diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/ATenGeneral.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/ATenGeneral.h new file mode 100644 index 0000000000000000000000000000000000000000..0085e1ea934f4ee29d9f4d6a20bd6f56d8eeac60 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/ATenGeneral.h @@ -0,0 +1,8 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/ATenOpList.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/ATenOpList.h new file mode 100644 index 0000000000000000000000000000000000000000..44d0a3ae4365b7d938a46f4704ce11bd41e46d7d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/ATenOpList.h @@ -0,0 +1,18 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace c10 { +struct OperatorName; +} + +namespace at { + +// check if an op is a custom op (i.e. did not come from native_functions.yaml) +TORCH_API bool is_custom_op(const c10::OperatorName& opName); +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/ATen_fwd.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/ATen_fwd.h new file mode 100644 index 0000000000000000000000000000000000000000..68d4b7e2e14f5851a790961d049406a6b3d6940c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/ATen_fwd.h @@ -0,0 +1,51 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +// Forward declarations of core ATen types used in dispatch functions +namespace c10 { + +template +class List; +template +class IListRef; +class Stream; +class Scalar; +class SymInt; +class SymIntList; +struct Storage; +struct TensorOptions; +template +class ArrayRef; +template +class OptionalArrayRef; + +} // namespace c10 + +namespace at { + +class Tensor; +class OptionalTensorRef; +struct Dimname; +struct Generator; +using TensorList = c10::ArrayRef; +using ITensorListRef = c10::IListRef; +using IOptTensorListRef = c10::IListRef; +using DimnameList = c10::ArrayRef; +using IntArrayRef = c10::ArrayRef; +using OptionalIntArrayRef = c10::OptionalArrayRef; +using OptionalSymIntArrayRef = c10::OptionalArrayRef; + +using c10::Stream; +using c10::Storage; +using c10::QScheme; +using c10::Scalar; +using c10::SymInt; +using c10::SymIntList; +using c10::TensorOptions; + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/ATen_pch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/ATen_pch.h new file mode 100644 index 0000000000000000000000000000000000000000..0e7728b4125f4dd36bcec4d0d910bb3e28e3ec42 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/ATen_pch.h @@ -0,0 +1,166 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// This global header must not depend on native_functions.yaml or +// incremental builds will be next to useless +#pragma push_macro("TORCH_ASSERT_NO_OPERATORS") +#define TORCH_ASSERT_NO_OPERATORS + +#include + +// This list of headers was generated using a script that finds +// high-impact headers and then manually tweaked to remove OS specific +// or duplicate headers (e.g. and ) and to remove +// "impl" headers (e.g BFloat16-inl.h or complex_math.h in c10). + +// To generate the initial list: +// 1. Build pytorch from scratch with all build caching disabled +// 2. Generate a build trace with ninjatracing (https://github.com/nico/ninjatracing) +// $ ninjatracing /path/to/pytorch/build/.ninja_log > trace_all.json +// 3. Run pch_gen.py from https://github.com/peterbell10/build_analysis/ +// $ python pch_gen.py --threshold .80 --target torch_cpu --build_dir /path/to/pytorch/build --trace trace_all.json +// Where the threshold can be tweaked until c10 and some of ATen +// core are included but TORCH_ASSERT_NO_OPERATORS still passes. + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#pragma pop_macro("TORCH_ASSERT_NO_OPERATORS") + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Array.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Array.h new file mode 100644 index 0000000000000000000000000000000000000000..16e370f826e940e770461d7e6c6ca1e8017ec7fd --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Array.h @@ -0,0 +1,53 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// A fixed-size array type usable from both host and +// device code. + +#include +#include + +namespace at::detail { + +template +struct Array { + // NOLINTNEXTLINE(*c-array*) + T data[size_]; + + C10_HOST_DEVICE T operator[](int i) const { + return data[i]; + } + C10_HOST_DEVICE T& operator[](int i) { + return data[i]; + } +#if defined(USE_ROCM) + C10_HOST_DEVICE Array() = default; + C10_HOST_DEVICE Array(const Array&) = default; + C10_HOST_DEVICE Array& operator=(const Array&) = default; + C10_HOST_DEVICE Array(Array&&) = default; + C10_HOST_DEVICE Array& operator=(Array&&) = default; + C10_HOST_DEVICE ~Array() = default; +#else + Array() = default; + Array(const Array&) = default; + Array& operator=(const Array&) = default; + Array(Array&&) noexcept = default; + Array& operator=(Array&&) noexcept = default; + ~Array() = default; +#endif + static constexpr int size() { + return size_; + } + // Fill the array with x. + C10_HOST_DEVICE Array(T x) { + for (int i = 0; i < size_; i++) { + data[i] = x; + } + } +}; + +} // namespace at::detail + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Backtrace.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Backtrace.h new file mode 100644 index 0000000000000000000000000000000000000000..ae30d57c820c35c3d00aa8d6390e79d3497721ad --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Backtrace.h @@ -0,0 +1,7 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/CachingHostAllocator.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/CachingHostAllocator.h new file mode 100644 index 0000000000000000000000000000000000000000..597e2b3720e30dc92d5ce2dfb14c51aa6778c824 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/CachingHostAllocator.h @@ -0,0 +1,800 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter") +namespace at { + +using c10::CachingAllocator::Stat; +using c10::CachingAllocator::DurationStat; + +/** + * HostBlock is typically a fundamental memory block used in pinned memory. It + * is likely related to Event and Stream of device runtime. It is probably a + * base struct or interface that can be inherited and extended by each backend. + */ +template +struct HostBlock { + // constructor for search key + HostBlock(size_t size) : size_(size) {} + + HostBlock(size_t size, void* ptr) : size_(size), ptr_(ptr) {} + + std::mutex mutex_; + size_t size_{0}; // block size in bytes + void* ptr_{nullptr}; // memory address + bool allocated_{false}; // in-use flag + size_t event_count_{0}; // number of related events + ska::flat_hash_set streams_; // streams on which the block was used +}; + +template +struct alignas(hardware_destructive_interference_size) FreeBlockList { + std::mutex mutex_; + std::deque list_; +}; + +namespace { + // Max cached block sizes: (1 << MAX_SIZE_INDEX) bytes + // NOLINTNEXTLINE(misc-definitions-in-headers) + constexpr size_t MAX_SIZE_INDEX = 64; +} + +// A large reserved pinned memory segment that is created in advance which is used +// to allocate small pinned memory requests to avoid calling into expensive APIs. +// We never free this memory and move up the pointer as we allocate new blocks +// and when blocks are freed, they are cached in the free lists. +struct PinnedReserveSegment { + PinnedReserveSegment(void *start, size_t size) : start_(start), size_(size), + current_ptr_(start_), initialized_(true) {} + + PinnedReserveSegment() : start_(nullptr), size_(0), current_ptr_(nullptr), initialized_(false) {} + + bool initialized() { + return initialized_; + } + + void* allocate(size_t bytes) { + std::lock_guard guard(mutex_); + + // Round up the requested size to 4KB boundary for all including the small ones. + size_t rounded_bytes = (bytes + 4096 - 1) & ~(4096 - 1); + + if (((uint8_t*)current_ptr_ + rounded_bytes) > ((uint8_t*)start_ + size_)) { + return nullptr; + } + + void* ptr = current_ptr_; + current_ptr_ = (uint8_t*)current_ptr_ + rounded_bytes; + return ptr; + } + + bool owns(void* ptr) { + return ptr >= start_ && ptr < (uint8_t*)start_ + size_; + } + + std::mutex mutex_; + void* start_; + size_t size_; + void* current_ptr_; + bool initialized_; +}; + +// Struct containing memory allocator summary statistics for host. +struct TORCH_API HostStats { + // COUNT: total allocations (active) + Stat active_requests; + // SUM: bytes allocated/reserved by this memory allocator. (active) + Stat active_bytes; + // COUNT: total allocations (active + free) + Stat allocations; + // SUM: bytes allocated/reserved by this memory allocator. This accounts + // for both free and in-use blocks. + Stat allocated_bytes; + + // SUM: time spent in cudaHostAlloc/cudaHostRegister in microseconds + DurationStat host_alloc_time; + + // SUM: time spent in cudaHostFree/cudaHostUnregister in microseconds + DurationStat host_free_time; + + // COUNT: number of times cudaHostAlloc/cudaHostRegister was called because + // the request could not be satisfied from existing free blocks. + int64_t num_host_alloc = 0; // This is derived from segment or timing + + // COUNT: number of times cudaHostFree/cudaHostUnregister was called. + int64_t num_host_free = 0; // This is derived from segment or timing + + // Count of cudaHostAlloc/cudaHostRegister per bucket + std::vector bucket_allocation = std::vector(MAX_SIZE_INDEX); +}; + +// Struct containing memory allocator summary statistics for host, as they +// are staged for reporting. This is a temporary struct that is used to +// avoid locking the allocator while collecting stats. +struct alignas(hardware_destructive_interference_size) HostStatsStaged { + std::mutex timing_mutex_; + // COUNT: total allocations (active + free) + // LOCK: access to this stat is protected by the allocator's blocks_mutex_ + Stat allocations; + // SUM: bytes allocated/reserved by this memory allocator. This accounts + // for both free and in-use blocks. + Stat allocated_bytes; + // COUNT: number of allocations per bucket (active) + // LOCK: access to this stat is protected by the per bucket free_list_[index].mutex_ + std::vector active_bucket_stats = std::vector(MAX_SIZE_INDEX); + // SUM: bytes of allocation per bucket (active) + // LOCK: access to this stat is protected by the per bucket free_list_[index].mutex_ + std::vector active_bytes_bucket_stats = std::vector(MAX_SIZE_INDEX); + // COUNT: number of allocations per bucket (active + free) + // LOCK: access to this stat is protected by the per bucket free_list_[index].mutex_ + std::vector allocation_bucket_stats = std::vector(MAX_SIZE_INDEX); + // SUM: bytes of allocation per bucket (active + free) + // LOCK: access to this stat is protected by the per bucket free_list_[index].mutex_ + std::vector allocated_bytes_bucket_stats = std::vector(MAX_SIZE_INDEX); + // SUM: time spent in cudaHostAlloc/cudaHostRegister + // LOCK: access to this stat is protected by the timing_mutex_ + DurationStat host_alloc_time; + // SUM: time spent in cudaHostFree/cudaHostUnregister + // LOCK: access to this stat is protected by the timing_mutex_ + DurationStat host_free_time; +}; + +/** + * Note [HostAllocator design] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * We have three key data structures - the free list which stores blocks that + * are not currently used, the block list which stores all blocks that have been + * allocated, and the event queue which stores runtime events and their + * corresponding blocks. + * + * Each of these are protected by a separate mutex. The key design principles + * are to 1) only hold each mutex for the minimal amount of time possible, 2) + * never do any possible expensive operations (such as CUDA runtime API calls) + * while holding the lock. + * + * There are four public methods: allocate, free, record_event and empty_cache. + * 1) In the allocate path, we first check to see if we can service our + * request from this free list, and otherwise we create a new block with + * allocate_host_memory. + * 2) In the free path, we insert events (if required) into the event queue, + * and if possible insert our block back into the free list. In allocate, we + * first eagerly query events until we find one that is not ready, and insert + * the corresponding block onto the free list if all the events recorded for a + * block are ready. + * 3) In the record_event path, we simply insert the given stream into the set + * of streams tracked by the specified block. This set of streams is then + * consumed in the free path. + * 4) In the empty_cache path, we flush any available blocks into the free + * list. Remove all element of free list, then remove them from block list and + * release the associated pinned memory allocation via free_block. + * + * We generalize the caching host allocator into two parts: interface and + * implementation. For any new backend looking to integrate with host allocator + * and reuse caching mechanism, these two parts are necessary to be specialized. + * + * For the implementation, we provide a CachingHostAllocatorImpl struct + * to abstract the caching mechanism. Any backend needs to provide a customized + * implementation by specializing its own public functions and the related + * runtime functions. Its template parameter S represents runtime Stream, E + * denotes runtime Event, B indicates the fundamental memory block. + * + * For the interface, we provide a CachingHostAllocatorInterface struct as an + * interface. Any backend needs to derive its own host allocator from this + * interface. Its template parameter T refers to an implementation that + * inherited from CachingHostAllocatorImpl. + * + * So this design can share the caching mechanism across each backend, and + * provide flexibility to each backend. A backend can choose to follow this + * implementation or reuse them by extending and overriding them as necessary. + * Taking CUDA as an example, it specializes runtime related functions to reuse + * the caching mechanism. Additionally, it extends the allocator's functionality + * by adding the allocWithCudaHostRegister function to support page-locking the + * memory range used by CUDA. Of course, you can also refer to + * XPUCachingHostAllocator, which is a host caching allocator supported on XPU + * backend, to implement a basic host caching allocator. + * + * Some of the invariants here are less strict than they could be - for example, + * we do not enforce that free(Block* block) => block->event_count == 0. This is + * for compatibility reasons, and we can explore enforcing these in subsequent + * versions. + * + * Note that this caching host allocator does not split larger allocations into + * smaller blocks, unlike the caching device allocator. + * + * In order to gather statistics about caching host allocator while minimally + * impacting performance, we use a HostStatsStaged struct to stage the stats + * before reporting them. This is done to avoid adding new locks to the allocator. + * Collecting stats is carefully done under existing locks, and then the staged + * stats are converted to the final stats when getStats is called. At that time + * we hold the same locks as empty_cache, to ensure the fidelity of the stats. + */ + +template < + typename S, + typename E, + typename B = HostBlock> +struct CachingHostAllocatorImpl { + virtual ~CachingHostAllocatorImpl() { + if (active_) { + active_ = false; + getBackgroundThreadPool()->waitWorkComplete(); + } + } + + public: + // return data_ptr and block pair. + virtual std::pair allocate(size_t size) { + if (size == 0) { + return {nullptr, nullptr}; + } + + // If we are using background threads, we can process events in the + // background. + if (!pinned_use_background_threads()) { + process_events(); + } + + // Round up the allocation to the nearest power of two to improve reuse. + // These power of two sizes are also used to index into the free list. + size_t roundSize = c10::llvm::PowerOf2Ceil(size); + + // First, try to allocate from the free list + auto* block = get_free_block(roundSize); + if (block) { + return {block->ptr_, reinterpret_cast(block)}; + } + + // Check in the recently freed blocks with pending events to see if we + // can reuse them. Call get_free_block again after processing events + if (pinned_use_background_threads()) { + // Launch the background thread and process events in a loop. + static bool background_thread_flag [[maybe_unused]] = [this] { + active_ = true; + getBackgroundThreadPool()->run([&]() { + while (active_) { + process_events(); + std::this_thread::sleep_for(std::chrono::microseconds(100)); + } + }); + return true; + }(); + } + + // Slow path: if we can't allocate from the cached free list, we need + // to create a new block. + void* ptr = nullptr; + allocate_host_memory(roundSize, &ptr); + + // Then, create a new block. + block = new B(roundSize, ptr); + block->allocated_ = true; + + add_allocated_block(block); + return {block->ptr_, reinterpret_cast(block)}; + } + + virtual void free(void* ctx) { + if (!ctx) { + return; + } + + // Note: we can assume that free is correctly paired with alloc, and thus we + // do not need to look up the ctx in blocks_. + auto* block = reinterpret_cast(ctx); + + std::optional> events; + ska::flat_hash_set streams; + { + std::lock_guard g(block->mutex_); + block->allocated_ = false; + if (block->streams_.empty()) { + TORCH_INTERNAL_ASSERT(block->event_count_ == 0); + } else { + events = std::vector(); + events->reserve(block->streams_.size()); + block->event_count_ += block->streams_.size(); + // Move out streams to avoid holding the mutex during event recording + streams = std::move(block->streams_); + block->streams_.clear(); + } + } + + // Event recording must be done outside the mutex to avoid potential + // deadlocks (e.g., when Python GIL is involved) + for (auto stream : streams) { + record_stream(events, stream); + } + + if (!events) { + auto index = size_index(block->size_); + std::lock_guard g(free_list_[index].mutex_); + free_list_[index].list_.push_back(block); + } else { + // restore these events that record by used streams. + std::lock_guard g(events_mutex_); + for (auto&& event : *events) { + events_.emplace_front(std::move(event), block); + } + } + } + + virtual bool record_event(void* ptr, void* ctx, c10::Stream s) { + S stream = S(s); + auto* block = reinterpret_cast(ctx); + + // Note: we need to check if the passed-in `ctx` is valid. This is because + // `record_event` (via `CachingHostAllocator_recordEvent`) can be invoked on + // an arbitrary tensor, and is not guaranteed to correspond to a pinned + // memory allocation. Therefore, we need to check that `ctx` is valid before + // proceeding. + { + std::lock_guard g(blocks_mutex_); + if (blocks_.find(block) != blocks_.end()) { + // Now we know this object is safe to access. + std::lock_guard gb(block->mutex_); + TORCH_INTERNAL_ASSERT(block->allocated_); + block->streams_.insert(stream); + return true; + } + auto it = ptr_to_block_.find(ptr); + if (it != ptr_to_block_.end()) { + block = it->second; + std::lock_guard g(block->mutex_); + TORCH_INTERNAL_ASSERT(block->allocated_); + block->streams_.insert(stream); + return true; + } + } + + return false; + } + + virtual void empty_cache() { + // Flush any available blocks into the free_list. + process_events(); + + // Remove all elements from the free list, remove them from the blocks + // list, and free the associated pinned memory allocation. This requires + // concurrently holding both the free list mutexes and the blocks mutex, and + // is the only function that concurrently holds multiple mutexes. + for (size_t i = 0; i < free_list_.size(); ++i) { + std::lock(free_list_[i].mutex_, blocks_mutex_); + std::lock_guard gf(free_list_[i].mutex_, std::adopt_lock); + std::lock_guard gb(blocks_mutex_, std::adopt_lock); + + std::vector blocks_to_remove(free_list_[i].list_.begin(), free_list_[i].list_.end()); + free_list_[i].list_.clear(); + + for (auto* block : blocks_to_remove) { + blocks_.erase(block); + ptr_to_block_.erase(block->ptr_); + auto index = size_index(block->size_); + free_block(block); + stats_.allocations.decrease(1); + stats_.allocated_bytes.decrease(block->size_); + stats_.allocation_bucket_stats[index].decrease(1); + stats_.allocated_bytes_bucket_stats[index].decrease(block->size_); + delete block; + } + } + } + + inline size_t size_index(size_t size) { + return c10::llvm::Log2_64_Ceil(size); + } + + virtual bool pinned_use_background_threads() { + return c10::CachingAllocator::AcceleratorAllocatorConfig:: + pinned_use_background_threads(); + } + + virtual void copy_data(void* dest [[maybe_unused]], const void* src [[maybe_unused]], std::size_t count [[maybe_unused]]) const { + TORCH_CHECK_NOT_IMPLEMENTED(false, "Not implemented for copy_data"); + } + + HostStats getStats() { + HostStats stats; + + // To keep getStats lightweight we do *not* flush any available blocks + // into the free_list. This may skew the stats a bit. + + auto add_bucket_stats = [](Stat& accumulator, const Stat& other) { + accumulator.allocated += other.allocated; + accumulator.current += other.current; + accumulator.freed += other.freed; + // Since peaks are measured per bucket independently, we add them up + // to estimate the total peak. This is not strictly correct, but it is + // the best approximation we can get after the fact. + accumulator.peak += other.peak; + }; + + // Accurate reading of memory stats requires concurrently holding both the + // free list mutexes and the blocks mutex. Previously, this was only done in + // empty_cache function. + for (size_t i = 0; i < free_list_.size(); ++i) { + std::lock(free_list_[i].mutex_, blocks_mutex_); + std::lock_guard gf(free_list_[i].mutex_, std::adopt_lock); + std::lock_guard gb(blocks_mutex_, std::adopt_lock); + + // We collect the slow-path stats only once, since they are not collected + // per bucket (we pick index 0 arbitrarily). These are also all the host + // allocations, not taking into account caching and free lists. + if (i == 0) { + stats.allocations = stats_.allocations; + stats.allocated_bytes = stats_.allocated_bytes; + stats.num_host_alloc = stats.allocations.allocated; + stats.num_host_free = stats.allocations.freed; + } + + // Bucket stats need to be merged with the slow-path stats. We do this in + // a best effort manner, since we can't really replay the cached events per bucket. + add_bucket_stats(stats.active_requests, stats_.active_bucket_stats[i]); + add_bucket_stats(stats.active_bytes, stats_.active_bytes_bucket_stats[i]); + stats.bucket_allocation[i] = stats_.allocation_bucket_stats[i].allocated; + } + + // Get the timing stats + { + std::lock_guard g(stats_.timing_mutex_); + + stats.host_alloc_time = stats_.host_alloc_time; + stats.host_free_time = stats_.host_free_time; + } + + return stats; + } + + void resetAccumulatedStats() { + // Resetting accumulated memory stats requires concurrently holding both the + // free list mutexes and the blocks mutex. Previously, this was only done in + // empty_cache function. + for (size_t i = 0; i < free_list_.size(); ++i) { + std::lock(free_list_[i].mutex_, blocks_mutex_); + std::lock_guard gf(free_list_[i].mutex_, std::adopt_lock); + std::lock_guard gb(blocks_mutex_, std::adopt_lock); + + if (i == 0) { + stats_.allocations.reset_accumulated(); + stats_.allocated_bytes.reset_accumulated(); + } + stats_.active_bucket_stats[i].reset_accumulated(); + stats_.active_bytes_bucket_stats[i].reset_accumulated(); + stats_.allocation_bucket_stats[i].reset_accumulated(); + stats_.allocated_bytes_bucket_stats[i].reset_accumulated(); + } + + // Also reset timing stats + { + std::lock_guard g(stats_.timing_mutex_); + stats_.host_alloc_time.reset_accumulated(); + stats_.host_free_time.reset_accumulated(); + } + } + + void resetPeakStats() { + // Resetting peak memory stats requires concurrently holding both the + // free list mutexes and the blocks mutex. Previously, this was only done in + // empty_cache function. + for (size_t i = 0; i < free_list_.size(); ++i) { + std::lock(free_list_[i].mutex_, blocks_mutex_); + std::lock_guard gf(free_list_[i].mutex_, std::adopt_lock); + std::lock_guard gb(blocks_mutex_, std::adopt_lock); + + if (i == 0) { + stats_.allocations.reset_peak(); + stats_.allocated_bytes.reset_peak(); + } + stats_.active_bucket_stats[i].reset_peak(); + stats_.active_bytes_bucket_stats[i].reset_peak(); + stats_.allocation_bucket_stats[i].reset_peak(); + stats_.allocated_bytes_bucket_stats[i].reset_peak(); + } + + // Also reset timing stats + { + std::lock_guard g(stats_.timing_mutex_); + stats_.host_alloc_time.reset_peak(); + stats_.host_free_time.reset_peak(); + } + } + + private: + virtual void add_allocated_block(B* block) { + std::lock_guard g(blocks_mutex_); + blocks_.insert(block); + stats_.allocations.increase(1); + stats_.allocated_bytes.increase(block->size_); + ptr_to_block_.insert({block->ptr_, block}); + + // Unfortunately, we have to, on the slow path, quickly + // lock the bucket to record the allocation. This should + // be a rare event once the cache is warmed up. + auto size = block->size_; + auto index = size_index(size); + { + std::lock_guard g(free_list_[index].mutex_); + stats_.allocation_bucket_stats[index].increase(1); + stats_.allocated_bytes_bucket_stats[index].increase(size); + stats_.active_bucket_stats[index].increase(1); + stats_.active_bytes_bucket_stats[index].increase(size); + } + } + + virtual B* get_free_block(size_t size) { + auto index = size_index(size); + std::lock_guard g(free_list_[index].mutex_); + if (!free_list_[index].list_.empty()) { + B* block = free_list_[index].list_.back(); + free_list_[index].list_.pop_back(); + block->allocated_ = true; + stats_.active_bucket_stats[index].increase(1); + stats_.active_bytes_bucket_stats[index].increase(size); + return block; + } + return nullptr; + } + + virtual void process_events() { + // process all events until the last unready event, not for specific size. + process_events_for_specific_size(-1); + } + + // If size is -1, process all events from backwards until the last unready + // event. Otherwise, process events for a specific size and on first ready block + // is found, add it to the free list and return. + virtual void process_events_for_specific_size(int64_t size) { + size_t event_count = 0; + size_t max_events = 0; + { + std::lock_guard g(events_mutex_); + max_events = events_.size(); + } + + while (true) { + // Avoid calling cudaEventDestroy while holding a mutex, so move + // intermediate events out of the lock into this object. + // process the last event + std::optional> processed; + { + std::lock_guard g(events_mutex_); + if (!events_.empty()) { + processed = std::move(events_.back()); + events_.pop_back(); + } + } + + if (!processed) { + return; + } + + if (size != -1) { + if (event_count++ > max_events) { + { + std::lock_guard g(events_mutex_); + events_.push_front(std::move(*processed)); + } + return; + } + if (size != (int64_t)processed->second->size_) { + // if we are processing a specific size, and the size of the block + // doesn't match, we can't use it. + { + std::lock_guard g(events_mutex_); + events_.push_front(std::move(*processed)); + } + continue; + } + } + + // otherwise, query the event + { + // now, see if we can handle this element + auto& event = processed->first; + if (!query_event(event)) { + // push the event onto the back if it's not ready. + { + std::lock_guard g(events_mutex_); + if (size == -1) { + events_.push_back(std::move(*processed)); + return; + } else { + events_.push_front(std::move(*processed)); + continue; + } + } + } + } + + // Process the events. + TORCH_INTERNAL_ASSERT(processed); + auto* block = processed->second; + bool available = false; + { + std::lock_guard g(block->mutex_); + TORCH_INTERNAL_ASSERT(!block->allocated_) + block->event_count_--; + if (block->event_count_ == 0) { + available = true; + } + } + + if (available) { + auto index = size_index(block->size_); + std::lock_guard g(free_list_[index].mutex_); + free_list_[index].list_.push_back(block); + stats_.active_bucket_stats[index].decrease(1); + stats_.active_bytes_bucket_stats[index].decrease(size); + if (size != -1) { + return; + } + } + } + } + + TaskThreadPool* getBackgroundThreadPool() { + static TaskThreadPool* pool = new TaskThreadPool(1); + return pool; + } + + /* These following functions are runtime-related. */ + + // Allocate page-locked memory on the host. + virtual void allocate_host_memory(size_t size, void** ptr) { + TORCH_CHECK_NOT_IMPLEMENTED( + false, "Not implemented for allocate_host_memory"); + } + + // Free block and release the pointer contained in block. + virtual void free_block(B* block) { + TORCH_CHECK_NOT_IMPLEMENTED(false, "Not implemented for free_block"); + } + + // Record an event on stream and store event into events. + virtual void record_stream(std::optional>& events, S stream) { + TORCH_CHECK_NOT_IMPLEMENTED(false, "Not implemented for record_stream"); + } + + // Query event if it is completed. + virtual bool query_event(E& event) { + TORCH_CHECK_NOT_IMPLEMENTED(false, "Not implemented for query_event"); + } + + alignas(hardware_destructive_interference_size) std::mutex blocks_mutex_; + ska::flat_hash_set blocks_; // block list + ska::flat_hash_map ptr_to_block_; + + // We keep free list as a vector of free lists, one for each power of two + // size. This allows us to quickly find a free block of the right size. + // We use deque to store per size free list and guard the list with its own + // mutex. + alignas(hardware_destructive_interference_size) std::vector> + free_list_{MAX_SIZE_INDEX}; + + alignas(hardware_destructive_interference_size) std::mutex events_mutex_; + std::deque> events_; // event queue paired with block + + // Indicates whether the event-processing thread pool is active. + // Set to false in the destructor to signal background threads to stop. + std::atomic active_{false}; +protected: + alignas(hardware_destructive_interference_size) HostStatsStaged stats_; +}; + +struct TORCH_API HostAllocator : public at::Allocator { + // Associates the pinned memory allocation with a stream to track + // dependencies. This ensures the memory won't be reused until the stream's + // operations complete + virtual bool record_event(void* ptr, void* ctx, c10::Stream stream) = 0; + + // Frees all cached pinned memory and returns it to the system, clearing the + // allocator's internal cache + virtual void empty_cache() = 0; + + // Returns comprehensive statistics about the allocator's memory usage, + // allocation patterns, and timing metrics + virtual HostStats get_stats() = 0; + + // Resets the cumulative allocation statistics + virtual void reset_accumulated_stats() = 0; + + // Resets the peak memory usage metrics + virtual void reset_peak_stats() = 0; +}; + +template +struct CachingHostAllocatorInterface : public HostAllocator { + CachingHostAllocatorInterface() : impl_(std::make_unique()) {} + + at::DataPtr allocate(size_t size) override { + auto ptr_and_ctx = impl_->allocate(size); + return { + ptr_and_ctx.first, + ptr_and_ctx.second, + deleteFunc, // Use the template parameter deleter function + at::DeviceType::CPU}; + } + + void free(void* ctx) { + impl_->free(ctx); + } + + bool record_event(void* ptr, void* ctx, c10::Stream stream) override { + return impl_->record_event(ptr, ctx, stream); + } + + void empty_cache() override { + impl_->empty_cache(); + } + + void copy_data(void* dest, const void* src, std::size_t count) + const override { + impl_->copy_data(dest, src, count); + } + + HostStats get_stats() override { + return impl_->getStats(); + } + + void reset_accumulated_stats() override { + impl_->resetAccumulatedStats(); + } + + void reset_peak_stats() override { + impl_->resetPeakStats(); + } + + std::unique_ptr impl_; +}; + +#define DECLARE_HOST_ALLOCATOR(name, impl, deleter, instance) \ + void deleter(void* ptr); \ + struct name final \ + : public at::CachingHostAllocatorInterface {}; \ + static name instance; \ + void deleter(void* ptr) { \ + instance.free(ptr); \ + } + +/** + * Set the host allocator for DeviceType `device_type`. This allocator manages + * pinned memory on the host that can be accessed efficiently by the specified + * device type. Note that this function is not thread-safe. + */ +TORCH_API void setHostAllocator( + at::DeviceType device_type, + at::HostAllocator* allocator, + uint8_t priority = 0); + +TORCH_API at::HostAllocator* getHostAllocator(at::DeviceType device_type); + +template +struct HostAllocatorRegistry { + explicit HostAllocatorRegistry(HostAllocator* allocator) { + at::setHostAllocator(device_type, allocator); + } +}; + +#define REGISTER_HOST_ALLOCATOR(device_type, allocator) \ + namespace { \ + static at::HostAllocatorRegistry \ + g_host_allocator_registry_instance(allocator); \ + } + +} // namespace at +C10_DIAGNOSTIC_POP() + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/CheckMemoryFormat.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/CheckMemoryFormat.h new file mode 100644 index 0000000000000000000000000000000000000000..c02296ff570367ef1fdf811195b8e54116979b38 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/CheckMemoryFormat.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include + +namespace c10::impl { + +inline std::optional +check_tensor_options_and_extract_memory_format( + const TensorOptions& options, + std::optional memory_format) { + TORCH_CHECK( + options.requires_grad_opt() != true, + "Operators taking TensorOptions cannot take a TensorOptions with " + "options.requires_grad set as true. This isn't implemented yet."); + TORCH_CHECK( + !(options.has_memory_format() && memory_format.has_value()), + "Cannot set memory_format both in TensorOptions and explicit argument; please delete " + "the redundant setter."); + if (memory_format.has_value()) { + return memory_format; + } else { + return options.memory_format_opt(); + } +} + +} // namespace impl namespace c10 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/DeprecatedTypeProperties.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/DeprecatedTypeProperties.h new file mode 100644 index 0000000000000000000000000000000000000000..2ea6e095b68839211938f983c70e06b6f2088f25 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/DeprecatedTypeProperties.h @@ -0,0 +1,144 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + + +namespace at { + +class Tensor; + +// This class specifies a Backend and a ScalarType. Currently, it primarily +// serves as a replacement return value for Tensor::type(). Previously, +// Tensor::type() returned Type&, but we are changing Type to not be +// dtype-specific. +class TORCH_API DeprecatedTypeProperties { + public: + DeprecatedTypeProperties(Backend backend, ScalarType scalar_type) + : backend_(backend), scalar_type_(scalar_type) {} + + Backend backend() const { + return backend_; + } + + Layout layout() const { + return layout_from_backend(backend_); + } + + bool is_sparse() const { + return layout_from_backend(backend()) == kSparse; + } + + bool is_sparse_csr() const { + return layout_from_backend(backend()) == kSparseCsr; + } + + c10::DeviceType device_type() const { + return backendToDeviceType(backend_); + } + + bool is_cuda() const { + return backendToDeviceType(backend_) == kCUDA; + } + + ScalarType scalarType() const { + return scalar_type_; + } + + caffe2::TypeMeta typeMeta() const { + return scalarTypeToTypeMeta(scalar_type_); + } + + bool operator==(const DeprecatedTypeProperties& other) const { + return backend_ == other.backend() && scalar_type_ == other.scalarType(); + } + + bool operator!=(const DeprecatedTypeProperties& other) const { + return !(*this == other); + } + + std::string toString() const { + std::string base_str; + if (backend_ == Backend::Undefined || scalar_type_ == ScalarType::Undefined) { + base_str = "UndefinedType"; + } else { + base_str = std::string(at::toString(backend_)) + at::toString(scalar_type_) + "Type"; + } + return base_str; + } + + DeprecatedTypeProperties & toBackend(Backend b) const { + return globalDeprecatedTypePropertiesRegistry().getDeprecatedTypeProperties( + b, scalar_type_); + } + + DeprecatedTypeProperties & toScalarType(ScalarType s) const { + return globalDeprecatedTypePropertiesRegistry().getDeprecatedTypeProperties( + backend_, s); + } + + DeprecatedTypeProperties & cpu() const { + return toBackend(Backend::CPU); + } + + DeprecatedTypeProperties & cuda() const { + return toBackend(Backend::CUDA); + } + + DeprecatedTypeProperties & hip() const { + return toBackend(Backend::HIP); + } + + DeprecatedTypeProperties & privateUser1() const { + return toBackend(Backend::PrivateUse1); + } + + /// Constructs the `TensorOptions` from a type and a `device_index`. + TensorOptions options(int16_t device_index = -1) const { + return TensorOptions().dtype(typeMeta()) + .device(device_type(), static_cast(device_index)) + .layout(layout()); + } + + /// Constructs the `TensorOptions` from a type and a Device. Asserts that + /// the device type matches the device type of the type. + TensorOptions options(std::optional device_opt) const { + if (!device_opt.has_value()) { + return options(-1); + } else { + Device device = device_opt.value(); + AT_ASSERT(device.type() == device_type()); + return options(device.index()); + } + } + + operator TensorOptions() const { + return options(); + } + + int64_t id() const { + return static_cast(backend()) * + static_cast(ScalarType::NumOptions) + + static_cast(scalarType()); + } + + Tensor unsafeTensorFromTH(void * th_pointer, bool retain) const; + Storage unsafeStorageFromTH(void * th_pointer, bool retain) const; + Tensor copy(const Tensor & src, bool non_blocking=false, std::optional to_device={}) const; + + private: + Backend backend_; + ScalarType scalar_type_; +}; + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/DeprecatedTypePropertiesRegistry.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/DeprecatedTypePropertiesRegistry.h new file mode 100644 index 0000000000000000000000000000000000000000..0119d4c13efc0c17609553b5f8581a1a782eed00 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/DeprecatedTypePropertiesRegistry.h @@ -0,0 +1,38 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// In order to preserve bc, we make DeprecatedTypeProperties instances unique +// just like they are for Type. + +#include +#include +#include + +namespace at { + +class DeprecatedTypeProperties; + +struct TORCH_API DeprecatedTypePropertiesDeleter { + void operator()(DeprecatedTypeProperties * ptr); +}; + +class TORCH_API DeprecatedTypePropertiesRegistry { + public: + DeprecatedTypePropertiesRegistry(); + + DeprecatedTypeProperties& getDeprecatedTypeProperties(Backend p, ScalarType s) const; + +private: + // NOLINTNEXTLINE(*c-array*) + std::unique_ptr registry + [static_cast(Backend::NumOptions)] + [static_cast(ScalarType::NumOptions)]; +}; + +TORCH_API DeprecatedTypePropertiesRegistry& globalDeprecatedTypePropertiesRegistry(); + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Dict.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Dict.h new file mode 100644 index 0000000000000000000000000000000000000000..35b829519081d74bd90b36ba41fd79f8e5d86cdf --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Dict.h @@ -0,0 +1,401 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace c10 { +struct IValue; +template class Dict; +struct Type; + +namespace impl { + +using valid_dict_key_types = guts::typelist::typelist< + int64_t, + std::string, + double, + c10::complex, + bool, + at::Tensor +>; +} + +namespace detail { + +struct DictKeyHash { + size_t operator()(const IValue& ivalue) const; +}; + +struct DictKeyEqualTo { + bool operator()(const IValue& lhs, const IValue& rhs) const; +}; + +struct DictImpl final : public c10::intrusive_ptr_target { + using dict_map_type = ska_ordered::order_preserving_flat_hash_map; + struct DictElementTypes final { + TypePtr keyType; + TypePtr valueType; + }; + + explicit DictImpl(dict_map_type dict_, DictElementTypes elementTypes_) + : dict(std::move(dict_)) + , elementTypes(std::move(elementTypes_)) {} + dict_map_type dict; + + DictElementTypes elementTypes; + + intrusive_ptr copy() const; + friend TORCH_API bool operator==(const DictImpl& lhs, const DictImpl& rhs); +}; + +} + +namespace impl { +template class DictIterator; + +/** + * A reference to an entry in the Dict. + * Use the `key()` and `value()` methods to read the element. + */ +template +class DictEntryRef final { +public: + explicit DictEntryRef(Iterator iterator) + : iterator_(std::move(iterator)) {} + + decltype(auto) key() const { + return iterator_->first.template to(); + } + + decltype(auto) value() const { + return iterator_->second.template to(); + } + + template + void setValue(Value_&& value) const { + static_assert(std::is_constructible_v, "Wrong type for the value argument of setValue()"); + iterator_->second = Value(std::forward(value)); + } + ~DictEntryRef() = default; + +private: + // allow copying and moving, but only our friends (i.e. the Dict class) can do + // it. Copying/moving this reference wrapper would be too ambiguous to allow it + // in the public API. + DictEntryRef(const DictEntryRef&) = default; + DictEntryRef& operator=(const DictEntryRef&) = default; + DictEntryRef(DictEntryRef&&) noexcept = default; + DictEntryRef& operator=(DictEntryRef&& rhs) & noexcept = default; + + Iterator iterator_; + friend class DictIterator; + friend class Dict; +}; + +// this wraps map_type::iterator to make sure user code can't rely +// on it being the type of the underlying map. +template +class DictIterator final { +public: + // C++17 friendly std::iterator implementation + using iterator_category = std::forward_iterator_tag; + using value_type = DictEntryRef; + using difference_type = std::ptrdiff_t; + using pointer = value_type*; + using reference = value_type&; + + explicit DictIterator() = default; + ~DictIterator() = default; + + DictIterator(const DictIterator& rhs): entryRef_(rhs.entryRef_) {} + DictIterator(DictIterator&& rhs) noexcept: entryRef_(std::move(rhs.entryRef_)) {} + DictIterator& operator=(const DictIterator& rhs) = default; + DictIterator& operator=(DictIterator&& rhs) noexcept { + entryRef_ = std::move(rhs.entryRef_); + return *this; + } + + DictIterator& operator++() { + ++entryRef_.iterator_; + return *this; + } + + DictIterator operator++(int) { + DictIterator copy(*this); + ++*this; + return copy; + } + + const DictEntryRef& operator*() const { + return entryRef_; + } + + const DictEntryRef* operator->() const { + return &entryRef_; + } + + friend difference_type operator-(const DictIterator& lhs, const DictIterator& rhs) { + return lhs.entryRef_.iterator_ - rhs.entryRef_.iterator_; + } + +private: + explicit DictIterator(Iterator iterator): entryRef_(std::move(iterator)) {} + + const Iterator& get_iterator_() const { + return entryRef_.iterator_; + } + + friend bool operator==(const DictIterator& lhs, const DictIterator& rhs) { + return lhs.get_iterator_() == rhs.get_iterator_(); + } + + friend bool operator!=(const DictIterator& lhs, const DictIterator& rhs) { + return lhs.get_iterator_() != rhs.get_iterator_(); + } + + friend bool operator<(const DictIterator& lhs, const DictIterator& rhs) { + return lhs.get_iterator_() < rhs.get_iterator_(); + } + + friend bool operator<=(const DictIterator& lhs, const DictIterator& rhs) { + return lhs.get_iterator_() <= rhs.get_iterator_(); + } + + friend bool operator>(const DictIterator& lhs, const DictIterator& rhs) { + return lhs.get_iterator_() > rhs.get_iterator_(); + } + + friend bool operator>=(const DictIterator& lhs, const DictIterator& rhs) { + return lhs.get_iterator_() >= rhs.get_iterator_(); + } + + DictEntryRef entryRef_; + + friend class DictIterator; + friend class Dict; +}; + +template Dict toTypedDict(Dict dict); +template Dict toGenericDict(Dict dict); +} + +/** + * An object of this class stores a map from Key to Value. + * + * This is a pointer type. After a copy, both Dicts + * will share the same storage: + * + * > Dict a; + * > Dict b = a; + * > b.insert(3, "three"); + * > ASSERT("three" == a.at(3)); + * + * We use this class in the PyTorch kernel API because that + * allows us to do optimizations and switch out the underlying + * map implementation without breaking backwards compatibility + * for the kernel API. + */ +template +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) +class Dict final { +private: + static_assert((std::is_same_v && std::is_same_v) || guts::typelist::contains::value, "Invalid Key type for Dict. We only support int64_t, double, bool, and string."); + + // impl_ stores the underlying map as a ska_ordered::order_preserving_flat_hash_map. + // We intentionally don't offer conversion from/to + // order_preserving_flat_hash_map, return references to it or something like that, + // because such operations would get expensive if we switch out + // the actual map implementation. + // This is an intrusive_ptr because Dict is a pointer type. + // Invariant: This will never be a nullptr, there will always be a valid + // DictImpl. + c10::intrusive_ptr impl_; + + explicit Dict(c10::intrusive_ptr&& impl); + friend struct IValue; + template friend Dict impl::toTypedDict(Dict); + template friend Dict impl::toGenericDict(Dict); + +public: + using key_type = Key; + using mapped_type = Value; + using size_type = typename detail::DictImpl::dict_map_type::size_type; + using iterator = impl::DictIterator; + + /** + * Creates an empty dict. + */ + explicit Dict(); + + /** + * Create a generic dict with runtime type information. + * This only works for c10::impl::GenericDict and is not part of the public API + * but only supposed to be used internally by PyTorch. + */ + explicit Dict(TypePtr keyType, TypePtr valueType); + + ~Dict() = default; + + Dict(const Dict&) = default; + Dict& operator=(const Dict&) = default; + + /** + * Create a new Dict pointing to a deep copy of the same data. + * The Dict returned is a new dict with separate storage. + * Changes in it are not reflected in the original dict or vice versa. + */ + Dict copy() const; + + /** + * Returns an iterator to the first element of the container. + * If the container is empty, the returned iterator will be equal to end(). + */ + iterator begin() const; + + /** + * Returns an iterator to the element following the last element of the container. + * This element acts as a placeholder; attempting to access it results in undefined behavior. + */ + iterator end() const; + + /** + * Checks if the container has no elements. + */ + bool empty() const; + + /** + * Returns the number of elements in the container. + */ + size_type size() const; + + /** + * Erases all elements from the container. After this call, size() returns zero. + * Invalidates any references, pointers, or iterators referring to contained elements. May also invalidate past-the-end iterators. + */ + void clear() const; + + /** + * Inserts element(s) into the container, if the container doesn't already contain an element with an equivalent key. + * May invalidate any references, pointers, or iterators referring to contained elements. + * + * @return A pair consisting of an iterator to the inserted element (or to the element that prevented the insertion) and a bool denoting whether the insertion took place. + */ + template + std::pair insert(Key_&& key, Value_&& value) const; + + /** + * If an element with the given key already exists, it is overwritten with the given value. + * Otherwise, a new element with the given key and value are inserted. + * May invalidate any references, pointers, or iterators referring to contained elements. + * + * @return The bool component is true if the insertion took place and false if the assignment took place. The iterator component is pointing at the element that was inserted or updated. + */ + template + std::pair insert_or_assign(Key_&& key, Value_&& value) const; + + /** + * Removes the element pointed to by iter. + * May invalidate any references, pointers, or iterators referring to contained elements. + * The iterator iter must be valid and dereferenceable. Thus the end() iterator (which is valid, but is not dereferenceable) cannot be used as a value for iter. + */ + void erase(iterator iter) const; + + /** + * Removes the element with the given key, if it exists. + * May invalidate any references, pointers, or iterators referring to contained elements. + * + * @return The number of elements removed. This is either '1' if an element with the key existed, or '0' if it didn't. + */ + [[nodiscard]] size_t erase(const Key& key) const; + + /** + * Returns the mapped value of the element with key equivalent to key. + * If no such element exists, an exception of type std::out_of_range is thrown. + */ + Value at(const Key& key) const; + + /** + * Finds an element with key equivalent to key. + * + * @return Iterator to an element with key equivalent to key. + * If no such element is found, past-the-end (see end()) iterator is returned. + */ + iterator find(const Key& key) const; + + /** + * Checks if there is an element with key equivalent to key in the container. + * + * @return true if there is such an element, otherwise false. + */ + bool contains(const Key& key) const; + + /** + * Increase the capacity so that at least count elements can be stored without + * having to reallocate or rehash. + */ + void reserve(size_type count) const; + + /** + * Value equality comparison. This function implements Python-like semantics for + * equality: two dicts with the same identity (e.g. same pointer) trivially + * compare equal, otherwise each element is compared for equality. + */ + template + friend bool operator==( + const Dict& lhs, + const Dict& rhs); + template + friend bool operator!=( + const Dict& lhs, + const Dict& rhs); + + /** + * Identity comparison. Returns true if and only if `rhs` represents the same + * Dict object as `this`. + */ + bool is(const Dict& rhs) const; + + // private API for now because the return type will change to TypePtr + // instead of std::optional once types are mandatory. + TypePtr keyType() const; + TypePtr valueType() const; + + // [unsafe set type] + // These functions mutate the tagged type of this dictionary in place. + // There is no checking that the members of the dictionary are instances + // of the new types, nor is there a check that other IValues which + // hold references to this dictionary have the right static type. + // This functionality is used only in the unpickler, where at + // creation type the real type of the dictionary is unknown, but + // then later recovered from the static type information of the + // unpickled object. + void unsafeSetKeyType(TypePtr t); + void unsafeSetValueType(TypePtr t); +}; + +namespace impl { +// GenericDict is how IValue stores dicts. It is, however, not part of the +// public API. Kernels should use Dicts with concrete Key, Value types instead +// (maybe except for some internal prim ops). +using GenericDict = Dict; + +} +} + +namespace torch { + template using Dict = c10::Dict; +} + +#include // IWYU pragma: keep + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Dict_inl.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Dict_inl.h new file mode 100644 index 0000000000000000000000000000000000000000..d80e69ae61f6d15c082c04213c7a4938b928ecfa --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Dict_inl.h @@ -0,0 +1,213 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace c10 { +namespace detail { +inline bool DictKeyEqualTo::operator()(const IValue& lhs, const IValue& rhs) const { + if (lhs.isTensor() && rhs.isTensor()) { + // for tensors, we compare only by identity (following how it's done in Python). + return lhs.is(rhs); + } + // Otherwise, we first compare by identity for efficiency, then by value (see: + // [container equality]) + return _fastEqualsForContainer(lhs, rhs); +} +} + +template decltype(auto) getTypePtr(); +std::string toString(const Type& type); + +namespace impl { + +template +Dict toTypedDict(GenericDict dict) { + TORCH_INTERNAL_ASSERT(*getTypePtr() == *dict.impl_->elementTypes.keyType, "Tried to cast a Dict<", toString(*dict.impl_->elementTypes.keyType), ", ", toString(*dict.impl_->elementTypes.valueType) ,"> to a Dict<", toString(*getTypePtr()), ", ", toString(*getTypePtr()), ">. Key types mismatch."); + TORCH_INTERNAL_ASSERT(*getTypePtr() == *dict.impl_->elementTypes.valueType, "Tried to cast a Dict<", toString(*dict.impl_->elementTypes.keyType), ", ", toString(*dict.impl_->elementTypes.valueType) ,"> to a Dict<", toString(*getTypePtr()), ", ", toString(*getTypePtr()), ">. Value types mismatch."); + + return Dict(std::move(dict.impl_)); +} + +template +GenericDict toGenericDict(Dict dict) { + return GenericDict(std::move(dict.impl_)); +} +} + +namespace detail { + +inline size_t DictKeyHash::operator()(const IValue& ivalue) const { + if (ivalue.isInt()) { + return std::hash()(ivalue.toInt()); + } else if (ivalue.isString()) { + return std::hash()(ivalue.toStringView()); + } else if (ivalue.isDouble()) { + return std::hash()(ivalue.toDouble()); + } else if (ivalue.isComplexDouble()) { + return c10::hash>()(ivalue.toComplexDouble()); + } else if (ivalue.isBool()) { + return std::hash()(ivalue.toBool()); + } else if (ivalue.isTensor()) { + return std::hash()(ivalue.toTensor().unsafeGetTensorImpl()); + } else if (ivalue.isDevice()) { + return std::hash()(ivalue.toDevice()); + } else { + TORCH_CHECK(false, "Can't hash IValues with tag '", ivalue.tagKind(), "'"); + } +} + +inline intrusive_ptr DictImpl::copy() const { + return make_intrusive(dict, elementTypes); +} + +} + +template +Dict::Dict() + :Dict(make_intrusive( + detail::DictImpl::dict_map_type(), + detail::DictImpl::DictElementTypes{getTypePtr(), getTypePtr()})) { + static_assert(!std::is_same_v, "This constructor is not valid for Dict. Please use c10::impl::GenericDict(keyType, valueType) instead."); + static_assert(!std::is_same_v, "This constructor is not valid for Dict<_, IValue>. Please use c10::impl::GenericDict(keyType, valueType) instead."); +} + +template +Dict::Dict(TypePtr keyType, TypePtr valueType) +: Dict(make_intrusive( + detail::DictImpl::dict_map_type(), + detail::DictImpl::DictElementTypes {std::move(keyType), std::move(valueType)})) { + static_assert(std::is_same_v, "This constructor is only valid for c10::impl::GenericDict."); + static_assert(std::is_same_v, "This constructor is only valid for c10::impl::GenericDict."); +} + +template +Dict::Dict(c10::intrusive_ptr&& impl): impl_(std::move(impl)) {} + +template +Dict Dict::copy() const { + return Dict(impl_->copy()); +} + +template +typename Dict::iterator Dict::begin() const { + return iterator{impl_->dict.begin()}; +} + +template +typename Dict::iterator Dict::end() const { + return iterator{impl_->dict.end()}; +} + +template +bool Dict::empty() const { + return impl_->dict.empty(); +} + +template +typename Dict::size_type Dict::size() const { + return impl_->dict.size(); +} + +template +void Dict::clear() const { + impl_->dict.clear(); +} + +template +template +std::pair::iterator, bool> Dict::insert(Key_&& key, Value_&& value) const { + static_assert(std::is_constructible_v, "Wrong type for the key argument of Dict::insert"); + static_assert(std::is_constructible_v, "Wrong type for the value argument of Dict::insert"); + auto inserted = impl_->dict.emplace( + Key(std::forward(key)), + Value(std::forward(value))); + return {iterator{inserted.first}, inserted.second}; +} + +template +template +std::pair::iterator, bool> Dict::insert_or_assign(Key_&& key, Value_&& value) const { + static_assert(std::is_constructible_v, "Wrong type for the key argument of Dict::insert_or_assign"); + static_assert(std::is_constructible_v, "Wrong type for the value argument of Dict::insert_or_assign"); + auto inserted = impl_->dict.insert_or_assign( + Key(std::forward(key)), + Value(std::forward(value))); + return {iterator{inserted.first}, inserted.second}; +} + +template +void Dict::erase(iterator iter) const { + impl_->dict.erase(iter.entryRef_.iterator_); +} + +template +[[nodiscard]] size_t Dict::erase(const Key& key) const { + return impl_->dict.erase(key); +} + +template +Value Dict::at(const Key& key) const { + return impl_->dict.at(key).template to(); +} + +template +typename Dict::iterator Dict::find(const Key& key) const { + return iterator{impl_->dict.find(key)}; +} + +template +bool Dict::contains(const Key& key) const { + return end() != find(key); +} + +template +void Dict::reserve(size_type count) const { + impl_->dict.reserve(count); +} + +template +TypePtr Dict::keyType() const { + return impl_->elementTypes.keyType; +} + +template +TypePtr Dict::valueType() const { + return impl_->elementTypes.valueType; +} +template +void Dict::unsafeSetKeyType(TypePtr t) { + impl_->elementTypes.keyType = std::move(t); +} + +template +void Dict::unsafeSetValueType(TypePtr t) { + impl_->elementTypes.valueType = std::move(t); +} + +template +bool operator==(const Dict& lhs, const Dict& rhs) { + // Dicts with the same identity trivially compare equal. + if (lhs.impl_ == rhs.impl_) { + return true; + } + + // Otherwise compare the values + return *lhs.impl_ == *rhs.impl_; +} + +template +bool operator!=(const Dict& lhs, const Dict& rhs) { + return !(lhs == rhs); +} + +template +bool Dict::is(const Dict& rhs) const { + return this->impl_ == rhs.impl_; +} +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/DimVector.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/DimVector.h new file mode 100644 index 0000000000000000000000000000000000000000..39f8179ba188a26c8742df484be9a4d4eeaf69c0 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/DimVector.h @@ -0,0 +1,18 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +namespace at { + +// Redeclaring 'DimVector' type and size inside 'at' namespace. +// This is done to avoid modifying every use into their 'c10' +// equivalent. + +using c10::kDimVectorStaticSize; +using c10::DimVector; + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Dimname.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Dimname.h new file mode 100644 index 0000000000000000000000000000000000000000..eb8a834037843b434ccb60907b507d9c38acabb0 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Dimname.h @@ -0,0 +1,53 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace at { + +enum class NameType: uint8_t { BASIC, WILDCARD }; + +struct TORCH_API Dimname { + static Dimname fromSymbol(Symbol name); + static Dimname wildcard(); + static bool isValidName(const std::string& name); + + NameType type() const { return type_; } + Symbol symbol() const { return name_; } + + bool isBasic() const { return type_ == NameType::BASIC; } + bool isWildcard() const { return type_ == NameType::WILDCARD; } + + bool matches(Dimname other) const; + std::optional unify(Dimname other) const; + + private: + Dimname(Symbol name) + : name_(name), type_(NameType::BASIC) {} + Dimname(Symbol name, NameType type) + : name_(name), type_(type) {} + + Symbol name_; + NameType type_; +}; + +using DimnameList = c10::ArrayRef; + +TORCH_API std::ostream& operator<<(std::ostream& out, const Dimname& dimname); + +inline bool operator==(const Dimname& lhs, const Dimname& rhs) { + return lhs.symbol() == rhs.symbol(); +} + +inline bool operator!=(const Dimname& lhs, const Dimname& rhs) { + return !(lhs == rhs); +} + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/DistributionsHelper.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/DistributionsHelper.h new file mode 100644 index 0000000000000000000000000000000000000000..1313f2f1a72e797b66257c81b302e14b05339302 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/DistributionsHelper.h @@ -0,0 +1,337 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +/** + * Distributions kernel adapted from THRandom.cpp + * The kernels try to follow std::random distributions signature + * For instance: in ATen + * auto gen = at::detail::createCPUGenerator(); + * at::uniform_real_distribution uniform(0, 1); + * auto sample = uniform(gen.get()); + * + * vs std::random + * + * std::mt19937 gen; + * std::uniform_real_distribution uniform(0, 1); + * auto sample = uniform(gen); + */ + + +namespace at { +namespace { + +/** + * Samples a discrete uniform distribution in the range [base, base+range) of type T + */ +template +struct uniform_int_from_to_distribution { + + C10_HOST_DEVICE inline uniform_int_from_to_distribution(uint64_t range, int64_t base) : range_(range), base_(base) {} + + template + C10_HOST_DEVICE inline T operator()(RNG generator) { +#ifdef FBCODE_CAFFE2 + if (( + std::is_same_v || + std::is_same_v || + std::is_same_v || + std::is_same_v) && range_ >= 1ULL << 32) +#else + if (range_ >= 1ULL << 28) // allow approx 5% skew in uniform int generation using % +#endif + { + return transformation::uniform_int_from_to(generator->random64(), range_, base_); + } else { + return transformation::uniform_int_from_to(generator->random(), range_, base_); + } + } + + private: + uint64_t range_; + int64_t base_; +}; + +/** + * Samples a discrete uniform distribution in the range [min_value(int64_t), max_value(int64_t)] + */ +template +struct uniform_int_full_range_distribution { + + template + C10_HOST_DEVICE inline T operator()(RNG generator) { + return transformation::uniform_int_full_range(generator->random64()); + } + +}; + +/** + * Samples a discrete uniform distribution in the range [0, max_value(T)] for integral types + * and [0, 2^mantissa] for floating-point types. + */ +template +struct uniform_int_distribution { + + template + C10_HOST_DEVICE inline T operator()(RNG generator) { + if constexpr (std::is_same_v || std::is_same_v) { + return transformation::uniform_int(generator->random64()); + } else { + return transformation::uniform_int(generator->random()); + } + } + +}; + +/** + * Samples a uniform distribution in the range [from, to) of type T + */ +template +struct uniform_real_distribution { + + C10_HOST_DEVICE inline uniform_real_distribution(T from, T to) : from_(from), to_(to) { + TORCH_CHECK_IF_NOT_ON_CUDA(from <= to); + TORCH_CHECK_IF_NOT_ON_CUDA(to - from <= std::numeric_limits::max()); + } + + template + C10_HOST_DEVICE inline dist_acctype operator()(RNG generator){ + if constexpr (std::is_same_v) { + return transformation::uniform_real(generator->random64(), from_, to_); + } else { + return transformation::uniform_real(generator->random(), from_, to_); + } + } + + private: + T from_; + T to_; +}; + +// The SFINAE checks introduced in #39816 looks overcomplicated and must revisited +// https://github.com/pytorch/pytorch/issues/40052 +#define DISTRIBUTION_HELPER_GENERATE_HAS_MEMBER(member) \ +template \ +struct has_member_##member \ +{ \ + typedef char yes; \ + typedef long no; \ + template static yes test(decltype(&U::member)); \ + template static no test(...); \ + static constexpr bool value = sizeof(test(0)) == sizeof(yes); \ +} + +DISTRIBUTION_HELPER_GENERATE_HAS_MEMBER(next_double_normal_sample); +DISTRIBUTION_HELPER_GENERATE_HAS_MEMBER(set_next_double_normal_sample); +DISTRIBUTION_HELPER_GENERATE_HAS_MEMBER(next_float_normal_sample); +DISTRIBUTION_HELPER_GENERATE_HAS_MEMBER(set_next_float_normal_sample); + +#define DISTRIBUTION_HELPER_GENERATE_NEXT_NORMAL_METHODS(TYPE) \ + \ +template ::value && \ + has_member_set_next_##TYPE##_normal_sample::value \ + ), int> = 0> \ +C10_HOST_DEVICE inline bool maybe_get_next_##TYPE##_normal_sample(RNG* generator, ret_type* ret) { \ + if (generator->next_##TYPE##_normal_sample()) { \ + *ret = *(generator->next_##TYPE##_normal_sample()); \ + generator->set_next_##TYPE##_normal_sample(std::optional()); \ + return true; \ + } \ + return false; \ +} \ + \ +template ::value || \ + !has_member_set_next_##TYPE##_normal_sample::value \ + ), int> = 0> \ +C10_HOST_DEVICE inline bool maybe_get_next_##TYPE##_normal_sample(RNG* /*generator*/, ret_type* /*ret*/) { \ + return false; \ +} \ + \ +template ::value \ + ), int> = 0> \ +C10_HOST_DEVICE inline void maybe_set_next_##TYPE##_normal_sample(RNG* generator, ret_type cache) { \ + generator->set_next_##TYPE##_normal_sample(cache); \ +} \ + \ +template ::value \ + ), int> = 0> \ +C10_HOST_DEVICE inline void maybe_set_next_##TYPE##_normal_sample(RNG* /*generator*/, ret_type /*cache*/) { \ +} + +DISTRIBUTION_HELPER_GENERATE_NEXT_NORMAL_METHODS(double) +DISTRIBUTION_HELPER_GENERATE_NEXT_NORMAL_METHODS(float) + +/** + * Samples a normal distribution using the Box-Muller method + * Takes mean and standard deviation as inputs + * Note that Box-muller method returns two samples at a time. + * Hence, we cache the "next" sample in the CPUGeneratorImpl class. + */ +template +struct normal_distribution { + + C10_HOST_DEVICE inline normal_distribution(T mean_in, T stdv_in) : mean(mean_in), stdv(stdv_in) { + TORCH_CHECK_IF_NOT_ON_CUDA(stdv_in >= 0, "stdv_in must be positive: ", stdv_in); + } + + template + C10_HOST_DEVICE inline dist_acctype operator()(RNG generator){ + dist_acctype ret; + // return cached values if available + if constexpr (std::is_same_v) { + if (maybe_get_next_double_normal_sample(generator, &ret)) { + return transformation::normal(ret, mean, stdv); + } + } else { + if (maybe_get_next_float_normal_sample(generator, &ret)) { + return transformation::normal(ret, mean, stdv); + } + } + // otherwise generate new normal values + uniform_real_distribution uniform(0.0, 1.0); + const dist_acctype u1 = uniform(generator); + const dist_acctype u2 = uniform(generator); + const dist_acctype r = ::sqrt(static_cast(-2.0) * ::log1p(-u2)); + const dist_acctype theta = static_cast(2.0) * c10::pi * u1; + if constexpr (std::is_same_v) { + maybe_set_next_double_normal_sample(generator, r * ::sin(theta)); + } else { + maybe_set_next_float_normal_sample(generator, r * ::sin(theta)); + } + ret = r * ::cos(theta); + return transformation::normal(ret, mean, stdv); + } + + private: + T mean; + T stdv; +}; + +template +struct DiscreteDistributionType { using type = float; }; + +template <> struct DiscreteDistributionType { using type = double; }; + +/** + * Samples a bernoulli distribution given a probability input + */ +template +struct bernoulli_distribution { + + C10_HOST_DEVICE inline bernoulli_distribution(T p_in) : p(p_in) { + TORCH_CHECK_IF_NOT_ON_CUDA(p_in >= 0 && p_in <= 1); + } + + template + C10_HOST_DEVICE inline T operator()(RNG generator) { + uniform_real_distribution uniform(0.0, 1.0); + return transformation::bernoulli(uniform(generator), p); + } + + private: + T p; +}; + +/** + * Samples a geometric distribution given a probability input + */ +template +struct geometric_distribution { + + C10_HOST_DEVICE inline geometric_distribution(T p_in) : p(p_in) { + TORCH_CHECK_IF_NOT_ON_CUDA(p_in > 0 && p_in < 1); + } + + template + C10_HOST_DEVICE inline T operator()(RNG generator) { + uniform_real_distribution uniform(0.0, 1.0); + return transformation::geometric(uniform(generator), p); + } + + private: + T p; +}; + +/** + * Samples an exponential distribution given a lambda input + */ +template +struct exponential_distribution { + + C10_HOST_DEVICE inline exponential_distribution(T lambda_in) : lambda(lambda_in) {} + + template + C10_HOST_DEVICE inline T operator()(RNG generator) { + uniform_real_distribution uniform(0.0, 1.0); + return transformation::exponential(uniform(generator), lambda); + } + + private: + T lambda; +}; + +/** + * Samples a cauchy distribution given median and sigma as inputs + */ +template +struct cauchy_distribution { + + C10_HOST_DEVICE inline cauchy_distribution(T median_in, T sigma_in) : median(median_in), sigma(sigma_in) {} + + template + C10_HOST_DEVICE inline T operator()(RNG generator) { + uniform_real_distribution uniform(0.0, 1.0); + return transformation::cauchy(uniform(generator), median, sigma); + } + + private: + T median; + T sigma; +}; + +/** + * Samples a lognormal distribution + * Takes mean and standard deviation as inputs + * Outputs two samples at a time + */ +template +struct lognormal_distribution { + + C10_HOST_DEVICE inline lognormal_distribution(T mean_in, T stdv_in) : mean(mean_in), stdv(stdv_in) { + TORCH_CHECK_IF_NOT_ON_CUDA(stdv_in > 0); + } + + template + C10_HOST_DEVICE inline T operator()(RNG generator){ + normal_distribution normal(mean, stdv); + return transformation::log_normal(normal(generator)); + } + + private: + T mean; + T stdv; +}; +} +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Formatting.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Formatting.h new file mode 100644 index 0000000000000000000000000000000000000000..8c9a935edb89ed5f126b769889f43c7ab4e81c12 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Formatting.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include + +namespace c10 { +TORCH_API std::ostream& operator<<(std::ostream& out, Backend b); +TORCH_API std::ostream& operator<<(std::ostream & out, const Scalar& s); +TORCH_API std::string toString(const Scalar& s); +} +namespace at { + +TORCH_API std::ostream& operator<<(std::ostream& out, const DeprecatedTypeProperties& t); +TORCH_API std::ostream& print( + std::ostream& stream, + const Tensor& tensor, + int64_t linesize); +inline std::ostream& operator<<(std::ostream & out, const Tensor & t) { + return print(out,t,80); +} +TORCH_API void print(const Tensor & t, int64_t linesize=80); +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Generator.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Generator.h new file mode 100644 index 0000000000000000000000000000000000000000..0f312f4e2139f6ee258407fe54d609e101f60f85 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Generator.h @@ -0,0 +1,194 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include + +// For the record I don't think this is a correct pimpl idiom. +// Including Impl header in interface header defeats the purpose +// because you can't change Impl private members without forcing +// everything that included the interface to rebuild. +// Impl should be forward-declared in the interface header instead. +#include + +/** + * Note [Generator] + * ~~~~~~~~~~~~~~~~ + * A Pseudo Random Number Generator (PRNG) is an engine that uses an algorithm to + * generate a seemingly random sequence of numbers, that may be later be used in creating + * a random distribution. Such an engine almost always maintains a state and requires a + * seed to start off the creation of random numbers. Often times, users have + * found it beneficial to be able to explicitly create, retain, and destroy + * PRNG states and also be able to have control over the seed value. + * + * A Generator in ATen gives users the ability to read, write and modify a PRNG engine. + * For instance, it does so by letting users seed a PRNG engine, fork the state of the + * engine, etc. + * + * By default, there is one generator per device, and a device's generator is + * lazily created. A user can use the torch.Generator() api to create their own generator. + */ + +/** + * Note [Acquire lock when using random generators] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * Generator and its derived classes are NOT thread-safe. Please note that most of the + * places where we have inserted locking for generators are historically based, and we + * haven't actually checked that everything is truly thread safe (and it probably isn't). + * Please use the public mutex_ when using any methods from these classes, except for the + * read-only methods. You can learn about the usage by looking into the unittests + * (aten/src/ATen/cpu_generator_test.cpp) and other places where we have used lock_guard. + * + * TODO: Look into changing the threading semantics of Generators in ATen (e.g., making + * them non-thread safe and instead making the generator state splittable, to accommodate + * forks into other threads). + */ + +namespace at { + +class Tensor; + +struct TORCH_API Generator { + Generator() = default; + + explicit Generator(c10::intrusive_ptr gen_impl) + : impl_(std::move(gen_impl)) { + TORCH_CHECK(impl_.get(), "GeneratorImpl with nullptr is not supported"); + } + + bool operator==(const Generator& rhs) const { + return this->impl_ == rhs.impl_; + } + + bool operator!=(const Generator& rhs) const { + return !((*this) == rhs); + } + + bool defined() const { + return static_cast(impl_); + } + + c10::GeneratorImpl* unsafeGetGeneratorImpl() const { + return impl_.get(); + } + + c10::GeneratorImpl* unsafeReleaseGeneratorImpl() { + return impl_.release(); + } + + const c10::intrusive_ptr& getIntrusivePtr() const { + return impl_; + } + + void set_current_seed(uint64_t seed) { impl_->set_current_seed(seed); } + // Sets the offset of Generator state to the desired offset. This is currently + // supported for only Philox based Generators, i.e., CUDA and MPS. + void set_offset(uint64_t offset) { impl_->set_offset(offset); } + + // Returns the offset of Generator state. This is currently supported for only + // Philox based Generators, i.e., CUDA and MPS. + uint64_t get_offset() const { return impl_->get_offset(); } + + uint64_t current_seed() const { return impl_->current_seed(); } + + uint64_t seed() { return impl_->seed(); } + + // Implementation not inlined to prevent cycle reference between + // `ATen/core/Generator.h` and `ATen/core/Tensor.h` + void set_state(const at::Tensor& new_state); + + at::Tensor get_state() const; + + void graphsafe_set_state(const Generator& new_state); + + Generator graphsafe_get_state() const; + + std::mutex& mutex() { + return impl_->mutex_; + } + + DispatchKeySet key_set() const { + return impl_->key_set(); + } + + Device device() const { return impl_->device(); } + + inline void set_pyobj(PyObject* pyobj) const noexcept { + impl_->set_pyobj(pyobj); + } + + inline PyObject* pyobj() const noexcept { + return impl_->pyobj(); + } + + template + T* get() const { return static_cast(impl_.get()); } + + Generator clone() const { + return Generator(impl_->clone()); + } + + private: + c10::intrusive_ptr impl_; +}; + +template +Generator make_generator(Args&&... args) { + return Generator(c10::make_intrusive(std::forward(args)...)); +} + +/** + * Utility function to static cast input Generator* to + * the backend generator type (CPU/CUDAGeneratorImpl etc.) + */ +template +inline T * check_generator(std::optional gen) { + TORCH_CHECK(gen.has_value(), "Expected Generator but received nullopt"); + TORCH_CHECK(gen->defined(), "Generator with undefined implementation is not allowed"); + TORCH_CHECK(T::device_type() == gen->device().type(), "Expected a '", T::device_type(), "' device type for generator but found '", gen->device().type(), "'"); + return gen->get(); +} + +/** + * Utility function used in tensor implementations, which + * supplies the default generator to tensors, if an input generator + * is not supplied. The input Generator* is also static casted to + * the backend generator type (CPU/CUDAGeneratorImpl etc.) + */ +template +inline T* get_generator_or_default(const std::optional& gen, const Generator& default_gen) { + return gen.has_value() && gen->defined() ? check_generator(gen) : check_generator(default_gen); +} + +namespace detail { + +/** + * Helper function for checking the validity of new random generator + * state. Right now following conditions are checked: + * + * - The new state tensor must be a torch.ByteTensor + * - Data of the new state tensor must be contiguous + */ +inline void check_rng_state(const c10::TensorImpl& new_state) { + TORCH_CHECK_TYPE( + new_state.layout() == kStrided && new_state.device().type() == kCPU && new_state.dtype() == kByte, + "RNG state must be a torch.ByteTensor" + ); + + TORCH_CHECK(new_state.is_contiguous(), "RNG state must be contiguous"); +} + +} // namespace detail + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/GeneratorForPrivateuseone.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/GeneratorForPrivateuseone.h new file mode 100644 index 0000000000000000000000000000000000000000..a5c82221aadfc7c310ef7eac00cfb4a17708fe21 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/GeneratorForPrivateuseone.h @@ -0,0 +1,44 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace at { + +using GeneratorFuncType = std::function; + +TORCH_API std::optional& GetGeneratorPrivate(); + +class TORCH_API _GeneratorRegister { + public: + explicit _GeneratorRegister(const GeneratorFuncType& func); +}; + +TORCH_API at::Generator GetGeneratorForPrivateuse1( + c10::DeviceIndex device_index); + +/** + * This is used to register Generator to PyTorch for `privateuse1` key. + * + * Usage: REGISTER_GENERATOR_PRIVATEUSE1(MakeGeneratorForPrivateuse1) + * + * class CustomGeneratorImpl : public c10::GeneratorImpl { + * CustomGeneratorImpl(DeviceIndex device_index = -1); + * explicit ~CustomGeneratorImpl() override = default; + * ... + * }; + * + * at::Generator MakeGeneratorForPrivateuse1(c10::DeviceIndex id) { + * return at::make_generator(id); + * } + */ + +#define REGISTER_GENERATOR_PRIVATEUSE1(GeneratorPrivate) \ + static auto temp##GeneratorPrivate = at::_GeneratorRegister(GeneratorPrivate); + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/IListRef.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/IListRef.h new file mode 100644 index 0000000000000000000000000000000000000000..694907d44ff219fe903c5afb42baedee9d518001 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/IListRef.h @@ -0,0 +1,638 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include +#include +#include + +/* + * [Note: IListRef] + * Wrapper around different API containers (e.g. boxed and unboxed). + * + * What is it? + * =========== + * It is a tagged union of both boxed and unboxed API containers. + * Working implementations: + * + * - `IListRef` + * - `IListRef` + * + * Note that `IListRef` is a view type. Meaning that it won't own the + * tensors it holds. It's intended to be used only as argument parameters. + * Specifically, where these 2 worlds overlap. + * + * What is this for? + * ================= + * Historically, PyTorch has maintained 2 different APIs: the unboxed + * (called from C++ API and Python eager mode) and boxed APIs (called + * from the TorchScript JIT, mobile interpreter, and boxed fallbacks). + * + * Calling unboxed kernels from the boxed "world" and vice-versa may + * result in non-negligible overhead. Lists are one of those types: + * + * - Boxed world: `c10::List` + * - Unboxed world: `c10::ArrayRef` + * + * In this context, `c10::IListRef` solves this problem by wrapping those + * 2 container types, so that we don't need to convert from one to + * the other. + * + * (see https://github.com/pytorch/pytorch/issues/66328) + * + * What does it do? + * ================ + * This container wraps around the different tagged containers + * (currently, only boxed and unboxed), without incurring in extra + * overhead for converting from one to another. It does so while + * exposing usual container methods, which dispatch to corresponding + * implementations. + * + * While it works with different container types, it introduces + * overhead for repeatedly calling member functions (since those will + * get dispatched, again). Therefore, you should only use it to iterate + * through the list up to one time. If you need to do more complex things, + * call `materialize()` first. + * + * Adding support for a new Tag + * ============================ + * Suppose we want to add a new tag: `Chest`. Here are the steps + * we would have to go through: + * + * 1. Add a line for it in the macro `TORCH_ILISTREF_FORALL_TAGS`. + * + * #define TORCH_ILISTREF_FORALL_TAGS(_, ...) \ + * ... + * _(Chest, ##__VA_ARGS__) + * + * 2. Add type aliases, union members, and constructors. + * + * template + * class IListRef { + * ... + * using chest_type = + * typename detail::IListRefTagImpl::list_type; + * ... + * IListRef(...) : tag_(IListRefTag::Chest) { + * ... + * } + * ... + * union Payload { + * ... + * chest_type chest; + * ... + * }; + * ... + * }; + * + * 3. Add a default implementation for it (in 'IListRef_inl.h'). It's + * preferable to make the default implementation work for `T = Tensor` + * (both `Unboxed` and `Boxed` do it). + * + * template + * class IListRefTagImplBase { + * public: + * using elem_type = ListElemT; + * using list_type = ChestContainer; + * + * static const list_type& unwrap(const IListRef& ilist) { ... } + * + * static typename list_type::const_iterator& unwrap( + * IListRefIterator& it) { ... } + * + * static const typename list_type::const_iterator& unwrap( + * const IListRefIterator& it) { ... } + * + * static IListRefConstRef iterator_get( + * const typename list_type::const_iterator& it) { ... } + * } + * + * 4. Add an specialization for each of the already supported types. + * Finally, for consistency, add them to the tracking list. + * (see [Note: IListRefTagImpl Specializations]) + * + * template <> + * class IListRefTagImpl + * : public IListRefTagImplBase {}; + * + * Adding support for a new Type + * ============================= + * Suppose we want to add support for a new type: `Matrix`. + * Here are the steps we would have to go through: + * + * 1. Add an specialization for each of the existing tags. + * For consistency, add them to the tracking list. + * (see [Note: IListRefTagImpl Specializations]) + * + * template <> + * class IListRefTagImpl + * : public IListRefTagImplBase {}; + * + * template <> + * class IListRefTagImpl + * : public IListRefTagImplBase {}; + * + * Common Problems + * =============== + * 1. One of `IListRef(Iterator)` methods are failing to compile. + * + * That may be happening because the container type you added + * is not compatible with the code written for that method. If + * that's true, then you might have to transform that code into + * a static method call (see `List::operator[]` method). + * + * 2. Can't make `IListRefIterator::operator*` return a const-reference. + * + * First, keep in mind that we assume that boxed containers will + * have to deal with `IValue` (e.g. `c10::List`). In this context, + * what may be happening is that `IValue` doesn't store internally + * your type `T`. Instead, it constructs a type new `T` every time + * you try to get `T` for it (see `IListRef`). + */ + +namespace c10 { +template +class IListRef; + +/* + * Applies arbitrary macros to each `IListRefTag`. + */ +#define TORCH_ILISTREF_FORALL_TAGS(_, ...) \ + _(Unboxed, ##__VA_ARGS__) \ + _(Boxed, ##__VA_ARGS__) \ + _(Materialized, ##__VA_ARGS__) + +/* + * Defines a "switch-case" for `TAG`. Inside, it executes `BODY`, + * while bringing to scope: + * + * - `ImplT`: the implementation class for `TAG` + * - `this_`: the result of unwrapping `this` + */ +#define TORCH_ILISTREF_UNWRAP_CASE(TAG, BODY) \ + case c10::IListRefTag::TAG: { \ + using ImplT = c10::detail::IListRefTagImpl; \ + auto& this_ = ImplT::unwrap(*this); \ + BODY \ + } break; + +/* + * Dispatches the unwrap call, depending on `TAG`, followed by + * the execution of `BODY`. It aborts if `TAG` is not a `IListRefTag`. + * + * This macro is useful because it allows us to handle different + * types (that correspond to different tags) to be implemented + * only once. We can do it even when the implementation of the + * different tags aren't syntactically the same, by dispatching + * it to a function (e.g. `ImplT::(this_)`). + */ +#define TORCH_ILISTREF_UNWRAP(TAG, BODY) \ + C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wswitch-enum") \ + switch (TAG) { \ + TORCH_ILISTREF_FORALL_TAGS(TORCH_ILISTREF_UNWRAP_CASE, BODY) \ + break; \ + default: \ + TORCH_INTERNAL_ASSERT(false, "invalid IListRef tag."); \ + } \ + C10_DIAGNOSTIC_POP() + +enum class IListRefTag { +#define DEFINE_TAG(tag, ...) tag, + TORCH_ILISTREF_FORALL_TAGS(DEFINE_TAG) +#undef DEFINE_TAG + None +}; + +namespace detail { +/* + * Type alias that specifies whether we return a reference or a copy of `T`. + * + * What is this for? + * ================= + * Since values in the boxed world are represented by an `IValue`, we also + * depend on whether it can be converted to a const-reference (`Tensor`) or + * has to create a new copy of `T` (`OptionalTensorRef`). + */ +template +using IListRefConstRef = typename ivalue_to_const_ref_overload_return::type; + +/* + * Interface that implements key functions for each `IListRefTag` type. + * + * What is this for? + * ================= + * Given an `IListRef(Iterator)`, some methods have to be implemented + * differently for each `TAG`. Therefore, the methods inside this class + * are used as dispatch targets for the different `IListRefTag` values. + * + * You should create an specialization of this class for each possible + * combination of `IListRefTag` type (except `None`) and element types + * (e.g. `Tensor`). + * + * What does it do? + * ================ + * 1. defines static methods to be used as dispatch targets by both + * `IListRef` and `IListRefIterator` (see the implementation of + * `IListRefTagImplBase`). + * + * 2. defines the `elem_type` and `list_type` aliases that will be + * used in the definition of `IListRef`. In general, we should do + * so by inheriting from `IListRefTagImplBase`. + * + * [Note: IListRefTagImpl Specialization] + * ====================================== + * For `IListRef(Iterator)`: + * - + * - + * - + * + * For `IListRef(Iterator)`: + * - + * - + * - + */ +template +class IListRefTagImpl {}; + +/* + * Base implementation of `IListRefTagImpl` methods. + * + * What is this for? + * ================= + * This should make adding specializations for new types easier. For + * example, one should be able to add a new type just by making its + * `IListRefTagImpl` specialization inherit from `IListRefTagImplBase`. + * + * You should create a partial specialization for this class only if + * you introduce a new `IListRefTag`. The idea being that there is one + * default implementation for each possible value of `IListRefTag`. + * + * What does it do? + * ================ + * 1. defines `elem_type` as an alias to `ListElemT`. + * + * 1. defines `list_type` as an alias to the default container type + * that will hold a collection of `elem_type`. The idea being that + * all types tagged as `TAG` will have `list_type` as its container, + * with different `elem_type`. + * + * 3. defines the default implementation for each of the methods that + * are supposed to be defined on `IListRefTagImpl` specializations. + * + * 4. inheriting from `IListRefTagImplBase` also means + * that the payload of the type `IListRef` will be of type `list_type` + * when it is tagged as `TAG`. + */ +template +class IListRefTagImplBase {}; + +/* + * Materialized container for `IListRef`. + * + * What is this for? + * ================= + * Container that groups `T` references together. This exchanges the + * overhead of every method call from `IListRef` for a dynamic allocation. + * + * You should use this container instead of `IListRef` if: + * + * - You are going to iterate the list more than once + * - You need to repeatedly access arbitrary elements (using `operator[]`) + * What does it do? + + * ================ + * Removes the reference (&) from the type, and wraps it into a + * `std::reference_wrapper`. If `IListRefConstRef` is not a + * reference type, then it's left unchanged. + */ +template +using _MaterializedIListRefElem = std::conditional_t< + std::is_reference_v, + typename std::reference_wrapper>, + T>; + +template +using MaterializedIListRefElem = _MaterializedIListRefElem>; + +template +using MaterializedIListRef = std::vector>; + +} // namespace detail + +/* + * Iterator for `IListRef`. + * + * What is it? + * =========== + * Currently, a `std::bidirectional_iterator` that wraps the iterator + * types defined for each of the `IListRefTag`. + * + * One should be able to use it, as if it were the unwrapped + * iterators themselves. + + * What does it do? + * ================ + * Similarly to `IListRef`, this is a wrapper class. Specifically, it + * wraps each container's `const_iterator` type alias. So, for example, + * given that the container for `IListRefTag::Boxed` is `c10::List`, this + * iterator will wrap a `c10::List::const_iterator`. + * + * [Note: MSVC Iterator Debug] + * =========================== + * MSVC `vector::iterator` implementation (used in the boxed variant) + * makes it so this union's destructor, copy-constructor (assignment), and + * move-constructor (assignment) are implicitly deleted. + * + * Therefore, we need to explicitly define them as needed. Follows a list + * of places where these are needed and their reason: + * + * - `Payload` destructor: + * it is deleted only if the macro `_ITERATOR_DEBUG_LEVEL` is set to 2. + * + * - `IListRefIterator` destructor: + * same as above. However, we need to explicitly call the variant + * destructor explicitly. + * + * - `IListRefIterator` copy-constructor: + * it is deleted only if the macro `_ITERATOR_DEBUG_LEVEL` is different + * than 0. + */ +template +class IListRefIterator { + private: +#define DEFINE_FRIEND_CLASS(TAG, ...) \ + friend class detail::IListRefTagImpl; \ + friend class detail::IListRefTagImplBase< \ + IListRefTag::TAG, \ + T, \ + typename detail::IListRefTagImpl::elem_type>; + TORCH_ILISTREF_FORALL_TAGS(DEFINE_FRIEND_CLASS) +#undef DEFINE_FRIEND_CLASS + + public: + // C++17 friendly std::iterator implementation + using iterator_category = std::bidirectional_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = T*; + using reference = T&; + + using unboxed_iterator_type = typename detail:: + IListRefTagImpl::list_type::const_iterator; + using boxed_iterator_type = typename detail:: + IListRefTagImpl::list_type::const_iterator; + using materialized_iterator_type = + typename detail::MaterializedIListRef::const_iterator; + + IListRefIterator() : tag_(IListRefTag::None) {} + +#if defined(_MSC_VER) && _ITERATOR_DEBUG_LEVEL != 0 + // See [Note: MSVC Iterator Debug] + IListRefIterator(const IListRefIterator& iterator) + : tag_(iterator.tag_) { + switch (tag_) { + case IListRefTag::Boxed: + payload_.boxed_iterator = iterator.payload_.boxed_iterator; + break; + case IListRefTag::Unboxed: + payload_.unboxed_iterator = iterator.payload_.unboxed_iterator; + break; + case IListRefTag::Materialized: + payload_.materialized_iterator = iterator.payload_.materialized_iterator; + break; + default: + TORCH_INTERNAL_ASSERT(false, "invalid IListRef tag."); + } + } +#endif + +#if defined(_MSC_VER) && _ITERATOR_DEBUG_LEVEL == 2 + // See [Note: MSVC Iterator Debug] + ~IListRefIterator() noexcept(false) { + switch (tag_) { + case IListRefTag::Boxed: + payload_.boxed_iterator.~boxed_iterator_type(); + break; + case IListRefTag::Unboxed: + payload_.unboxed_iterator.~unboxed_iterator_type(); + break; + case IListRefTag::Materialized: + payload_.materialized_iterator.~materialized_iterator_type(); + break; + default: + TORCH_INTERNAL_ASSERT(false, "invalid IListRef tag."); + } + } +#endif + + IListRefIterator(boxed_iterator_type boxed) : tag_(IListRefTag::Boxed) { + payload_.boxed_iterator = boxed; + } + + IListRefIterator(unboxed_iterator_type unboxed) : tag_(IListRefTag::Unboxed) { + payload_.unboxed_iterator = unboxed; + } + + IListRefIterator(materialized_iterator_type materialized) : tag_(IListRefTag::Materialized) { + payload_.materialized_iterator = materialized; + } + + detail::IListRefConstRef operator*() const { + TORCH_ILISTREF_UNWRAP(tag_, { return ImplT::iterator_get(this_); }); + } + + IListRefIterator& operator++() { + TORCH_ILISTREF_UNWRAP(tag_, { ++this_; }); + return *this; + } + + IListRefIterator operator++(int) { + auto old = *this; + TORCH_ILISTREF_UNWRAP(tag_, { ++this_; }); + return old; + } + + IListRefIterator& operator--() { + TORCH_ILISTREF_UNWRAP(tag_, { --this_; }); + return *this; + } + + IListRefIterator operator--(int) { + auto old = *this; + TORCH_ILISTREF_UNWRAP(tag_, { --this_; }); + return old; + } + + bool operator==(const IListRefIterator& rhs) const { + if (tag_ != rhs.tag_) { + return false; + } + TORCH_ILISTREF_UNWRAP(tag_, { + auto& rhs_it = ImplT::unwrap(rhs); + return this_ == rhs_it; + }); + } + + bool operator!=(const IListRefIterator& rhs) const { + return !(*this == rhs); + } + + private: + union Payload { + boxed_iterator_type boxed_iterator; + unboxed_iterator_type unboxed_iterator; + materialized_iterator_type materialized_iterator; + void* _init_ptr; + Payload() : _init_ptr(nullptr) {} +#if defined(_MSC_VER) + // See [Note: MSVC Iterator Debug] + ~Payload() {} +#endif + }; + + Payload payload_; + IListRefTag tag_; +}; + +/* + * See [Note: IListRef] + */ +template +class IListRef { + private: +#define DEFINE_FRIEND_CLASS(TAG, ...) \ + friend class detail::IListRefTagImpl; \ + friend class detail::IListRefTagImplBase< \ + IListRefTag::TAG, \ + T, \ + typename detail::IListRefTagImpl::elem_type>; + TORCH_ILISTREF_FORALL_TAGS(DEFINE_FRIEND_CLASS) +#undef DEFINE_FRIEND_CLASS + + public: + using unboxed_type = + typename detail::IListRefTagImpl::list_type; + using boxed_type = + typename detail::IListRefTagImpl::list_type; + using materialized_type = + typename detail::MaterializedIListRef; + + using iterator = IListRefIterator; + using const_iterator = IListRefIterator; + using reverse_iterator = std::reverse_iterator; + using value_type = typename iterator::value_type; + + IListRef() : tag_(IListRefTag::None) {} + + IListRef(const boxed_type& boxed) : tag_(IListRefTag::Boxed) { + payload_.boxed = &boxed; + } + + IListRef(const unboxed_type& unboxed) : tag_(IListRefTag::Unboxed) { + payload_.unboxed = unboxed; + } + + IListRef(const std::initializer_list& list) : tag_(IListRefTag::Unboxed) { + payload_.unboxed = at::ArrayRef(list); + } + + template < + typename... UnboxedConstructorArgs, + typename = std::enable_if_t< + std::is_constructible_v>> + IListRef(UnboxedConstructorArgs&&... args) : tag_(IListRefTag::Unboxed) { + payload_.unboxed = unboxed_type(std::forward(args)...); + } + + IListRef(const materialized_type& materialized) : tag_(IListRefTag::Materialized) { + payload_.materialized = &materialized; + } + + size_t size() const { + TORCH_ILISTREF_UNWRAP(tag_, { return this_.size(); }); + } + + bool empty() const { + return size() == 0; + } + + iterator begin() const { + TORCH_ILISTREF_UNWRAP(tag_, { return this_.begin(); }); + } + + iterator end() const { + TORCH_ILISTREF_UNWRAP(tag_, { return this_.end(); }); + } + + detail::IListRefConstRef front() const { + TORCH_ILISTREF_UNWRAP(tag_, { return ImplT::front(this_); }); + } + + /* + * Materializes the `IListRef` into a `std::vector`. + * + * This should be used when one wishes to either: + * + * - iterate over the list more than once: each `IListRefIterator` + * member function call has to go through a switch, introducing + * non-negligible overhead + * + * - randomly access an arbitrary element using `operator[]`: + * same reason as above + */ + detail::MaterializedIListRef materialize() const { + if (isMaterialized()) { + return toMaterialized(); + } + + detail::MaterializedIListRef materialized; + materialized.reserve(size()); + for (const auto& t : *this) { + materialized.emplace_back(t); + } + return materialized; + } + +#define DEFINE_CHECK(TAG, ...) \ + bool is##TAG() const { \ + return tag_ == IListRefTag::TAG; \ + } + TORCH_ILISTREF_FORALL_TAGS(DEFINE_CHECK) +#undef DEFINE_CHECK + + bool isNone() const { + return tag_ == IListRefTag::None; + } + +#define DEFINE_CASTING(TAG, ...) \ + const typename detail::IListRefTagImpl::list_type& \ + to##TAG() const { \ + TORCH_INTERNAL_ASSERT(is##TAG()); \ + return detail::IListRefTagImpl::unwrap(*this); \ + } + TORCH_ILISTREF_FORALL_TAGS(DEFINE_CASTING) +#undef DEFINE_CASTING + + private: + union Payload { + const boxed_type* boxed; + unboxed_type unboxed; + const materialized_type* materialized; + Payload() : boxed(nullptr) {} + }; + + Payload payload_; + IListRefTag tag_; +}; + +} // namespace c10 + +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/IListRef_inl.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/IListRef_inl.h new file mode 100644 index 0000000000000000000000000000000000000000..98bf272763e98ee82db4c88cfeae2588f1085c2e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/IListRef_inl.h @@ -0,0 +1,208 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace at { +class Tensor; +class OptionalTensorRef; +} + + +namespace c10::detail { + +/* + * Specializations of `IListRefTagImplBase` that implement the default + * implementation for `IListRefTag::Unboxed`. + */ +template +class IListRefTagImplBase { + public: + using elem_type = ListElemT; + using list_type = ArrayRef; + + /* + * These `unwrap` static methods unwraps the inner containers out + * of `IListRef` (and `IListRefIterator`). They are required when + * the macro `TORCH_ILISTREF_UNWRAP` is called. + */ + static const list_type& unwrap(const IListRef& ilist) { + return ilist.payload_.unboxed; + } + + static typename list_type::const_iterator& unwrap(IListRefIterator& it) { + return it.payload_.unboxed_iterator; + } + + static const typename list_type::const_iterator& unwrap( + const IListRefIterator& it) { + return it.payload_.unboxed_iterator; + } + + /* + * We have these function (besides the `unwrap`s above) because the + * implementation for both `IListRef::operator[]` and `IListRefIterator::operator*` + * weren't syntactically equal for the existing tags at the time + * (`Unboxed` and `Boxed`). + */ + static IListRefConstRef front(const list_type& lst) { + return lst.front(); + } + + static IListRefConstRef iterator_get( + const typename list_type::const_iterator& it) { + return *it; + } +}; + +/* + * Specializations of `IListRefTagImplBase` that implement the default + * implementation for `IListRefTag::Boxed`. + */ +template +class IListRefTagImplBase { + public: + using elem_type = ListElemT; + using list_type = List; + + static const list_type& unwrap(const IListRef& ilist) { + return *ilist.payload_.boxed; + } + + static typename list_type::const_iterator& unwrap(IListRefIterator& it) { + return it.payload_.boxed_iterator; + } + + static const typename list_type::const_iterator& unwrap( + const IListRefIterator& it) { + return it.payload_.boxed_iterator; + } + + static IListRefConstRef front(const list_type& lst) { + return lst[0]; + } + + static IListRefConstRef iterator_get( + const typename list_type::const_iterator& it) { + return (*it).get().toTensor(); + } +}; + +/* + * Specializations of `IListRefTagImplBase` that implement the default + * implementation for `IListRefTag::Materialized`. + */ +template +class IListRefTagImplBase> { + public: + using elem_type = MaterializedIListRefElem; + using list_type = MaterializedIListRef; + + static const list_type& unwrap(const IListRef& ilist) { + return *ilist.payload_.materialized; + } + + static typename list_type::const_iterator& unwrap(IListRefIterator& it) { + return it.payload_.materialized_iterator; + } + + static const typename list_type::const_iterator& unwrap( + const IListRefIterator& it) { + return it.payload_.materialized_iterator; + } + + static IListRefConstRef front(const list_type& lst) { + return lst[0]; + } + + static IListRefConstRef iterator_get( + const typename list_type::const_iterator& it) { + return *it; + } +}; + +/* + * [Note: ITensorListRef] + * Specializations necessary for `IListRef` type. + * + * Since the default implementations are usually done with supporting + * `Tensor` in mind, we only have to inherit from the base implementations. + */ +template <> +class IListRefTagImpl + : public IListRefTagImplBase {}; + +template <> +class IListRefTagImpl + : public IListRefTagImplBase {}; + +template <> +class IListRefTagImpl + : public IListRefTagImplBase< + IListRefTag::Materialized, + at::Tensor, + MaterializedIListRefElem> {}; + +/* + * [Note: IOptTensorListRef] + * Specializations necessary for `IListRef` type. + * + * We can't get an `at::OptionalTensorRef` directly from an instance of + * `List>` (the type that corresponds to the boxed world). + * + * So, the default implementation won't help us. Thus, we have to implement + * this method ourselves. + */ +template <> +class IListRefTagImpl + : public IListRefTagImplBase {}; + +template <> +class IListRefTagImpl + : public IListRefTagImplBase> { + + public: + /* + * Given an instance of the types corresponding to the `Boxed` tag, we override + * the default implementation, so that we can return a `at::OptionalTensorRef`. + */ + static IListRefConstRef iterator_get( + const typename list_type::const_iterator& it) { + C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wdangling-reference") + const auto& ivalue = (*it).get(); + C10_DIAGNOSTIC_POP() + if (!ivalue.isNone()) { + const auto& tensor = ivalue.toTensor(); + return (tensor.defined()) ? tensor : at::OptionalTensorRef{}; + } + return {}; + } +}; + +template <> +class IListRefTagImpl + : public IListRefTagImplBase< + IListRefTag::Materialized, + at::OptionalTensorRef, + MaterializedIListRefElem> {}; + +} // namespace c10::detail + + +namespace at { + +// [Note: ITensorListRef] +using ITensorListRef = c10::IListRef; +using ITensorListRefIterator = c10::IListRefIterator; +using MaterializedITensorListRef = c10::detail::MaterializedIListRef; +// [Note: IOptTensorListRef] +using IOptTensorListRef = c10::IListRef; +using IOptTensorListRefIterator = c10::IListRefIterator; +using MaterializedIOptTensorListRef = c10::detail::MaterializedIListRef; + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/LegacyTypeDispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/LegacyTypeDispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..26802a959251e74cdeb7fd0601ceb6fc098702c4 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/LegacyTypeDispatch.h @@ -0,0 +1,116 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// The legacy mechanism for dispatching operators in ATen is a Type +// object, which is essentially a giant virtual dispatch table +// for every operation we support dynamically dispatching over. +// +// This has been deprecated in favor of ATenDispatch, and in the future, +// c10 dispatcher. +// TODO: Clean up what remains here + +#include + +namespace at { + +// A RAII, thread local (!) guard that will disable dispatch to variable +// handler. +// +// NOTE [ Treating Variables as non-Variables in type dispatch ] +// +// What exactly does AutoDispatchBelowAutograd do? The short answer is, it causes +// dispatches on ATen functions to go to the non-variable implementation, +// bypassing autograd handling (and also profiling and tracing). +// +// To understand why this guard exists, it's helpful to understand the history +// behind how Variable was implemented. Previously, Variables were implemented +// as a wrapper on Tensors; so the act of processing a Variable involved +// unwrapping the underlying Tensor, and then calling the underlying base +// operation on /that/ operation +// +// However, after the Variable/Tensor merge, there is no concept of unwrapping +// a tensor anymore. If you just call the operation on the same variable +// again inside your VariableType handler, you'll dispatch back to +// VariableType, which is not what we want. +// +// The solution to the above problem is to add `at::AutoDispatchBelowAutograd`, which +// when enabled will cause `legacyTensorType()` and `getType()` to always return +// non-Variable type, even if the tensor being called on is a variable. + +/* Note [AutoDispatchBelowAutograd] + * AutoDispatchBelowAutograd is **INTERNAL ONLY** that it should be used + * for kernel implementations and customized C++ kernels. + * If you are looking for a guard to run workload in inference mode, please use + * c10::InferenceMode RAII which is user facing API. + * In the past AutoDispatchBelowAutograd(or its old version AutoNonVariableTypeMode) + * was used in the user code for inference-only workload, this was under risk of + * producing wrong results silently in some edge cases. For example: + * ``` + * torch::Tensor s = torch::ones({1, 2, 3}).set_requires_grad(true); + * torch::Tensor out = s * s; + * { + * at::AutoDispatchBelowAutograd guard; + * s.add_(1); // Skips version bump on `s`. + * } + * // WRONG GRADIENT! s.grad() are now computed using `s` value after the + * // inplace update. + * out.backward(torch::ones_like(out)); + * ``` + * Users should use `c10::InferenceMode` here so that it'll properly throw an + * error saying "one of the variables needed for gradient computation has be modified." + */ +struct TORCH_API AutoDispatchBelowAutograd { + AutoDispatchBelowAutograd() : + autograd_guard_(c10::autograd_dispatch_keyset) { + } + + // disable all autograd dispatch keys + c10::impl::ExcludeDispatchKeyGuard autograd_guard_; +}; + +// TODO: AutoNonVariableTypeMode should be removed in release 1.10. +struct TORCH_API AutoNonVariableTypeMode { + AutoNonVariableTypeMode(bool enabled = true) : + autograd_guard_(c10::autograd_dispatch_keyset) { + TORCH_WARN_ONCE("AutoNonVariableTypeMode is deprecated and will be removed in 1.10 release. " + "For kernel implementations please use AutoDispatchBelowADInplaceOrView instead, " + "If you are looking for a user facing API to enable running your inference-only " + "workload, please use c10::InferenceMode. Using AutoDispatchBelowADInplaceOrView in user code " + "is under risk of producing silent wrong result in some edge cases. " + "See Note [AutoDispatchBelowAutograd] for more details."); + TORCH_INTERNAL_ASSERT(enabled); + } + + // disable all autograd dispatch keys + c10::impl::ExcludeDispatchKeyGuard autograd_guard_; +}; + +struct TORCH_API AutoDispatchSkipFunctionalize { + AutoDispatchSkipFunctionalize() : + dispatch_key_guard_(c10::DispatchKeySet(c10::DispatchKey::Functionalize)) { + } + c10::impl::ExcludeDispatchKeyGuard dispatch_key_guard_; +}; + +/* Note [AutoDispatchBelowADInplaceOrView] + * AutoDispatchBelowADInplaceOrView is equivalent to AutoNonVariableTypeMode + * before we split inplace & view ops out of VariableType kernel. + * Note this guard is used in VariableType kernels for functional ops + * as well as ADInplaceOrView kernels for inplace/view ops to enforce the + * Invariant: + * Once you are in VariableType/ADInplaceOrView kernel for an op, + * you never go back to a kernel on same dispatch key until + * you finish the current op. + */ +struct TORCH_API AutoDispatchBelowADInplaceOrView { + AutoDispatchBelowADInplaceOrView() : + dispatch_key_guard_(c10::autograd_dispatch_keyset_with_ADInplaceOrView) { + } + // disable Autograd & ADInplaceOrView dispatch keys + c10::impl::ExcludeDispatchKeyGuard dispatch_key_guard_; +}; +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/List.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/List.h new file mode 100644 index 0000000000000000000000000000000000000000..f109430c5427471d494c4b8081ef519ee8329972 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/List.h @@ -0,0 +1,496 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +class Tensor; +} +namespace c10 { +struct IValue; +template class List; +struct Type; + +namespace detail { + +struct ListImpl final : public c10::intrusive_ptr_target { + using list_type = std::vector; + + explicit TORCH_API ListImpl(list_type list_, TypePtr elementType_); + + list_type list; + + TypePtr elementType; + + intrusive_ptr copy() const { + return make_intrusive(list, elementType); + } + friend TORCH_API bool operator==(const ListImpl& lhs, const ListImpl& rhs); +}; +} + +namespace impl { + +template class ListIterator; + +template class ListElementReference; + +template +void swap(ListElementReference&& lhs, ListElementReference&& rhs) noexcept; + +template +bool operator==(const ListElementReference& lhs, const T& rhs); + +template +bool operator==(const T& lhs, const ListElementReference& rhs); + +template +struct ListElementConstReferenceTraits { + // In the general case, we use IValue::to(). + using const_reference = typename c10::detail::ivalue_to_const_ref_overload_return::type; +}; + +// There is no to() overload for std::optional. +template<> +struct ListElementConstReferenceTraits> { + using const_reference = std::optional>; +}; + +template +class ListElementReference final { +public: + operator std::conditional_t< + std::is_reference_v::type>, + const T&, + T>() const; + + ListElementReference& operator=(T&& new_value) &&; + + ListElementReference& operator=(const T& new_value) &&; + + // assigning another ref to this assigns the underlying value + ListElementReference& operator=(ListElementReference&& rhs) && noexcept; + + const IValue& get() const& { + return *iterator_; + } + + friend void swap(ListElementReference&& lhs, ListElementReference&& rhs) noexcept; + + ListElementReference(const ListElementReference&) = delete; + ListElementReference& operator=(const ListElementReference&) = delete; + ~ListElementReference() = default; + +private: + ListElementReference(Iterator iter) + : iterator_(iter) {} + + // allow moving, but only our friends (i.e. the List class) can move us + ListElementReference(ListElementReference&&) noexcept = default; + ListElementReference& operator=(ListElementReference&& rhs) & noexcept { + iterator_ = std::move(rhs.iterator_); + return *this; + } + + friend class List; + friend class ListIterator; + + Iterator iterator_; +}; + +// this wraps vector::iterator to make sure user code can't rely +// on it being the type of the underlying vector. +template +class ListIterator final { + public: + // C++17 friendly std::iterator implementation + using iterator_category = std::random_access_iterator_tag; + using value_type = T; + using difference_type = std::ptrdiff_t; + using pointer = T*; + using reference = ListElementReference; + + explicit ListIterator() = default; + ~ListIterator() = default; + + ListIterator(const ListIterator&) = default; + ListIterator(ListIterator&&) noexcept = default; + ListIterator& operator=(const ListIterator&) = default; + ListIterator& operator=(ListIterator&&) noexcept = default; + + ListIterator& operator++() { + ++iterator_; + return *this; + } + + ListIterator operator++(int) { + ListIterator copy(*this); + ++*this; + return copy; + } + + ListIterator& operator--() { + --iterator_; + return *this; + } + + ListIterator operator--(int) { + ListIterator copy(*this); + --*this; + return copy; + } + + ListIterator& operator+=(typename List::size_type offset) { + iterator_ += offset; + return *this; + } + + ListIterator& operator-=(typename List::size_type offset) { + iterator_ -= offset; + return *this; + } + + ListIterator operator+(typename List::size_type offset) const { + return ListIterator{iterator_ + offset}; + } + + ListIterator operator-(typename List::size_type offset) const { + return ListIterator{iterator_ - offset}; + } + + friend difference_type operator-(const ListIterator& lhs, const ListIterator& rhs) { + return lhs.iterator_ - rhs.iterator_; + } + + ListElementReference operator*() const { + return {iterator_}; + } + + ListElementReference operator[](typename List::size_type offset) const { + return {iterator_ + offset}; + } + +private: + explicit ListIterator(Iterator iterator): iterator_(std::move(iterator)) {} + + Iterator iterator_; + + friend bool operator==(const ListIterator& lhs, const ListIterator& rhs) { + return lhs.iterator_ == rhs.iterator_; + } + + friend bool operator!=(const ListIterator& lhs, const ListIterator& rhs) { + return !(lhs == rhs); + } + + friend bool operator<(const ListIterator& lhs, const ListIterator& rhs) { + return lhs.iterator_ < rhs.iterator_; + } + + friend bool operator<=(const ListIterator& lhs, const ListIterator& rhs) { + return lhs.iterator_ <= rhs.iterator_; + } + + friend bool operator>(const ListIterator& lhs, const ListIterator& rhs) { + return lhs.iterator_ > rhs.iterator_; + } + + friend bool operator>=(const ListIterator& lhs, const ListIterator& rhs) { + return lhs.iterator_ >= rhs.iterator_; + } + + friend class ListIterator; + friend class List; +}; + +template List toTypedList(List list); +template List toList(List&& list); +template List toList(const List& list); +const IValue* ptr_to_first_element(const List& list); +} + +/** + * An object of this class stores a list of values of type T. + * + * This is a pointer type. After a copy, both Lists + * will share the same storage: + * + * > List a; + * > List b = a; + * > b.push_back("three"); + * > ASSERT("three" == a.get(0)); + * + * We use this class in the PyTorch kernel API instead of + * std::vector, because that allows us to do optimizations + * and switch out the underlying list implementation without + * breaking backwards compatibility for the kernel API. + */ +template +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) +class List final { +private: + // This is an intrusive_ptr because List is a pointer type. + // Invariant: This will never be a nullptr, there will always be a valid + // ListImpl. + c10::intrusive_ptr impl_; + + using internal_reference_type = impl::ListElementReference; + using internal_const_reference_type = typename impl::ListElementConstReferenceTraits::const_reference; + +public: + using value_type = T; + using size_type = typename c10::detail::ListImpl::list_type::size_type; + using iterator = impl::ListIterator; + using const_iterator = impl::ListIterator; + using reverse_iterator = impl::ListIterator; + + /** + * Constructs an empty list. + */ + explicit List(); + + /** + * Constructs a list with some initial values. + * Example: + * List a({2, 3, 4}); + */ + List(std::initializer_list initial_values); + explicit List(ArrayRef initial_values); + + /** + * Create a generic list with runtime type information. + * This only works for c10::impl::GenericList and is not part of the public API + * but only supposed to be used internally by PyTorch. + */ + explicit List(TypePtr elementType); + + List(const List&) = default; + List& operator=(const List&) = default; + ~List() = default; + + /** + * Create a new List pointing to a deep copy of the same data. + * The List returned is a new list with separate storage. + * Changes in it are not reflected in the original list or vice versa. + */ + List copy() const; + + /** + * Returns the element at specified location pos, with bounds checking. + * If pos is not within the range of the container, an exception of type std::out_of_range is thrown. + */ + internal_const_reference_type get(size_type pos) const; + + /** + * Moves out the element at the specified location pos and returns it, with bounds checking. + * If pos is not within the range of the container, an exception of type std::out_of_range is thrown. + * The list contains an invalid element at position pos afterwards. Any operations + * on it before re-setting it are invalid. + */ + value_type extract(size_type pos) const; + + /** + * Returns a reference to the element at specified location pos, with bounds checking. + * If pos is not within the range of the container, an exception of type std::out_of_range is thrown. + * + * You cannot store the reference, but you can read it and assign new values to it: + * + * List list = ...; + * list[2] = 5; + * int64_t v = list[1]; + */ + internal_const_reference_type operator[](size_type pos) const; + + internal_reference_type operator[](size_type pos); + + /** + * Assigns a new value to the element at location pos. + */ + void set(size_type pos, const value_type& value) const; + + /** + * Assigns a new value to the element at location pos. + */ + void set(size_type pos, value_type&& value) const; + + /** + * Returns an iterator to the first element of the container. + * If the container is empty, the returned iterator will be equal to end(). + */ + iterator begin() const; + + /** + * Returns an iterator to the element following the last element of the container. + * This element acts as a placeholder; attempting to access it results in undefined behavior. + */ + iterator end() const; + + /** + * Checks if the container has no elements. + */ + bool empty() const; + + /** + * Returns the number of elements in the container + */ + size_type size() const; + + /** + * Increase the capacity of the vector to a value that's greater or equal to new_cap. + */ + void reserve(size_type new_cap) const; + + /** + * Erases all elements from the container. After this call, size() returns zero. + * Invalidates any references, pointers, or iterators referring to contained elements. Any past-the-end iterators are also invalidated. + */ + void clear() const; + + /** + * Inserts value before pos. + * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated. + */ + iterator insert(iterator pos, const T& value) const; + + /** + * Inserts value before pos. + * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated. + */ + iterator insert(iterator pos, T&& value) const; + + /** + * Inserts a new element into the container directly before pos. + * The new element is constructed with the given arguments. + * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated. + */ + template + iterator emplace(iterator pos, Args&&... value) const; + + /** + * Appends the given element value to the end of the container. + * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated. + */ + void push_back(const T& value) const; + + /** + * Appends the given element value to the end of the container. + * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated. + */ + void push_back(T&& value) const; + + /** + * Appends the given list to the end of the container. Uses at most one memory allocation. + * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated. + */ + void append(List lst) const; + + /** + * Appends the given element value to the end of the container. + * The new element is constructed with the given arguments. + * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated. + */ + template + void emplace_back(Args&&... args) const; + + /** + * Removes the element at pos. + * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated. + */ + iterator erase(iterator pos) const; + + /** + * Removes the elements in the range [first, last). + * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated. + */ + iterator erase(iterator first, iterator last) const; + + /** + * Removes the last element of the container. + * Calling pop_back on an empty container is undefined. + * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated. + */ + void pop_back() const; + + /** + * Resizes the container to contain count elements. + * If the current size is less than count, additional default-inserted elements are appended. + * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated. + */ + void resize(size_type count) const; + + /** + * Resizes the container to contain count elements. + * If the current size is less than count, additional copies of value are appended. + * May invalidate any references, pointers, or iterators referring to contained elements. Any past-the-end iterators may also be invalidated. + */ + void resize(size_type count, const T& value) const; + + /** + * Value equality comparison. This function implements Python-like semantics for + * equality: two lists with the same identity (e.g. same pointer) trivially + * compare equal, otherwise each element is compared for equality. + */ + template + friend bool operator==(const List& lhs, const List& rhs); + + template + friend bool operator!=(const List& lhs, const List& rhs); + + /** + * Identity comparison. Returns true if and only if `rhs` represents the same + * List object as `this`. + */ + bool is(const List& rhs) const; + + std::vector vec() const; + + /** + * Returns the number of Lists currently pointing to this same list. + * If this is the only instance pointing to this list, returns 1. + */ + // TODO Test use_count + size_t use_count() const; + + TypePtr elementType() const; + + // See [unsafe set type] for why this exists. + void unsafeSetElementType(TypePtr t); + +private: + explicit List(c10::intrusive_ptr&& elements); + explicit List(const c10::intrusive_ptr& elements); + friend struct IValue; + template friend List impl::toTypedList(List); + template friend List impl::toList(List&&); + template friend List impl::toList(const List&); + friend const IValue* impl::ptr_to_first_element(const List& list); +}; + +namespace impl { +// GenericList is how IValue stores lists. It is, however, not part of the +// public API. Kernels should use Lists with concrete types instead +// (maybe except for some internal prim ops). +using GenericList = List; + +} +} + +namespace torch { + template using List = c10::List; +} + +#include // IWYU pragma: keep + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/List_inl.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/List_inl.h new file mode 100644 index 0000000000000000000000000000000000000000..55c1e24c25707009608a16be209661ad9b0c827f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/List_inl.h @@ -0,0 +1,358 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace c10 { + +template decltype(auto) getTypePtr(); +std::string toString(const Type& type); + +template +List::List(c10::intrusive_ptr&& elements) +: impl_(std::move(elements)) {} + +template +List::List(const c10::intrusive_ptr& elements) +: impl_(elements) {} + +template +List::List() +: List(make_intrusive( + typename c10::detail::ListImpl::list_type(), + getTypePtr())) { + static_assert(!std::is_same_v, "This constructor is not valid for List. Please use c10::impl::GenericList(elementType) instead."); +} + +template +List::List(ArrayRef values) +: List(make_intrusive( + typename c10::detail::ListImpl::list_type(), + getTypePtr())) { + static_assert(!std::is_same_v, "This constructor is not valid for List. Please use c10::impl::GenericList(elementType)."); + impl_->list.reserve(values.size()); + for (const T& element : values) { + impl_->list.push_back(element); + } +} + +template +List::List(std::initializer_list initial_values) +: List(ArrayRef(initial_values)) { + static_assert(!std::is_same_v, "This constructor is not valid for List. Please use c10::impl::GenericList(elementType)."); +} + +template +List::List(TypePtr elementType) +: List(make_intrusive( + typename c10::detail::ListImpl::list_type(), + std::move(elementType))) { + static_assert(std::is_same_v || std::is_same_v>, + "This constructor is only valid for c10::impl::GenericList or List."); +} + +namespace impl { +template +List toTypedList(impl::GenericList list) { + // If there's other instances of the list (i.e. list.use_count() > 1), then we have to be invariant + // because upcasting would allow people to add types into the new list that would break the old list. + // However, if there aren't any other instances of this list (i.e. list.use_count() == 1), then we can + // allow upcasting. This can be a perf improvement since we can cast List to List> + // without having to copy it. This is also used to provide backwards compatibility with some old models + // that serialized the index arguments to aten::index, aten::index_put, aten::index_put_ and aten::index_put_impl_ + // as List before we changed that argument to be List>. When deserializing, we + // have list.use_count() == 1 and can deserialize the List directly as List>. + TORCH_CHECK(*list.impl_->elementType == *getTypePtr() + || (list.use_count() == 1 && list.impl_->elementType->isSubtypeOf(*getTypePtr())) + , "Tried to cast a List<", toString(*list.impl_->elementType), "> to a List<", toString(*getTypePtr()), ">. Types mismatch."); + return List(std::move(list.impl_)); +} + +template +impl::GenericList toList(List&& list) { + return GenericList(std::move(list.impl_)); +} +template +impl::GenericList toList(const List& list) { + return GenericList(list.impl_); +} +} + +template +List List::copy() const { + return List(impl_->copy()); +} + +namespace detail { + template + T list_element_to(T element) { + return element; + } + template + T list_element_to(const IValue& element) { + return element.template to(); + } + template + T list_element_to(IValue&& element) { + return std::move(element).template to(); + } + template + struct ListElementFrom { + static IValue from(const T& element) { + return element; + } + static IValue from(T&& element) { + return std::move(element); + } + }; + template<> + struct ListElementFrom { + static const IValue& from(const IValue& element) { + return element; + } + static IValue&& from(IValue&& element) { + return std::move(element); + } + }; +} + +namespace impl { + +template +ListElementReference::operator std::conditional_t< + std::is_reference_v::type>, + const T&, + T>() const { + return iterator_->template to(); +} + +template +ListElementReference& ListElementReference::operator=(T&& new_value) && { + *iterator_ = c10::detail::ListElementFrom::from(std::move(new_value)); + return *this; +} + +template +ListElementReference& ListElementReference::operator=(const T& new_value) && { + *iterator_ = c10::detail::ListElementFrom::from(new_value); + return *this; +} + +template +ListElementReference& ListElementReference::operator=(ListElementReference&& rhs) && noexcept { + *iterator_ = *rhs.iterator_; + return *this; +} + +template +void swap(ListElementReference&& lhs, ListElementReference&& rhs) noexcept { + std::swap(*lhs.iterator_, *rhs.iterator_); +} + +template +bool operator==(const ListElementReference& lhs, const T& rhs) { + const T& lhs_tmp = lhs; + return lhs_tmp == rhs; +} + +template +inline bool operator==(const T& lhs, const ListElementReference& rhs) { + return rhs == lhs; +} + +template +inline typename ListElementConstReferenceTraits::const_reference +list_element_to_const_ref(const IValue& element) { + return element.template to(); +} + +template<> +inline typename ListElementConstReferenceTraits>::const_reference +list_element_to_const_ref>(const IValue& element) { + return element.toOptionalStringRef(); +} + +} // namespace impl + +template +void List::set(size_type pos, const value_type& value) const { + impl_->list.at(pos) = c10::detail::ListElementFrom::from(value); +} + +template +void List::set(size_type pos, value_type&& value) const { + impl_->list.at(pos) = c10::detail::ListElementFrom::from(std::move(value)); +} + +template +typename List::internal_const_reference_type List::get(size_type pos) const { + return operator[](pos); +} + +template +typename List::internal_const_reference_type List::operator[](size_type pos) const { + return c10::impl::list_element_to_const_ref(impl_->list.at(pos)); +} + +template +typename List::internal_reference_type List::operator[](size_type pos) { + static_cast(impl_->list.at(pos)); // Throw the exception if it is out of range. + return {impl_->list.begin() + static_castlist)::difference_type>(pos)}; +} + +template +typename List::value_type List::extract(size_type pos) const { + auto& elem = impl_->list.at(pos); + auto result = c10::detail::list_element_to(std::move(elem)); + // Reset the list element to a T() instead of None to keep it correctly typed + elem = c10::detail::ListElementFrom::from(T{}); + return result; +} + +template +typename List::iterator List::begin() const { + return iterator(impl_->list.begin()); +} + +template +typename List::iterator List::end() const { + return iterator(impl_->list.end()); +} + +template +bool List::empty() const { + return impl_->list.empty(); +} + +template +typename List::size_type List::size() const { + return impl_->list.size(); +} + +template +void List::reserve(size_type new_cap) const { + impl_->list.reserve(new_cap); +} + +template +void List::clear() const { + impl_->list.clear(); +} + +template +typename List::iterator List::insert(iterator pos, const T& value) const { + return iterator { impl_->list.insert(pos.iterator_, c10::detail::ListElementFrom::from(value)) }; +} + +template +typename List::iterator List::insert(iterator pos, T&& value) const { + return iterator { impl_->list.insert(pos.iterator_, c10::detail::ListElementFrom::from(std::move(value))) }; +} + +template +template +typename List::iterator List::emplace(iterator pos, Args&&... value) const { + // TODO Use list_element_from? + return iterator { impl_->list.emplace(pos.iterator_, std::forward(value)...) }; +} + +template +void List::push_back(const T& value) const { + impl_->list.push_back(c10::detail::ListElementFrom::from(value)); +} + +template +void List::push_back(T&& value) const { + impl_->list.push_back(c10::detail::ListElementFrom::from(std::move(value))); +} + +template +void List::append(List b) const { + if (b.use_count() == 1) { + impl_->list.insert(impl_->list.end(), make_move_iterator(b.impl_->list.begin()), make_move_iterator(b.impl_->list.end())); + } else { + impl_->list.insert(impl_->list.end(), b.impl_->list.begin(), b.impl_->list.end()); + } +} + +template +template +void List::emplace_back(Args&&... args) const { + // TODO Use list_element_from? + impl_->list.push_back(T(std::forward(args)...)); +} + +template +typename List::iterator List::erase(iterator pos) const { + return iterator { impl_->list.erase(pos.iterator_) }; +} + +template +typename List::iterator List::erase(iterator first, iterator last) const { + return iterator { impl_->list.erase(first.iterator_, last.iterator_) }; +} + +template +void List::pop_back() const { + impl_->list.pop_back(); +} + +template +void List::resize(size_type count) const { + impl_->list.resize(count, T{}); +} + +template +void List::resize(size_type count, const T& value) const { + impl_->list.resize(count, value); +} + +template +bool operator==(const List& lhs, const List& rhs) { + // Lists with the same identity trivially compare equal. + if (lhs.impl_ == rhs.impl_) { + return true; + } + + // Otherwise, just compare values directly. + return *lhs.impl_ == *rhs.impl_; +} + +template +bool operator!=(const List& lhs, const List& rhs) { + return !(lhs == rhs); +} + +template +bool List::is(const List& rhs) const { + return this->impl_ == rhs.impl_; +} + +template +std::vector List::vec() const { + std::vector result(begin(), end()); + return result; +} + +template +size_t List::use_count() const { + return impl_.use_count(); +} + +template +TypePtr List::elementType() const { + return impl_->elementType; +} + +template +void List::unsafeSetElementType(TypePtr t) { + impl_->elementType = std::move(t); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/MT19937RNGEngine.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/MT19937RNGEngine.h new file mode 100644 index 0000000000000000000000000000000000000000..7eb7a2c4bdc17d6d4df0d12a9c97c7dbbccd8485 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/MT19937RNGEngine.h @@ -0,0 +1,199 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +// define constants like M_PI and C keywords for MSVC +#ifdef _MSC_VER +#ifndef _USE_MATH_DEFINES +#define _USE_MATH_DEFINES +#endif +#include +#endif + +#include +#include +#include + +namespace at { + +constexpr int MERSENNE_STATE_N = 624; +constexpr int MERSENNE_STATE_M = 397; +constexpr uint32_t MATRIX_A = 0x9908b0df; +constexpr uint32_t UMASK = 0x80000000; +constexpr uint32_t LMASK = 0x7fffffff; + +/** + * Note [Mt19937 Engine implementation] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * Originally implemented in: + * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/MT2002/CODES/MTARCOK/mt19937ar-cok.c + * and modified with C++ constructs. Moreover the state array of the engine + * has been modified to hold 32 bit uints instead of 64 bits. + * + * Note that we reimplemented mt19937 instead of using std::mt19937 because, + * at::mt19937 turns out to be faster in the pytorch codebase. PyTorch builds with -O2 + * by default and following are the benchmark numbers (benchmark code can be found at + * https://github.com/syed-ahmed/benchmark-rngs): + * + * with -O2 + * Time to get 100000000 philox randoms with at::uniform_real_distribution = 0.462759s + * Time to get 100000000 at::mt19937 randoms with at::uniform_real_distribution = 0.39628s + * Time to get 100000000 std::mt19937 randoms with std::uniform_real_distribution = 0.352087s + * Time to get 100000000 std::mt19937 randoms with at::uniform_real_distribution = 0.419454s + * + * std::mt19937 is faster when used in conjunction with std::uniform_real_distribution, + * however we can't use std::uniform_real_distribution because of this bug: + * http://open-std.org/JTC1/SC22/WG21/docs/lwg-active.html#2524. Plus, even if we used + * std::uniform_real_distribution and filtered out the 1's, it is a different algorithm + * than what's in pytorch currently and that messes up the tests in tests_distributions.py. + * The other option, using std::mt19937 with at::uniform_real_distribution is a tad bit slower + * than at::mt19937 with at::uniform_real_distribution and hence, we went with the latter. + * + * Copyright notice: + * A C-program for MT19937, with initialization improved 2002/2/10. + * Coded by Takuji Nishimura and Makoto Matsumoto. + * This is a faster version by taking Shawn Cokus's optimization, + * Matthe Bellew's simplification, Isaku Wada's real version. + * + * Before using, initialize the state by using init_genrand(seed) + * or init_by_array(init_key, key_length). + * + * Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of its contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * + * Any feedback is very welcome. + * http://www.math.sci.hiroshima-u.ac.jp/~m-mat/MT/emt.html + * email: m-mat @ math.sci.hiroshima-u.ac.jp (remove space) + */ + +/** + * mt19937_data_pod is used to get POD data in and out + * of mt19937_engine. Used in torch.get_rng_state and + * torch.set_rng_state functions. + */ +struct mt19937_data_pod { + uint64_t seed_; + int left_; + bool seeded_; + uint32_t next_; + std::array state_; +}; + +class mt19937_engine { +public: + + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) + inline explicit mt19937_engine(uint64_t seed = 5489) { + init_with_uint32(seed); + } + + inline mt19937_data_pod data() const { + return data_; + } + + inline void set_data(const mt19937_data_pod& data) { + data_ = data; + } + + inline uint64_t seed() const { + return data_.seed_; + } + + inline bool is_valid() { + if ((data_.seeded_ == true) + && (data_.left_ > 0 && data_.left_ <= MERSENNE_STATE_N) + && (data_.next_ <= MERSENNE_STATE_N)) { + return true; + } + return false; + } + + inline uint32_t operator()() { + if (--(data_.left_) == 0) { + next_state(); + } + uint32_t y = *(data_.state_.data() + data_.next_++); + y ^= (y >> 11); + y ^= (y << 7) & 0x9d2c5680; + y ^= (y << 15) & 0xefc60000; + y ^= (y >> 18); + + return y; + } + +private: + mt19937_data_pod data_; + + inline void init_with_uint32(uint64_t seed) { + data_.seed_ = seed; + data_.seeded_ = true; + data_.state_[0] = seed & 0xffffffff; + for (const auto j : c10::irange(1, MERSENNE_STATE_N)) { + data_.state_[j] = (1812433253 * (data_.state_[j-1] ^ (data_.state_[j-1] >> 30)) + j); + } + data_.left_ = 1; + data_.next_ = 0; + } + + inline uint32_t mix_bits(uint32_t u, uint32_t v) { + return (u & UMASK) | (v & LMASK); + } + + inline uint32_t twist(uint32_t u, uint32_t v) { + return (mix_bits(u,v) >> 1) ^ (v & 1 ? MATRIX_A : 0); + } + + inline void next_state() { + uint32_t* p = data_.state_.data(); + data_.left_ = MERSENNE_STATE_N; + data_.next_ = 0; + + for(int j = MERSENNE_STATE_N - MERSENNE_STATE_M + 1; --j; p++) { + *p = p[MERSENNE_STATE_M] ^ twist(p[0], p[1]); + } + + for(int j = MERSENNE_STATE_M; --j; p++) { + *p = p[MERSENNE_STATE_M - MERSENNE_STATE_N] ^ twist(p[0], p[1]); + } + + *p = p[MERSENNE_STATE_M - MERSENNE_STATE_N] ^ twist(p[0], data_.state_[0]); + } + +}; + +typedef mt19937_engine mt19937; + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/NamedTensor.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/NamedTensor.h new file mode 100644 index 0000000000000000000000000000000000000000..890373220b0a34e893e9fd950a93229f22eea137 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/NamedTensor.h @@ -0,0 +1,148 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace at { + +class TensorBase; + +// XXX: This file exists because TensorImpl is in c10, but Dimname is in ATen. +// Due to the c10/ATen library split, TensorImpl cannot depend on Dimname, +// so we have a couple of workarounds. +// +// In the long term, we'll move Dimname to c10 and everything in this file +// can be refactored out. The main blocker for that is that "c10::Symbol" +// actually exists outside of c10 and needs to be moved in. + +// TensorImpl has a unique_ptr field. +// XXX: Ideally we would just put std::optional> into TensorImpl. +// +// This class has an important invariant: there must be at least ONE +// non-wildcard +struct TORCH_API NamedTensorMeta final : public c10::NamedTensorMetaInterface { + // This enum is to remind people that the invariant on constructors is that + // the list of dimnames must have at least one non-wildcard + enum HAS_NON_WILDCARD { + HasNonWildcard + }; + + explicit NamedTensorMeta(HAS_NON_WILDCARD /*unused*/, DimnameList names) + : names_(names.vec()) { + check_invariants(); + } + explicit NamedTensorMeta(HAS_NON_WILDCARD /*unused*/, std::vector&& names) + : names_(std::move(names)) { + check_invariants(); + } + + std::unique_ptr clone() const override { + return std::make_unique(HasNonWildcard, names_); + } + + DimnameList names() const { return names_; } + + // Used for an assertion in TensorImpl.h + int64_t slow_dim() const override { + return static_cast(names_.size()); + } + + void check_invariants() const { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + std::any_of(names_.begin(), names_.end(), [](const Dimname& n) { return !n.isWildcard(); })); + } + + void set_names(HAS_NON_WILDCARD /*unused*/, DimnameList new_names) { + TORCH_INTERNAL_ASSERT(new_names.size() == names_.size()); + std::copy(new_names.begin(), new_names.end(), names_.begin()); + check_invariants(); + } + + void set_names(HAS_NON_WILDCARD /*unused*/, std::vector&& new_names) { + TORCH_INTERNAL_ASSERT(new_names.size() == names_.size()); + names_ = std::move(new_names); + check_invariants(); + } + + // INVARIANT: at least one Dimname is non-WILDCARD + std::vector names_; +}; + +// When NamesMode is disabled, then all operations ignore tensors' names fields. +// Concretely speaking, all tensors are treated as having nullopt names. +struct TORCH_API NamesMode { + static bool is_enabled(); + static void set_enabled(bool enabled); +}; + + +// A RAII, thread local (!) guard that enables or disables names upon +// construction, and sets it back to the original value upon destruction. +struct TORCH_API NoNamesGuard { + NoNamesGuard() : prev_mode(NamesMode::is_enabled()) { + NamesMode::set_enabled(false); + } + NoNamesGuard(const NoNamesGuard&) = delete; + NoNamesGuard(NoNamesGuard&&) = delete; + NoNamesGuard& operator=(const NoNamesGuard&) = delete; + NoNamesGuard& operator=(NoNamesGuard&&) = delete; + ~NoNamesGuard() { + if (initialized) { + reset(); + } + } + void reset() { + TORCH_INTERNAL_ASSERT(initialized); + NamesMode::set_enabled(prev_mode); + } + private: + bool prev_mode; + bool initialized{true}; +}; + +void check_names_valid_for(const TensorBase& tensor, DimnameList names); +void check_names_valid_for(size_t tensor_dim, DimnameList names); + +// Sets the names of `tensor` to be `names`. +TORCH_API const TensorBase& internal_set_names_inplace(const TensorBase& tensor, std::optional names); +TORCH_API const TensorBase& internal_set_names_inplace(const TensorBase& tensor, std::vector&& names, bool validate_names); + +constexpr size_t kMaxNamedTensorDim = 64; + +DimnameList default_names(size_t len); + +namespace impl { + +// Some helper functions on TensorImpl. Useful for working with names in TH. +// XXX: Ideally these would exist as methods on TensorImpl +TORCH_API void internal_set_names_inplace(TensorImpl* impl, std::optional names, bool validate_names); +TORCH_API void internal_set_names_inplace(TensorImpl* impl, std::vector&& names, bool validate_names); + +void check_names_valid_for(TensorImpl* impl, DimnameList names); + +// Returns true if the tensor's names exist and are not all 'None'. +// Returns false if the tensor's names don't exist (were not allocated), +// or if all names are 'None'. +// We treat not-allocated-names the same as allocated names that are all 'None'. +TORCH_API bool has_names(const TensorImpl* impl); + +// Returns the names of the tensor's dimensions. +// Unnamed tensors are treated as having 'None' in all dimension; this method +// would return a DimnameList of all 'None's for an unnamed tensor. +TORCH_API DimnameList get_names(const TensorImpl* impl); + +// This is more of an implementation detail; one should use impl::get_names / +// Tensor::names() whenever possible because it provides a cleaner API. +// Returns the names of the tensor if they have been allocated; returns nullopt +// instead if the haven't been. The names of a tensor are not allocated if a +// tensor is constructed with names=None. +TORCH_API std::optional get_opt_names(const TensorImpl* impl); + +} // namespace impl + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/NestedIntSymNodeImpl.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/NestedIntSymNodeImpl.h new file mode 100644 index 0000000000000000000000000000000000000000..5104fa756ff6d9dea41b70d4140b1122ddf1bc14 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/NestedIntSymNodeImpl.h @@ -0,0 +1,192 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace c10 { + +// The motivating usecase for this is to represent the ragged size structure +// of a jagged tensor [B, [s_0, s_1, s_2], D] as a single integer j0. This +// allows us to simply return [B, j0, D] if someone queries for the size of our +// tensor. +// +// Morally we define comparison between two nested ints to return true if +// that comparison holds for all corresponding elements of the arrays they +// represent. Comparison between a nested int and a plain int is defined +// similarly. +// +// To simulate this desired behavior but also avoid the O(N) cost of checking, +// we associate each raggedness pattern with an integer "id" that can be used as +// a proxy to evaluate equality. We also constrain the range of values for this +// as to enable inequality checks. +// +// We also support a positive integer scalar "coeff" that is used for computing +// strides. For example given, a [B, j0, D] tensor, it can be strided in two +// different ways: [D * j0, D, 1] and [j0, 1, sum(j0)]. The coeff is used to +// differentiate the two cases. +// +// During tracing the strides of the outputs need to be a function of the size +// and strides of the inputs so it is important that NestedIntSymNode itself is +// able to express this. +class TORCH_API NestedIntSymNodeImpl : public SymNodeImpl { + public: + // CAUTION: you should probably not be constructing these directly; please + // the higher-level API in python instead (TODO: actually introduce that). + explicit NestedIntSymNodeImpl(int64_t val, int64_t coeff) + : val_(val), coeff_(coeff) {} + + bool bool_() override { + return false; + } + + bool is_int() override { + return true; + } + + bool is_float() override { + return false; + } + + bool is_bool() override { + return false; + } + + bool is_nested_int() const override { + return true; + } + + bool has_hint() override { + return true; + } + + c10::SymNode wrap_int(int64_t num) override { + return SymNode(c10::make_intrusive>(num)); + } + + int64_t guard_int(const char* file, int64_t line) override { + TORCH_CHECK(false); + } + + double guard_float(const char* file, int64_t line) override { + TORCH_CHECK(false, "not a float"); + } + + bool guard_bool(const char* file, int64_t line) override { + TORCH_CHECK(false, "not a bool"); + } + + int64_t int_() override { + TORCH_CHECK(false); + } + + std::string str() override { + if (coeff_ == 1) { + return "j" + std::to_string(val_); + } + return std::to_string(coeff_) + "*j" + std::to_string(val_); + } + + // NOTE [ Inequalities with nested int ] + // + // The semantics of nested int when it comes to relations is that it is + // treated as integer known to be within a certain range, + // + // j0 \in [2, int64_t::max] + // + // allowing us to answer queries like j0 >= 1 (True), and j0 == 0 (False). + // This is a useful default range for the raggedness pattern of a jagged + // tensor (1) since sizes are non-negative, and (2) we need to get past 0/1 + // specialization checks. + // + // [ Indeterminate inequalities error out ] + // + // Given the semantic defined above, certain relations like j0 < 3 are thus + // indeterminable. In our impl today, evaluating such relations error + // + // It may seem convenient to just define indeterminate relations to return + // False, but the implementation we maintain in parallel using sympy does not + // allow this. + // + // Sympy only allows overriding of Ge. The other relations (Lt, Gt, Le) are, + // by consequence, all derived from Ge e.g., Lt(a, b) := !Ge(a, b). This + // would mean that means that if we define the indeterminate j0 >= 3 to be + // False, the also indeterminate j0 < 3 will be evaluated to be True! + // + // [ Coefficient are assumed positive ] + // + // For the purpose of computing inequalities, we consider the coefficient of + // the nested int to be a positive integer. + // + // Thus, no modifications are needed to the logic since + // j0 >= k implies coeff * j0 >= k + // + c10::SymNode eq(const c10::SymNode& other) override; + c10::SymNode ne(const c10::SymNode& other) override; + c10::SymNode ge(const c10::SymNode& other) override; + c10::SymNode gt(const c10::SymNode& other) override; + c10::SymNode lt(const c10::SymNode& other) override; + c10::SymNode le(const c10::SymNode& other) override; + c10::SymNode mul(const c10::SymNode& other) override; + + std::optional nested_int() override { + return val_; + } + + std::optional nested_int_coeff() override { + return coeff_; + } + + bool is_symbolic() override { + return false; + } + + c10::SymNode clone() override; + +#define DEFINE_BINARY_NOT_SUPPORTED(name) \ + c10::SymNode name(const c10::SymNode& other) override { \ + TORCH_CHECK(false, #name " not supported by NestedIntSymNode"); \ + } + + DEFINE_BINARY_NOT_SUPPORTED(add) + DEFINE_BINARY_NOT_SUPPORTED(sub) + DEFINE_BINARY_NOT_SUPPORTED(truediv) + DEFINE_BINARY_NOT_SUPPORTED(pow) + DEFINE_BINARY_NOT_SUPPORTED(floordiv) + DEFINE_BINARY_NOT_SUPPORTED(mod) + DEFINE_BINARY_NOT_SUPPORTED(sym_min) + DEFINE_BINARY_NOT_SUPPORTED(sym_max) + DEFINE_BINARY_NOT_SUPPORTED(sym_and) + DEFINE_BINARY_NOT_SUPPORTED(sym_or) + +#undef DEFINE_BINARY_NOT_SUPPORTED + +#define DEFINE_NOT_SUPPORTED(name) \ + c10::SymNode name() override { \ + TORCH_CHECK(false, #name " is not supported by NestedIntSymNode"); \ + } + + DEFINE_NOT_SUPPORTED(sym_not) + DEFINE_NOT_SUPPORTED(ceil) + DEFINE_NOT_SUPPORTED(floor) + DEFINE_NOT_SUPPORTED(neg) + DEFINE_NOT_SUPPORTED(sym_float) + +#undef DEFINE_NOT_SUPPORTED + + private: + int64_t val_; + int64_t coeff_; +}; + +} // namespace c10 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/PhiloxRNGEngine.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/PhiloxRNGEngine.h new file mode 100644 index 0000000000000000000000000000000000000000..0146c2801c1b93b6b8d7e8c5043ed22ffad1f449 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/PhiloxRNGEngine.h @@ -0,0 +1,245 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// define constants like M_PI and C keywords for MSVC +#ifdef _MSC_VER +#define _USE_MATH_DEFINES +#include +#endif + + +#ifdef __CUDACC__ +#include +#endif + +#include +#include +#include +#include + +namespace at { + +// typedefs for holding vector data +namespace detail { + +typedef std::array UINT4; +typedef std::array UINT2; +typedef std::array DOUBLE2; +typedef std::array FLOAT2; + +} // namespace detail + +/** + * Note [Philox Engine implementation] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * Originally implemented in PyTorch's fusion compiler + * Refer to: http://www.thesalmons.org/john/random123/papers/random123sc11.pdf + * for details regarding the engine. + * + * Note that currently this implementation of the philox engine is not used + * anywhere except for tests in cpu_generator_test.cpp. However, this engine + * will replace curandStatePhilox4_32_10_t in the future. + * + * The philox engine takes a seed value, a subsequeunce + * for starting the generation and an offset for the subsequence. + * Think of this engine as an algorithm producing a huge array. We are + * parallelizing this array by partitioning the huge array and assigning + * a thread index to each partition. In other words, each seed value + * (there are 2^64 possible seed values) gives a sub array of size + * 2^128 (each element in that array is a 128 bit number). Reasoning + * behind the array being of size 2^128 is, there are 2^64 possible + * thread index value and there is an array of size 2^64 for each of + * those thread index. Hence 2^64 * 2^64 = 2^128 for each seed value. + * + * In short, this generator can produce 2^64 (seed values) * 2^128 (number + * of elements in an array given by a seed value) = 2^192 values. + * + * Arguments: + * seed: Seed values could be any number from 0 to 2^64-1. + * subsequence: Subsequence is just the cuda thread indexing with: + * - blockIdx.x * blockDim.x + threadIdx.x + * offset: The offset variable in PhiloxEngine decides how many 128-bit + * random numbers to skip (i.e. how many groups of 4, 32-bit numbers to skip) + * and hence really decides the total number of randoms that can be achieved + * for the given subsequence. + */ + +class philox_engine { +public: + + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) + C10_HOST_DEVICE inline explicit philox_engine(uint64_t seed = 67280421310721, + uint64_t subsequence = 0, + uint64_t offset = 0) { + + reset_state(seed, subsequence); + incr_n(offset); + } + + C10_HOST_DEVICE inline void reset_state(uint64_t seed = 67280421310721, + uint64_t subsequence = 0) { + key_[0] = static_cast(seed); + key_[1] = static_cast(seed >> 32); + counter_ = detail::UINT4{}; + counter_[2] = static_cast(subsequence); + counter_[3] = static_cast(subsequence >> 32); + STATE = 0; + } + + /** + * Set the offset field of Philox Generator to the desired offset. + */ + C10_HOST_DEVICE inline void set_offset(uint64_t offset) { + counter_[0] = static_cast(offset); + counter_[1] = static_cast(offset >> 32); + } + + /** + * Gets the current offset of the Philox Generator. + */ + C10_HOST_DEVICE uint64_t get_offset() const { + uint64_t lo = static_cast(counter_[0]); + uint64_t hi = static_cast(counter_[1]) << 32; + return lo | hi; + } + + /** + * Produces a unique 32-bit pseudo random number on every invocation. Bookeeps state to avoid waste. + */ + C10_HOST_DEVICE inline uint32_t operator()(int32_t n_rounds = 10) { // 10 here to preserve back-compat behavior + if(STATE == 0) { + detail::UINT4 counter = counter_; + detail::UINT2 key = key_; + output_ = rand(counter, key, n_rounds); + incr(); + } + uint32_t ret = output_[static_cast(STATE)]; + STATE = (STATE + 1) & 3; + return ret; + } + + inline float randn(uint32_t n_rounds) { + #ifdef __CUDA_ARCH__ + AT_ASSERT(false, "Unsupported invocation of randn on CUDA"); + #endif + if(STATE == 0) { + detail::UINT4 counter = counter_; + detail::UINT2 key = key_; + output_ = rand(counter, key, n_rounds); + incr(); + } + // TODO(min-jean-cho) change to Polar method, a more efficient version of Box-Muller method + // TODO(voz) We use std:: below, and thus need a separate impl for CUDA. + float u1 = 1 - uint32_to_uniform_float(output_[0]); // uint32_to_uniform_float returns [0,1), we need (0,1] to avoid passing 0 to log. + float u2 = 1 - uint32_to_uniform_float(output_[1]); + return static_cast(std::sqrt(-2.0 * std::log(u1)) * std::cos(2.0 * M_PI * u2)); + } + + /** + * Function that Skips N 128 bit numbers in a subsequence + */ + C10_HOST_DEVICE inline void incr_n(uint64_t n) { + uint32_t nlo = static_cast(n); + uint32_t nhi = static_cast(n >> 32); + counter_[0] += nlo; + // if overflow in x has occurred, carry over to nhi + if (counter_[0] < nlo) { + nhi++; + // if overflow in nhi has occurred during carry over, + // propagate that overflow to y and exit to increment z + // otherwise return + counter_[1] += nhi; + if(nhi != 0) { + if (nhi <= counter_[1]) { + return; + } + } + } else { + // if overflow in y has occurred during addition, + // exit to increment z + // otherwise return + counter_[1] += nhi; + if (nhi <= counter_[1]) { + return; + } + } + if (++counter_[2]) + return; + ++counter_[3]; + } + + /** + * Function that Skips one 128 bit number in a subsequence + */ + C10_HOST_DEVICE inline void incr() { + if (++counter_[0]) + return; + if (++counter_[1]) + return; + if (++counter_[2]) { + return; + } + ++counter_[3]; + } + +private: + detail::UINT4 counter_; + detail::UINT4 output_; + detail::UINT2 key_; + uint32_t STATE; + + C10_HOST_DEVICE inline uint32_t mulhilo32(uint32_t a, uint32_t b, + uint32_t *result_high) { + #ifdef __CUDA_ARCH__ + *result_high = __umulhi(a, b); + return a*b; + #else + const uint64_t product = static_cast(a) * b; + *result_high = static_cast(product >> 32); + return static_cast(product); + #endif + } + + C10_HOST_DEVICE inline detail::UINT4 single_round(detail::UINT4 ctr, detail::UINT2 in_key) { + uint32_t hi0 = 0; + uint32_t hi1 = 0; + uint32_t lo0 = mulhilo32(kPhiloxSA, ctr[0], &hi0); + uint32_t lo1 = mulhilo32(kPhiloxSB, ctr[2], &hi1); + detail::UINT4 ret; + ret[0] = hi1 ^ ctr[1] ^ in_key[0]; + ret[1] = lo1; + ret[2] = hi0 ^ ctr[3] ^ in_key[1]; + ret[3] = lo0; + return ret; + } + + C10_HOST_DEVICE constexpr float uint32_to_uniform_float(uint32_t value) { + // maximum value such that `MAX_INT * scale < 1.0` (with float rounding) + constexpr float scale = 4.6566127342e-10; + return static_cast(value & 0x7FFFFFFF) * scale; + } + + + + C10_HOST_DEVICE inline detail::UINT4 rand(detail::UINT4& counter, detail::UINT2& key, uint32_t n_rounds) { + for (uint32_t round = 0; round < (n_rounds - 1); round++) { + counter = single_round(counter, key); + key[0] += (kPhilox10A); key[1] += (kPhilox10B); + } + return single_round(counter, key); + } + + + static constexpr uint32_t kPhilox10A = 0x9E3779B9; + static constexpr uint32_t kPhilox10B = 0xBB67AE85; + static constexpr uint32_t kPhiloxSA = 0xD2511F53; + static constexpr uint32_t kPhiloxSB = 0xCD9E8D57; +}; + +typedef philox_engine Philox4_32; + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/PythonFallbackKernel.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/PythonFallbackKernel.h new file mode 100644 index 0000000000000000000000000000000000000000..c79644c56704f057444caa2e60d92a76d26f94bf --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/PythonFallbackKernel.h @@ -0,0 +1,40 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + + +namespace at::impl { + +struct TORCH_API RestorePythonTLSSnapshot { + RestorePythonTLSSnapshot(); + RestorePythonTLSSnapshot(RestorePythonTLSSnapshot&& other) = delete; + RestorePythonTLSSnapshot(const RestorePythonTLSSnapshot&) = delete; + RestorePythonTLSSnapshot& operator=(const RestorePythonTLSSnapshot&) = delete; + RestorePythonTLSSnapshot& operator=(RestorePythonTLSSnapshot&&) = delete; + ~RestorePythonTLSSnapshot(); + +private: + c10::impl::LocalDispatchKeySet saved_; + c10::impl::ForceDispatchKeyGuard guard_; +}; + + +// RAII guard to make working with the above TLS safer. +struct TORCH_API MaybeSetTLSOnEntryGuard { +public: + MaybeSetTLSOnEntryGuard(); + MaybeSetTLSOnEntryGuard(MaybeSetTLSOnEntryGuard&& other) = delete; + MaybeSetTLSOnEntryGuard(const MaybeSetTLSOnEntryGuard&) = delete; + MaybeSetTLSOnEntryGuard& operator=(const MaybeSetTLSOnEntryGuard&) = delete; + MaybeSetTLSOnEntryGuard& operator=(MaybeSetTLSOnEntryGuard&&) = delete; + ~MaybeSetTLSOnEntryGuard(); + +private: + bool value_set_; +}; + +} // namespace at::impl + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/PythonOpRegistrationTrampoline.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/PythonOpRegistrationTrampoline.h new file mode 100644 index 0000000000000000000000000000000000000000..bbbc9b5308c5c12b6d71684d1a1c9d0047df990a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/PythonOpRegistrationTrampoline.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +// TODO: this can probably live in c10 + + +namespace at::impl { + +class TORCH_API PythonOpRegistrationTrampoline final { + static std::atomic interpreter_; + +public: + // Returns true if you successfully registered yourself (that means + // you are in the hot seat for doing the operator registrations!) + static bool registerInterpreter(c10::impl::PyInterpreter* /*interp*/); + + // Returns nullptr if no interpreter has been registered yet. + static c10::impl::PyInterpreter* getInterpreter(); +}; + +} // namespace at::impl + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/QuantizerBase.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/QuantizerBase.h new file mode 100644 index 0000000000000000000000000000000000000000..fbb5f92f0e2214090ce7570877b7cfb6ea27168d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/QuantizerBase.h @@ -0,0 +1,89 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace at { + +class Tensor; +struct QTensorImpl; +struct Quantizer; +using ConstQuantizerPtr = const c10::intrusive_ptr&; +using QuantizerPtr = c10::intrusive_ptr; + +/** + * Quantizer is the class for storing all the information + * that's necessary to perform quantize and dequantize + * operation. + * + * We might have different types of quantization schemes and this is + * the base class for all quantizers. + * + * QTensorImpl will hold a pointer to Quantizer so that we can support + * different quantization schemes on Tensor. + * + * For example, the most common quantization scheme, Affine Quantization, + * requires scale and zero_point as parameters, we'll store scale and zero_point + * inside the instance and we can use it to quantize a float Tensor or + * dequantize a quantized Tensor. + * + * When you add new types of leaf Quantizer class, please also + * make sure to add a corresponding QScheme enum since + * they should have one to one mapping. + * + * Note about intrusive_ptr: + * Quantized Tensor holds an intrusive_ptr to Quantizer, and multiple Tensor can + * share the same Quantizer. Quantizer should be immutable. + */ +struct TORCH_API Quantizer : public c10::intrusive_ptr_target { + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const ScalarType scalar_type_; + explicit Quantizer(ScalarType scalar_type) : scalar_type_(scalar_type) {} + ~Quantizer() override = default; + + // Copied from torch/csrc/jit/ir/scope.h + QuantizerPtr intrusive_from_this() { + c10::raw::intrusive_ptr::incref(this); // we are creating a new pointer + // from a raw `this` pointer + // so we need to bump the refcount + // to account for this ownership + return c10::intrusive_ptr::reclaim(this); + } + + /** + * Each concrete Quantizer type should have a unique QScheme type. + */ + virtual QScheme qscheme() const = 0; + + ScalarType scalar_type() const { + return scalar_type_; + } + + /** + * quantize a float Tensor into a quantized Tensor. + */ + virtual Tensor quantize(const Tensor& t) = 0; + + /** + * dequantize a quantized Tensor into a float Tensor. + */ + virtual Tensor dequantize(const Tensor& t) = 0; + + /** + * dequantize a quantized Tensor into a float Tensor, out= variant + */ + virtual Tensor& dequantize_out(Tensor& out, const Tensor& t) = 0; + + /** + * Compare against `other` for equality. + */ + virtual bool equalTo(QuantizerPtr other) const = 0; +}; + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Range.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Range.h new file mode 100644 index 0000000000000000000000000000000000000000..8d857bd08d4204214c5e7e06d623fa48993ec597 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Range.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace at { + +struct Range { + Range(int64_t begin, int64_t end) + : begin(begin) + , end(end) {} + + int64_t size() const { return end - begin; } + + Range operator/(int64_t divisor) { + return Range(begin / divisor, end / divisor); + } + + int64_t begin; + int64_t end; +}; + +std::ostream& operator<<(std::ostream& out, const Range& range); + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Reduction.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Reduction.h new file mode 100644 index 0000000000000000000000000000000000000000..8f8cb7650bd48a58c4e79701853d8b00732bbb02 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Reduction.h @@ -0,0 +1,19 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +namespace at::Reduction { + +// NB: Keep this in sync with Reduction class in torch/nn/_reduction.py +// These constants control the reduction behavior of loss functions. +// Ideally, this would be a scoped enum, but jit doesn't support that +enum Reduction { + None, // Do not reduce + Mean, // (Possibly weighted) mean of losses + Sum, // Sum losses + END +}; +} // namespace at::Reduction + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Scalar.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Scalar.h new file mode 100644 index 0000000000000000000000000000000000000000..be3507a9784f176823a82e035c581dfde946efa5 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Scalar.h @@ -0,0 +1,6 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/ScalarType.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/ScalarType.h new file mode 100644 index 0000000000000000000000000000000000000000..97945e1e57a437a0e46f2bbefad76e1f34280828 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/ScalarType.h @@ -0,0 +1,6 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Tensor.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Tensor.h new file mode 100644 index 0000000000000000000000000000000000000000..cc8558a2bbe45f6d373f24d6a0e02e8795537240 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Tensor.h @@ -0,0 +1,103 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace at { +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) +class TORCH_API OptionalTensorRef { + public: + OptionalTensorRef() = default; + + ~OptionalTensorRef() { + ref_.unsafeReleaseTensorImpl(); + } + + OptionalTensorRef(const TensorBase& src) + : ref_(Tensor::unsafe_borrow_t{}, src) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(src.defined()); + } + + OptionalTensorRef(const OptionalTensorRef& rhs) + : ref_(Tensor::unsafe_borrow_t{}, rhs.ref_) {} + + OptionalTensorRef(OptionalTensorRef&& rhs) = default; + OptionalTensorRef& operator=(OptionalTensorRef rhs) { + std::swap(ref_, rhs.ref_); + return *this; + } + + bool has_value() const { + return ref_.defined(); + } + + const Tensor& getTensorRef() const & { + return ref_; + } + + const Tensor& operator*() const & { + return ref_; + } + + const Tensor* operator->() const & { + return &ref_; + } + + operator bool() const { + return ref_.defined(); + } + + private: + Tensor ref_; +}; + +// Use to convert a TensorBase (that may be undefined) to an at::Tensor +// without bumping refcount. +class TORCH_API TensorRef { + public: + ~TensorRef() { + ref_.unsafeReleaseTensorImpl(); + } + + TensorRef(const TensorBase& src) + : ref_(Tensor::unsafe_borrow_t{}, src) {} + TensorRef(TensorRef&& other) = default; + TensorRef(const TensorRef&) = default; + TensorRef& operator=(const TensorRef&) = default; + TensorRef& operator=(TensorRef&&) = default; + + const Tensor& operator*() const & { + return ref_; + } + private: + Tensor ref_; +}; + +template +auto Tensor::register_hook(T&& hook) const -> Tensor::hook_return_void_t { + // Return the grad argument in case of a hook with void return type to have an + // std::function with Tensor return type + static_assert(std::is_same_v, + "Expected hook to return void"); + return _register_hook([fn=std::forward(hook)](const TensorBase& grad_base) { + TensorRef grad(grad_base); + fn(*grad); + return Tensor(); + }); +} + +template +auto Tensor::register_hook(T&& hook) const -> Tensor::hook_return_var_t { + return _register_hook([fn=std::forward(hook)](const TensorBase& grad_base) { + TensorRef grad(grad_base); + Tensor ret = fn(*grad); + return TensorBase(std::move(ret)); + }); +} + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/TensorAccessor.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/TensorAccessor.h new file mode 100644 index 0000000000000000000000000000000000000000..4fc9c1e8a55cbf9edb187d808bf25f964db38d54 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/TensorAccessor.h @@ -0,0 +1,66 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { + +using torch::headeronly::DefaultPtrTraits; +#if defined(__CUDACC__) || defined(__HIPCC__) + using torch::headeronly::RestrictPtrTraits; +#endif + +template class PtrTraits = DefaultPtrTraits, typename index_t = int64_t> +using TensorAccessorBase = torch::headeronly::detail::TensorAccessorBase; + +template class PtrTraits = DefaultPtrTraits, typename index_t = int64_t> +using TensorAccessor = torch::headeronly::detail::TensorAccessor; + +namespace detail { + +template +struct IndexBoundsCheck { + IndexBoundsCheck(index_t i) { + TORCH_CHECK_INDEX( + 0 <= i && i < index_t{N}, + "Index ", + i, + " is not within bounds of a tensor of dimension ", + N); + } +}; +} // namespace detail + +template class PtrTraits = DefaultPtrTraits, typename index_t = int64_t> +using GenericPackedTensorAccessorBase = torch::headeronly::detail::GenericPackedTensorAccessorBase, T, N, PtrTraits, index_t>; + +template class PtrTraits = DefaultPtrTraits, typename index_t = int64_t> +using GenericPackedTensorAccessor = torch::headeronly::detail::GenericPackedTensorAccessor, detail::IndexBoundsCheck, T, N, PtrTraits, index_t>; + +// Can't put this directly into the macro function args because of commas +#define AT_X GenericPackedTensorAccessor + +// Old name for `GenericPackedTensorAccessor` +template class PtrTraits = DefaultPtrTraits, typename index_t = int64_t> +C10_DEFINE_DEPRECATED_USING(PackedTensorAccessor, AT_X) + +#undef AT_X + +template class PtrTraits = DefaultPtrTraits> +using PackedTensorAccessor32 = GenericPackedTensorAccessor; + +template class PtrTraits = DefaultPtrTraits> +using PackedTensorAccessor64 = GenericPackedTensorAccessor; +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/TensorBase.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/TensorBase.h new file mode 100644 index 0000000000000000000000000000000000000000..3244b247a214a9aa086a17adb4b20db6c0afa295 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/TensorBase.h @@ -0,0 +1,1098 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// See https://github.com/pytorch/pytorch/issues/161660 +// This compile flag is intended to be passed in to CppExtensions that rely on +// the stable ABI via the `extra_compile_args` argument. This is a stopgap +// solution to ensure that non-stable libtorch APIs are not used in the extension. +// The long term solution is to have a torch_stable target that excludes headers +// that are not in torch/stable or torch/headeronly. +// See test/cpp_extensions/torch_stable_test_extension/setup.py for an example +// of how this is used. +#ifdef TORCH_STABLE_ONLY +#error \ + "TensorBase.h should not be included when TORCH_STABLE_ONLY compile flag is passed" +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace c10 { +class Scalar; +} + +namespace torch::autograd { + +struct Node; + +} // namespace torch::autograd + +namespace at { + +class Tensor; +class TensorBase; + +// Convert Tensor to TensorBase without any need to include Tensor.h +TORCH_API const TensorBase& get_tensor_base(const Tensor& t); + +namespace impl { +inline bool variable_excluded_from_dispatch() { +#ifdef C10_MOBILE + // Please read the comment in `VariableFallbackKernel.cpp` about the background of this change. + return true; +#else + return c10::impl::tls_local_dispatch_key_set().excluded_.isSupersetOf(c10::autograd_dispatch_keyset); +#endif +} + +} + +// NOTE: [Tensor vs. TensorBase] +// +// Tensor, being the central data structure in PyTorch, gets used and +// its header included almost everywhere. Unfortunately this means +// every time an operator signature is updated or changed in +// native_functions.yaml, you (and every other PyTorch developer) need +// to recompile all of ATen and its dependencies. +// +// TensorBase aims to break up these header dependencies, and improve +// incremental build times for all PyTorch developers. TensorBase +// represents a reference counted handle to TensorImpl, exactly the +// same as Tensor. However, TensorBase doesn't have code generated +// methods in its API and thus no dependence on native_functions.yaml. +// +// Usage tips +// ---------- +// - You can `#define TORCH_ASSERT_NO_OPERATORS` at the top of a .cpp +// or .cu file to ensure it has no header dependencies on +// native_functions.yaml (direct or indirect). +// - Tensor inherits from TensorBase, so functions taking +// `const TensorBase &` are callable with Tensor as well. +// - TensorBase can be converted to Tensor with `Tensor(tensor_base)`, +// but this requires a reference-count bump. OptionalTensorRef, on +// the other hand, can materialize a `const Tensor &` without +// touching the reference-count. +class TORCH_API TensorBase { + public: + struct unsafe_borrow_t { explicit unsafe_borrow_t() = default; }; + + protected: + // Create a Tensor with a +0 reference count. Special care must be + // taken to avoid decrementing this reference count at destruction + // time. Intended to support MaybeOwnedTraits. + explicit TensorBase(unsafe_borrow_t /*unused*/, const TensorBase& rhs) + : impl_(c10::intrusive_ptr(rhs.impl_.get(), c10::raw::DontIncreaseRefcount{})) {} + friend MaybeOwnedTraits; + + public: + TensorBase() = default; + // This constructor should not be used by end users and is an implementation + // detail invoked by autogenerated code. + explicit TensorBase( + c10::intrusive_ptr tensor_impl) + : impl_(std::move(tensor_impl)) { + TORCH_CHECK(impl_.get(), "TensorImpl with nullptr is not supported"); + } + TensorBase(const TensorBase&) = default; + TensorBase(TensorBase&&) noexcept = default; + ~TensorBase() noexcept = default; + + public: + // Creates a new wrapper from TensorImpl. Intentionally a free method because + // it should be used with care. Checks necessary invariants + static TensorBase wrap_tensor_impl( + c10::intrusive_ptr tensor_impl) { + TensorBase r(std::move(tensor_impl)); + r.enforce_invariants(); + return r; + } + + int64_t dim() const { + return impl_->dim(); + } + int64_t storage_offset() const { + return impl_->storage_offset(); + } + + TensorBase contiguous(MemoryFormat memory_format=MemoryFormat::Contiguous) const { + if (is_contiguous_or_false(memory_format)) { + return *this; + } else { + return __dispatch_contiguous(memory_format); + } + } + + /// Should be used if *this can reasonably be expected to be contiguous and + /// performance is important. + /// Compared to contiguous, it saves a reference count + /// increment/decrement if *this is already contiguous, at the cost + /// in all cases of an extra pointer of stack usage, an extra branch + /// to access, and an extra branch at destruction time. + c10::MaybeOwned expect_contiguous( + MemoryFormat memory_format=MemoryFormat::Contiguous) const &; + + // Use .contiguous() instead. Trying to borrow from a prvalue + // will only lead to trouble and dangling references. + c10::MaybeOwned expect_contiguous( + MemoryFormat memory_format=MemoryFormat::Contiguous) && = delete; + + const TensorBase& fill_(const c10::Scalar& scalar) const; + const TensorBase& zero_() const; + + TensorBase to(at::TensorOptions options={}, bool non_blocking=false, bool copy=false, std::optional memory_format=std::nullopt) const; + + bool is_complex() const { + return at::isComplexType(this->scalar_type()); + } + + bool is_floating_point() const { + return at::isFloatingType(this->scalar_type()); + } + + bool is_signed() const { + return at::isSignedType(this->scalar_type()); + } + + c10::SymInt sym_size(int64_t dim) const { + return impl_->sym_size(dim); + } + + c10::SymInt sym_stride(int64_t dim) const { + const auto sizes = this->sym_strides(); + const auto ndim = static_cast(sizes.size()); + // false is passed to maybe_wrap_dim so behavior is identical to array access (but with wrapping) + return sizes[c10::maybe_wrap_dim(dim, ndim, /*wrap_scalar=*/false)]; + + } + + int64_t size(int64_t dim) const { + return impl_->size(dim); + } + + int64_t stride(int64_t dim) const { + const auto strides = this->strides(); + const auto ndim = static_cast(strides.size()); + // false is passed to maybe_wrap_dim so behavior is identical to array access (but with wrapping) + return strides[c10::maybe_wrap_dim(dim, ndim, /*wrap_scalar=*/false)]; + } + + TensorImpl * unsafeGetTensorImpl() const { + return impl_.get(); + } + TensorImpl * unsafeReleaseTensorImpl() { + return impl_.release(); + } + const c10::intrusive_ptr& getIntrusivePtr() const { + return impl_; + } + + c10::intrusive_ptr unsafeReleaseIntrusivePtr() { + return std::move(impl_); + } + + bool defined() const { + return impl_; + } + + void reset() { + impl_.reset(); + } + +#if defined (_MSC_VER) + TensorBase& operator=(const TensorBase& x) & { + impl_ = x.impl_; + return *this; + }; + TensorBase& operator=(TensorBase&& x) & noexcept { + impl_ = std::move(x.impl_); + return *this; + } +#else + TensorBase& operator=(const TensorBase& x) & = default; + TensorBase& operator=(TensorBase&& x) & noexcept = default; +#endif + + // Ban assignment to rvalues, since at::Tensor (weirdly) performs a deep copy here + TensorBase& operator=(const TensorBase&) && = delete; + TensorBase& operator=(TensorBase&&) && noexcept = delete; + + bool is_same(const TensorBase& other) const noexcept { + return impl_ == other.impl_; + } + size_t use_count() const noexcept { + return impl_.use_count(); + } + size_t weak_use_count() const noexcept { + return impl_.weak_use_count(); + } + bool is_uniquely_owned() const noexcept { + return impl_.is_uniquely_owned(); + } + + std::string toString() const; + + IntArrayRef sizes() const { + return impl_->sizes(); + } + c10::SymIntArrayRef sym_sizes() const { + return impl_->sym_sizes(); + } + c10::SymIntArrayRef sym_strides() const { + return impl_->sym_strides(); + } + IntArrayRef strides() const { + return impl_->strides(); + } + // See impl::get_opt_names in ATen/NamedTensor.h for docs. + std::optional opt_names() const { + return impl::get_opt_names(unsafeGetTensorImpl()); + } + // See impl::get_names in ATen/NamedTensor.h for docs. + DimnameList names() const { + return impl::get_names(unsafeGetTensorImpl()); + } + int64_t ndimension() const { + return dim(); + } + + bool is_contiguous(at::MemoryFormat memory_format=at::MemoryFormat::Contiguous) const { + return impl_->is_contiguous(memory_format); + } + + // Like is_contiguous, but more dynamic shape-friendly. May return a symbolic representation of + // contiguity instead of SymTrue SymFalse, when results are data-dependent. + c10::SymBool sym_is_contiguous(at::MemoryFormat memory_format=at::MemoryFormat::Contiguous) const { + if (impl_->has_symbolic_sizes_strides()) { + return impl_->sym_is_contiguous(memory_format); + } + return impl_->is_contiguous(memory_format); + } + + // Like is_contiguous, but more dynamic shape-friendly. Can returns + // false instead of throwing data-dependent errors for tensors with unbacked + // sizes or strides. + bool is_contiguous_or_false(at::MemoryFormat memory_format=at::MemoryFormat::Contiguous) const { + if (impl_->has_symbolic_sizes_strides()) { + return impl_->sym_is_contiguous(memory_format).guard_or_false(__FILE__, __LINE__); + } + return impl_->is_contiguous(memory_format); + } + + bool is_non_overlapping_and_dense() const { + return impl_->is_non_overlapping_and_dense(); + } + + at::MemoryFormat suggest_memory_format( + bool channels_last_strides_exact_match = false) const { + // Setting channels_last_strides_exact_match to true forces function to + // check 0,1 - sized dimension strides. + if (layout() == at::kStrided) { + if (impl_->is_strides_like_channels_last()) { + if (!channels_last_strides_exact_match || + get_channels_last_strides_2d(sizes()) == strides()) { + return at::MemoryFormat::ChannelsLast; + } + } + else if (impl_->is_strides_like_channels_last_3d()) { + if (!channels_last_strides_exact_match || + get_channels_last_strides_3d(sizes()) == strides()) { + return at::MemoryFormat::ChannelsLast3d; + } + } + } + return at::MemoryFormat::Contiguous; + } + + // Total bytes consumed by the "view" of elements of the array. Does not + // include size of metadata. The number reported here does not necessarily + // correspond to the true physical memory consumed by a tensor; instead, + // it reports the memory the tensor would take *if* it were contiguous. + // Defined to be numel() * itemsize() + size_t nbytes() const { + TORCH_CHECK(layout () != at::kSparse, + "nbytes is not defined for sparse tensors. If you want the size of the constituent " \ + "tensors, add the nbytes of the indices and values. If you want the size of the " \ + "equivalent dense tensor, multiply numel() by element_size()"); + return impl_->numel() * impl_->itemsize(); + } + + c10::SymInt sym_nbytes() const { + TORCH_CHECK(layout () != at::kSparse, + "nbytes is not defined for sparse tensors. If you want the size of the constituent " \ + "tensors, add the nbytes of the indices and values. If you want the size of the " \ + "equivalent dense tensor, multiply numel() by element_size()"); + return impl_->sym_numel() * impl_->itemsize(); + } + + int64_t numel() const { + return impl_->numel(); + } + + c10::SymInt sym_numel() const { + return impl_->sym_numel(); + } + + c10::SymInt sym_storage_offset() const { + return impl_->sym_storage_offset(); + } + + // Length of one array element in bytes. This is the traditional + // Numpy naming. + size_t itemsize() const { + return impl_->itemsize(); + } + + // Same as itemsize(). This is the PyTorch naming. + int64_t element_size() const { + return static_cast(impl_->itemsize()); + } + + DispatchKeySet key_set() const { + return impl_->key_set(); + } + ScalarType scalar_type() const { + return typeMetaToScalarType(impl_->dtype()); + } + bool has_storage() const { + return defined() && impl_->has_storage(); + } + const Storage& storage() const { + return impl_->storage(); + } + bool is_alias_of(const at::TensorBase& other) const{ + return impl_->storage().is_alias_of(other.storage()); + } + + // Move the storage backend to shm based + // to enable memory sharing across processes. + // + // NB1: the ideal behavior of this API still requires further discussion + // but for now we are inclined to keep it consistent with existing THP behavior + // https://github.com/pytorch/pytorch/blob/4dca9bde0552afc67b5b74f4a0696fe6055709c4/torch/storage.py#L196-L212 + // so we don't assert on anything here and rely on caller knowing + // what it's doing. + // + // NB2: this currently provides Linux fd based shm support only + // to simplify the storage lifetime management logic in ATen + // and similarly for now we are not adding support for file system based + // shm support like in THP due to additional GC manager support needed + // to prevent leaks. + // As such, calling this from non supported systems (e.g. Windows) would fail. + void share_memory_() { + at::share_memory_(*this); + } + + inline bool _is_zerotensor() const { + return impl_->_is_zerotensor(); + } + + inline void _set_zero(bool zero) const { + impl_->_set_zero(zero); + } + + inline bool is_conj() const { + return impl_->is_conj(); + } + + // sets the conjugate bit of a tensor. + // NOTE: Conjugate bit is supposed to be a read-only field. Only change this, if you are sure + // that's what you want. Changing this might lead to incorrect behavior since conjugation is + // a lazy operation and we rely on this bit to determine if a conjugation needs to be materialized. + inline void _set_conj(bool conjugate) const { + impl_->_set_conj(conjugate); + } + + inline bool is_neg() const { + return impl_->is_neg(); + } + + // sets the negative bit of a tensor. + // NOTE: Negative bit is supposed to be a read-only field. Only change this, if you are sure + // that's what you want. Changing this might lead to incorrect behavior since we rely on this + // bit to determine if a negation needs to be materialized. + inline void _set_neg(bool negative) const { + impl_->_set_neg(negative); + } + + /// Returns a `Tensor`'s layout. + Layout layout() const { + return impl_->layout(); + } + + /// Returns a `Tensor`'s dtype (`TypeMeta`). + caffe2::TypeMeta dtype() const { + return impl_->dtype(); + } + + /// Returns a `Tensor`'s device. + inline Device device() const { + return impl_->device(); + } + + /// Returns a `Tensor`'s device index. + DeviceIndex get_device() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->get_device(); + } + + /// Returns if a `Tensor` has CPU backend. + bool is_cpu() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_cpu(); + } + + /// Returns if a `Tensor` has CUDA backend. + bool is_cuda() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_cuda(); + } + + /// Returns if a `Tensor` has IPU backend. + bool is_ipu() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_ipu(); + } + + /// Returns if a `Tensor` has XPU backend. + bool is_xpu() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_xpu(); + } + + /// Returns if a `Tensor` has XLA backend. + bool is_xla() const { + return impl_->is_xla(); + } + + /// Returns if a `Tensor` has MTIA backend. + bool is_mtia() const { + return impl_->is_mtia(); + } + + /// Returns if a `Tensor` has HPU backend. + bool is_hpu() const { + return impl_->is_hpu(); + } + + /// Returns if a `Tensor` has Lazy backend. + bool is_lazy() const { + return impl_->is_lazy(); + } + + /// Returns if a `Tensor` has HIP backend. + bool is_hip() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_hip(); + } + + /// Returns if a `Tensor` has VE backend. + bool is_ve() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_ve(); + } + + /// Returns if a `Tensor` has PrivateUse1 backend. + bool is_privateuseone() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_privateuseone(); + } + + /// Returns if a `Tensor` has sparse backend. + bool is_sparse() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_sparse(); + } + + /// Returns is a `Tensor` has a sparse CSR backend. + bool is_sparse_csr() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_sparse_csr(); + } + + /// Returns if a `Tensor` is mkldnn tensor. + bool is_mkldnn() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_mkldnn(); + } + + /// Returns if a `Tensor` is mps tensor. + bool is_mps() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_mps(); + } + + /// Returns if a `Tensor` is maia tensor. + bool is_maia() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_maia(); + } + + /// Returns if a `Tensor` is vulkan tensor. + bool is_vulkan() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_vulkan(); + } + + /// Returns if a `Tensor` is metal tensor. + bool is_metal() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_metal(); + } + + /// Returns if a `Tensor` has quantized backend. + bool is_quantized() const { + // NB: this is not a native function to avoid dispatching overhead. + return impl_->is_quantized(); + } + + /// Returns if a `Tensor` is a meta tensor. Meta tensors can + /// also have other designations. + bool is_meta() const { + return impl_->is_meta(); + } + + /// Returns if a `Tensor` is an inference tensor. + bool is_inference() const { + return impl_->is_inference(); + } + + // Returns if a `Tensor` is a NestedTensor. + bool is_nested() const { + return impl_->is_nested(); + } + + /// If a tensor is a quantized tensor, returns its quantizer + /// TODO: it's not in native_functions.yaml yet as it's not exposed to python + QuantizerPtr quantizer() const; + + /// Returns if a `Tensor` has any dimension names + bool has_names() const { + // If a user is using unnamed tensors, then we can short-circuit right here. + // Otherwise, impl::has_names attempts to retrieve names. + if (!impl_->has_named_tensor_meta()) { + return false; + } + return impl::has_names(unsafeGetTensorImpl()); + } + + /// Returns a `Tensor`'s dimension names data structure + const NamedTensorMeta* get_named_tensor_meta() const { + return static_cast(impl_->named_tensor_meta()); + } + + NamedTensorMeta* get_named_tensor_meta() { + return static_cast(impl_->named_tensor_meta()); + } + + /// Returns the `TensorOptions` corresponding to this `Tensor`. Defined in + /// TensorOptions.h. + TensorOptions options() const { + return TensorOptions().dtype(dtype()) + .device(device()) + .layout(layout()); + } + + const void* const_data_ptr() const { + return this->unsafeGetTensorImpl()->data(); + } + + void* mutable_data_ptr() const { + return this->unsafeGetTensorImpl()->mutable_data(); + } + + // TODO(#97856) Make this return a const pointer. This currently + // returns a non-const pointer because of the large + // number of clients that we still want to audit before + // migrating to mutable_data_ptr(). + void* data_ptr() const { + return mutable_data_ptr(); + } + + template , int> = 0> + const T* const_data_ptr() const; + + template , int> = 0> + const std::remove_const_t* const_data_ptr() const; + + template + T* mutable_data_ptr() const; + + // Legacy interface during the migration to indicate that a callsite + // has not been audited for mutability. + // + // Do not add new uses of this, use const_data_ptr() if possible, + // mutable_data_ptr() otherwise. + // + // TODO(#97856) Make this return a const pointer. This is currently + // const because of the vast number of clients that + // rely on this. + template + T* data_ptr() const; + + // Purposely not defined here to avoid inlining + void print() const; + + // Return a `TensorAccessor` for CPU `Tensor`s. You have to specify scalar type and + // dimension. + template + TensorAccessor accessor() const& { + static_assert(N > 0, "accessor is used for indexing tensor, for scalars use *data_ptr()"); + TORCH_CHECK(dim() == N, "TensorAccessor expected ", N, " dims but tensor has ", dim()); + T* ptr = nullptr; + if constexpr (std::is_const_v) { + ptr = const_data_ptr(); + } else { + ptr = mutable_data_ptr(); + } + return TensorAccessor(ptr,sizes().data(),strides().data()); + } + template + TensorAccessor accessor() && = delete; + + // Return a `GenericPackedTensorAccessor` for CUDA `Tensor`s. You have to specify scalar type and + // dimension. You can optionally specify RestrictPtrTraits as a template parameter to + // cast the data pointer to a __restrict__ pointer. + // In order to use this, your CUDA kernel has to take a corresponding GenericPackedTensorAccessor + // as an argument. + template class PtrTraits = DefaultPtrTraits, typename index_t = int64_t> + GenericPackedTensorAccessor generic_packed_accessor() const& { + static_assert(N > 0, "accessor is used for indexing tensor, for scalars use *data_ptr()"); + TORCH_CHECK(dim() == N, "TensorAccessor expected ", N, " dims but tensor has ", dim()); + T* ptr = nullptr; + if constexpr (std::is_const_v) { + ptr = const_data_ptr(); + } else { + ptr = mutable_data_ptr(); + } + return GenericPackedTensorAccessor(static_cast::PtrType>(ptr),sizes().data(),strides().data()); + } + template class PtrTraits = DefaultPtrTraits, typename index_t = int64_t> + GenericPackedTensorAccessor generic_packed_accessor() && = delete; + + template class PtrTraits = DefaultPtrTraits> + PackedTensorAccessor32 packed_accessor32() const& { + TORCH_CHECK( + impl_->numel() <= + static_cast(std::numeric_limits::max()), + "numel needs to be smaller than int32_t max; otherwise, please use packed_accessor64"); + return generic_packed_accessor(); + } + template class PtrTraits = DefaultPtrTraits> + PackedTensorAccessor32 packed_accessor32() && = delete; + + template class PtrTraits = DefaultPtrTraits> + PackedTensorAccessor64 packed_accessor64() const& { + return generic_packed_accessor(); + } + template class PtrTraits = DefaultPtrTraits> + PackedTensorAccessor64 packed_accessor64() && = delete; + + // ~~~~~ Autograd API ~~~~~ + + /// \fn bool is_leaf() const; + /// + /// All Tensors that have `requires_grad()` which is ``false`` will be leaf Tensors by convention. + /// + /// For Tensors that have `requires_grad()` which is ``true``, they will be leaf Tensors if they were + /// created by the user. This means that they are not the result of an operation and so + /// `grad_fn()` is `nullptr`. + /// + /// Only leaf Tensors will have their `grad()` populated during a call to `backward()`. + /// To get `grad()` populated for non-leaf Tensors, you can use `retain_grad()`. + /// + /// Example: + /// @code + /// auto a = torch::rand(10, torch::requires_grad()); + /// std::cout << a.is_leaf() << std::endl; // prints `true` + /// + /// auto b = torch::rand(10, torch::requires_grad()).to(torch::kCUDA); + /// std::cout << b.is_leaf() << std::endl; // prints `false` + /// // b was created by the operation that cast a cpu Tensor into a cuda Tensor + /// + /// auto c = torch::rand(10, torch::requires_grad()) + 2; + /// std::cout << c.is_leaf() << std::endl; // prints `false` + /// // c was created by the addition operation + /// + /// auto d = torch::rand(10).cuda(); + /// std::cout << d.is_leaf() << std::endl; // prints `true` + /// // d does not require gradients and so has no operation creating it (that is tracked by the autograd engine) + /// + /// auto e = torch::rand(10).cuda().requires_grad_(); + /// std::cout << e.is_leaf() << std::endl; // prints `true` + /// // e requires gradients and has no operations creating it + /// + /// auto f = torch::rand(10, torch::device(torch::kCUDA).requires_grad(true)); + /// std::cout << f.is_leaf() << std::endl; // prints `true` + /// // f requires grad, has no operation creating it + /// @endcode + + /// \fn void backward(const Tensor & gradient={}, std::optional retain_graph=std::nullopt, bool create_graph=false, std::optional inputs=std::nullopt) const; + /// + /// Computes the gradient of current tensor with respect to graph leaves. + /// + /// The graph is differentiated using the chain rule. If the tensor is + /// non-scalar (i.e. its data has more than one element) and requires + /// gradient, the function additionally requires specifying ``gradient``. + /// It should be a tensor of matching type and location, that contains + /// the gradient of the differentiated function w.r.t. this Tensor. + /// + /// This function accumulates gradients in the leaves - you might need to + /// zero them before calling it. + /// + /// \param gradient Gradient w.r.t. the + /// tensor. If it is a tensor, it will be automatically converted + /// to a Tensor that does not require grad unless ``create_graph`` is True. + /// None values can be specified for scalar Tensors or ones that + /// don't require grad. If a None value would be acceptable then + /// this argument is optional. + /// \param retain_graph If ``false``, the graph used to compute + /// the grads will be freed. Note that in nearly all cases setting + /// this option to True is not needed and often can be worked around + /// in a much more efficient way. Defaults to the value of + /// ``create_graph``. + /// \param create_graph If ``true``, graph of the derivative will + /// be constructed, allowing to compute higher order derivative + /// products. Defaults to ``false``. + /// \param inputs Inputs w.r.t. which the gradient will be accumulated into + /// ``at::Tensor::grad``. All other Tensors will be ignored. If not + /// provided, the gradient is accumulated into all the leaf Tensors + /// that were used to compute the current tensor. + /// When inputs are provided and a given input is not a leaf, + /// the current implementation will call its grad_fn (even though it is not strictly needed to get this gradients). + /// It is an implementation detail on which the user should not rely. + /// See https://github.com/pytorch/pytorch/pull/60521#issuecomment-867061780 for more details. + + /// \fn Tensor detach() const; + /// + /// Returns a new Tensor, detached from the current graph. + /// The result will never require gradient. + + /// \fn Tensor & detach_() const; + /// + /// Detaches the Tensor from the graph that created it, making it a leaf. + /// Views cannot be detached in-place. + + /// \fn void retain_grad() const; + /// + /// Enables this Tensor to have their :attr:`grad` populated during + /// :func:`backward`. This is a no-op for leaf tensors. + + /// \fn bool retains_grad() const; + /// + /// Is ``true`` if this Tensor is non-leaf and its :attr:`grad` is enabled to be + /// populated during :func:`backward`, ``false`` otherwise. + + const TensorBase& set_requires_grad(bool requires_grad) const { + impl_->set_requires_grad(requires_grad); + return *this; + } + bool requires_grad() const { + return impl_->requires_grad(); + } + + // The Forward AD API functions below are low level and are not to be used by end + // users who should use the API provided in torch/csrc/autograd.h + + /// This function returns the forward gradient for this Tensor at the given level. + const Tensor& _fw_grad(uint64_t level) const { + return impl_->_fw_grad(level, *this); + } + + /// This function can be used to set the value of the forward grad. + /// Note that the given new_grad might not be used directly if it has different + /// metadata (size/stride/storage offset) compared to this Tensor. In that case, + /// new_grad content will be copied into a new Tensor + void _set_fw_grad(const TensorBase& new_grad, uint64_t level, bool is_inplace_op) const { + impl_->_set_fw_grad(new_grad, *this, level, is_inplace_op); + } + + /// NOTE: This is similar to the legacy `.data()` function on `Variable`, and is intended + /// to be used from functions that need to access the `Variable`'s equivalent `Tensor` + /// (i.e. `Tensor` that shares the same storage and tensor metadata with the `Variable`). + /// + /// One notable difference with the legacy `.data()` function is that changes to the + /// returned `Tensor`'s tensor metadata (e.g. sizes / strides / storage / storage_offset) + /// will not update the original `Variable`, due to the fact that this function + /// shallow-copies the `Variable`'s underlying TensorImpl. + at::TensorBase tensor_data() const; + + /// NOTE: `var.variable_data()` in C++ has the same semantics as `tensor.data` + /// in Python, which create a new `Variable` that shares the same storage and + /// tensor metadata with the original `Variable`, but with a completely new + /// autograd history. + /// + /// NOTE: If we change the tensor metadata (e.g. sizes / strides / + /// storage / storage_offset) of a variable created from `var.variable_data()`, those + /// changes will not update the original variable `var`. In `.variable_data()`, we set + /// `allow_tensor_metadata_change_` to false to make such changes explicitly illegal, + /// in order to prevent users from changing metadata of `var.variable_data()` + /// and expecting the original variable `var` to also be updated. + at::TensorBase variable_data() const; + + // Gradient Node and Edges + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + /// Gets the gradient function of the `Variable`. If this is a leaf variable, + /// the pointer returned will be null. + /// + /// For View Variables: + /// Gets the up-to-date grad_fn. If the shared data or base was modified, we + /// re-create the grad_fn to express the up-to-date view relationship between + /// this and the base Variable. + const std::shared_ptr& grad_fn() const; + + // Hooks + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + template + using hook_return_void_t = std::enable_if_t>, unsigned>; + template + using hook_return_var_t = std::enable_if_t, TensorBase>, unsigned>; + + /// Registers a backward hook. + /// + /// The hook will be called every time a gradient with respect to the Tensor is computed. + /// The hook should have one of the following signature: + /// ``` + /// hook(TensorBase grad) -> TensorBase + /// ``` + /// ``` + /// hook(TensorBase grad) -> void + /// ``` + /// The hook should not modify its argument, but it can optionally return a new gradient + /// which will be used in place of `grad`. + /// + /// This function returns the index of the hook in the list which can be used to remove hook. + /// + /// Example: + /// @code + /// auto v = torch::tensor({0., 0., 0.}, torch::requires_grad()); + /// auto h = v.register_hook([](torch::Tensor grad){ return grad * 2; }); // double the gradient + /// v.backward(torch::tensor({1., 2., 3.})); + /// // This prints: + /// // ``` + /// // 2 + /// // 4 + /// // 6 + /// // [ CPUFloatType{3} ] + /// // ``` + /// std::cout << v.grad() << std::endl; + /// v.remove_hook(h); // removes the hook + /// @endcode + template + hook_return_void_t register_hook(T&& hook) const; + template + hook_return_var_t register_hook(T&& hook) const; + +protected: + unsigned _register_hook(std::function hook) const; + +public: + + /// Remove hook at given position + void remove_hook(unsigned pos) const; + + // Variable methods + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + bool is_leaf() const; + + int64_t output_nr() const; + + void set_data(const TensorBase & new_data) const; + + TensorBase data() const; + + int64_t _version() const; + + void retain_grad() const; + + bool retains_grad() const; + + const TensorBase& requires_grad_(bool _requires_grad=true) const; + + std::optional grad_dtype() const; + + void set_grad_dtype(const std::optional& grad_dtype) const; + + // View Variables + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + /// Returns true if this `Variable` is a view of another `Variable`. + bool is_view() const; + + /// Returns the `Variable` that this `Variable` is a view of. If this + /// `Variable` is not a view, throw a `std::runtime_error`. + const TensorBase& _base() const; + + // Miscellaneous + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + const std::string& name() const; + +protected: + void enforce_invariants(); + c10::intrusive_ptr impl_; + +private: + TensorBase __dispatch_contiguous(c10::MemoryFormat /*memory_format*/) const; +}; + +inline DeviceIndex get_device(const TensorBase& self) { + return self.get_device(); +} + +template +auto TensorBase::register_hook(T&& hook) const -> TensorBase::hook_return_void_t { + // Return the grad argument in case of a hook with void return type to have an + // std::function with Tensor return type + static_assert(std::is_same_v, + "Expected hook to return void"); + return _register_hook([fn=std::forward(hook)](const TensorBase& grad) { + fn(grad); + return TensorBase(); + }); +} + +template +auto TensorBase::register_hook(T&& hook) const -> TensorBase::hook_return_var_t { + return _register_hook(std::forward(hook)); +} + +namespace detail { +// Helper creator for Tensor class which doesn't requires the users to pass +// in an intrusive_ptr instead it just converts the argument passed to +// requested intrusive_ptr type. +template +TensorBase make_tensor_base(Args&&... args) { + return TensorBase(c10::make_intrusive(std::forward(args)...)); +} + +} // namespace detail + +inline DispatchKey legacyExtractDispatchKey(const TensorBase& t) { + return legacyExtractDispatchKey(t.key_set()); +} + +} // namespace at + +namespace c10 { +template <> +struct MaybeOwnedTraits { + using owned_type = at::TensorBase; + using borrow_type = at::TensorBase; + + static borrow_type createBorrow(const owned_type& from) { + // NOTE: this can be implemented without the special + // unsafe_borrow_t Tensor constructor as + // + // return borrow_type(c10::intrusive_ptr::reclaim(from.unsafeGetTensorImpl())); + // + // but that hurts inlining due to the nullptr check in the + // Tensor(c10::intrusive_ptr<...>) constructor. We already know + // that from.impl_ isn't null because from is a valid Tensor, so + // we needn't do the check again. (using __builtin_assume can + // avoid this, but wouldn't be portable to MSVC.) + return borrow_type(borrow_type::unsafe_borrow_t{}, from); + } + + static void assignBorrow(borrow_type& lhs, const borrow_type& rhs) { + lhs.unsafeReleaseTensorImpl(); + // See above note: this can be implemented with public API + // similarly to createBorrow(), but that would hurt inlining. + lhs = borrow_type(borrow_type::unsafe_borrow_t{}, rhs); + } + + static void destroyBorrow(borrow_type& toDestroy) { + toDestroy.unsafeReleaseTensorImpl(); // "leak" it, but it was already +0. + } + + static const owned_type& referenceFromBorrow(const borrow_type& borrow) { + return borrow; + } + + static const owned_type* pointerFromBorrow(const borrow_type& borrow) { + return &borrow; + } + + static bool debugBorrowIsValid(const borrow_type& /*borrow*/) { + return true; + } +}; + +template <> +struct ExclusivelyOwnedTraits : public c10::ExclusivelyOwnedTensorTraits {}; +} // namespace c10 + +namespace at { + +inline c10::MaybeOwned borrow_from_optional_tensor( + const std::optional& opt) { + return opt.has_value() + ? c10::MaybeOwned::borrowed(*opt) + : c10::MaybeOwned::owned(std::in_place); +} + +inline c10::MaybeOwned TensorBase::expect_contiguous(MemoryFormat memory_format) const & { + if (is_contiguous(memory_format)) { + return c10::MaybeOwned::borrowed(*this); + } else { + return c10::MaybeOwned::owned(__dispatch_contiguous(memory_format)); + } +} + +namespace symint { + +template +using enable_if_symint = std::enable_if_t>; +template +using enable_if_int = std::enable_if_t>; + +template > +c10::SymIntArrayRef sizes(const TensorBase& t) { return t.sym_sizes(); } +template > +IntArrayRef sizes(const TensorBase& t) { return t.sizes(); } + +template > +c10::SymInt size(const TensorBase& t, int64_t dim) { return t.sym_size(dim); } +template > +int64_t size(const TensorBase& t, int64_t dim) { return t.size(dim); } + +template > +c10::SymIntArrayRef strides(const TensorBase& t) { return t.sym_strides(); } +template > +IntArrayRef strides(const TensorBase& t) { return t.strides(); } + +template > +c10::SymInt numel(const TensorBase& t) { return t.sym_numel(); } +template > +int64_t numel(const TensorBase& t) { return t.numel(); } + +} // namespace symint + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/TensorBody.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/TensorBody.h new file mode 100644 index 0000000000000000000000000000000000000000..f67ce6fbfbcfce68be22d95d2758ad28799a2c72 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/TensorBody.h @@ -0,0 +1,5799 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef TORCH_ASSERT_NO_OPERATORS +#error This change adds a dependency on native_functions.yaml, \ + meaning the file will need to be re-compiled every time an operator \ + is changed or added. Consider if your change would be better placed in \ + another file, or if a more specific header might achieve the same goal. \ + See NOTE: [Tensor vs. TensorBase] +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#include + +namespace c10{ +template class List; +template class IListRef; +} +namespace at { +struct Generator; +struct Type; +class DeprecatedTypeProperties; +class Tensor; +} // namespace at +namespace at { +namespace indexing { +struct TensorIndex; +} // namespace indexing +} // namespace at + +namespace torch { namespace autograd { + +struct Node; + +}} // namespace torch::autograd + +namespace at { + +class OptionalTensorRef; +class TensorRef; +class Tensor; +using TensorList = ArrayRef; +using ITensorList = c10::IListRef; + +using Stream = c10::Stream; + +// Tensor is a "generic" object holding a pointer to the underlying TensorImpl object, which +// has an embedded reference count. In this way, Tensor is similar to boost::intrusive_ptr. +// +// For example: +// +// void func(Tensor a) { +// Tensor b = a; +// ... +// } +// +// In this example, when we say Tensor b = a, we are creating a new object that points to the +// same underlying TensorImpl, and bumps its reference count. When b goes out of scope, the +// destructor decrements the reference count by calling release() on the TensorImpl it points to. +// The existing constructors, operator overloads, etc. take care to implement the correct semantics. +// +// Note that Tensor can also be NULL, i.e. it is not associated with any underlying TensorImpl, and +// special care must be taken to handle this. +class TORCH_API Tensor: public TensorBase { + protected: + // Create a Tensor with a +0 reference count. Special care must be + // taken to avoid decrementing this reference count at destruction + // time. Intended to support MaybeOwnedTraits. + explicit Tensor(unsafe_borrow_t, const TensorBase& rhs): TensorBase(unsafe_borrow_t{}, rhs) {} + friend MaybeOwnedTraits; + friend OptionalTensorRef; + friend TensorRef; + + public: + Tensor() = default; + // This constructor should not be used by end users and is an implementation + // detail invoked by autogenerated code. + explicit Tensor( + c10::intrusive_ptr tensor_impl) + : TensorBase(std::move(tensor_impl)) {} + Tensor(const Tensor &tensor) = default; + Tensor(Tensor &&tensor) = default; + + // Implicitly move-constructible from TensorBase, but must be explicit to increase refcount + explicit Tensor(const TensorBase &base): TensorBase(base) {} + /*implicit*/ Tensor(TensorBase &&base): TensorBase(std::move(base)) {} + + // Creates a new wrapper from TensorImpl. Intentionally a free method because + // it should be used with care. Checks necessary invariants + static Tensor wrap_tensor_impl( + c10::intrusive_ptr tensor_impl) { + return TensorBase::wrap_tensor_impl(std::move(tensor_impl)); + } + + Tensor contiguous(MemoryFormat memory_format=MemoryFormat::Contiguous) const { + return TensorBase::contiguous(memory_format); + } + + Tensor conj() const { + if (!this->is_complex()) { + return *this; + } + + C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wswitch-enum") + switch (this->layout()) { + case at::kSparse: + case at::kSparseCsr: + case at::kSparseCsc: + case at::kSparseBsr: + case at::kSparseBsc: + return this->conj_physical(); + default: + return this->_conj(); + } + C10_DIAGNOSTIC_POP() + } + + // Aliased by Dimname overloads, so need explicit using + using TensorBase::size; + using TensorBase::sym_size; + using TensorBase::stride; + + /// Should be used if *this can reasonably be expected to be contiguous and + /// performance is important. + /// Compared to contiguous, it saves a reference count + /// increment/decrement if *this is already contiguous, at the cost + /// in all cases of an extra pointer of stack usage, an extra branch + /// to access, and an extra branch at destruction time. + c10::MaybeOwned expect_contiguous(MemoryFormat memory_format=MemoryFormat::Contiguous) const &; + + // Use .contiguous() instead. Trying to borrow from a prvalue Tensor + // will only lead to trouble and dangling references. + c10::MaybeOwned expect_contiguous(MemoryFormat memory_format=MemoryFormat::Contiguous) && = delete; + + // The following overloads are very intriguing. Consider the following + // program: + // + // x[1] = 3; + // + // We would expect that the first entry of x is written to 3. But how can we + // actually achieve this? x[1] evaluates to a tensor... + // + // The answer is, using a ref-qualifier. x[1] is an rvalue, which cannot be + // (profitably) assigned to in the traditional sense, so we overload + // assignment to mean, "Actually, copy 3 into the tensor data." This is done + // with an rvalue-reference ref-qualified overload (the methods with && at the + // end of their type.) + // + // There's one more fly in the ointment: We also want + // + // Tensor x = y; + // + // to work, and we want it NOT to copy. So we need a traditional operator= + // overload. But we MUST specify a mutable lvalue ref-qualifier, to + // disambiguate the traditional overload from the rvalue-reference + // ref-qualified overload. Otherwise, it will be ambiguous, because + // a non ref-qualified method is eligible for all situations. + + // Unfortunately, we have to write these constructors out manually + // to work around an MSVC bug: + // error C2580: 'at::Tensor &at::Tensor::operator =(const at::Tensor &) &': + // multiple versions of a defaulted special member functions are not allowed + // Tensor& operator=(const Tensor&) & = default; + // Tensor& operator=(Tensor&&) & = default; + + // Also MSVC will wrongly issue the following warning with the aforementioned fix + // warning C4522: 'at::Tensor': multiple assignment operators specified + // Let's just skip the warning. + // + // TODO: temporarily disabled + + Tensor& operator=(const TensorBase& x) & noexcept { + impl_ = x.getIntrusivePtr(); + return *this; + } + Tensor& operator=(TensorBase&& x) & noexcept { + impl_ = x.unsafeReleaseIntrusivePtr(); + return *this; + } + + Tensor& operator=(const Tensor &x) & noexcept { + return operator=(static_cast(x)); + } + Tensor& operator=(Tensor &&x) & noexcept { + return operator=(static_cast(x)); + } + + Tensor& operator=(const Scalar &v) && { + return fill_(v); + } + Tensor& operator=(const Tensor &rhs) && { + return copy_(rhs); + } + Tensor& operator=(Tensor&& rhs) && { + return copy_(rhs); + } + + C10_DEPRECATED_MESSAGE("Tensor.type() is deprecated. Instead use Tensor.options(), which in many cases (e.g. in a constructor) is a drop-in replacement. If you were using data from type(), that is now available from Tensor itself, so instead of tensor.type().scalar_type(), use tensor.scalar_type() instead and instead of tensor.type().backend() use tensor.device().") + DeprecatedTypeProperties & type() const { + return globalDeprecatedTypePropertiesRegistry().getDeprecatedTypeProperties( + dispatchKeyToBackend(legacyExtractDispatchKey(key_set())), + scalar_type()); + } + + Tensor toType(ScalarType t) const { + return to(options().dtype(t), /*non_blocking*/ false, /*copy*/ false); + } + + // TODO: Deprecate me + Tensor toBackend(Backend b) const { + return to(options().device(backendToDeviceType(b)).layout(layout_from_backend(b)), /*non_blocking*/ false, /*copy*/ false); + } + + C10_DEPRECATED_MESSAGE("Tensor.is_variable() is deprecated; everything is a variable now. (If you want to assert that variable has been appropriately handled already, use at::impl::variable_excluded_from_dispatch())") + bool is_variable() const noexcept { + return !at::impl::variable_excluded_from_dispatch(); + } + + template + C10_DEPRECATED_MESSAGE("Tensor.data() is deprecated. Please use Tensor.data_ptr() instead.") + T * data() const { + return data_ptr(); + } + + template + T item() const; + + template class PtrTraits = DefaultPtrTraits, typename index_t = int64_t> + C10_DEPRECATED_MESSAGE("packed_accessor is deprecated, use packed_accessor32 or packed_accessor64 instead") + GenericPackedTensorAccessor packed_accessor() const & { + return generic_packed_accessor(); + } + template class PtrTraits = DefaultPtrTraits, typename index_t = int64_t> + C10_DEPRECATED_MESSAGE("packed_accessor is deprecated, use packed_accessor32 or packed_accessor64 instead") + GenericPackedTensorAccessor packed_accessor() && = delete; + + Tensor operator~() const { + return bitwise_not(); + } + Tensor operator-() const { + return neg(); + } + Tensor& operator+=(const Tensor & other) { + return add_(other); + } + Tensor& operator+=(const Scalar & other) { + return add_(other); + } + Tensor& operator-=(const Tensor & other) { + return sub_(other); + } + Tensor& operator-=(const Scalar & other) { + return sub_(other); + } + Tensor& operator*=(const Tensor & other) { + return mul_(other); + } + Tensor& operator*=(const Scalar & other) { + return mul_(other); + } + Tensor& operator/=(const Tensor & other) { + return div_(other); + } + Tensor& operator/=(const Scalar & other) { + return div_(other); + } + Tensor& operator&=(const Tensor & other) { + return bitwise_and_(other); + } + Tensor& operator|=(const Tensor & other) { + return bitwise_or_(other); + } + Tensor& operator^=(const Tensor & other) { + return bitwise_xor_(other); + } + Tensor operator[](const Scalar & index) const { + if (!index.isIntegral(false)) { + TORCH_CHECK_INDEX(false, "Can only index tensors with integral scalars"); + } + return this->operator[](index.toLong()); + } + Tensor operator[](const Tensor & index) const { + // These properties are checked in the Scalar constructor, but we already + // check them here to provide more useful diagnostics for the user. + if (!index.defined()) { + TORCH_CHECK_INDEX(false, "Can only index with tensors that are defined"); + } + if (index.dim() != 0) { + TORCH_CHECK_INDEX(false, + "Can only index with tensors that are scalars (zero-dim)"); + } + // The Scalar(Tensor) constructor is explicit, so we need to call it. + return this->operator[](index.item()); + } + Tensor operator[](int64_t index) const { + return select(0, index); + } + + Tensor index(ArrayRef indices) const; + Tensor index(std::initializer_list indices) const; + + Tensor & index_put_(ArrayRef indices, Tensor const & rhs); + Tensor & index_put_(ArrayRef indices, const Scalar& v); + Tensor & index_put_(std::initializer_list indices, Tensor const & rhs); + Tensor & index_put_(std::initializer_list indices, const Scalar& v); + + Tensor cpu() const { + return to(options().device(c10::DeviceType::CPU), /*non_blocking*/ false, /*copy*/ false); + } + + // TODO: The Python version also accepts arguments + Tensor cuda() const { + return to(options().device(c10::DeviceType::CUDA), /*non_blocking*/ false, /*copy*/ false); + } + + Tensor hip() const { + return to(options().device(c10::DeviceType::HIP), /*non_blocking*/ false, /*copy*/ false); + } + + Tensor ve() const { + return to(options().device(c10::DeviceType::VE), /*non_blocking*/ false, /*copy*/ false); + } + + Tensor vulkan() const { + return to(options().device(c10::DeviceType::Vulkan), /*non_blocking*/ false, /*copy*/ false); + } + + Tensor metal() const { + return to(options().device(c10::DeviceType::Metal), /*non_blocking*/ false, /*copy*/ false); + } + + Tensor meta() const { + return to(options().device(c10::DeviceType::Meta), /*non_blocking*/ false, /*copy*/ false); + } + + // ~~~~~ Autograd API ~~~~~ + + /// \fn bool is_leaf() const; + /// + /// All Tensors that have `requires_grad()` which is ``false`` will be leaf Tensors by convention. + /// + /// For Tensors that have `requires_grad()` which is ``true``, they will be leaf Tensors if they were + /// created by the user. This means that they are not the result of an operation and so + /// `grad_fn()` is `nullptr`. + /// + /// Only leaf Tensors will have their `grad()` populated during a call to `backward()`. + /// To get `grad()` populated for non-leaf Tensors, you can use `retain_grad()`. + /// + /// Example: + /// @code + /// auto a = torch::rand(10, torch::requires_grad()); + /// std::cout << a.is_leaf() << std::endl; // prints `true` + /// + /// auto b = torch::rand(10, torch::requires_grad()).to(torch::kCUDA); + /// std::cout << b.is_leaf() << std::endl; // prints `false` + /// // b was created by the operation that cast a cpu Tensor into a cuda Tensor + /// + /// auto c = torch::rand(10, torch::requires_grad()) + 2; + /// std::cout << c.is_leaf() << std::endl; // prints `false` + /// // c was created by the addition operation + /// + /// auto d = torch::rand(10).cuda(); + /// std::cout << d.is_leaf() << std::endl; // prints `true` + /// // d does not require gradients and so has no operation creating it (that is tracked by the autograd engine) + /// + /// auto e = torch::rand(10).cuda().requires_grad_(); + /// std::cout << e.is_leaf() << std::endl; // prints `true` + /// // e requires gradients and has no operations creating it + /// + /// auto f = torch::rand(10, torch::device(torch::kCUDA).requires_grad(true)); + /// std::cout << f.is_leaf() << std::endl; // prints `true` + /// // f requires grad, has no operation creating it + /// @endcode + + /// \fn void backward(const Tensor & gradient={}, std::optional retain_graph=std::nullopt, bool create_graph=false, std::optional inputs=std::nullopt) const; + /// + /// Computes the gradient of current tensor with respect to graph leaves. + /// + /// The graph is differentiated using the chain rule. If the tensor is + /// non-scalar (i.e. its data has more than one element) and requires + /// gradient, the function additionally requires specifying ``gradient``. + /// It should be a tensor of matching type and location, that contains + /// the gradient of the differentiated function w.r.t. this Tensor. + /// + /// This function accumulates gradients in the leaves - you might need to + /// zero them before calling it. + /// + /// \param gradient Gradient w.r.t. the + /// tensor. If it is a tensor, it will be automatically converted + /// to a Tensor that does not require grad unless ``create_graph`` is True. + /// None values can be specified for scalar Tensors or ones that + /// don't require grad. If a None value would be acceptable then + /// this argument is optional. + /// \param retain_graph If ``false``, the graph used to compute + /// the grads will be freed. Note that in nearly all cases setting + /// this option to True is not needed and often can be worked around + /// in a much more efficient way. Defaults to the value of + /// ``create_graph``. + /// \param create_graph If ``true``, graph of the derivative will + /// be constructed, allowing to compute higher order derivative + /// products. Defaults to ``false``. + /// \param inputs Inputs w.r.t. which the gradient will be accumulated into + /// ``at::Tensor::grad``. All other Tensors will be ignored. If not + /// provided, the gradient is accumulated into all the leaf Tensors + /// that were used to compute the current tensor. + /// When inputs are provided and a given input is not a leaf, + /// the current implementation will call its grad_fn (even though it is not strictly needed to get this gradients). + /// It is an implementation detail on which the user should not rely. + /// See https://github.com/pytorch/pytorch/pull/60521#issuecomment-867061780 for more details. + void backward(const Tensor & gradient={}, std::optional retain_graph=std::nullopt, bool create_graph=false, std::optional inputs=std::nullopt) const { + // NB: Adding this wrapper to _backward here because we'd like our + // 'backwards' api to accept the 'inputs' argument optionally. Since code gen + // currently does not support optional of TensorList our approach is to replace + // backward in native_functions.yaml with _backward and call it here instead. + if (inputs.has_value()) { + TORCH_CHECK(inputs.value().size() > 0, "'inputs' argument to backward cannot be empty") + this->_backward(inputs.value(), gradient, retain_graph, create_graph); + } else { + this->_backward({}, gradient, retain_graph, create_graph); + } + } + + /// \fn Tensor detach() const; + /// + /// Returns a new Tensor, detached from the current graph. + /// The result will never require gradient. + + /// \fn Tensor & detach_() const; + /// + /// Detaches the Tensor from the graph that created it, making it a leaf. + /// Views cannot be detached in-place. + + /// \fn void retain_grad() const; + /// + /// Enables this Tensor to have their :attr:`grad` populated during + /// :func:`backward`. This is a no-op for leaf tensors. + + /// \fn bool retains_grad() const; + /// + /// Is ``true`` if this Tensor is non-leaf and its :attr:`grad` is enabled to be + /// populated during :func:`backward`, ``false`` otherwise. + + const Tensor& set_requires_grad(bool requires_grad) const { + TensorBase::set_requires_grad(requires_grad); + return *this; + } + + /// Return a mutable reference to the gradient. This is conventionally + /// used as `t.grad() = x` to set a gradient to a completely new tensor. + /// Note that this function work with a non-const Tensor and is not + /// thread safe. + Tensor& mutable_grad() const { + return impl_->mutable_grad(); + } + + /// This function returns an undefined tensor by default and returns a defined tensor + /// the first time a call to `backward()` computes gradients for this Tensor. + /// The attribute will then contain the gradients computed and future calls + /// to `backward()` will accumulate (add) gradients into it. + const Tensor& grad() const { + const Tensor& maybe_grad = impl_->grad(); + if (!is_leaf() && !retains_grad() && !maybe_grad.defined()) { + TORCH_WARN( + "The .grad attribute of a Tensor that is not a leaf Tensor is being accessed. Its .grad " + "attribute won't be populated during autograd.backward(). If you indeed want the .grad " + "field to be populated for a non-leaf Tensor, use .retain_grad() on the non-leaf Tensor. " + "If you access the non-leaf Tensor by mistake, make sure you access the leaf Tensor " + "instead. See github.com/pytorch/pytorch/pull/30531 for more information."); + } + return maybe_grad; + } + + // The Forward AD API functions below are low level and are not to be used by end + // users who should use the API provided in torch/csrc/autograd.h + + /// This function returns the forward gradient for this Tensor at the given level. + const Tensor& _fw_grad(uint64_t level) const { + return impl_->_fw_grad(level, *this); + } + + /// This function can be used to set the value of the forward grad. + /// Note that the given new_grad might not be used directly if it has different + /// metadata (size/stride/storage offset) compared to this Tensor. In that case, + /// new_grad content will be copied into a new Tensor + void _set_fw_grad(const TensorBase& new_grad, uint64_t level, bool is_inplace_op) const { + impl_->_set_fw_grad(new_grad, *this, level, is_inplace_op); + } + + + // STOP. Thinking of adding a method here, which only makes use + // of other ATen methods? Define it in native_functions.yaml. + + //example + //Tensor * add(Tensor & b); + void __dispatch__backward(at::TensorList inputs, const ::std::optional & gradient={}, ::std::optional retain_graph=::std::nullopt, bool create_graph=false) const; + void __dispatch_set_data(const at::Tensor & new_data) const; + at::Tensor __dispatch_data() const; + bool __dispatch_is_leaf() const; + int64_t __dispatch_output_nr() const; + int64_t __dispatch__version() const; + at::Tensor & __dispatch_requires_grad_(bool requires_grad=true) const; + void __dispatch_retain_grad() const; + bool __dispatch_retains_grad() const; + at::Tensor _fw_primal(int64_t level) const; + at::Tensor & rename_(::std::optional names) const; + at::Tensor rename(::std::optional names) const; + at::Tensor align_to(at::DimnameList names) const; + at::Tensor align_to(at::DimnameList order, int64_t ellipsis_idx) const; + at::Tensor align_as(const at::Tensor & other) const; + at::Tensor refine_names(at::DimnameList names) const; + at::Tensor abs() const; + at::Tensor & abs_() const; + at::Tensor absolute() const; + at::Tensor & absolute_() const; + at::Tensor angle() const; + at::Tensor sgn() const; + at::Tensor & sgn_() const; + at::Tensor chalf(::std::optional memory_format=::std::nullopt) const; + at::Tensor _conj() const; + at::Tensor __dispatch_conj() const; + at::Tensor _conj_physical() const; + at::Tensor conj_physical() const; + at::Tensor & conj_physical_() const; + at::Tensor resolve_conj() const; + at::Tensor resolve_neg() const; + at::Tensor _neg_view() const; + at::Tensor acos() const; + at::Tensor & acos_() const; + at::Tensor arccos() const; + at::Tensor & arccos_() const; + at::Tensor add(const at::Tensor & other, const at::Scalar & alpha=1) const; + at::Tensor & add_(const at::Tensor & other, const at::Scalar & alpha=1) const; + at::Tensor add(const at::Scalar & other, const at::Scalar & alpha=1) const; + at::Tensor & add_(const at::Scalar & other, const at::Scalar & alpha=1) const; + at::Tensor addmv(const at::Tensor & mat, const at::Tensor & vec, const at::Scalar & beta=1, const at::Scalar & alpha=1) const; + at::Tensor & addmv_(const at::Tensor & mat, const at::Tensor & vec, const at::Scalar & beta=1, const at::Scalar & alpha=1) const; + at::Tensor addr(const at::Tensor & vec1, const at::Tensor & vec2, const at::Scalar & beta=1, const at::Scalar & alpha=1) const; + at::Tensor & addr_(const at::Tensor & vec1, const at::Tensor & vec2, const at::Scalar & beta=1, const at::Scalar & alpha=1) const; + at::Tensor _is_all_true() const; + at::Tensor _is_any_true() const; + at::Tensor all(int64_t dim, bool keepdim=false) const; + at::Tensor all(at::OptionalIntArrayRef dim, bool keepdim=false) const; + at::Tensor all(at::Dimname dim, bool keepdim=false) const; + bool allclose(const at::Tensor & other, double rtol=1e-05, double atol=1e-08, bool equal_nan=false) const; + at::Tensor any(int64_t dim, bool keepdim=false) const; + at::Tensor any(at::OptionalIntArrayRef dim, bool keepdim=false) const; + at::Tensor any(at::Dimname dim, bool keepdim=false) const; + at::Tensor argmax(::std::optional dim=::std::nullopt, bool keepdim=false) const; + at::Tensor argmin(::std::optional dim=::std::nullopt, bool keepdim=false) const; + at::Tensor acosh() const; + at::Tensor & acosh_() const; + at::Tensor arccosh() const; + at::Tensor & arccosh_() const; + at::Tensor asinh() const; + at::Tensor & asinh_() const; + at::Tensor arcsinh() const; + at::Tensor & arcsinh_() const; + at::Tensor atanh() const; + at::Tensor & atanh_() const; + at::Tensor arctanh() const; + at::Tensor & arctanh_() const; + at::Tensor as_strided(at::IntArrayRef size, at::IntArrayRef stride, ::std::optional storage_offset=::std::nullopt) const; + at::Tensor as_strided_symint(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional storage_offset=::std::nullopt) const; + const at::Tensor & as_strided_(at::IntArrayRef size, at::IntArrayRef stride, ::std::optional storage_offset=::std::nullopt) const; + const at::Tensor & as_strided__symint(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional storage_offset=::std::nullopt) const; + at::Tensor asin() const; + at::Tensor & asin_() const; + at::Tensor arcsin() const; + at::Tensor & arcsin_() const; + at::Tensor atan() const; + at::Tensor & atan_() const; + at::Tensor arctan() const; + at::Tensor & arctan_() const; + at::Tensor baddbmm(const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta=1, const at::Scalar & alpha=1) const; + at::Tensor & baddbmm_(const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta=1, const at::Scalar & alpha=1) const; + at::Tensor bernoulli(::std::optional generator=::std::nullopt) const; + at::Tensor & bernoulli_(const at::Tensor & p, ::std::optional generator=::std::nullopt) const; + at::Tensor & bernoulli_(double p=0.5, ::std::optional generator=::std::nullopt) const; + at::Tensor bernoulli(double p, ::std::optional generator=::std::nullopt) const; + at::Tensor bincount(const ::std::optional & weights={}, int64_t minlength=0) const; + at::Tensor bincount_symint(const ::std::optional & weights={}, c10::SymInt minlength=0) const; + at::Tensor bitwise_not() const; + at::Tensor & bitwise_not_() const; + at::Tensor copysign(const at::Tensor & other) const; + at::Tensor & copysign_(const at::Tensor & other) const; + at::Tensor copysign(const at::Scalar & other) const; + at::Tensor & copysign_(const at::Scalar & other) const; + at::Tensor _lazy_clone() const; + at::Tensor logical_not() const; + at::Tensor & logical_not_() const; + at::Tensor logical_xor(const at::Tensor & other) const; + at::Tensor & logical_xor_(const at::Tensor & other) const; + at::Tensor logical_and(const at::Tensor & other) const; + at::Tensor & logical_and_(const at::Tensor & other) const; + at::Tensor logical_or(const at::Tensor & other) const; + at::Tensor & logical_or_(const at::Tensor & other) const; + at::Tensor bmm(const at::Tensor & mat2) const; + at::Tensor broadcast_to(at::IntArrayRef size) const; + at::Tensor broadcast_to_symint(c10::SymIntArrayRef size) const; + at::Tensor ceil() const; + at::Tensor & ceil_() const; + ::std::vector unsafe_chunk(int64_t chunks, int64_t dim=0) const; + ::std::vector chunk(int64_t chunks, int64_t dim=0) const; + ::std::vector tensor_split(int64_t sections, int64_t dim=0) const; + ::std::vector tensor_split_symint(c10::SymInt sections, int64_t dim=0) const; + ::std::vector tensor_split(at::IntArrayRef indices, int64_t dim=0) const; + ::std::vector tensor_split_symint(c10::SymIntArrayRef indices, int64_t dim=0) const; + ::std::vector tensor_split(const at::Tensor & tensor_indices_or_sections, int64_t dim=0) const; + at::Tensor clamp(const ::std::optional & min, const ::std::optional & max=::std::nullopt) const; + at::Tensor clamp(const ::std::optional & min={}, const ::std::optional & max={}) const; + at::Tensor & clamp_(const ::std::optional & min, const ::std::optional & max=::std::nullopt) const; + at::Tensor & clamp_(const ::std::optional & min={}, const ::std::optional & max={}) const; + at::Tensor clamp_max(const at::Scalar & max) const; + at::Tensor clamp_max(const at::Tensor & max) const; + at::Tensor & clamp_max_(const at::Scalar & max) const; + at::Tensor & clamp_max_(const at::Tensor & max) const; + at::Tensor clamp_min(const at::Scalar & min) const; + at::Tensor clamp_min(const at::Tensor & min) const; + at::Tensor & clamp_min_(const at::Scalar & min) const; + at::Tensor & clamp_min_(const at::Tensor & min) const; + at::Tensor clip(const ::std::optional & min, const ::std::optional & max=::std::nullopt) const; + at::Tensor clip(const ::std::optional & min={}, const ::std::optional & max={}) const; + at::Tensor & clip_(const ::std::optional & min, const ::std::optional & max=::std::nullopt) const; + at::Tensor & clip_(const ::std::optional & min={}, const ::std::optional & max={}) const; + at::Tensor __dispatch_contiguous(at::MemoryFormat memory_format=c10::MemoryFormat::Contiguous) const; + at::Tensor & copy_(const at::Tensor & src, bool non_blocking=false) const; + at::Tensor cos() const; + at::Tensor & cos_() const; + at::Tensor cosh() const; + at::Tensor & cosh_() const; + at::Tensor count_nonzero(at::IntArrayRef dim) const; + at::Tensor count_nonzero(::std::optional dim=::std::nullopt) const; + at::Tensor cov(int64_t correction=1, const ::std::optional & fweights={}, const ::std::optional & aweights={}) const; + at::Tensor corrcoef() const; + ::std::tuple cummax(int64_t dim) const; + ::std::tuple cummax(at::Dimname dim) const; + ::std::tuple cummin(int64_t dim) const; + ::std::tuple cummin(at::Dimname dim) const; + at::Tensor cumprod(int64_t dim, ::std::optional dtype=::std::nullopt) const; + at::Tensor & cumprod_(int64_t dim, ::std::optional dtype=::std::nullopt) const; + at::Tensor cumprod(at::Dimname dim, ::std::optional dtype=::std::nullopt) const; + at::Tensor & cumprod_(at::Dimname dim, ::std::optional dtype=::std::nullopt) const; + at::Tensor cumsum(int64_t dim, ::std::optional dtype=::std::nullopt) const; + at::Tensor & cumsum_(int64_t dim, ::std::optional dtype=::std::nullopt) const; + at::Tensor cumsum(at::Dimname dim, ::std::optional dtype=::std::nullopt) const; + at::Tensor & cumsum_(at::Dimname dim, ::std::optional dtype=::std::nullopt) const; + at::Tensor diag_embed(int64_t offset=0, int64_t dim1=-2, int64_t dim2=-1) const; + at::Tensor diagflat(int64_t offset=0) const; + at::Tensor diagonal(int64_t offset=0, int64_t dim1=0, int64_t dim2=1) const; + at::Tensor diagonal(at::Dimname outdim, at::Dimname dim1, at::Dimname dim2, int64_t offset=0) const; + at::Tensor & fill_diagonal_(const at::Scalar & fill_value, bool wrap=false) const; + at::Tensor diff(int64_t n=1, int64_t dim=-1, const ::std::optional & prepend={}, const ::std::optional & append={}) const; + at::Tensor div(const at::Tensor & other) const; + at::Tensor & div_(const at::Tensor & other) const; + at::Tensor div(const at::Tensor & other, ::std::optional rounding_mode) const; + at::Tensor & div_(const at::Tensor & other, ::std::optional rounding_mode) const; + at::Tensor div(const at::Scalar & other) const; + at::Tensor & div_(const at::Scalar & other) const; + at::Tensor div(const at::Scalar & other, ::std::optional rounding_mode) const; + at::Tensor & div_(const at::Scalar & other, ::std::optional rounding_mode) const; + at::Tensor divide(const at::Tensor & other) const; + at::Tensor & divide_(const at::Tensor & other) const; + at::Tensor divide(const at::Scalar & other) const; + at::Tensor & divide_(const at::Scalar & other) const; + at::Tensor divide(const at::Tensor & other, ::std::optional rounding_mode) const; + at::Tensor & divide_(const at::Tensor & other, ::std::optional rounding_mode) const; + at::Tensor divide(const at::Scalar & other, ::std::optional rounding_mode) const; + at::Tensor & divide_(const at::Scalar & other, ::std::optional rounding_mode) const; + at::Tensor true_divide(const at::Tensor & other) const; + at::Tensor & true_divide_(const at::Tensor & other) const; + at::Tensor true_divide(const at::Scalar & other) const; + at::Tensor & true_divide_(const at::Scalar & other) const; + at::Tensor dot(const at::Tensor & tensor) const; + at::Tensor vdot(const at::Tensor & other) const; + at::Tensor new_empty(at::IntArrayRef size, at::TensorOptions options={}) const; + at::Tensor new_empty(at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const; + at::Tensor new_empty_symint(c10::SymIntArrayRef size, at::TensorOptions options={}) const; + at::Tensor new_empty_symint(c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const; + at::Tensor new_empty_strided(at::IntArrayRef size, at::IntArrayRef stride, at::TensorOptions options={}) const; + at::Tensor new_empty_strided(at::IntArrayRef size, at::IntArrayRef stride, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const; + at::Tensor new_empty_strided_symint(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, at::TensorOptions options={}) const; + at::Tensor new_empty_strided_symint(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const; + at::Tensor new_full(at::IntArrayRef size, const at::Scalar & fill_value, at::TensorOptions options={}) const; + at::Tensor new_full(at::IntArrayRef size, const at::Scalar & fill_value, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const; + at::Tensor new_full_symint(c10::SymIntArrayRef size, const at::Scalar & fill_value, at::TensorOptions options={}) const; + at::Tensor new_full_symint(c10::SymIntArrayRef size, const at::Scalar & fill_value, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const; + at::Tensor new_zeros(at::IntArrayRef size, at::TensorOptions options={}) const; + at::Tensor new_zeros(at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const; + at::Tensor new_zeros_symint(c10::SymIntArrayRef size, at::TensorOptions options={}) const; + at::Tensor new_zeros_symint(c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const; + at::Tensor new_ones(at::IntArrayRef size, at::TensorOptions options={}) const; + at::Tensor new_ones(at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const; + at::Tensor new_ones_symint(c10::SymIntArrayRef size, at::TensorOptions options={}) const; + at::Tensor new_ones_symint(c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const; + const at::Tensor & resize_(at::IntArrayRef size, ::std::optional memory_format=::std::nullopt) const; + const at::Tensor & resize__symint(c10::SymIntArrayRef size, ::std::optional memory_format=::std::nullopt) const; + at::Tensor erf() const; + at::Tensor & erf_() const; + at::Tensor erfc() const; + at::Tensor & erfc_() const; + at::Tensor exp() const; + at::Tensor & exp_() const; + at::Tensor exp2() const; + at::Tensor & exp2_() const; + at::Tensor expm1() const; + at::Tensor & expm1_() const; + at::Tensor expand(at::IntArrayRef size, bool implicit=false) const; + at::Tensor expand_symint(c10::SymIntArrayRef size, bool implicit=false) const; + at::Tensor expand_as(const at::Tensor & other) const; + at::Tensor flatten(int64_t start_dim=0, int64_t end_dim=-1) const; + at::Tensor flatten(int64_t start_dim, int64_t end_dim, at::Dimname out_dim) const; + at::Tensor flatten(at::Dimname start_dim, at::Dimname end_dim, at::Dimname out_dim) const; + at::Tensor flatten(at::DimnameList dims, at::Dimname out_dim) const; + at::Tensor unflatten(int64_t dim, at::IntArrayRef sizes) const; + at::Tensor unflatten_symint(int64_t dim, c10::SymIntArrayRef sizes) const; + at::Tensor unflatten(at::Dimname dim, at::IntArrayRef sizes, at::DimnameList names) const; + at::Tensor unflatten_symint(at::Dimname dim, c10::SymIntArrayRef sizes, at::DimnameList names) const; + at::Tensor & fill_(const at::Scalar & value) const; + at::Tensor & fill_(const at::Tensor & value) const; + at::Tensor floor() const; + at::Tensor & floor_() const; + at::Tensor floor_divide(const at::Tensor & other) const; + at::Tensor & floor_divide_(const at::Tensor & other) const; + at::Tensor floor_divide(const at::Scalar & other) const; + at::Tensor & floor_divide_(const at::Scalar & other) const; + at::Tensor frac() const; + at::Tensor & frac_() const; + at::Tensor gcd(const at::Tensor & other) const; + at::Tensor & gcd_(const at::Tensor & other) const; + at::Tensor lcm(const at::Tensor & other) const; + at::Tensor & lcm_(const at::Tensor & other) const; + at::Tensor index(const c10::List<::std::optional> & indices) const; + at::Tensor & index_copy_(int64_t dim, const at::Tensor & index, const at::Tensor & source) const; + at::Tensor index_copy(int64_t dim, const at::Tensor & index, const at::Tensor & source) const; + at::Tensor & index_copy_(at::Dimname dim, const at::Tensor & index, const at::Tensor & source) const; + at::Tensor index_copy(at::Dimname dim, const at::Tensor & index, const at::Tensor & source) const; + at::Tensor & index_put_(const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate=false) const; + at::Tensor index_put(const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate=false) const; + at::Tensor isclose(const at::Tensor & other, double rtol=1e-05, double atol=1e-08, bool equal_nan=false) const; + at::Tensor isnan() const; + bool is_distributed() const; + bool __dispatch_is_floating_point() const; + bool __dispatch_is_complex() const; + bool __dispatch_is_conj() const; + bool __dispatch__is_zerotensor() const; + bool __dispatch_is_neg() const; + at::Tensor isreal() const; + bool is_nonzero() const; + bool is_same_size(const at::Tensor & other) const; + bool __dispatch_is_signed() const; + bool __dispatch_is_inference() const; + at::Tensor kron(const at::Tensor & other) const; + ::std::tuple kthvalue(int64_t k, int64_t dim=-1, bool keepdim=false) const; + ::std::tuple kthvalue_symint(c10::SymInt k, int64_t dim=-1, bool keepdim=false) const; + ::std::tuple kthvalue(int64_t k, at::Dimname dim, bool keepdim=false) const; + ::std::tuple kthvalue_symint(c10::SymInt k, at::Dimname dim, bool keepdim=false) const; + at::Tensor nan_to_num(::std::optional nan=::std::nullopt, ::std::optional posinf=::std::nullopt, ::std::optional neginf=::std::nullopt) const; + at::Tensor & nan_to_num_(::std::optional nan=::std::nullopt, ::std::optional posinf=::std::nullopt, ::std::optional neginf=::std::nullopt) const; + at::Tensor ldexp(const at::Tensor & other) const; + at::Tensor & ldexp_(const at::Tensor & other) const; + at::Tensor log() const; + at::Tensor & log_() const; + at::Tensor log10() const; + at::Tensor & log10_() const; + at::Tensor log1p() const; + at::Tensor & log1p_() const; + at::Tensor log2() const; + at::Tensor & log2_() const; + at::Tensor logaddexp(const at::Tensor & other) const; + at::Tensor logaddexp2(const at::Tensor & other) const; + at::Tensor xlogy(const at::Tensor & other) const; + at::Tensor xlogy(const at::Scalar & other) const; + at::Tensor & xlogy_(const at::Tensor & other) const; + at::Tensor & xlogy_(const at::Scalar & other) const; + at::Tensor log_softmax(int64_t dim, ::std::optional dtype=::std::nullopt) const; + at::Tensor log_softmax(at::Dimname dim, ::std::optional dtype=::std::nullopt) const; + at::Tensor logcumsumexp(int64_t dim) const; + at::Tensor logcumsumexp(at::Dimname dim) const; + at::Tensor logsumexp(at::IntArrayRef dim, bool keepdim=false) const; + at::Tensor logsumexp(at::DimnameList dim, bool keepdim=false) const; + at::Tensor matmul(const at::Tensor & other) const; + at::Tensor matrix_power(int64_t n) const; + at::Tensor matrix_exp() const; + ::std::tuple aminmax(::std::optional dim=::std::nullopt, bool keepdim=false) const; + ::std::tuple max(int64_t dim, bool keepdim=false) const; + ::std::tuple max(at::Dimname dim, bool keepdim=false) const; + at::Tensor amax(at::IntArrayRef dim={}, bool keepdim=false) const; + at::Tensor mean(::std::optional dtype=::std::nullopt) const; + at::Tensor mean(at::OptionalIntArrayRef dim, bool keepdim=false, ::std::optional dtype=::std::nullopt) const; + at::Tensor mean(at::DimnameList dim, bool keepdim=false, ::std::optional dtype=::std::nullopt) const; + at::Tensor nanmean(at::OptionalIntArrayRef dim=::std::nullopt, bool keepdim=false, ::std::optional dtype=::std::nullopt) const; + at::Tensor median() const; + ::std::tuple median(int64_t dim, bool keepdim=false) const; + ::std::tuple median(at::Dimname dim, bool keepdim=false) const; + at::Tensor nanmedian() const; + ::std::tuple nanmedian(int64_t dim, bool keepdim=false) const; + ::std::tuple nanmedian(at::Dimname dim, bool keepdim=false) const; + ::std::tuple min(int64_t dim, bool keepdim=false) const; + ::std::tuple min(at::Dimname dim, bool keepdim=false) const; + at::Tensor amin(at::IntArrayRef dim={}, bool keepdim=false) const; + at::Tensor mm(const at::Tensor & mat2) const; + ::std::tuple mode(int64_t dim=-1, bool keepdim=false) const; + ::std::tuple mode(at::Dimname dim, bool keepdim=false) const; + at::Tensor mul(const at::Tensor & other) const; + at::Tensor & mul_(const at::Tensor & other) const; + at::Tensor mul(const at::Scalar & other) const; + at::Tensor & mul_(const at::Scalar & other) const; + at::Tensor multiply(const at::Tensor & other) const; + at::Tensor & multiply_(const at::Tensor & other) const; + at::Tensor multiply(const at::Scalar & other) const; + at::Tensor & multiply_(const at::Scalar & other) const; + at::Tensor mv(const at::Tensor & vec) const; + at::Tensor mvlgamma(int64_t p) const; + at::Tensor & mvlgamma_(int64_t p) const; + at::Tensor narrow_copy(int64_t dim, int64_t start, int64_t length) const; + at::Tensor narrow_copy_symint(int64_t dim, c10::SymInt start, c10::SymInt length) const; + at::Tensor narrow(int64_t dim, int64_t start, int64_t length) const; + at::Tensor narrow_symint(int64_t dim, c10::SymInt start, c10::SymInt length) const; + at::Tensor narrow(int64_t dim, const at::Tensor & start, int64_t length) const; + at::Tensor narrow_symint(int64_t dim, const at::Tensor & start, c10::SymInt length) const; + at::Tensor permute(at::IntArrayRef dims) const; + at::Tensor movedim(at::IntArrayRef source, at::IntArrayRef destination) const; + at::Tensor movedim(int64_t source, int64_t destination) const; + at::Tensor moveaxis(at::IntArrayRef source, at::IntArrayRef destination) const; + at::Tensor moveaxis(int64_t source, int64_t destination) const; + at::Tensor numpy_T() const; + at::Tensor matrix_H() const; + at::Tensor mT() const; + at::Tensor mH() const; + at::Tensor adjoint() const; + bool is_pinned(::std::optional device=::std::nullopt) const; + at::Tensor pin_memory(::std::optional device=::std::nullopt) const; + at::Tensor pinverse(double rcond=1e-15) const; + at::Tensor rad2deg() const; + at::Tensor & rad2deg_() const; + at::Tensor deg2rad() const; + at::Tensor & deg2rad_() const; + at::Tensor ravel() const; + at::Tensor reciprocal() const; + at::Tensor & reciprocal_() const; + at::Tensor neg() const; + at::Tensor & neg_() const; + at::Tensor negative() const; + at::Tensor & negative_() const; + at::Tensor repeat(at::IntArrayRef repeats) const; + at::Tensor repeat_symint(c10::SymIntArrayRef repeats) const; + at::Tensor repeat_interleave(const at::Tensor & repeats, ::std::optional dim=::std::nullopt, ::std::optional output_size=::std::nullopt) const; + at::Tensor repeat_interleave_symint(const at::Tensor & repeats, ::std::optional dim=::std::nullopt, ::std::optional output_size=::std::nullopt) const; + at::Tensor repeat_interleave(int64_t repeats, ::std::optional dim=::std::nullopt, ::std::optional output_size=::std::nullopt) const; + at::Tensor repeat_interleave_symint(c10::SymInt repeats, ::std::optional dim=::std::nullopt, ::std::optional output_size=::std::nullopt) const; + at::Tensor reshape(at::IntArrayRef shape) const; + at::Tensor reshape_symint(c10::SymIntArrayRef shape) const; + at::Tensor _reshape_alias(at::IntArrayRef size, at::IntArrayRef stride) const; + at::Tensor _reshape_alias_symint(c10::SymIntArrayRef size, c10::SymIntArrayRef stride) const; + at::Tensor reshape_as(const at::Tensor & other) const; + at::Tensor round() const; + at::Tensor & round_() const; + at::Tensor round(int64_t decimals) const; + at::Tensor & round_(int64_t decimals) const; + at::Tensor relu() const; + at::Tensor & relu_() const; + at::Tensor prelu(const at::Tensor & weight) const; + at::Tensor hardshrink(const at::Scalar & lambd=0.5) const; + at::Tensor hardshrink_backward(const at::Tensor & grad_out, const at::Scalar & lambd) const; + at::Tensor rsqrt() const; + at::Tensor & rsqrt_() const; + at::Tensor select(at::Dimname dim, int64_t index) const; + at::Tensor select(int64_t dim, int64_t index) const; + at::Tensor select_symint(int64_t dim, c10::SymInt index) const; + at::Tensor sigmoid() const; + at::Tensor & sigmoid_() const; + at::Tensor logit(::std::optional eps=::std::nullopt) const; + at::Tensor & logit_(::std::optional eps=::std::nullopt) const; + at::Tensor sin() const; + at::Tensor & sin_() const; + at::Tensor sinc() const; + at::Tensor & sinc_() const; + at::Tensor sinh() const; + at::Tensor & sinh_() const; + at::Tensor detach() const; + at::Tensor & detach_() const; + int64_t size(at::Dimname dim) const; + at::Tensor slice(int64_t dim=0, ::std::optional start=::std::nullopt, ::std::optional end=::std::nullopt, int64_t step=1) const; + at::Tensor slice_symint(int64_t dim=0, ::std::optional start=::std::nullopt, ::std::optional end=::std::nullopt, c10::SymInt step=1) const; + at::Tensor slice_inverse(const at::Tensor & src, int64_t dim=0, ::std::optional start=::std::nullopt, ::std::optional end=::std::nullopt, int64_t step=1) const; + at::Tensor slice_inverse_symint(const at::Tensor & src, int64_t dim=0, ::std::optional start=::std::nullopt, ::std::optional end=::std::nullopt, c10::SymInt step=1) const; + at::Tensor slice_scatter(const at::Tensor & src, int64_t dim=0, ::std::optional start=::std::nullopt, ::std::optional end=::std::nullopt, int64_t step=1) const; + at::Tensor slice_scatter_symint(const at::Tensor & src, int64_t dim=0, ::std::optional start=::std::nullopt, ::std::optional end=::std::nullopt, c10::SymInt step=1) const; + at::Tensor select_scatter(const at::Tensor & src, int64_t dim, int64_t index) const; + at::Tensor select_scatter_symint(const at::Tensor & src, int64_t dim, c10::SymInt index) const; + at::Tensor diagonal_scatter(const at::Tensor & src, int64_t offset=0, int64_t dim1=0, int64_t dim2=1) const; + at::Tensor as_strided_scatter(const at::Tensor & src, at::IntArrayRef size, at::IntArrayRef stride, ::std::optional storage_offset=::std::nullopt) const; + at::Tensor as_strided_scatter_symint(const at::Tensor & src, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional storage_offset=::std::nullopt) const; + at::Tensor smm(const at::Tensor & mat2) const; + at::Tensor softmax(int64_t dim, ::std::optional dtype=::std::nullopt) const; + at::Tensor softmax(at::Dimname dim, ::std::optional dtype=::std::nullopt) const; + ::std::vector unsafe_split(int64_t split_size, int64_t dim=0) const; + ::std::vector unsafe_split_symint(c10::SymInt split_size, int64_t dim=0) const; + ::std::vector split(int64_t split_size, int64_t dim=0) const; + ::std::vector split_symint(c10::SymInt split_size, int64_t dim=0) const; + ::std::vector split(at::IntArrayRef split_size, int64_t dim=0) const; + ::std::vector split_symint(c10::SymIntArrayRef split_size, int64_t dim=0) const; + ::std::vector unsafe_split_with_sizes(at::IntArrayRef split_sizes, int64_t dim=0) const; + ::std::vector unsafe_split_with_sizes_symint(c10::SymIntArrayRef split_sizes, int64_t dim=0) const; + ::std::vector split_with_sizes(at::IntArrayRef split_sizes, int64_t dim=0) const; + ::std::vector split_with_sizes_symint(c10::SymIntArrayRef split_sizes, int64_t dim=0) const; + ::std::vector hsplit(int64_t sections) const; + ::std::vector hsplit(at::IntArrayRef indices) const; + ::std::vector vsplit(int64_t sections) const; + ::std::vector vsplit(at::IntArrayRef indices) const; + ::std::vector dsplit(int64_t sections) const; + ::std::vector dsplit(at::IntArrayRef indices) const; + at::Tensor squeeze() const; + at::Tensor squeeze(int64_t dim) const; + at::Tensor squeeze(at::Dimname dim) const; + at::Tensor squeeze(at::IntArrayRef dim) const; + at::Tensor & squeeze_() const; + at::Tensor & squeeze_(int64_t dim) const; + at::Tensor & squeeze_(at::IntArrayRef dim) const; + at::Tensor & squeeze_(at::Dimname dim) const; + at::Tensor sspaddmm(const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta=1, const at::Scalar & alpha=1) const; + at::Tensor stft(int64_t n_fft, ::std::optional hop_length, ::std::optional win_length, const ::std::optional & window, bool normalized, ::std::optional onesided=::std::nullopt, ::std::optional return_complex=::std::nullopt, ::std::optional align_to_window=::std::nullopt) const; + at::Tensor stft(int64_t n_fft, ::std::optional hop_length=::std::nullopt, ::std::optional win_length=::std::nullopt, const ::std::optional & window={}, bool center=true, c10::string_view pad_mode="reflect", bool normalized=false, ::std::optional onesided=::std::nullopt, ::std::optional return_complex=::std::nullopt, ::std::optional align_to_window=::std::nullopt) const; + at::Tensor istft(int64_t n_fft, ::std::optional hop_length=::std::nullopt, ::std::optional win_length=::std::nullopt, const ::std::optional & window={}, bool center=true, bool normalized=false, ::std::optional onesided=::std::nullopt, ::std::optional length=::std::nullopt, bool return_complex=false) const; + int64_t stride(at::Dimname dim) const; + at::Tensor sum(::std::optional dtype=::std::nullopt) const; + at::Tensor sum(at::OptionalIntArrayRef dim, bool keepdim=false, ::std::optional dtype=::std::nullopt) const; + at::Tensor sum(at::DimnameList dim, bool keepdim=false, ::std::optional dtype=::std::nullopt) const; + at::Tensor nansum(at::OptionalIntArrayRef dim=::std::nullopt, bool keepdim=false, ::std::optional dtype=::std::nullopt) const; + at::Tensor hash_tensor(at::IntArrayRef dim={}, bool keepdim=false, int64_t mode=0) const; + at::Tensor sum_to_size(at::IntArrayRef size) const; + at::Tensor sum_to_size_symint(c10::SymIntArrayRef size) const; + at::Tensor sqrt() const; + at::Tensor & sqrt_() const; + at::Tensor square() const; + at::Tensor & square_() const; + at::Tensor std(bool unbiased) const; + at::Tensor std(at::OptionalIntArrayRef dim, bool unbiased, bool keepdim=false) const; + at::Tensor std(at::OptionalIntArrayRef dim=::std::nullopt, const ::std::optional & correction=::std::nullopt, bool keepdim=false) const; + at::Tensor std(at::DimnameList dim, bool unbiased, bool keepdim=false) const; + at::Tensor std(at::DimnameList dim, const ::std::optional & correction=::std::nullopt, bool keepdim=false) const; + at::Tensor prod(::std::optional dtype=::std::nullopt) const; + at::Tensor prod(int64_t dim, bool keepdim=false, ::std::optional dtype=::std::nullopt) const; + at::Tensor prod(at::Dimname dim, bool keepdim=false, ::std::optional dtype=::std::nullopt) const; + at::Tensor t() const; + at::Tensor & t_() const; + at::Tensor tan() const; + at::Tensor & tan_() const; + at::Tensor tanh() const; + at::Tensor & tanh_() const; + at::Tensor tile(at::IntArrayRef dims) const; + at::Tensor tile_symint(c10::SymIntArrayRef dims) const; + at::Tensor transpose(int64_t dim0, int64_t dim1) const; + at::Tensor transpose(at::Dimname dim0, at::Dimname dim1) const; + at::Tensor & transpose_(int64_t dim0, int64_t dim1) const; + at::Tensor flip(at::IntArrayRef dims) const; + at::Tensor fliplr() const; + at::Tensor flipud() const; + at::Tensor roll(at::IntArrayRef shifts, at::IntArrayRef dims={}) const; + at::Tensor roll_symint(c10::SymIntArrayRef shifts, at::IntArrayRef dims={}) const; + at::Tensor rot90(int64_t k=1, at::IntArrayRef dims={0,1}) const; + at::Tensor _nested_tensor_size() const; + at::Tensor _nested_tensor_strides() const; + at::Tensor _nested_tensor_storage_offsets() const; + at::Tensor trunc() const; + at::Tensor & trunc_() const; + at::Tensor fix() const; + at::Tensor & fix_() const; + at::Tensor type_as(const at::Tensor & other) const; + at::Tensor unsqueeze(int64_t dim) const; + at::Tensor & unsqueeze_(int64_t dim) const; + at::Tensor var(bool unbiased) const; + at::Tensor var(at::OptionalIntArrayRef dim, bool unbiased, bool keepdim=false) const; + at::Tensor var(at::OptionalIntArrayRef dim=::std::nullopt, const ::std::optional & correction=::std::nullopt, bool keepdim=false) const; + at::Tensor var(at::DimnameList dim, bool unbiased, bool keepdim=false) const; + at::Tensor var(at::DimnameList dim, const ::std::optional & correction=::std::nullopt, bool keepdim=false) const; + at::Tensor view_as(const at::Tensor & other) const; + at::Tensor where(const at::Tensor & condition, const at::Tensor & other) const; + at::Tensor where(const at::Tensor & condition, const at::Scalar & other) const; + at::Tensor norm(const ::std::optional & p, at::ScalarType dtype) const; + at::Tensor norm(const at::Scalar & p=2) const; + at::Tensor norm(const ::std::optional & p, at::IntArrayRef dim, bool keepdim, at::ScalarType dtype) const; + at::Tensor norm(const ::std::optional & p, at::IntArrayRef dim, bool keepdim=false) const; + at::Tensor norm(const ::std::optional & p, at::DimnameList dim, bool keepdim, at::ScalarType dtype) const; + at::Tensor norm(const ::std::optional & p, at::DimnameList dim, bool keepdim=false) const; + ::std::tuple frexp() const; + at::Tensor clone(::std::optional memory_format=::std::nullopt) const; + at::Tensor positive() const; + const at::Tensor & resize_as_(const at::Tensor & the_template, ::std::optional memory_format=::std::nullopt) const; + const at::Tensor & resize_as_sparse_(const at::Tensor & the_template) const; + at::Tensor & zero_() const; + at::Tensor sub(const at::Tensor & other, const at::Scalar & alpha=1) const; + at::Tensor & sub_(const at::Tensor & other, const at::Scalar & alpha=1) const; + at::Tensor sub(const at::Scalar & other, const at::Scalar & alpha=1) const; + at::Tensor & sub_(const at::Scalar & other, const at::Scalar & alpha=1) const; + at::Tensor subtract(const at::Tensor & other, const at::Scalar & alpha=1) const; + at::Tensor & subtract_(const at::Tensor & other, const at::Scalar & alpha=1) const; + at::Tensor subtract(const at::Scalar & other, const at::Scalar & alpha=1) const; + at::Tensor & subtract_(const at::Scalar & other, const at::Scalar & alpha=1) const; + at::Tensor heaviside(const at::Tensor & values) const; + at::Tensor & heaviside_(const at::Tensor & values) const; + at::Tensor addmm(const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta=1, const at::Scalar & alpha=1) const; + at::Tensor & addmm_(const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta=1, const at::Scalar & alpha=1) const; + at::Tensor _addmm_activation(const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta=1, const at::Scalar & alpha=1, bool use_gelu=false) const; + const at::Tensor & sparse_resize_(at::IntArrayRef size, int64_t sparse_dim, int64_t dense_dim) const; + const at::Tensor & sparse_resize_and_clear_(at::IntArrayRef size, int64_t sparse_dim, int64_t dense_dim) const; + at::Tensor sparse_mask(const at::Tensor & mask) const; + at::Tensor _sparse_mask_projection(const at::Tensor & mask, bool accumulate_matches=false) const; + at::Tensor to_dense(::std::optional dtype=::std::nullopt, ::std::optional masked_grad=::std::nullopt) const; + at::Tensor _to_dense(::std::optional dtype=::std::nullopt, ::std::optional masked_grad=::std::nullopt) const; + int64_t sparse_dim() const; + int64_t _dimI() const; + int64_t dense_dim() const; + int64_t _dimV() const; + int64_t _nnz() const; + at::Tensor coalesce() const; + bool is_coalesced() const; + at::Tensor _indices() const; + at::Tensor _values() const; + at::Tensor & _coalesced_(bool coalesced) const; + at::Tensor indices() const; + at::Tensor values() const; + at::Tensor crow_indices() const; + at::Tensor col_indices() const; + at::Tensor ccol_indices() const; + at::Tensor row_indices() const; + ::std::vector unbind(int64_t dim=0) const; + ::std::vector unbind(at::Dimname dim) const; + at::Tensor to_sparse(int64_t sparse_dim) const; + at::Tensor _to_sparse(int64_t sparse_dim) const; + at::Tensor to_sparse(::std::optional layout=::std::nullopt, at::OptionalIntArrayRef blocksize=::std::nullopt, ::std::optional dense_dim=::std::nullopt) const; + at::Tensor _to_sparse(::std::optional layout=::std::nullopt, at::OptionalIntArrayRef blocksize=::std::nullopt, ::std::optional dense_dim=::std::nullopt) const; + at::Tensor to_sparse_csr(::std::optional dense_dim=::std::nullopt) const; + at::Tensor _to_sparse_csr(::std::optional dense_dim=::std::nullopt) const; + at::Tensor to_sparse_csc(::std::optional dense_dim=::std::nullopt) const; + at::Tensor _to_sparse_csc(::std::optional dense_dim=::std::nullopt) const; + at::Tensor to_sparse_bsr(at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt) const; + at::Tensor _to_sparse_bsr(at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt) const; + at::Tensor to_sparse_bsc(at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt) const; + at::Tensor _to_sparse_bsc(at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt) const; + at::Tensor to_mkldnn(::std::optional dtype=::std::nullopt) const; + at::Tensor dequantize() const; + double q_scale() const; + int64_t q_zero_point() const; + at::Tensor q_per_channel_scales() const; + at::Tensor q_per_channel_zero_points() const; + int64_t q_per_channel_axis() const; + at::Tensor int_repr() const; + at::QScheme qscheme() const; + at::Tensor _autocast_to_reduced_precision(bool cuda_enabled, bool cpu_enabled, at::ScalarType cuda_dtype, at::ScalarType cpu_dtype) const; + at::Tensor _autocast_to_full_precision(bool cuda_enabled, bool cpu_enabled) const; + at::Tensor to(at::TensorOptions options={}, bool non_blocking=false, bool copy=false, ::std::optional memory_format=::std::nullopt) const; + at::Tensor to(::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, bool non_blocking, bool copy, ::std::optional memory_format) const; + at::Tensor to(at::Device device, at::ScalarType dtype, bool non_blocking=false, bool copy=false, ::std::optional memory_format=::std::nullopt) const; + at::Tensor to(at::ScalarType dtype, bool non_blocking=false, bool copy=false, ::std::optional memory_format=::std::nullopt) const; + at::Tensor to(const at::Tensor & other, bool non_blocking=false, bool copy=false, ::std::optional memory_format=::std::nullopt) const; + at::Scalar item() const; + at::Tensor & set_(at::Storage source) const; + at::Tensor & set_(at::Storage source, int64_t storage_offset, at::IntArrayRef size, at::IntArrayRef stride={}) const; + at::Tensor & set__symint(at::Storage source, c10::SymInt storage_offset, c10::SymIntArrayRef size, c10::SymIntArrayRef stride={}) const; + at::Tensor & set_(const at::Tensor & source, int64_t storage_offset, at::IntArrayRef size, at::IntArrayRef stride={}) const; + at::Tensor & set__symint(const at::Tensor & source, c10::SymInt storage_offset, c10::SymIntArrayRef size, c10::SymIntArrayRef stride={}) const; + at::Tensor & set_(const at::Tensor & source) const; + at::Tensor & set_() const; + bool is_set_to(const at::Tensor & tensor) const; + at::Tensor & masked_fill_(const at::Tensor & mask, const at::Scalar & value) const; + at::Tensor masked_fill(const at::Tensor & mask, const at::Scalar & value) const; + at::Tensor & masked_fill_(const at::Tensor & mask, const at::Tensor & value) const; + at::Tensor masked_fill(const at::Tensor & mask, const at::Tensor & value) const; + at::Tensor & masked_scatter_(const at::Tensor & mask, const at::Tensor & source) const; + at::Tensor masked_scatter(const at::Tensor & mask, const at::Tensor & source) const; + at::Tensor view(at::IntArrayRef size) const; + at::Tensor view_symint(c10::SymIntArrayRef size) const; + at::Tensor view(at::ScalarType dtype) const; + at::Tensor & put_(const at::Tensor & index, const at::Tensor & source, bool accumulate=false) const; + at::Tensor put(const at::Tensor & index, const at::Tensor & source, bool accumulate=false) const; + at::Tensor & index_add_(int64_t dim, const at::Tensor & index, const at::Tensor & source, const at::Scalar & alpha=1) const; + at::Tensor index_add(int64_t dim, const at::Tensor & index, const at::Tensor & source, const at::Scalar & alpha=1) const; + at::Tensor index_add(at::Dimname dim, const at::Tensor & index, const at::Tensor & source, const at::Scalar & alpha=1) const; + at::Tensor & index_reduce_(int64_t dim, const at::Tensor & index, const at::Tensor & source, c10::string_view reduce, bool include_self=true) const; + at::Tensor index_reduce(int64_t dim, const at::Tensor & index, const at::Tensor & source, c10::string_view reduce, bool include_self=true) const; + at::Tensor & index_fill_(int64_t dim, const at::Tensor & index, const at::Scalar & value) const; + at::Tensor index_fill(int64_t dim, const at::Tensor & index, const at::Scalar & value) const; + at::Tensor & index_fill_(int64_t dim, const at::Tensor & index, const at::Tensor & value) const; + at::Tensor index_fill(int64_t dim, const at::Tensor & index, const at::Tensor & value) const; + at::Tensor & index_fill_(at::Dimname dim, const at::Tensor & index, const at::Scalar & value) const; + at::Tensor & index_fill_(at::Dimname dim, const at::Tensor & index, const at::Tensor & value) const; + at::Tensor index_fill(at::Dimname dim, const at::Tensor & index, const at::Scalar & value) const; + at::Tensor index_fill(at::Dimname dim, const at::Tensor & index, const at::Tensor & value) const; + at::Tensor scatter(int64_t dim, const at::Tensor & index, const at::Tensor & src) const; + at::Tensor & scatter_(int64_t dim, const at::Tensor & index, const at::Tensor & src) const; + at::Tensor scatter(int64_t dim, const at::Tensor & index, const at::Scalar & value) const; + at::Tensor & scatter_(int64_t dim, const at::Tensor & index, const at::Scalar & value) const; + at::Tensor scatter(int64_t dim, const at::Tensor & index, const at::Tensor & src, c10::string_view reduce) const; + at::Tensor & scatter_(int64_t dim, const at::Tensor & index, const at::Tensor & src, c10::string_view reduce) const; + at::Tensor scatter(int64_t dim, const at::Tensor & index, const at::Scalar & value, c10::string_view reduce) const; + at::Tensor & scatter_(int64_t dim, const at::Tensor & index, const at::Scalar & value, c10::string_view reduce) const; + at::Tensor scatter(at::Dimname dim, const at::Tensor & index, const at::Tensor & src) const; + at::Tensor scatter(at::Dimname dim, const at::Tensor & index, const at::Scalar & value) const; + at::Tensor scatter_add(int64_t dim, const at::Tensor & index, const at::Tensor & src) const; + at::Tensor & scatter_add_(int64_t dim, const at::Tensor & index, const at::Tensor & src) const; + at::Tensor scatter_add(at::Dimname dim, const at::Tensor & index, const at::Tensor & src) const; + at::Tensor scatter_reduce(int64_t dim, const at::Tensor & index, const at::Tensor & src, c10::string_view reduce, bool include_self=true) const; + at::Tensor & scatter_reduce_(int64_t dim, const at::Tensor & index, const at::Tensor & src, c10::string_view reduce, bool include_self=true) const; + at::Tensor & eq_(const at::Scalar & other) const; + at::Tensor & eq_(const at::Tensor & other) const; + at::Tensor bitwise_and(const at::Scalar & other) const; + at::Tensor bitwise_and(const at::Tensor & other) const; + at::Tensor & bitwise_and_(const at::Scalar & other) const; + at::Tensor & bitwise_and_(const at::Tensor & other) const; + at::Tensor __and__(const at::Scalar & other) const; + at::Tensor __and__(const at::Tensor & other) const; + at::Tensor & __iand__(const at::Scalar & other) const; + at::Tensor & __iand__(const at::Tensor & other) const; + at::Tensor bitwise_or(const at::Scalar & other) const; + at::Tensor bitwise_or(const at::Tensor & other) const; + at::Tensor & bitwise_or_(const at::Scalar & other) const; + at::Tensor & bitwise_or_(const at::Tensor & other) const; + at::Tensor __or__(const at::Scalar & other) const; + at::Tensor __or__(const at::Tensor & other) const; + at::Tensor & __ior__(const at::Scalar & other) const; + at::Tensor & __ior__(const at::Tensor & other) const; + at::Tensor bitwise_xor(const at::Scalar & other) const; + at::Tensor bitwise_xor(const at::Tensor & other) const; + at::Tensor & bitwise_xor_(const at::Scalar & other) const; + at::Tensor & bitwise_xor_(const at::Tensor & other) const; + at::Tensor __xor__(const at::Scalar & other) const; + at::Tensor __xor__(const at::Tensor & other) const; + at::Tensor & __ixor__(const at::Scalar & other) const; + at::Tensor & __ixor__(const at::Tensor & other) const; + at::Tensor __lshift__(const at::Scalar & other) const; + at::Tensor __lshift__(const at::Tensor & other) const; + at::Tensor & __ilshift__(const at::Scalar & other) const; + at::Tensor & __ilshift__(const at::Tensor & other) const; + at::Tensor bitwise_left_shift(const at::Tensor & other) const; + at::Tensor & bitwise_left_shift_(const at::Tensor & other) const; + at::Tensor bitwise_left_shift(const at::Scalar & other) const; + at::Tensor & bitwise_left_shift_(const at::Scalar & other) const; + at::Tensor __rshift__(const at::Scalar & other) const; + at::Tensor __rshift__(const at::Tensor & other) const; + at::Tensor & __irshift__(const at::Scalar & other) const; + at::Tensor & __irshift__(const at::Tensor & other) const; + at::Tensor bitwise_right_shift(const at::Tensor & other) const; + at::Tensor & bitwise_right_shift_(const at::Tensor & other) const; + at::Tensor bitwise_right_shift(const at::Scalar & other) const; + at::Tensor & bitwise_right_shift_(const at::Scalar & other) const; + at::Tensor & tril_(int64_t diagonal=0) const; + at::Tensor & tril__symint(c10::SymInt diagonal=0) const; + at::Tensor & triu_(int64_t diagonal=0) const; + at::Tensor & triu__symint(c10::SymInt diagonal=0) const; + at::Tensor & digamma_() const; + at::Tensor & lerp_(const at::Tensor & end, const at::Scalar & weight) const; + at::Tensor & lerp_(const at::Tensor & end, const at::Tensor & weight) const; + at::Tensor & addbmm_(const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta=1, const at::Scalar & alpha=1) const; + at::Tensor addbmm(const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta=1, const at::Scalar & alpha=1) const; + at::Tensor & random_(int64_t from, ::std::optional to, ::std::optional generator=::std::nullopt) const; + at::Tensor & random_(int64_t to, ::std::optional generator=::std::nullopt) const; + at::Tensor & random_(::std::optional generator=::std::nullopt) const; + at::Tensor & uniform_(double from=0, double to=1, ::std::optional generator=::std::nullopt) const; + at::Tensor & cauchy_(double median=0, double sigma=1, ::std::optional generator=::std::nullopt) const; + at::Tensor & log_normal_(double mean=1, double std=2, ::std::optional generator=::std::nullopt) const; + at::Tensor & exponential_(double lambd=1, ::std::optional generator=::std::nullopt) const; + at::Tensor & geometric_(double p, ::std::optional generator=::std::nullopt) const; + at::Tensor diag(int64_t diagonal=0) const; + at::Tensor cross(const at::Tensor & other, ::std::optional dim=::std::nullopt) const; + at::Tensor triu(int64_t diagonal=0) const; + at::Tensor triu_symint(c10::SymInt diagonal=0) const; + at::Tensor tril(int64_t diagonal=0) const; + at::Tensor tril_symint(c10::SymInt diagonal=0) const; + at::Tensor trace() const; + at::Tensor ne(const at::Scalar & other) const; + at::Tensor ne(const at::Tensor & other) const; + at::Tensor & ne_(const at::Scalar & other) const; + at::Tensor & ne_(const at::Tensor & other) const; + at::Tensor not_equal(const at::Scalar & other) const; + at::Tensor not_equal(const at::Tensor & other) const; + at::Tensor & not_equal_(const at::Scalar & other) const; + at::Tensor & not_equal_(const at::Tensor & other) const; + at::Tensor eq(const at::Scalar & other) const; + at::Tensor eq(const at::Tensor & other) const; + at::Tensor ge(const at::Scalar & other) const; + at::Tensor ge(const at::Tensor & other) const; + at::Tensor & ge_(const at::Scalar & other) const; + at::Tensor & ge_(const at::Tensor & other) const; + at::Tensor greater_equal(const at::Scalar & other) const; + at::Tensor greater_equal(const at::Tensor & other) const; + at::Tensor & greater_equal_(const at::Scalar & other) const; + at::Tensor & greater_equal_(const at::Tensor & other) const; + at::Tensor le(const at::Scalar & other) const; + at::Tensor le(const at::Tensor & other) const; + at::Tensor & le_(const at::Scalar & other) const; + at::Tensor & le_(const at::Tensor & other) const; + at::Tensor less_equal(const at::Scalar & other) const; + at::Tensor less_equal(const at::Tensor & other) const; + at::Tensor & less_equal_(const at::Scalar & other) const; + at::Tensor & less_equal_(const at::Tensor & other) const; + at::Tensor gt(const at::Scalar & other) const; + at::Tensor gt(const at::Tensor & other) const; + at::Tensor & gt_(const at::Scalar & other) const; + at::Tensor & gt_(const at::Tensor & other) const; + at::Tensor greater(const at::Scalar & other) const; + at::Tensor greater(const at::Tensor & other) const; + at::Tensor & greater_(const at::Scalar & other) const; + at::Tensor & greater_(const at::Tensor & other) const; + at::Tensor lt(const at::Scalar & other) const; + at::Tensor lt(const at::Tensor & other) const; + at::Tensor & lt_(const at::Scalar & other) const; + at::Tensor & lt_(const at::Tensor & other) const; + at::Tensor less(const at::Scalar & other) const; + at::Tensor less(const at::Tensor & other) const; + at::Tensor & less_(const at::Scalar & other) const; + at::Tensor & less_(const at::Tensor & other) const; + at::Tensor take(const at::Tensor & index) const; + at::Tensor take_along_dim(const at::Tensor & indices, ::std::optional dim=::std::nullopt) const; + at::Tensor index_select(int64_t dim, const at::Tensor & index) const; + at::Tensor index_select(at::Dimname dim, const at::Tensor & index) const; + at::Tensor masked_select(const at::Tensor & mask) const; + at::Tensor nonzero() const; + at::Tensor nonzero_static(int64_t size, int64_t fill_value=-1) const; + at::Tensor nonzero_static_symint(c10::SymInt size, int64_t fill_value=-1) const; + ::std::vector nonzero_numpy() const; + at::Tensor argwhere() const; + at::Tensor gather(int64_t dim, const at::Tensor & index, bool sparse_grad=false) const; + at::Tensor gather(at::Dimname dim, const at::Tensor & index, bool sparse_grad=false) const; + at::Tensor addcmul(const at::Tensor & tensor1, const at::Tensor & tensor2, const at::Scalar & value=1) const; + at::Tensor & addcmul_(const at::Tensor & tensor1, const at::Tensor & tensor2, const at::Scalar & value=1) const; + at::Tensor addcdiv(const at::Tensor & tensor1, const at::Tensor & tensor2, const at::Scalar & value=1) const; + at::Tensor & addcdiv_(const at::Tensor & tensor1, const at::Tensor & tensor2, const at::Scalar & value=1) const; + ::std::tuple triangular_solve(const at::Tensor & A, bool upper=true, bool transpose=false, bool unitriangular=false) const; + ::std::tuple svd(bool some=true, bool compute_uv=true) const; + at::Tensor swapaxes(int64_t axis0, int64_t axis1) const; + at::Tensor & swapaxes_(int64_t axis0, int64_t axis1) const; + at::Tensor swapdims(int64_t dim0, int64_t dim1) const; + at::Tensor & swapdims_(int64_t dim0, int64_t dim1) const; + at::Tensor cholesky(bool upper=false) const; + at::Tensor cholesky_solve(const at::Tensor & input2, bool upper=false) const; + at::Tensor cholesky_inverse(bool upper=false) const; + ::std::tuple qr(bool some=true) const; + ::std::tuple geqrf() const; + at::Tensor orgqr(const at::Tensor & input2) const; + at::Tensor ormqr(const at::Tensor & input2, const at::Tensor & input3, bool left=true, bool transpose=false) const; + at::Tensor lu_solve(const at::Tensor & LU_data, const at::Tensor & LU_pivots) const; + at::Tensor multinomial(int64_t num_samples, bool replacement=false, ::std::optional generator=::std::nullopt) const; + at::Tensor multinomial_symint(c10::SymInt num_samples, bool replacement=false, ::std::optional generator=::std::nullopt) const; + at::Tensor & lgamma_() const; + at::Tensor lgamma() const; + at::Tensor digamma() const; + at::Tensor polygamma(int64_t n) const; + at::Tensor & polygamma_(int64_t n) const; + at::Tensor erfinv() const; + at::Tensor & erfinv_() const; + at::Tensor i0() const; + at::Tensor & i0_() const; + at::Tensor sign() const; + at::Tensor & sign_() const; + at::Tensor signbit() const; + at::Tensor dist(const at::Tensor & other, const at::Scalar & p=2) const; + at::Tensor & atan2_(const at::Tensor & other) const; + at::Tensor atan2(const at::Tensor & other) const; + at::Tensor arctan2(const at::Tensor & other) const; + at::Tensor & arctan2_(const at::Tensor & other) const; + at::Tensor lerp(const at::Tensor & end, const at::Scalar & weight) const; + at::Tensor lerp(const at::Tensor & end, const at::Tensor & weight) const; + at::Tensor histc(int64_t bins=100, const at::Scalar & min=0, const at::Scalar & max=0) const; + ::std::tuple histogram(const at::Tensor & bins, const ::std::optional & weight={}, bool density=false) const; + ::std::tuple histogram(int64_t bins=100, ::std::optional> range=::std::nullopt, const ::std::optional & weight={}, bool density=false) const; + at::Tensor fmod(const at::Scalar & other) const; + at::Tensor & fmod_(const at::Scalar & other) const; + at::Tensor fmod(const at::Tensor & other) const; + at::Tensor & fmod_(const at::Tensor & other) const; + at::Tensor hypot(const at::Tensor & other) const; + at::Tensor & hypot_(const at::Tensor & other) const; + at::Tensor igamma(const at::Tensor & other) const; + at::Tensor & igamma_(const at::Tensor & other) const; + at::Tensor igammac(const at::Tensor & other) const; + at::Tensor & igammac_(const at::Tensor & other) const; + at::Tensor nextafter(const at::Tensor & other) const; + at::Tensor & nextafter_(const at::Tensor & other) const; + at::Tensor remainder(const at::Scalar & other) const; + at::Tensor & remainder_(const at::Scalar & other) const; + at::Tensor remainder(const at::Tensor & other) const; + at::Tensor & remainder_(const at::Tensor & other) const; + at::Tensor min() const; + at::Tensor fmin(const at::Tensor & other) const; + at::Tensor max() const; + at::Tensor fmax(const at::Tensor & other) const; + at::Tensor maximum(const at::Tensor & other) const; + at::Tensor max(const at::Tensor & other) const; + at::Tensor minimum(const at::Tensor & other) const; + at::Tensor min(const at::Tensor & other) const; + at::Tensor quantile(const at::Tensor & q, ::std::optional dim=::std::nullopt, bool keepdim=false, c10::string_view interpolation="linear") const; + at::Tensor quantile(double q, ::std::optional dim=::std::nullopt, bool keepdim=false, c10::string_view interpolation="linear") const; + at::Tensor nanquantile(const at::Tensor & q, ::std::optional dim=::std::nullopt, bool keepdim=false, c10::string_view interpolation="linear") const; + at::Tensor nanquantile(double q, ::std::optional dim=::std::nullopt, bool keepdim=false, c10::string_view interpolation="linear") const; + ::std::tuple sort(int64_t dim=-1, bool descending=false) const; + ::std::tuple sort(::std::optional stable, int64_t dim=-1, bool descending=false) const; + ::std::tuple sort(at::Dimname dim, bool descending=false) const; + ::std::tuple sort(::std::optional stable, at::Dimname dim, bool descending=false) const; + at::Tensor msort() const; + at::Tensor argsort(int64_t dim=-1, bool descending=false) const; + at::Tensor argsort(bool stable, int64_t dim=-1, bool descending=false) const; + at::Tensor argsort(at::Dimname dim, bool descending=false) const; + ::std::tuple topk(int64_t k, int64_t dim=-1, bool largest=true, bool sorted=true) const; + ::std::tuple topk_symint(c10::SymInt k, int64_t dim=-1, bool largest=true, bool sorted=true) const; + at::Tensor all() const; + at::Tensor any() const; + at::Tensor renorm(const at::Scalar & p, int64_t dim, const at::Scalar & maxnorm) const; + at::Tensor & renorm_(const at::Scalar & p, int64_t dim, const at::Scalar & maxnorm) const; + at::Tensor unfold(int64_t dimension, int64_t size, int64_t step) const; + bool equal(const at::Tensor & other) const; + at::Tensor pow(const at::Tensor & exponent) const; + at::Tensor pow(const at::Scalar & exponent) const; + at::Tensor & pow_(const at::Scalar & exponent) const; + at::Tensor & pow_(const at::Tensor & exponent) const; + at::Tensor float_power(const at::Tensor & exponent) const; + at::Tensor float_power(const at::Scalar & exponent) const; + at::Tensor & float_power_(const at::Scalar & exponent) const; + at::Tensor & float_power_(const at::Tensor & exponent) const; + at::Tensor & normal_(double mean=0, double std=1, ::std::optional generator=::std::nullopt) const; + at::Tensor alias() const; + at::Tensor isfinite() const; + at::Tensor isinf() const; + void record_stream(at::Stream s) const; + at::Tensor isposinf() const; + at::Tensor isneginf() const; + at::Tensor det() const; + ::std::tuple slogdet() const; + at::Tensor logdet() const; + at::Tensor inverse() const; + at::Tensor inner(const at::Tensor & other) const; + at::Tensor outer(const at::Tensor & vec2) const; + at::Tensor ger(const at::Tensor & vec2) const; + at::Tensor to_padded_tensor(double padding, at::OptionalIntArrayRef output_size=::std::nullopt) const; + at::Tensor to_padded_tensor_symint(double padding, at::OptionalSymIntArrayRef output_size=::std::nullopt) const; + + // Special C++ only overloads for std()-like functions (See gh-40287) + // These are needed because int -> bool conversion takes precedence over int -> IntArrayRef + // So, for example std(0) would select the std(unbiased=False) overload + + Tensor var(int dim) const { + return var(IntArrayRef{dim}); + } + + Tensor std(int dim) const { + return std(IntArrayRef{dim}); + } + + // We changed .dtype() to return a TypeMeta in #12766. Ideally, we want the + // at::kDouble and its friends to be TypeMeta's, but that hasn't happened yet. + // Before that change, we make this method to maintain BC for C++ usage like + // `x.to(y.dtype)`. + // TODO: remove following two after at::kDouble and its friends are TypeMeta's. + inline Tensor to(caffe2::TypeMeta type_meta, bool non_blocking=false, bool copy=false) const { + return this->to(/*scalar_type=*/typeMetaToScalarType(type_meta), non_blocking, copy); + } + inline Tensor to(Device device, caffe2::TypeMeta type_meta, bool non_blocking=false, bool copy=false) const { + return this->to(device, /*scalar_type=*/typeMetaToScalarType(type_meta), non_blocking, copy); + } + + template + decltype(auto) m(F func, Args&&... params) const { + return func(*this, std::forward(params)...); + } + + /// NOTE: This is similar to the legacy `.data()` function on `Variable`, and is intended + /// to be used from functions that need to access the `Variable`'s equivalent `Tensor` + /// (i.e. `Tensor` that shares the same storage and tensor metadata with the `Variable`). + /// + /// One notable difference with the legacy `.data()` function is that changes to the + /// returned `Tensor`'s tensor metadata (e.g. sizes / strides / storage / storage_offset) + /// will not update the original `Variable`, due to the fact that this function + /// shallow-copies the `Variable`'s underlying TensorImpl. + at::Tensor tensor_data() const { + return TensorBase::tensor_data(); + } + + /// NOTE: `var.variable_data()` in C++ has the same semantics as `tensor.data` + /// in Python, which create a new `Variable` that shares the same storage and + /// tensor metadata with the original `Variable`, but with a completely new + /// autograd history. + /// + /// NOTE: If we change the tensor metadata (e.g. sizes / strides / + /// storage / storage_offset) of a variable created from `var.variable_data()`, those + /// changes will not update the original variable `var`. In `.variable_data()`, we set + /// `allow_tensor_metadata_change_` to false to make such changes explicitly illegal, + /// in order to prevent users from changing metadata of `var.variable_data()` + /// and expecting the original variable `var` to also be updated. + at::Tensor variable_data() const { + return TensorBase::variable_data(); + } + + // Hooks + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + template + using hook_return_void_t = std::enable_if_t>::value, unsigned>; + template + using hook_return_var_t = std::enable_if_t, Tensor>, unsigned>; + + /// Registers a backward hook. + /// + /// The hook will be called every time a gradient with respect to the Tensor is computed. + /// The hook should have one of the following signature: + /// ``` + /// hook(Tensor grad) -> Tensor + /// ``` + /// ``` + /// hook(Tensor grad) -> void + /// ``` + /// The hook should not modify its argument, but it can optionally return a new gradient + /// which will be used in place of `grad`. + /// + /// This function returns the index of the hook in the list which can be used to remove hook. + /// + /// Example: + /// @code + /// auto v = torch::tensor({0., 0., 0.}, torch::requires_grad()); + /// auto h = v.register_hook([](torch::Tensor grad){ return grad * 2; }); // double the gradient + /// v.backward(torch::tensor({1., 2., 3.})); + /// // This prints: + /// // ``` + /// // 2 + /// // 4 + /// // 6 + /// // [ CPUFloatType{3} ] + /// // ``` + /// std::cout << v.grad() << std::endl; + /// v.remove_hook(h); // removes the hook + /// @endcode + template + hook_return_void_t register_hook(T&& hook) const; + template + hook_return_var_t register_hook(T&& hook) const; + + // Variable methods + //~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + Tensor data() const { + return TensorBase::data(); + } + + void _backward(TensorList inputs, const std::optional& gradient, std::optional keep_graph, bool create_graph) const; + + const Tensor& requires_grad_(bool _requires_grad=true) const { + TensorBase::requires_grad_(_requires_grad); + return *this; + } +}; + +namespace detail { +// Helper creator for Tensor class which doesn't requires the users to pass +// in an intrusive_ptr instead it just converts the argument passed to +// requested intrusive_ptr type. +template +Tensor make_tensor(Args&&... args) { + return Tensor(c10::make_intrusive(std::forward(args)...)); +} + +} // namespace detail + +} // namespace at + + +namespace at { + +// aten::_backward(Tensor self, Tensor[] inputs, Tensor? gradient=None, bool? retain_graph=None, bool create_graph=False) -> () +inline void Tensor::__dispatch__backward(at::TensorList inputs, const ::std::optional & gradient, ::std::optional retain_graph, bool create_graph) const { + return at::_ops::_backward::call(const_cast(*this), inputs, gradient, retain_graph, create_graph); +} + +// aten::set_data(Tensor(a!) self, Tensor new_data) -> () +inline void Tensor::__dispatch_set_data(const at::Tensor & new_data) const { + return at::_ops::set_data::call(const_cast(*this), new_data); +} + +// aten::data(Tensor self) -> Tensor +inline at::Tensor Tensor::__dispatch_data() const { + return at::_ops::data::call(const_cast(*this)); +} + +// aten::is_leaf(Tensor self) -> bool +inline bool Tensor::__dispatch_is_leaf() const { + return at::_ops::is_leaf::call(const_cast(*this)); +} + +// aten::output_nr(Tensor self) -> int +inline int64_t Tensor::__dispatch_output_nr() const { + return at::_ops::output_nr::call(const_cast(*this)); +} + +// aten::_version(Tensor self) -> int +inline int64_t Tensor::__dispatch__version() const { + return at::_ops::_version::call(const_cast(*this)); +} + +// aten::requires_grad_(Tensor(a!) self, bool requires_grad=True) -> Tensor(a!) +inline at::Tensor & Tensor::__dispatch_requires_grad_(bool requires_grad) const { + return at::_ops::requires_grad_::call(const_cast(*this), requires_grad); +} + +// aten::retain_grad(Tensor(a!) self) -> () +inline void Tensor::__dispatch_retain_grad() const { + return at::_ops::retain_grad::call(const_cast(*this)); +} + +// aten::retains_grad(Tensor self) -> bool +inline bool Tensor::__dispatch_retains_grad() const { + return at::_ops::retains_grad::call(const_cast(*this)); +} + +// aten::_fw_primal(Tensor(a) self, int level) -> Tensor(a) +inline at::Tensor Tensor::_fw_primal(int64_t level) const { + return at::_ops::_fw_primal::call(const_cast(*this), level); +} + +// aten::rename_(Tensor(a!) self, Dimname[]? names) -> Tensor(a!) +inline at::Tensor & Tensor::rename_(::std::optional names) const { + return at::_ops::rename_::call(const_cast(*this), names); +} + +// aten::rename(Tensor(a) self, Dimname[]? names) -> Tensor(a) +inline at::Tensor Tensor::rename(::std::optional names) const { + return at::_ops::rename::call(const_cast(*this), names); +} + +// aten::align_to(Tensor(a) self, Dimname[] names) -> Tensor(a) +inline at::Tensor Tensor::align_to(at::DimnameList names) const { + return at::_ops::align_to::call(const_cast(*this), names); +} + +// aten::align_to.ellipsis_idx(Tensor(a) self, Dimname[] order, int ellipsis_idx) -> Tensor(a) +inline at::Tensor Tensor::align_to(at::DimnameList order, int64_t ellipsis_idx) const { + return at::_ops::align_to_ellipsis_idx::call(const_cast(*this), order, ellipsis_idx); +} + +// aten::align_as(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::align_as(const at::Tensor & other) const { + return at::_ops::align_as::call(const_cast(*this), other); +} + +// aten::refine_names(Tensor(a) self, Dimname[] names) -> Tensor(a) +inline at::Tensor Tensor::refine_names(at::DimnameList names) const { + return at::_ops::refine_names::call(const_cast(*this), names); +} + +// aten::abs(Tensor self) -> Tensor +inline at::Tensor Tensor::abs() const { + return at::_ops::abs::call(const_cast(*this)); +} + +// aten::abs_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::abs_() const { + return at::_ops::abs_::call(const_cast(*this)); +} + +// aten::absolute(Tensor self) -> Tensor +inline at::Tensor Tensor::absolute() const { + return at::_ops::absolute::call(const_cast(*this)); +} + +// aten::absolute_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::absolute_() const { + return at::_ops::absolute_::call(const_cast(*this)); +} + +// aten::angle(Tensor self) -> Tensor +inline at::Tensor Tensor::angle() const { + return at::_ops::angle::call(const_cast(*this)); +} + +// aten::sgn(Tensor self) -> Tensor +inline at::Tensor Tensor::sgn() const { + return at::_ops::sgn::call(const_cast(*this)); +} + +// aten::sgn_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::sgn_() const { + return at::_ops::sgn_::call(const_cast(*this)); +} + +// aten::chalf(Tensor self, *, MemoryFormat? memory_format=None) -> Tensor +inline at::Tensor Tensor::chalf(::std::optional memory_format) const { + return at::_ops::chalf::call(const_cast(*this), memory_format); +} + +// aten::_conj(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::_conj() const { + return at::_ops::_conj::call(const_cast(*this)); +} + +// aten::conj(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::__dispatch_conj() const { + return at::_ops::conj::call(const_cast(*this)); +} + +// aten::_conj_physical(Tensor self) -> Tensor +inline at::Tensor Tensor::_conj_physical() const { + return at::_ops::_conj_physical::call(const_cast(*this)); +} + +// aten::conj_physical(Tensor self) -> Tensor +inline at::Tensor Tensor::conj_physical() const { + return at::_ops::conj_physical::call(const_cast(*this)); +} + +// aten::conj_physical_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::conj_physical_() const { + return at::_ops::conj_physical_::call(const_cast(*this)); +} + +// aten::resolve_conj(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::resolve_conj() const { + return at::_ops::resolve_conj::call(const_cast(*this)); +} + +// aten::resolve_neg(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::resolve_neg() const { + return at::_ops::resolve_neg::call(const_cast(*this)); +} + +// aten::_neg_view(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::_neg_view() const { + return at::_ops::_neg_view::call(const_cast(*this)); +} + +// aten::acos(Tensor self) -> Tensor +inline at::Tensor Tensor::acos() const { + return at::_ops::acos::call(const_cast(*this)); +} + +// aten::acos_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::acos_() const { + return at::_ops::acos_::call(const_cast(*this)); +} + +// aten::arccos(Tensor self) -> Tensor +inline at::Tensor Tensor::arccos() const { + return at::_ops::arccos::call(const_cast(*this)); +} + +// aten::arccos_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::arccos_() const { + return at::_ops::arccos_::call(const_cast(*this)); +} + +// aten::add.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor +inline at::Tensor Tensor::add(const at::Tensor & other, const at::Scalar & alpha) const { + return at::_ops::add_Tensor::call(const_cast(*this), other, alpha); +} + +// aten::add_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!) +inline at::Tensor & Tensor::add_(const at::Tensor & other, const at::Scalar & alpha) const { + return at::_ops::add__Tensor::call(const_cast(*this), other, alpha); +} + +// aten::add.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor +inline at::Tensor Tensor::add(const at::Scalar & other, const at::Scalar & alpha) const { + return at::_ops::add_Scalar::call(const_cast(*this), other, alpha); +} + +// aten::add_.Scalar(Tensor(a!) self, Scalar other, Scalar alpha=1) -> Tensor(a!) +inline at::Tensor & Tensor::add_(const at::Scalar & other, const at::Scalar & alpha) const { + return at::_ops::add__Scalar::call(const_cast(*this), other, alpha); +} + +// aten::addmv(Tensor self, Tensor mat, Tensor vec, *, Scalar beta=1, Scalar alpha=1) -> Tensor +inline at::Tensor Tensor::addmv(const at::Tensor & mat, const at::Tensor & vec, const at::Scalar & beta, const at::Scalar & alpha) const { + return at::_ops::addmv::call(const_cast(*this), mat, vec, beta, alpha); +} + +// aten::addmv_(Tensor(a!) self, Tensor mat, Tensor vec, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!) +inline at::Tensor & Tensor::addmv_(const at::Tensor & mat, const at::Tensor & vec, const at::Scalar & beta, const at::Scalar & alpha) const { + return at::_ops::addmv_::call(const_cast(*this), mat, vec, beta, alpha); +} + +// aten::addr(Tensor self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1) -> Tensor +inline at::Tensor Tensor::addr(const at::Tensor & vec1, const at::Tensor & vec2, const at::Scalar & beta, const at::Scalar & alpha) const { + return at::_ops::addr::call(const_cast(*this), vec1, vec2, beta, alpha); +} + +// aten::addr_(Tensor(a!) self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!) +inline at::Tensor & Tensor::addr_(const at::Tensor & vec1, const at::Tensor & vec2, const at::Scalar & beta, const at::Scalar & alpha) const { + return at::_ops::addr_::call(const_cast(*this), vec1, vec2, beta, alpha); +} + +// aten::_is_all_true(Tensor self) -> Tensor +inline at::Tensor Tensor::_is_all_true() const { + return at::_ops::_is_all_true::call(const_cast(*this)); +} + +// aten::_is_any_true(Tensor self) -> Tensor +inline at::Tensor Tensor::_is_any_true() const { + return at::_ops::_is_any_true::call(const_cast(*this)); +} + +// aten::all.dim(Tensor self, int dim, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::all(int64_t dim, bool keepdim) const { + return at::_ops::all_dim::call(const_cast(*this), dim, keepdim); +} + +// aten::all.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::all(at::OptionalIntArrayRef dim, bool keepdim) const { + return at::_ops::all_dims::call(const_cast(*this), dim, keepdim); +} + +// aten::all.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::all(at::Dimname dim, bool keepdim) const { + return at::_ops::all_dimname::call(const_cast(*this), dim, keepdim); +} + +// aten::allclose(Tensor self, Tensor other, float rtol=1e-05, float atol=1e-08, bool equal_nan=False) -> bool +inline bool Tensor::allclose(const at::Tensor & other, double rtol, double atol, bool equal_nan) const { + return at::_ops::allclose::call(const_cast(*this), other, rtol, atol, equal_nan); +} + +// aten::any.dim(Tensor self, int dim, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::any(int64_t dim, bool keepdim) const { + return at::_ops::any_dim::call(const_cast(*this), dim, keepdim); +} + +// aten::any.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::any(at::OptionalIntArrayRef dim, bool keepdim) const { + return at::_ops::any_dims::call(const_cast(*this), dim, keepdim); +} + +// aten::any.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::any(at::Dimname dim, bool keepdim) const { + return at::_ops::any_dimname::call(const_cast(*this), dim, keepdim); +} + +// aten::argmax(Tensor self, int? dim=None, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::argmax(::std::optional dim, bool keepdim) const { + return at::_ops::argmax::call(const_cast(*this), dim, keepdim); +} + +// aten::argmin(Tensor self, int? dim=None, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::argmin(::std::optional dim, bool keepdim) const { + return at::_ops::argmin::call(const_cast(*this), dim, keepdim); +} + +// aten::acosh(Tensor self) -> Tensor +inline at::Tensor Tensor::acosh() const { + return at::_ops::acosh::call(const_cast(*this)); +} + +// aten::acosh_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::acosh_() const { + return at::_ops::acosh_::call(const_cast(*this)); +} + +// aten::arccosh(Tensor self) -> Tensor +inline at::Tensor Tensor::arccosh() const { + return at::_ops::arccosh::call(const_cast(*this)); +} + +// aten::arccosh_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::arccosh_() const { + return at::_ops::arccosh_::call(const_cast(*this)); +} + +// aten::asinh(Tensor self) -> Tensor +inline at::Tensor Tensor::asinh() const { + return at::_ops::asinh::call(const_cast(*this)); +} + +// aten::asinh_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::asinh_() const { + return at::_ops::asinh_::call(const_cast(*this)); +} + +// aten::arcsinh(Tensor self) -> Tensor +inline at::Tensor Tensor::arcsinh() const { + return at::_ops::arcsinh::call(const_cast(*this)); +} + +// aten::arcsinh_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::arcsinh_() const { + return at::_ops::arcsinh_::call(const_cast(*this)); +} + +// aten::atanh(Tensor self) -> Tensor +inline at::Tensor Tensor::atanh() const { + return at::_ops::atanh::call(const_cast(*this)); +} + +// aten::atanh_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::atanh_() const { + return at::_ops::atanh_::call(const_cast(*this)); +} + +// aten::arctanh(Tensor self) -> Tensor +inline at::Tensor Tensor::arctanh() const { + return at::_ops::arctanh::call(const_cast(*this)); +} + +// aten::arctanh_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::arctanh_() const { + return at::_ops::arctanh_::call(const_cast(*this)); +} + +// aten::as_strided(Tensor(a) self, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor(a) +inline at::Tensor Tensor::as_strided(at::IntArrayRef size, at::IntArrayRef stride, ::std::optional storage_offset) const { + return at::_ops::as_strided::call(const_cast(*this), c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), storage_offset.has_value() ? ::std::make_optional(c10::SymInt(*storage_offset)) : ::std::nullopt); +} + +// aten::as_strided(Tensor(a) self, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor(a) +inline at::Tensor Tensor::as_strided_symint(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional storage_offset) const { + return at::_ops::as_strided::call(const_cast(*this), size, stride, storage_offset); +} + +// aten::as_strided_(Tensor(a!) self, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor(a!) +inline const at::Tensor & Tensor::as_strided_(at::IntArrayRef size, at::IntArrayRef stride, ::std::optional storage_offset) const { + return at::_ops::as_strided_::call(const_cast(*this), c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), storage_offset.has_value() ? ::std::make_optional(c10::SymInt(*storage_offset)) : ::std::nullopt); +} + +// aten::as_strided_(Tensor(a!) self, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor(a!) +inline const at::Tensor & Tensor::as_strided__symint(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional storage_offset) const { + return at::_ops::as_strided_::call(const_cast(*this), size, stride, storage_offset); +} + +// aten::asin(Tensor self) -> Tensor +inline at::Tensor Tensor::asin() const { + return at::_ops::asin::call(const_cast(*this)); +} + +// aten::asin_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::asin_() const { + return at::_ops::asin_::call(const_cast(*this)); +} + +// aten::arcsin(Tensor self) -> Tensor +inline at::Tensor Tensor::arcsin() const { + return at::_ops::arcsin::call(const_cast(*this)); +} + +// aten::arcsin_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::arcsin_() const { + return at::_ops::arcsin_::call(const_cast(*this)); +} + +// aten::atan(Tensor self) -> Tensor +inline at::Tensor Tensor::atan() const { + return at::_ops::atan::call(const_cast(*this)); +} + +// aten::atan_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::atan_() const { + return at::_ops::atan_::call(const_cast(*this)); +} + +// aten::arctan(Tensor self) -> Tensor +inline at::Tensor Tensor::arctan() const { + return at::_ops::arctan::call(const_cast(*this)); +} + +// aten::arctan_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::arctan_() const { + return at::_ops::arctan_::call(const_cast(*this)); +} + +// aten::baddbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor +inline at::Tensor Tensor::baddbmm(const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta, const at::Scalar & alpha) const { + return at::_ops::baddbmm::call(const_cast(*this), batch1, batch2, beta, alpha); +} + +// aten::baddbmm_(Tensor(a!) self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!) +inline at::Tensor & Tensor::baddbmm_(const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta, const at::Scalar & alpha) const { + return at::_ops::baddbmm_::call(const_cast(*this), batch1, batch2, beta, alpha); +} + +// aten::bernoulli(Tensor self, *, Generator? generator=None) -> Tensor +inline at::Tensor Tensor::bernoulli(::std::optional generator) const { + return at::_ops::bernoulli::call(const_cast(*this), generator); +} + +// aten::bernoulli_.Tensor(Tensor(a!) self, Tensor p, *, Generator? generator=None) -> Tensor(a!) +inline at::Tensor & Tensor::bernoulli_(const at::Tensor & p, ::std::optional generator) const { + return at::_ops::bernoulli__Tensor::call(const_cast(*this), p, generator); +} + +// aten::bernoulli_.float(Tensor(a!) self, float p=0.5, *, Generator? generator=None) -> Tensor(a!) +inline at::Tensor & Tensor::bernoulli_(double p, ::std::optional generator) const { + return at::_ops::bernoulli__float::call(const_cast(*this), p, generator); +} + +// aten::bernoulli.p(Tensor self, float p, *, Generator? generator=None) -> Tensor +inline at::Tensor Tensor::bernoulli(double p, ::std::optional generator) const { + return at::_ops::bernoulli_p::call(const_cast(*this), p, generator); +} + +// aten::bincount(Tensor self, Tensor? weights=None, SymInt minlength=0) -> Tensor +inline at::Tensor Tensor::bincount(const ::std::optional & weights, int64_t minlength) const { + return at::_ops::bincount::call(const_cast(*this), weights, minlength); +} + +// aten::bincount(Tensor self, Tensor? weights=None, SymInt minlength=0) -> Tensor +inline at::Tensor Tensor::bincount_symint(const ::std::optional & weights, c10::SymInt minlength) const { + return at::_ops::bincount::call(const_cast(*this), weights, minlength); +} + +// aten::bitwise_not(Tensor self) -> Tensor +inline at::Tensor Tensor::bitwise_not() const { + return at::_ops::bitwise_not::call(const_cast(*this)); +} + +// aten::bitwise_not_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::bitwise_not_() const { + return at::_ops::bitwise_not_::call(const_cast(*this)); +} + +// aten::copysign.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::copysign(const at::Tensor & other) const { + return at::_ops::copysign_Tensor::call(const_cast(*this), other); +} + +// aten::copysign_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::copysign_(const at::Tensor & other) const { + return at::_ops::copysign__Tensor::call(const_cast(*this), other); +} + +// aten::copysign.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::copysign(const at::Scalar & other) const { + return at::_ops::copysign_Scalar::call(const_cast(*this), other); +} + +// aten::copysign_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::copysign_(const at::Scalar & other) const { + return at::_ops::copysign__Scalar::call(const_cast(*this), other); +} + +// aten::_lazy_clone(Tensor self) -> Tensor +inline at::Tensor Tensor::_lazy_clone() const { + return at::_ops::_lazy_clone::call(const_cast(*this)); +} + +// aten::logical_not(Tensor self) -> Tensor +inline at::Tensor Tensor::logical_not() const { + return at::_ops::logical_not::call(const_cast(*this)); +} + +// aten::logical_not_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::logical_not_() const { + return at::_ops::logical_not_::call(const_cast(*this)); +} + +// aten::logical_xor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::logical_xor(const at::Tensor & other) const { + return at::_ops::logical_xor::call(const_cast(*this), other); +} + +// aten::logical_xor_(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::logical_xor_(const at::Tensor & other) const { + return at::_ops::logical_xor_::call(const_cast(*this), other); +} + +// aten::logical_and(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::logical_and(const at::Tensor & other) const { + return at::_ops::logical_and::call(const_cast(*this), other); +} + +// aten::logical_and_(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::logical_and_(const at::Tensor & other) const { + return at::_ops::logical_and_::call(const_cast(*this), other); +} + +// aten::logical_or(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::logical_or(const at::Tensor & other) const { + return at::_ops::logical_or::call(const_cast(*this), other); +} + +// aten::logical_or_(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::logical_or_(const at::Tensor & other) const { + return at::_ops::logical_or_::call(const_cast(*this), other); +} + +// aten::bmm(Tensor self, Tensor mat2) -> Tensor +inline at::Tensor Tensor::bmm(const at::Tensor & mat2) const { + return at::_ops::bmm::call(const_cast(*this), mat2); +} + +// aten::broadcast_to(Tensor(a) self, SymInt[] size) -> Tensor(a) +inline at::Tensor Tensor::broadcast_to(at::IntArrayRef size) const { + return at::_ops::broadcast_to::call(const_cast(*this), c10::fromIntArrayRefSlow(size)); +} + +// aten::broadcast_to(Tensor(a) self, SymInt[] size) -> Tensor(a) +inline at::Tensor Tensor::broadcast_to_symint(c10::SymIntArrayRef size) const { + return at::_ops::broadcast_to::call(const_cast(*this), size); +} + +// aten::ceil(Tensor self) -> Tensor +inline at::Tensor Tensor::ceil() const { + return at::_ops::ceil::call(const_cast(*this)); +} + +// aten::ceil_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::ceil_() const { + return at::_ops::ceil_::call(const_cast(*this)); +} + +// aten::unsafe_chunk(Tensor self, int chunks, int dim=0) -> Tensor[] +inline ::std::vector Tensor::unsafe_chunk(int64_t chunks, int64_t dim) const { + return at::_ops::unsafe_chunk::call(const_cast(*this), chunks, dim); +} + +// aten::chunk(Tensor(a -> *) self, int chunks, int dim=0) -> Tensor(a)[] +inline ::std::vector Tensor::chunk(int64_t chunks, int64_t dim) const { + return at::_ops::chunk::call(const_cast(*this), chunks, dim); +} + +// aten::tensor_split.sections(Tensor(a -> *) self, SymInt sections, int dim=0) -> Tensor(a)[] +inline ::std::vector Tensor::tensor_split(int64_t sections, int64_t dim) const { + return at::_ops::tensor_split_sections::call(const_cast(*this), sections, dim); +} + +// aten::tensor_split.sections(Tensor(a -> *) self, SymInt sections, int dim=0) -> Tensor(a)[] +inline ::std::vector Tensor::tensor_split_symint(c10::SymInt sections, int64_t dim) const { + return at::_ops::tensor_split_sections::call(const_cast(*this), sections, dim); +} + +// aten::tensor_split.indices(Tensor(a -> *) self, SymInt[] indices, int dim=0) -> Tensor(a)[] +inline ::std::vector Tensor::tensor_split(at::IntArrayRef indices, int64_t dim) const { + return at::_ops::tensor_split_indices::call(const_cast(*this), c10::fromIntArrayRefSlow(indices), dim); +} + +// aten::tensor_split.indices(Tensor(a -> *) self, SymInt[] indices, int dim=0) -> Tensor(a)[] +inline ::std::vector Tensor::tensor_split_symint(c10::SymIntArrayRef indices, int64_t dim) const { + return at::_ops::tensor_split_indices::call(const_cast(*this), indices, dim); +} + +// aten::tensor_split.tensor_indices_or_sections(Tensor(a -> *) self, Tensor tensor_indices_or_sections, int dim=0) -> Tensor(a)[] +inline ::std::vector Tensor::tensor_split(const at::Tensor & tensor_indices_or_sections, int64_t dim) const { + return at::_ops::tensor_split_tensor_indices_or_sections::call(const_cast(*this), tensor_indices_or_sections, dim); +} + +// aten::clamp(Tensor self, Scalar? min=None, Scalar? max=None) -> Tensor +inline at::Tensor Tensor::clamp(const ::std::optional & min, const ::std::optional & max) const { + return at::_ops::clamp::call(const_cast(*this), min, max); +} + +// aten::clamp.Tensor(Tensor self, Tensor? min=None, Tensor? max=None) -> Tensor +inline at::Tensor Tensor::clamp(const ::std::optional & min, const ::std::optional & max) const { + return at::_ops::clamp_Tensor::call(const_cast(*this), min, max); +} + +// aten::clamp_(Tensor(a!) self, Scalar? min=None, Scalar? max=None) -> Tensor(a!) +inline at::Tensor & Tensor::clamp_(const ::std::optional & min, const ::std::optional & max) const { + return at::_ops::clamp_::call(const_cast(*this), min, max); +} + +// aten::clamp_.Tensor(Tensor(a!) self, Tensor? min=None, Tensor? max=None) -> Tensor(a!) +inline at::Tensor & Tensor::clamp_(const ::std::optional & min, const ::std::optional & max) const { + return at::_ops::clamp__Tensor::call(const_cast(*this), min, max); +} + +// aten::clamp_max(Tensor self, Scalar max) -> Tensor +inline at::Tensor Tensor::clamp_max(const at::Scalar & max) const { + return at::_ops::clamp_max::call(const_cast(*this), max); +} + +// aten::clamp_max.Tensor(Tensor self, Tensor max) -> Tensor +inline at::Tensor Tensor::clamp_max(const at::Tensor & max) const { + return at::_ops::clamp_max_Tensor::call(const_cast(*this), max); +} + +// aten::clamp_max_(Tensor(a!) self, Scalar max) -> Tensor(a!) +inline at::Tensor & Tensor::clamp_max_(const at::Scalar & max) const { + return at::_ops::clamp_max_::call(const_cast(*this), max); +} + +// aten::clamp_max_.Tensor(Tensor(a!) self, Tensor max) -> Tensor(a!) +inline at::Tensor & Tensor::clamp_max_(const at::Tensor & max) const { + return at::_ops::clamp_max__Tensor::call(const_cast(*this), max); +} + +// aten::clamp_min(Tensor self, Scalar min) -> Tensor +inline at::Tensor Tensor::clamp_min(const at::Scalar & min) const { + return at::_ops::clamp_min::call(const_cast(*this), min); +} + +// aten::clamp_min.Tensor(Tensor self, Tensor min) -> Tensor +inline at::Tensor Tensor::clamp_min(const at::Tensor & min) const { + return at::_ops::clamp_min_Tensor::call(const_cast(*this), min); +} + +// aten::clamp_min_(Tensor(a!) self, Scalar min) -> Tensor(a!) +inline at::Tensor & Tensor::clamp_min_(const at::Scalar & min) const { + return at::_ops::clamp_min_::call(const_cast(*this), min); +} + +// aten::clamp_min_.Tensor(Tensor(a!) self, Tensor min) -> Tensor(a!) +inline at::Tensor & Tensor::clamp_min_(const at::Tensor & min) const { + return at::_ops::clamp_min__Tensor::call(const_cast(*this), min); +} + +// aten::clip(Tensor self, Scalar? min=None, Scalar? max=None) -> Tensor +inline at::Tensor Tensor::clip(const ::std::optional & min, const ::std::optional & max) const { + return at::_ops::clip::call(const_cast(*this), min, max); +} + +// aten::clip.Tensor(Tensor self, Tensor? min=None, Tensor? max=None) -> Tensor +inline at::Tensor Tensor::clip(const ::std::optional & min, const ::std::optional & max) const { + return at::_ops::clip_Tensor::call(const_cast(*this), min, max); +} + +// aten::clip_(Tensor(a!) self, Scalar? min=None, Scalar? max=None) -> Tensor(a!) +inline at::Tensor & Tensor::clip_(const ::std::optional & min, const ::std::optional & max) const { + return at::_ops::clip_::call(const_cast(*this), min, max); +} + +// aten::clip_.Tensor(Tensor(a!) self, Tensor? min=None, Tensor? max=None) -> Tensor(a!) +inline at::Tensor & Tensor::clip_(const ::std::optional & min, const ::std::optional & max) const { + return at::_ops::clip__Tensor::call(const_cast(*this), min, max); +} + +// aten::contiguous(Tensor(a) self, *, MemoryFormat memory_format=contiguous_format) -> Tensor(a) +inline at::Tensor Tensor::__dispatch_contiguous(at::MemoryFormat memory_format) const { + return at::_ops::contiguous::call(const_cast(*this), memory_format); +} + +// aten::copy_(Tensor(a!) self, Tensor src, bool non_blocking=False) -> Tensor(a!) +inline at::Tensor & Tensor::copy_(const at::Tensor & src, bool non_blocking) const { + return at::_ops::copy_::call(const_cast(*this), src, non_blocking); +} + +// aten::cos(Tensor self) -> Tensor +inline at::Tensor Tensor::cos() const { + return at::_ops::cos::call(const_cast(*this)); +} + +// aten::cos_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::cos_() const { + return at::_ops::cos_::call(const_cast(*this)); +} + +// aten::cosh(Tensor self) -> Tensor +inline at::Tensor Tensor::cosh() const { + return at::_ops::cosh::call(const_cast(*this)); +} + +// aten::cosh_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::cosh_() const { + return at::_ops::cosh_::call(const_cast(*this)); +} + +// aten::count_nonzero.dim_IntList(Tensor self, int[] dim) -> Tensor +inline at::Tensor Tensor::count_nonzero(at::IntArrayRef dim) const { + return at::_ops::count_nonzero_dim_IntList::call(const_cast(*this), dim); +} + +// aten::count_nonzero(Tensor self, int? dim=None) -> Tensor +inline at::Tensor Tensor::count_nonzero(::std::optional dim) const { + return at::_ops::count_nonzero::call(const_cast(*this), dim); +} + +// aten::cov(Tensor self, *, int correction=1, Tensor? fweights=None, Tensor? aweights=None) -> Tensor +inline at::Tensor Tensor::cov(int64_t correction, const ::std::optional & fweights, const ::std::optional & aweights) const { + return at::_ops::cov::call(const_cast(*this), correction, fweights, aweights); +} + +// aten::corrcoef(Tensor self) -> Tensor +inline at::Tensor Tensor::corrcoef() const { + return at::_ops::corrcoef::call(const_cast(*this)); +} + +// aten::cummax(Tensor self, int dim) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::cummax(int64_t dim) const { + return at::_ops::cummax::call(const_cast(*this), dim); +} + +// aten::cummax.dimname(Tensor self, Dimname dim) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::cummax(at::Dimname dim) const { + return at::_ops::cummax_dimname::call(const_cast(*this), dim); +} + +// aten::cummin(Tensor self, int dim) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::cummin(int64_t dim) const { + return at::_ops::cummin::call(const_cast(*this), dim); +} + +// aten::cummin.dimname(Tensor self, Dimname dim) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::cummin(at::Dimname dim) const { + return at::_ops::cummin_dimname::call(const_cast(*this), dim); +} + +// aten::cumprod(Tensor self, int dim, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::cumprod(int64_t dim, ::std::optional dtype) const { + return at::_ops::cumprod::call(const_cast(*this), dim, dtype); +} + +// aten::cumprod_(Tensor(a!) self, int dim, *, ScalarType? dtype=None) -> Tensor(a!) +inline at::Tensor & Tensor::cumprod_(int64_t dim, ::std::optional dtype) const { + return at::_ops::cumprod_::call(const_cast(*this), dim, dtype); +} + +// aten::cumprod.dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::cumprod(at::Dimname dim, ::std::optional dtype) const { + return at::_ops::cumprod_dimname::call(const_cast(*this), dim, dtype); +} + +// aten::cumprod_.dimname(Tensor(a!) self, Dimname dim, *, ScalarType? dtype=None) -> Tensor(a!) +inline at::Tensor & Tensor::cumprod_(at::Dimname dim, ::std::optional dtype) const { + return at::_ops::cumprod__dimname::call(const_cast(*this), dim, dtype); +} + +// aten::cumsum(Tensor self, int dim, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::cumsum(int64_t dim, ::std::optional dtype) const { + return at::_ops::cumsum::call(const_cast(*this), dim, dtype); +} + +// aten::cumsum_(Tensor(a!) self, int dim, *, ScalarType? dtype=None) -> Tensor(a!) +inline at::Tensor & Tensor::cumsum_(int64_t dim, ::std::optional dtype) const { + return at::_ops::cumsum_::call(const_cast(*this), dim, dtype); +} + +// aten::cumsum.dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::cumsum(at::Dimname dim, ::std::optional dtype) const { + return at::_ops::cumsum_dimname::call(const_cast(*this), dim, dtype); +} + +// aten::cumsum_.dimname(Tensor(a!) self, Dimname dim, *, ScalarType? dtype=None) -> Tensor(a!) +inline at::Tensor & Tensor::cumsum_(at::Dimname dim, ::std::optional dtype) const { + return at::_ops::cumsum__dimname::call(const_cast(*this), dim, dtype); +} + +// aten::diag_embed(Tensor self, int offset=0, int dim1=-2, int dim2=-1) -> Tensor +inline at::Tensor Tensor::diag_embed(int64_t offset, int64_t dim1, int64_t dim2) const { + return at::_ops::diag_embed::call(const_cast(*this), offset, dim1, dim2); +} + +// aten::diagflat(Tensor self, int offset=0) -> Tensor +inline at::Tensor Tensor::diagflat(int64_t offset) const { + return at::_ops::diagflat::call(const_cast(*this), offset); +} + +// aten::diagonal(Tensor(a) self, int offset=0, int dim1=0, int dim2=1) -> Tensor(a) +inline at::Tensor Tensor::diagonal(int64_t offset, int64_t dim1, int64_t dim2) const { + return at::_ops::diagonal::call(const_cast(*this), offset, dim1, dim2); +} + +// aten::diagonal.Dimname(Tensor(a) self, *, Dimname outdim, Dimname dim1, Dimname dim2, int offset=0) -> Tensor(a) +inline at::Tensor Tensor::diagonal(at::Dimname outdim, at::Dimname dim1, at::Dimname dim2, int64_t offset) const { + return at::_ops::diagonal_Dimname::call(const_cast(*this), outdim, dim1, dim2, offset); +} + +// aten::fill_diagonal_(Tensor(a!) self, Scalar fill_value, bool wrap=False) -> Tensor(a!) +inline at::Tensor & Tensor::fill_diagonal_(const at::Scalar & fill_value, bool wrap) const { + return at::_ops::fill_diagonal_::call(const_cast(*this), fill_value, wrap); +} + +// aten::diff(Tensor self, int n=1, int dim=-1, Tensor? prepend=None, Tensor? append=None) -> Tensor +inline at::Tensor Tensor::diff(int64_t n, int64_t dim, const ::std::optional & prepend, const ::std::optional & append) const { + return at::_ops::diff::call(const_cast(*this), n, dim, prepend, append); +} + +// aten::div.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::div(const at::Tensor & other) const { + return at::_ops::div_Tensor::call(const_cast(*this), other); +} + +// aten::div_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::div_(const at::Tensor & other) const { + return at::_ops::div__Tensor::call(const_cast(*this), other); +} + +// aten::div.Tensor_mode(Tensor self, Tensor other, *, str? rounding_mode) -> Tensor +inline at::Tensor Tensor::div(const at::Tensor & other, ::std::optional rounding_mode) const { + return at::_ops::div_Tensor_mode::call(const_cast(*this), other, rounding_mode); +} + +// aten::div_.Tensor_mode(Tensor(a!) self, Tensor other, *, str? rounding_mode) -> Tensor(a!) +inline at::Tensor & Tensor::div_(const at::Tensor & other, ::std::optional rounding_mode) const { + return at::_ops::div__Tensor_mode::call(const_cast(*this), other, rounding_mode); +} + +// aten::div.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::div(const at::Scalar & other) const { + return at::_ops::div_Scalar::call(const_cast(*this), other); +} + +// aten::div_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::div_(const at::Scalar & other) const { + return at::_ops::div__Scalar::call(const_cast(*this), other); +} + +// aten::div.Scalar_mode(Tensor self, Scalar other, *, str? rounding_mode) -> Tensor +inline at::Tensor Tensor::div(const at::Scalar & other, ::std::optional rounding_mode) const { + return at::_ops::div_Scalar_mode::call(const_cast(*this), other, rounding_mode); +} + +// aten::div_.Scalar_mode(Tensor(a!) self, Scalar other, *, str? rounding_mode) -> Tensor(a!) +inline at::Tensor & Tensor::div_(const at::Scalar & other, ::std::optional rounding_mode) const { + return at::_ops::div__Scalar_mode::call(const_cast(*this), other, rounding_mode); +} + +// aten::divide.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::divide(const at::Tensor & other) const { + return at::_ops::divide_Tensor::call(const_cast(*this), other); +} + +// aten::divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::divide_(const at::Tensor & other) const { + return at::_ops::divide__Tensor::call(const_cast(*this), other); +} + +// aten::divide.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::divide(const at::Scalar & other) const { + return at::_ops::divide_Scalar::call(const_cast(*this), other); +} + +// aten::divide_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::divide_(const at::Scalar & other) const { + return at::_ops::divide__Scalar::call(const_cast(*this), other); +} + +// aten::divide.Tensor_mode(Tensor self, Tensor other, *, str? rounding_mode) -> Tensor +inline at::Tensor Tensor::divide(const at::Tensor & other, ::std::optional rounding_mode) const { + return at::_ops::divide_Tensor_mode::call(const_cast(*this), other, rounding_mode); +} + +// aten::divide_.Tensor_mode(Tensor(a!) self, Tensor other, *, str? rounding_mode) -> Tensor(a!) +inline at::Tensor & Tensor::divide_(const at::Tensor & other, ::std::optional rounding_mode) const { + return at::_ops::divide__Tensor_mode::call(const_cast(*this), other, rounding_mode); +} + +// aten::divide.Scalar_mode(Tensor self, Scalar other, *, str? rounding_mode) -> Tensor +inline at::Tensor Tensor::divide(const at::Scalar & other, ::std::optional rounding_mode) const { + return at::_ops::divide_Scalar_mode::call(const_cast(*this), other, rounding_mode); +} + +// aten::divide_.Scalar_mode(Tensor(a!) self, Scalar other, *, str? rounding_mode) -> Tensor(a!) +inline at::Tensor & Tensor::divide_(const at::Scalar & other, ::std::optional rounding_mode) const { + return at::_ops::divide__Scalar_mode::call(const_cast(*this), other, rounding_mode); +} + +// aten::true_divide.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::true_divide(const at::Tensor & other) const { + return at::_ops::true_divide_Tensor::call(const_cast(*this), other); +} + +// aten::true_divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::true_divide_(const at::Tensor & other) const { + return at::_ops::true_divide__Tensor::call(const_cast(*this), other); +} + +// aten::true_divide.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::true_divide(const at::Scalar & other) const { + return at::_ops::true_divide_Scalar::call(const_cast(*this), other); +} + +// aten::true_divide_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::true_divide_(const at::Scalar & other) const { + return at::_ops::true_divide__Scalar::call(const_cast(*this), other); +} + +// aten::dot(Tensor self, Tensor tensor) -> Tensor +inline at::Tensor Tensor::dot(const at::Tensor & tensor) const { + return at::_ops::dot::call(const_cast(*this), tensor); +} + +// aten::vdot(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::vdot(const at::Tensor & other) const { + return at::_ops::vdot::call(const_cast(*this), other); +} + +// aten::new_empty(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_empty(at::IntArrayRef size, at::TensorOptions options) const { + return at::_ops::new_empty::call(const_cast(*this), c10::fromIntArrayRefSlow(size), c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} + +// aten::new_empty(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_empty(at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const { + return at::_ops::new_empty::call(const_cast(*this), c10::fromIntArrayRefSlow(size), dtype, layout, device, pin_memory); +} + +// aten::new_empty(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_empty_symint(c10::SymIntArrayRef size, at::TensorOptions options) const { + return at::_ops::new_empty::call(const_cast(*this), size, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} + +// aten::new_empty(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_empty_symint(c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const { + return at::_ops::new_empty::call(const_cast(*this), size, dtype, layout, device, pin_memory); +} + +// aten::new_empty_strided(Tensor self, SymInt[] size, SymInt[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_empty_strided(at::IntArrayRef size, at::IntArrayRef stride, at::TensorOptions options) const { + return at::_ops::new_empty_strided::call(const_cast(*this), c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} + +// aten::new_empty_strided(Tensor self, SymInt[] size, SymInt[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_empty_strided(at::IntArrayRef size, at::IntArrayRef stride, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const { + return at::_ops::new_empty_strided::call(const_cast(*this), c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), dtype, layout, device, pin_memory); +} + +// aten::new_empty_strided(Tensor self, SymInt[] size, SymInt[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_empty_strided_symint(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, at::TensorOptions options) const { + return at::_ops::new_empty_strided::call(const_cast(*this), size, stride, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} + +// aten::new_empty_strided(Tensor self, SymInt[] size, SymInt[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_empty_strided_symint(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const { + return at::_ops::new_empty_strided::call(const_cast(*this), size, stride, dtype, layout, device, pin_memory); +} + +// aten::new_full(Tensor self, SymInt[] size, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_full(at::IntArrayRef size, const at::Scalar & fill_value, at::TensorOptions options) const { + return at::_ops::new_full::call(const_cast(*this), c10::fromIntArrayRefSlow(size), fill_value, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} + +// aten::new_full(Tensor self, SymInt[] size, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_full(at::IntArrayRef size, const at::Scalar & fill_value, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const { + return at::_ops::new_full::call(const_cast(*this), c10::fromIntArrayRefSlow(size), fill_value, dtype, layout, device, pin_memory); +} + +// aten::new_full(Tensor self, SymInt[] size, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_full_symint(c10::SymIntArrayRef size, const at::Scalar & fill_value, at::TensorOptions options) const { + return at::_ops::new_full::call(const_cast(*this), size, fill_value, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} + +// aten::new_full(Tensor self, SymInt[] size, Scalar fill_value, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_full_symint(c10::SymIntArrayRef size, const at::Scalar & fill_value, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const { + return at::_ops::new_full::call(const_cast(*this), size, fill_value, dtype, layout, device, pin_memory); +} + +// aten::new_zeros(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_zeros(at::IntArrayRef size, at::TensorOptions options) const { + return at::_ops::new_zeros::call(const_cast(*this), c10::fromIntArrayRefSlow(size), c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} + +// aten::new_zeros(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_zeros(at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const { + return at::_ops::new_zeros::call(const_cast(*this), c10::fromIntArrayRefSlow(size), dtype, layout, device, pin_memory); +} + +// aten::new_zeros(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_zeros_symint(c10::SymIntArrayRef size, at::TensorOptions options) const { + return at::_ops::new_zeros::call(const_cast(*this), size, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} + +// aten::new_zeros(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_zeros_symint(c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const { + return at::_ops::new_zeros::call(const_cast(*this), size, dtype, layout, device, pin_memory); +} + +// aten::new_ones(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_ones(at::IntArrayRef size, at::TensorOptions options) const { + return at::_ops::new_ones::call(const_cast(*this), c10::fromIntArrayRefSlow(size), c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} + +// aten::new_ones(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_ones(at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const { + return at::_ops::new_ones::call(const_cast(*this), c10::fromIntArrayRefSlow(size), dtype, layout, device, pin_memory); +} + +// aten::new_ones(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_ones_symint(c10::SymIntArrayRef size, at::TensorOptions options) const { + return at::_ops::new_ones::call(const_cast(*this), size, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} + +// aten::new_ones(Tensor self, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor Tensor::new_ones_symint(c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) const { + return at::_ops::new_ones::call(const_cast(*this), size, dtype, layout, device, pin_memory); +} + +// aten::resize_(Tensor(a!) self, SymInt[] size, *, MemoryFormat? memory_format=None) -> Tensor(a!) +inline const at::Tensor & Tensor::resize_(at::IntArrayRef size, ::std::optional memory_format) const { + return at::_ops::resize_::call(const_cast(*this), c10::fromIntArrayRefSlow(size), memory_format); +} + +// aten::resize_(Tensor(a!) self, SymInt[] size, *, MemoryFormat? memory_format=None) -> Tensor(a!) +inline const at::Tensor & Tensor::resize__symint(c10::SymIntArrayRef size, ::std::optional memory_format) const { + return at::_ops::resize_::call(const_cast(*this), size, memory_format); +} + +// aten::erf(Tensor self) -> Tensor +inline at::Tensor Tensor::erf() const { + return at::_ops::erf::call(const_cast(*this)); +} + +// aten::erf_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::erf_() const { + return at::_ops::erf_::call(const_cast(*this)); +} + +// aten::erfc(Tensor self) -> Tensor +inline at::Tensor Tensor::erfc() const { + return at::_ops::erfc::call(const_cast(*this)); +} + +// aten::erfc_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::erfc_() const { + return at::_ops::erfc_::call(const_cast(*this)); +} + +// aten::exp(Tensor self) -> Tensor +inline at::Tensor Tensor::exp() const { + return at::_ops::exp::call(const_cast(*this)); +} + +// aten::exp_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::exp_() const { + return at::_ops::exp_::call(const_cast(*this)); +} + +// aten::exp2(Tensor self) -> Tensor +inline at::Tensor Tensor::exp2() const { + return at::_ops::exp2::call(const_cast(*this)); +} + +// aten::exp2_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::exp2_() const { + return at::_ops::exp2_::call(const_cast(*this)); +} + +// aten::expm1(Tensor self) -> Tensor +inline at::Tensor Tensor::expm1() const { + return at::_ops::expm1::call(const_cast(*this)); +} + +// aten::expm1_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::expm1_() const { + return at::_ops::expm1_::call(const_cast(*this)); +} + +// aten::expand(Tensor(a) self, SymInt[] size, *, bool implicit=False) -> Tensor(a) +inline at::Tensor Tensor::expand(at::IntArrayRef size, bool implicit) const { + return at::_ops::expand::call(const_cast(*this), c10::fromIntArrayRefSlow(size), implicit); +} + +// aten::expand(Tensor(a) self, SymInt[] size, *, bool implicit=False) -> Tensor(a) +inline at::Tensor Tensor::expand_symint(c10::SymIntArrayRef size, bool implicit) const { + return at::_ops::expand::call(const_cast(*this), size, implicit); +} + +// aten::expand_as(Tensor(a) self, Tensor other) -> Tensor(a) +inline at::Tensor Tensor::expand_as(const at::Tensor & other) const { + return at::_ops::expand_as::call(const_cast(*this), other); +} + +// aten::flatten.using_ints(Tensor(a) self, int start_dim=0, int end_dim=-1) -> Tensor(a) +inline at::Tensor Tensor::flatten(int64_t start_dim, int64_t end_dim) const { + return at::_ops::flatten_using_ints::call(const_cast(*this), start_dim, end_dim); +} + +// aten::flatten.named_out_dim(Tensor(a) self, int start_dim, int end_dim, Dimname out_dim) -> Tensor(a) +inline at::Tensor Tensor::flatten(int64_t start_dim, int64_t end_dim, at::Dimname out_dim) const { + return at::_ops::flatten_named_out_dim::call(const_cast(*this), start_dim, end_dim, out_dim); +} + +// aten::flatten.using_names(Tensor(a) self, Dimname start_dim, Dimname end_dim, Dimname out_dim) -> Tensor(a) +inline at::Tensor Tensor::flatten(at::Dimname start_dim, at::Dimname end_dim, at::Dimname out_dim) const { + return at::_ops::flatten_using_names::call(const_cast(*this), start_dim, end_dim, out_dim); +} + +// aten::flatten.DimnameList(Tensor(a) self, Dimname[] dims, Dimname out_dim) -> Tensor(a) +inline at::Tensor Tensor::flatten(at::DimnameList dims, at::Dimname out_dim) const { + return at::_ops::flatten_DimnameList::call(const_cast(*this), dims, out_dim); +} + +// aten::unflatten.int(Tensor(a) self, int dim, SymInt[] sizes) -> Tensor(a) +inline at::Tensor Tensor::unflatten(int64_t dim, at::IntArrayRef sizes) const { + return at::_ops::unflatten_int::call(const_cast(*this), dim, c10::fromIntArrayRefSlow(sizes)); +} + +// aten::unflatten.int(Tensor(a) self, int dim, SymInt[] sizes) -> Tensor(a) +inline at::Tensor Tensor::unflatten_symint(int64_t dim, c10::SymIntArrayRef sizes) const { + return at::_ops::unflatten_int::call(const_cast(*this), dim, sizes); +} + +// aten::unflatten.Dimname(Tensor(a) self, Dimname dim, SymInt[] sizes, Dimname[] names) -> Tensor(a) +inline at::Tensor Tensor::unflatten(at::Dimname dim, at::IntArrayRef sizes, at::DimnameList names) const { + return at::_ops::unflatten_Dimname::call(const_cast(*this), dim, c10::fromIntArrayRefSlow(sizes), names); +} + +// aten::unflatten.Dimname(Tensor(a) self, Dimname dim, SymInt[] sizes, Dimname[] names) -> Tensor(a) +inline at::Tensor Tensor::unflatten_symint(at::Dimname dim, c10::SymIntArrayRef sizes, at::DimnameList names) const { + return at::_ops::unflatten_Dimname::call(const_cast(*this), dim, sizes, names); +} + +// aten::fill_.Scalar(Tensor(a!) self, Scalar value) -> Tensor(a!) +inline at::Tensor & Tensor::fill_(const at::Scalar & value) const { + return at::_ops::fill__Scalar::call(const_cast(*this), value); +} + +// aten::fill_.Tensor(Tensor(a!) self, Tensor value) -> Tensor(a!) +inline at::Tensor & Tensor::fill_(const at::Tensor & value) const { + return at::_ops::fill__Tensor::call(const_cast(*this), value); +} + +// aten::floor(Tensor self) -> Tensor +inline at::Tensor Tensor::floor() const { + return at::_ops::floor::call(const_cast(*this)); +} + +// aten::floor_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::floor_() const { + return at::_ops::floor_::call(const_cast(*this)); +} + +// aten::floor_divide(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::floor_divide(const at::Tensor & other) const { + return at::_ops::floor_divide::call(const_cast(*this), other); +} + +// aten::floor_divide_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::floor_divide_(const at::Tensor & other) const { + return at::_ops::floor_divide__Tensor::call(const_cast(*this), other); +} + +// aten::floor_divide.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::floor_divide(const at::Scalar & other) const { + return at::_ops::floor_divide_Scalar::call(const_cast(*this), other); +} + +// aten::floor_divide_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::floor_divide_(const at::Scalar & other) const { + return at::_ops::floor_divide__Scalar::call(const_cast(*this), other); +} + +// aten::frac(Tensor self) -> Tensor +inline at::Tensor Tensor::frac() const { + return at::_ops::frac::call(const_cast(*this)); +} + +// aten::frac_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::frac_() const { + return at::_ops::frac_::call(const_cast(*this)); +} + +// aten::gcd(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::gcd(const at::Tensor & other) const { + return at::_ops::gcd::call(const_cast(*this), other); +} + +// aten::gcd_(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::gcd_(const at::Tensor & other) const { + return at::_ops::gcd_::call(const_cast(*this), other); +} + +// aten::lcm(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::lcm(const at::Tensor & other) const { + return at::_ops::lcm::call(const_cast(*this), other); +} + +// aten::lcm_(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::lcm_(const at::Tensor & other) const { + return at::_ops::lcm_::call(const_cast(*this), other); +} + +// aten::index.Tensor(Tensor self, Tensor?[] indices) -> Tensor +inline at::Tensor Tensor::index(const c10::List<::std::optional> & indices) const { + return at::_ops::index_Tensor::call(const_cast(*this), indices); +} + +// aten::index_copy_(Tensor(a!) self, int dim, Tensor index, Tensor source) -> Tensor(a!) +inline at::Tensor & Tensor::index_copy_(int64_t dim, const at::Tensor & index, const at::Tensor & source) const { + return at::_ops::index_copy_::call(const_cast(*this), dim, index, source); +} + +// aten::index_copy(Tensor self, int dim, Tensor index, Tensor source) -> Tensor +inline at::Tensor Tensor::index_copy(int64_t dim, const at::Tensor & index, const at::Tensor & source) const { + return at::_ops::index_copy::call(const_cast(*this), dim, index, source); +} + +// aten::index_copy_.dimname(Tensor(a!) self, Dimname dim, Tensor index, Tensor source) -> Tensor(a!) +inline at::Tensor & Tensor::index_copy_(at::Dimname dim, const at::Tensor & index, const at::Tensor & source) const { + return at::_ops::index_copy__dimname::call(const_cast(*this), dim, index, source); +} + +// aten::index_copy.dimname(Tensor self, Dimname dim, Tensor index, Tensor source) -> Tensor +inline at::Tensor Tensor::index_copy(at::Dimname dim, const at::Tensor & index, const at::Tensor & source) const { + return at::_ops::index_copy_dimname::call(const_cast(*this), dim, index, source); +} + +// aten::index_put_(Tensor(a!) self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor(a!) +inline at::Tensor & Tensor::index_put_(const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate) const { + return at::_ops::index_put_::call(const_cast(*this), indices, values, accumulate); +} + +// aten::index_put(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor +inline at::Tensor Tensor::index_put(const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate) const { + return at::_ops::index_put::call(const_cast(*this), indices, values, accumulate); +} + +// aten::isclose(Tensor self, Tensor other, float rtol=1e-05, float atol=1e-08, bool equal_nan=False) -> Tensor +inline at::Tensor Tensor::isclose(const at::Tensor & other, double rtol, double atol, bool equal_nan) const { + return at::_ops::isclose::call(const_cast(*this), other, rtol, atol, equal_nan); +} + +// aten::isnan(Tensor self) -> Tensor +inline at::Tensor Tensor::isnan() const { + return at::_ops::isnan::call(const_cast(*this)); +} + +// aten::is_distributed(Tensor self) -> bool +inline bool Tensor::is_distributed() const { + return at::_ops::is_distributed::call(const_cast(*this)); +} + +// aten::is_floating_point(Tensor self) -> bool +inline bool Tensor::__dispatch_is_floating_point() const { + return at::_ops::is_floating_point::call(const_cast(*this)); +} + +// aten::is_complex(Tensor self) -> bool +inline bool Tensor::__dispatch_is_complex() const { + return at::_ops::is_complex::call(const_cast(*this)); +} + +// aten::is_conj(Tensor self) -> bool +inline bool Tensor::__dispatch_is_conj() const { + return at::_ops::is_conj::call(const_cast(*this)); +} + +// aten::_is_zerotensor(Tensor self) -> bool +inline bool Tensor::__dispatch__is_zerotensor() const { + return at::_ops::_is_zerotensor::call(const_cast(*this)); +} + +// aten::is_neg(Tensor self) -> bool +inline bool Tensor::__dispatch_is_neg() const { + return at::_ops::is_neg::call(const_cast(*this)); +} + +// aten::isreal(Tensor self) -> Tensor +inline at::Tensor Tensor::isreal() const { + return at::_ops::isreal::call(const_cast(*this)); +} + +// aten::is_nonzero(Tensor self) -> bool +inline bool Tensor::is_nonzero() const { + return at::_ops::is_nonzero::call(const_cast(*this)); +} + +// aten::is_same_size(Tensor self, Tensor other) -> bool +inline bool Tensor::is_same_size(const at::Tensor & other) const { + return at::_ops::is_same_size::call(const_cast(*this), other); +} + +// aten::is_signed(Tensor self) -> bool +inline bool Tensor::__dispatch_is_signed() const { + return at::_ops::is_signed::call(const_cast(*this)); +} + +// aten::is_inference(Tensor self) -> bool +inline bool Tensor::__dispatch_is_inference() const { + return at::_ops::is_inference::call(const_cast(*this)); +} + +// aten::kron(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::kron(const at::Tensor & other) const { + return at::_ops::kron::call(const_cast(*this), other); +} + +// aten::kthvalue(Tensor self, SymInt k, int dim=-1, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::kthvalue(int64_t k, int64_t dim, bool keepdim) const { + return at::_ops::kthvalue::call(const_cast(*this), k, dim, keepdim); +} + +// aten::kthvalue(Tensor self, SymInt k, int dim=-1, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::kthvalue_symint(c10::SymInt k, int64_t dim, bool keepdim) const { + return at::_ops::kthvalue::call(const_cast(*this), k, dim, keepdim); +} + +// aten::kthvalue.dimname(Tensor self, SymInt k, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::kthvalue(int64_t k, at::Dimname dim, bool keepdim) const { + return at::_ops::kthvalue_dimname::call(const_cast(*this), k, dim, keepdim); +} + +// aten::kthvalue.dimname(Tensor self, SymInt k, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::kthvalue_symint(c10::SymInt k, at::Dimname dim, bool keepdim) const { + return at::_ops::kthvalue_dimname::call(const_cast(*this), k, dim, keepdim); +} + +// aten::nan_to_num(Tensor self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor +inline at::Tensor Tensor::nan_to_num(::std::optional nan, ::std::optional posinf, ::std::optional neginf) const { + return at::_ops::nan_to_num::call(const_cast(*this), nan, posinf, neginf); +} + +// aten::nan_to_num_(Tensor(a!) self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor(a!) +inline at::Tensor & Tensor::nan_to_num_(::std::optional nan, ::std::optional posinf, ::std::optional neginf) const { + return at::_ops::nan_to_num_::call(const_cast(*this), nan, posinf, neginf); +} + +// aten::ldexp.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::ldexp(const at::Tensor & other) const { + return at::_ops::ldexp_Tensor::call(const_cast(*this), other); +} + +// aten::ldexp_(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::ldexp_(const at::Tensor & other) const { + return at::_ops::ldexp_::call(const_cast(*this), other); +} + +// aten::log(Tensor self) -> Tensor +inline at::Tensor Tensor::log() const { + return at::_ops::log::call(const_cast(*this)); +} + +// aten::log_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::log_() const { + return at::_ops::log_::call(const_cast(*this)); +} + +// aten::log10(Tensor self) -> Tensor +inline at::Tensor Tensor::log10() const { + return at::_ops::log10::call(const_cast(*this)); +} + +// aten::log10_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::log10_() const { + return at::_ops::log10_::call(const_cast(*this)); +} + +// aten::log1p(Tensor self) -> Tensor +inline at::Tensor Tensor::log1p() const { + return at::_ops::log1p::call(const_cast(*this)); +} + +// aten::log1p_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::log1p_() const { + return at::_ops::log1p_::call(const_cast(*this)); +} + +// aten::log2(Tensor self) -> Tensor +inline at::Tensor Tensor::log2() const { + return at::_ops::log2::call(const_cast(*this)); +} + +// aten::log2_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::log2_() const { + return at::_ops::log2_::call(const_cast(*this)); +} + +// aten::logaddexp(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::logaddexp(const at::Tensor & other) const { + return at::_ops::logaddexp::call(const_cast(*this), other); +} + +// aten::logaddexp2(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::logaddexp2(const at::Tensor & other) const { + return at::_ops::logaddexp2::call(const_cast(*this), other); +} + +// aten::xlogy.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::xlogy(const at::Tensor & other) const { + return at::_ops::xlogy_Tensor::call(const_cast(*this), other); +} + +// aten::xlogy.Scalar_Other(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::xlogy(const at::Scalar & other) const { + return at::_ops::xlogy_Scalar_Other::call(const_cast(*this), other); +} + +// aten::xlogy_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::xlogy_(const at::Tensor & other) const { + return at::_ops::xlogy__Tensor::call(const_cast(*this), other); +} + +// aten::xlogy_.Scalar_Other(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::xlogy_(const at::Scalar & other) const { + return at::_ops::xlogy__Scalar_Other::call(const_cast(*this), other); +} + +// aten::log_softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::log_softmax(int64_t dim, ::std::optional dtype) const { + return at::_ops::log_softmax_int::call(const_cast(*this), dim, dtype); +} + +// aten::log_softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::log_softmax(at::Dimname dim, ::std::optional dtype) const { + return at::_ops::log_softmax_Dimname::call(const_cast(*this), dim, dtype); +} + +// aten::logcumsumexp(Tensor self, int dim) -> Tensor +inline at::Tensor Tensor::logcumsumexp(int64_t dim) const { + return at::_ops::logcumsumexp::call(const_cast(*this), dim); +} + +// aten::logcumsumexp.dimname(Tensor self, Dimname dim) -> Tensor +inline at::Tensor Tensor::logcumsumexp(at::Dimname dim) const { + return at::_ops::logcumsumexp_dimname::call(const_cast(*this), dim); +} + +// aten::logsumexp(Tensor self, int[1] dim, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::logsumexp(at::IntArrayRef dim, bool keepdim) const { + return at::_ops::logsumexp::call(const_cast(*this), dim, keepdim); +} + +// aten::logsumexp.names(Tensor self, Dimname[1] dim, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::logsumexp(at::DimnameList dim, bool keepdim) const { + return at::_ops::logsumexp_names::call(const_cast(*this), dim, keepdim); +} + +// aten::matmul(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::matmul(const at::Tensor & other) const { + return at::_ops::matmul::call(const_cast(*this), other); +} + +// aten::matrix_power(Tensor self, int n) -> Tensor +inline at::Tensor Tensor::matrix_power(int64_t n) const { + return at::_ops::matrix_power::call(const_cast(*this), n); +} + +// aten::matrix_exp(Tensor self) -> Tensor +inline at::Tensor Tensor::matrix_exp() const { + return at::_ops::matrix_exp::call(const_cast(*this)); +} + +// aten::aminmax(Tensor self, *, int? dim=None, bool keepdim=False) -> (Tensor min, Tensor max) +inline ::std::tuple Tensor::aminmax(::std::optional dim, bool keepdim) const { + return at::_ops::aminmax::call(const_cast(*this), dim, keepdim); +} + +// aten::max.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::max(int64_t dim, bool keepdim) const { + return at::_ops::max_dim::call(const_cast(*this), dim, keepdim); +} + +// aten::max.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::max(at::Dimname dim, bool keepdim) const { + return at::_ops::max_names_dim::call(const_cast(*this), dim, keepdim); +} + +// aten::amax(Tensor self, int[1] dim=[], bool keepdim=False) -> Tensor +inline at::Tensor Tensor::amax(at::IntArrayRef dim, bool keepdim) const { + return at::_ops::amax::call(const_cast(*this), dim, keepdim); +} + +// aten::mean(Tensor self, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::mean(::std::optional dtype) const { + return at::_ops::mean::call(const_cast(*this), dtype); +} + +// aten::mean.dim(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::mean(at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype) const { + return at::_ops::mean_dim::call(const_cast(*this), dim, keepdim, dtype); +} + +// aten::mean.names_dim(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::mean(at::DimnameList dim, bool keepdim, ::std::optional dtype) const { + return at::_ops::mean_names_dim::call(const_cast(*this), dim, keepdim, dtype); +} + +// aten::nanmean(Tensor self, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::nanmean(at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype) const { + return at::_ops::nanmean::call(const_cast(*this), dim, keepdim, dtype); +} + +// aten::median(Tensor self) -> Tensor +inline at::Tensor Tensor::median() const { + return at::_ops::median::call(const_cast(*this)); +} + +// aten::median.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::median(int64_t dim, bool keepdim) const { + return at::_ops::median_dim::call(const_cast(*this), dim, keepdim); +} + +// aten::median.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::median(at::Dimname dim, bool keepdim) const { + return at::_ops::median_names_dim::call(const_cast(*this), dim, keepdim); +} + +// aten::nanmedian(Tensor self) -> Tensor +inline at::Tensor Tensor::nanmedian() const { + return at::_ops::nanmedian::call(const_cast(*this)); +} + +// aten::nanmedian.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::nanmedian(int64_t dim, bool keepdim) const { + return at::_ops::nanmedian_dim::call(const_cast(*this), dim, keepdim); +} + +// aten::nanmedian.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::nanmedian(at::Dimname dim, bool keepdim) const { + return at::_ops::nanmedian_names_dim::call(const_cast(*this), dim, keepdim); +} + +// aten::min.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::min(int64_t dim, bool keepdim) const { + return at::_ops::min_dim::call(const_cast(*this), dim, keepdim); +} + +// aten::min.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::min(at::Dimname dim, bool keepdim) const { + return at::_ops::min_names_dim::call(const_cast(*this), dim, keepdim); +} + +// aten::amin(Tensor self, int[1] dim=[], bool keepdim=False) -> Tensor +inline at::Tensor Tensor::amin(at::IntArrayRef dim, bool keepdim) const { + return at::_ops::amin::call(const_cast(*this), dim, keepdim); +} + +// aten::mm(Tensor self, Tensor mat2) -> Tensor +inline at::Tensor Tensor::mm(const at::Tensor & mat2) const { + return at::_ops::mm::call(const_cast(*this), mat2); +} + +// aten::mode(Tensor self, int dim=-1, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::mode(int64_t dim, bool keepdim) const { + return at::_ops::mode::call(const_cast(*this), dim, keepdim); +} + +// aten::mode.dimname(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::mode(at::Dimname dim, bool keepdim) const { + return at::_ops::mode_dimname::call(const_cast(*this), dim, keepdim); +} + +// aten::mul.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::mul(const at::Tensor & other) const { + return at::_ops::mul_Tensor::call(const_cast(*this), other); +} + +// aten::mul_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::mul_(const at::Tensor & other) const { + return at::_ops::mul__Tensor::call(const_cast(*this), other); +} + +// aten::mul.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::mul(const at::Scalar & other) const { + return at::_ops::mul_Scalar::call(const_cast(*this), other); +} + +// aten::mul_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::mul_(const at::Scalar & other) const { + return at::_ops::mul__Scalar::call(const_cast(*this), other); +} + +// aten::multiply.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::multiply(const at::Tensor & other) const { + return at::_ops::multiply_Tensor::call(const_cast(*this), other); +} + +// aten::multiply_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::multiply_(const at::Tensor & other) const { + return at::_ops::multiply__Tensor::call(const_cast(*this), other); +} + +// aten::multiply.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::multiply(const at::Scalar & other) const { + return at::_ops::multiply_Scalar::call(const_cast(*this), other); +} + +// aten::multiply_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::multiply_(const at::Scalar & other) const { + return at::_ops::multiply__Scalar::call(const_cast(*this), other); +} + +// aten::mv(Tensor self, Tensor vec) -> Tensor +inline at::Tensor Tensor::mv(const at::Tensor & vec) const { + return at::_ops::mv::call(const_cast(*this), vec); +} + +// aten::mvlgamma(Tensor self, int p) -> Tensor +inline at::Tensor Tensor::mvlgamma(int64_t p) const { + return at::_ops::mvlgamma::call(const_cast(*this), p); +} + +// aten::mvlgamma_(Tensor(a!) self, int p) -> Tensor(a!) +inline at::Tensor & Tensor::mvlgamma_(int64_t p) const { + return at::_ops::mvlgamma_::call(const_cast(*this), p); +} + +// aten::narrow_copy(Tensor self, int dim, SymInt start, SymInt length) -> Tensor +inline at::Tensor Tensor::narrow_copy(int64_t dim, int64_t start, int64_t length) const { + return at::_ops::narrow_copy::call(const_cast(*this), dim, start, length); +} + +// aten::narrow_copy(Tensor self, int dim, SymInt start, SymInt length) -> Tensor +inline at::Tensor Tensor::narrow_copy_symint(int64_t dim, c10::SymInt start, c10::SymInt length) const { + return at::_ops::narrow_copy::call(const_cast(*this), dim, start, length); +} + +// aten::narrow(Tensor(a) self, int dim, SymInt start, SymInt length) -> Tensor(a) +inline at::Tensor Tensor::narrow(int64_t dim, int64_t start, int64_t length) const { + return at::_ops::narrow::call(const_cast(*this), dim, start, length); +} + +// aten::narrow(Tensor(a) self, int dim, SymInt start, SymInt length) -> Tensor(a) +inline at::Tensor Tensor::narrow_symint(int64_t dim, c10::SymInt start, c10::SymInt length) const { + return at::_ops::narrow::call(const_cast(*this), dim, start, length); +} + +// aten::narrow.Tensor(Tensor(a) self, int dim, Tensor start, SymInt length) -> Tensor(a) +inline at::Tensor Tensor::narrow(int64_t dim, const at::Tensor & start, int64_t length) const { + return at::_ops::narrow_Tensor::call(const_cast(*this), dim, start, length); +} + +// aten::narrow.Tensor(Tensor(a) self, int dim, Tensor start, SymInt length) -> Tensor(a) +inline at::Tensor Tensor::narrow_symint(int64_t dim, const at::Tensor & start, c10::SymInt length) const { + return at::_ops::narrow_Tensor::call(const_cast(*this), dim, start, length); +} + +// aten::permute(Tensor(a) self, int[] dims) -> Tensor(a) +inline at::Tensor Tensor::permute(at::IntArrayRef dims) const { + return at::_ops::permute::call(const_cast(*this), dims); +} + +// aten::movedim.intlist(Tensor(a) self, int[] source, int[] destination) -> Tensor(a) +inline at::Tensor Tensor::movedim(at::IntArrayRef source, at::IntArrayRef destination) const { + return at::_ops::movedim_intlist::call(const_cast(*this), source, destination); +} + +// aten::movedim.int(Tensor(a) self, int source, int destination) -> Tensor(a) +inline at::Tensor Tensor::movedim(int64_t source, int64_t destination) const { + return at::_ops::movedim_int::call(const_cast(*this), source, destination); +} + +// aten::moveaxis.intlist(Tensor(a) self, int[] source, int[] destination) -> Tensor(a) +inline at::Tensor Tensor::moveaxis(at::IntArrayRef source, at::IntArrayRef destination) const { + return at::_ops::moveaxis_intlist::call(const_cast(*this), source, destination); +} + +// aten::moveaxis.int(Tensor(a) self, int source, int destination) -> Tensor(a) +inline at::Tensor Tensor::moveaxis(int64_t source, int64_t destination) const { + return at::_ops::moveaxis_int::call(const_cast(*this), source, destination); +} + +// aten::numpy_T(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::numpy_T() const { + return at::_ops::numpy_T::call(const_cast(*this)); +} + +// aten::matrix_H(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::matrix_H() const { + return at::_ops::matrix_H::call(const_cast(*this)); +} + +// aten::mT(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::mT() const { + return at::_ops::mT::call(const_cast(*this)); +} + +// aten::mH(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::mH() const { + return at::_ops::mH::call(const_cast(*this)); +} + +// aten::adjoint(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::adjoint() const { + return at::_ops::adjoint::call(const_cast(*this)); +} + +// aten::is_pinned(Tensor self, Device? device=None) -> bool +inline bool Tensor::is_pinned(::std::optional device) const { + return at::_ops::is_pinned::call(const_cast(*this), device); +} + +// aten::pin_memory(Tensor(a) self, Device? device=None) -> Tensor(a) +inline at::Tensor Tensor::pin_memory(::std::optional device) const { + return at::_ops::pin_memory::call(const_cast(*this), device); +} + +// aten::pinverse(Tensor self, float rcond=1e-15) -> Tensor +inline at::Tensor Tensor::pinverse(double rcond) const { + return at::_ops::pinverse::call(const_cast(*this), rcond); +} + +// aten::rad2deg(Tensor self) -> Tensor +inline at::Tensor Tensor::rad2deg() const { + return at::_ops::rad2deg::call(const_cast(*this)); +} + +// aten::rad2deg_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::rad2deg_() const { + return at::_ops::rad2deg_::call(const_cast(*this)); +} + +// aten::deg2rad(Tensor self) -> Tensor +inline at::Tensor Tensor::deg2rad() const { + return at::_ops::deg2rad::call(const_cast(*this)); +} + +// aten::deg2rad_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::deg2rad_() const { + return at::_ops::deg2rad_::call(const_cast(*this)); +} + +// aten::ravel(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::ravel() const { + return at::_ops::ravel::call(const_cast(*this)); +} + +// aten::reciprocal(Tensor self) -> Tensor +inline at::Tensor Tensor::reciprocal() const { + return at::_ops::reciprocal::call(const_cast(*this)); +} + +// aten::reciprocal_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::reciprocal_() const { + return at::_ops::reciprocal_::call(const_cast(*this)); +} + +// aten::neg(Tensor self) -> Tensor +inline at::Tensor Tensor::neg() const { + return at::_ops::neg::call(const_cast(*this)); +} + +// aten::neg_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::neg_() const { + return at::_ops::neg_::call(const_cast(*this)); +} + +// aten::negative(Tensor self) -> Tensor +inline at::Tensor Tensor::negative() const { + return at::_ops::negative::call(const_cast(*this)); +} + +// aten::negative_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::negative_() const { + return at::_ops::negative_::call(const_cast(*this)); +} + +// aten::repeat(Tensor self, SymInt[] repeats) -> Tensor +inline at::Tensor Tensor::repeat(at::IntArrayRef repeats) const { + return at::_ops::repeat::call(const_cast(*this), c10::fromIntArrayRefSlow(repeats)); +} + +// aten::repeat(Tensor self, SymInt[] repeats) -> Tensor +inline at::Tensor Tensor::repeat_symint(c10::SymIntArrayRef repeats) const { + return at::_ops::repeat::call(const_cast(*this), repeats); +} + +// aten::repeat_interleave.self_Tensor(Tensor self, Tensor repeats, int? dim=None, *, SymInt? output_size=None) -> Tensor +inline at::Tensor Tensor::repeat_interleave(const at::Tensor & repeats, ::std::optional dim, ::std::optional output_size) const { + return at::_ops::repeat_interleave_self_Tensor::call(const_cast(*this), repeats, dim, output_size.has_value() ? ::std::make_optional(c10::SymInt(*output_size)) : ::std::nullopt); +} + +// aten::repeat_interleave.self_Tensor(Tensor self, Tensor repeats, int? dim=None, *, SymInt? output_size=None) -> Tensor +inline at::Tensor Tensor::repeat_interleave_symint(const at::Tensor & repeats, ::std::optional dim, ::std::optional output_size) const { + return at::_ops::repeat_interleave_self_Tensor::call(const_cast(*this), repeats, dim, output_size); +} + +// aten::repeat_interleave.self_int(Tensor self, SymInt repeats, int? dim=None, *, SymInt? output_size=None) -> Tensor +inline at::Tensor Tensor::repeat_interleave(int64_t repeats, ::std::optional dim, ::std::optional output_size) const { + return at::_ops::repeat_interleave_self_int::call(const_cast(*this), repeats, dim, output_size.has_value() ? ::std::make_optional(c10::SymInt(*output_size)) : ::std::nullopt); +} + +// aten::repeat_interleave.self_int(Tensor self, SymInt repeats, int? dim=None, *, SymInt? output_size=None) -> Tensor +inline at::Tensor Tensor::repeat_interleave_symint(c10::SymInt repeats, ::std::optional dim, ::std::optional output_size) const { + return at::_ops::repeat_interleave_self_int::call(const_cast(*this), repeats, dim, output_size); +} + +// aten::reshape(Tensor(a) self, SymInt[] shape) -> Tensor(a) +inline at::Tensor Tensor::reshape(at::IntArrayRef shape) const { + return at::_ops::reshape::call(const_cast(*this), c10::fromIntArrayRefSlow(shape)); +} + +// aten::reshape(Tensor(a) self, SymInt[] shape) -> Tensor(a) +inline at::Tensor Tensor::reshape_symint(c10::SymIntArrayRef shape) const { + return at::_ops::reshape::call(const_cast(*this), shape); +} + +// aten::_reshape_alias(Tensor(a) self, SymInt[] size, SymInt[] stride) -> Tensor(a) +inline at::Tensor Tensor::_reshape_alias(at::IntArrayRef size, at::IntArrayRef stride) const { + return at::_ops::_reshape_alias::call(const_cast(*this), c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride)); +} + +// aten::_reshape_alias(Tensor(a) self, SymInt[] size, SymInt[] stride) -> Tensor(a) +inline at::Tensor Tensor::_reshape_alias_symint(c10::SymIntArrayRef size, c10::SymIntArrayRef stride) const { + return at::_ops::_reshape_alias::call(const_cast(*this), size, stride); +} + +// aten::reshape_as(Tensor(a) self, Tensor other) -> Tensor(a) +inline at::Tensor Tensor::reshape_as(const at::Tensor & other) const { + return at::_ops::reshape_as::call(const_cast(*this), other); +} + +// aten::round(Tensor self) -> Tensor +inline at::Tensor Tensor::round() const { + return at::_ops::round::call(const_cast(*this)); +} + +// aten::round_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::round_() const { + return at::_ops::round_::call(const_cast(*this)); +} + +// aten::round.decimals(Tensor self, *, int decimals) -> Tensor +inline at::Tensor Tensor::round(int64_t decimals) const { + return at::_ops::round_decimals::call(const_cast(*this), decimals); +} + +// aten::round_.decimals(Tensor(a!) self, *, int decimals) -> Tensor(a!) +inline at::Tensor & Tensor::round_(int64_t decimals) const { + return at::_ops::round__decimals::call(const_cast(*this), decimals); +} + +// aten::relu(Tensor self) -> Tensor +inline at::Tensor Tensor::relu() const { + return at::_ops::relu::call(const_cast(*this)); +} + +// aten::relu_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::relu_() const { + return at::_ops::relu_::call(const_cast(*this)); +} + +// aten::prelu(Tensor self, Tensor weight) -> Tensor +inline at::Tensor Tensor::prelu(const at::Tensor & weight) const { + return at::_ops::prelu::call(const_cast(*this), weight); +} + +// aten::hardshrink(Tensor self, Scalar lambd=0.5) -> Tensor +inline at::Tensor Tensor::hardshrink(const at::Scalar & lambd) const { + return at::_ops::hardshrink::call(const_cast(*this), lambd); +} + +// aten::hardshrink_backward(Tensor grad_out, Tensor self, Scalar lambd) -> Tensor +inline at::Tensor Tensor::hardshrink_backward(const at::Tensor & grad_out, const at::Scalar & lambd) const { + return at::_ops::hardshrink_backward::call(grad_out, const_cast(*this), lambd); +} + +// aten::rsqrt(Tensor self) -> Tensor +inline at::Tensor Tensor::rsqrt() const { + return at::_ops::rsqrt::call(const_cast(*this)); +} + +// aten::rsqrt_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::rsqrt_() const { + return at::_ops::rsqrt_::call(const_cast(*this)); +} + +// aten::select.Dimname(Tensor(a) self, Dimname dim, int index) -> Tensor(a) +inline at::Tensor Tensor::select(at::Dimname dim, int64_t index) const { + return at::_ops::select_Dimname::call(const_cast(*this), dim, index); +} + +// aten::select.int(Tensor(a) self, int dim, SymInt index) -> Tensor(a) +inline at::Tensor Tensor::select(int64_t dim, int64_t index) const { + return at::_ops::select_int::call(const_cast(*this), dim, index); +} + +// aten::select.int(Tensor(a) self, int dim, SymInt index) -> Tensor(a) +inline at::Tensor Tensor::select_symint(int64_t dim, c10::SymInt index) const { + return at::_ops::select_int::call(const_cast(*this), dim, index); +} + +// aten::sigmoid(Tensor self) -> Tensor +inline at::Tensor Tensor::sigmoid() const { + return at::_ops::sigmoid::call(const_cast(*this)); +} + +// aten::sigmoid_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::sigmoid_() const { + return at::_ops::sigmoid_::call(const_cast(*this)); +} + +// aten::logit(Tensor self, float? eps=None) -> Tensor +inline at::Tensor Tensor::logit(::std::optional eps) const { + return at::_ops::logit::call(const_cast(*this), eps); +} + +// aten::logit_(Tensor(a!) self, float? eps=None) -> Tensor(a!) +inline at::Tensor & Tensor::logit_(::std::optional eps) const { + return at::_ops::logit_::call(const_cast(*this), eps); +} + +// aten::sin(Tensor self) -> Tensor +inline at::Tensor Tensor::sin() const { + return at::_ops::sin::call(const_cast(*this)); +} + +// aten::sin_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::sin_() const { + return at::_ops::sin_::call(const_cast(*this)); +} + +// aten::sinc(Tensor self) -> Tensor +inline at::Tensor Tensor::sinc() const { + return at::_ops::sinc::call(const_cast(*this)); +} + +// aten::sinc_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::sinc_() const { + return at::_ops::sinc_::call(const_cast(*this)); +} + +// aten::sinh(Tensor self) -> Tensor +inline at::Tensor Tensor::sinh() const { + return at::_ops::sinh::call(const_cast(*this)); +} + +// aten::sinh_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::sinh_() const { + return at::_ops::sinh_::call(const_cast(*this)); +} + +// aten::detach(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::detach() const { + return at::_ops::detach::call(const_cast(*this)); +} + +// aten::detach_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::detach_() const { + return at::_ops::detach_::call(const_cast(*this)); +} + +// aten::size.Dimname(Tensor self, Dimname dim) -> int +inline int64_t Tensor::size(at::Dimname dim) const { + return at::_ops::size_Dimname::call(const_cast(*this), dim); +} + +// aten::slice.Tensor(Tensor(a) self, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a) +inline at::Tensor Tensor::slice(int64_t dim, ::std::optional start, ::std::optional end, int64_t step) const { + return at::_ops::slice_Tensor::call(const_cast(*this), dim, start.has_value() ? ::std::make_optional(c10::SymInt(*start)) : ::std::nullopt, end.has_value() ? ::std::make_optional(c10::SymInt(*end)) : ::std::nullopt, step); +} + +// aten::slice.Tensor(Tensor(a) self, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a) +inline at::Tensor Tensor::slice_symint(int64_t dim, ::std::optional start, ::std::optional end, c10::SymInt step) const { + return at::_ops::slice_Tensor::call(const_cast(*this), dim, start, end, step); +} + +// aten::slice_inverse(Tensor(a) self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a) +inline at::Tensor Tensor::slice_inverse(const at::Tensor & src, int64_t dim, ::std::optional start, ::std::optional end, int64_t step) const { + return at::_ops::slice_inverse::call(const_cast(*this), src, dim, start.has_value() ? ::std::make_optional(c10::SymInt(*start)) : ::std::nullopt, end.has_value() ? ::std::make_optional(c10::SymInt(*end)) : ::std::nullopt, step); +} + +// aten::slice_inverse(Tensor(a) self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a) +inline at::Tensor Tensor::slice_inverse_symint(const at::Tensor & src, int64_t dim, ::std::optional start, ::std::optional end, c10::SymInt step) const { + return at::_ops::slice_inverse::call(const_cast(*this), src, dim, start, end, step); +} + +// aten::slice_scatter(Tensor self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor +inline at::Tensor Tensor::slice_scatter(const at::Tensor & src, int64_t dim, ::std::optional start, ::std::optional end, int64_t step) const { + return at::_ops::slice_scatter::call(const_cast(*this), src, dim, start.has_value() ? ::std::make_optional(c10::SymInt(*start)) : ::std::nullopt, end.has_value() ? ::std::make_optional(c10::SymInt(*end)) : ::std::nullopt, step); +} + +// aten::slice_scatter(Tensor self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor +inline at::Tensor Tensor::slice_scatter_symint(const at::Tensor & src, int64_t dim, ::std::optional start, ::std::optional end, c10::SymInt step) const { + return at::_ops::slice_scatter::call(const_cast(*this), src, dim, start, end, step); +} + +// aten::select_scatter(Tensor self, Tensor src, int dim, SymInt index) -> Tensor +inline at::Tensor Tensor::select_scatter(const at::Tensor & src, int64_t dim, int64_t index) const { + return at::_ops::select_scatter::call(const_cast(*this), src, dim, index); +} + +// aten::select_scatter(Tensor self, Tensor src, int dim, SymInt index) -> Tensor +inline at::Tensor Tensor::select_scatter_symint(const at::Tensor & src, int64_t dim, c10::SymInt index) const { + return at::_ops::select_scatter::call(const_cast(*this), src, dim, index); +} + +// aten::diagonal_scatter(Tensor self, Tensor src, int offset=0, int dim1=0, int dim2=1) -> Tensor +inline at::Tensor Tensor::diagonal_scatter(const at::Tensor & src, int64_t offset, int64_t dim1, int64_t dim2) const { + return at::_ops::diagonal_scatter::call(const_cast(*this), src, offset, dim1, dim2); +} + +// aten::as_strided_scatter(Tensor self, Tensor src, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor +inline at::Tensor Tensor::as_strided_scatter(const at::Tensor & src, at::IntArrayRef size, at::IntArrayRef stride, ::std::optional storage_offset) const { + return at::_ops::as_strided_scatter::call(const_cast(*this), src, c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), storage_offset.has_value() ? ::std::make_optional(c10::SymInt(*storage_offset)) : ::std::nullopt); +} + +// aten::as_strided_scatter(Tensor self, Tensor src, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor +inline at::Tensor Tensor::as_strided_scatter_symint(const at::Tensor & src, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional storage_offset) const { + return at::_ops::as_strided_scatter::call(const_cast(*this), src, size, stride, storage_offset); +} + +// aten::smm(Tensor self, Tensor mat2) -> Tensor +inline at::Tensor Tensor::smm(const at::Tensor & mat2) const { + return at::_ops::smm::call(const_cast(*this), mat2); +} + +// aten::softmax.int(Tensor self, int dim, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::softmax(int64_t dim, ::std::optional dtype) const { + return at::_ops::softmax_int::call(const_cast(*this), dim, dtype); +} + +// aten::softmax.Dimname(Tensor self, Dimname dim, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::softmax(at::Dimname dim, ::std::optional dtype) const { + return at::_ops::softmax_Dimname::call(const_cast(*this), dim, dtype); +} + +// aten::unsafe_split.Tensor(Tensor self, SymInt split_size, int dim=0) -> Tensor[] +inline ::std::vector Tensor::unsafe_split(int64_t split_size, int64_t dim) const { + return at::_ops::unsafe_split_Tensor::call(const_cast(*this), split_size, dim); +} + +// aten::unsafe_split.Tensor(Tensor self, SymInt split_size, int dim=0) -> Tensor[] +inline ::std::vector Tensor::unsafe_split_symint(c10::SymInt split_size, int64_t dim) const { + return at::_ops::unsafe_split_Tensor::call(const_cast(*this), split_size, dim); +} + +// aten::split.Tensor(Tensor(a -> *) self, SymInt split_size, int dim=0) -> Tensor(a)[] +inline ::std::vector Tensor::split(int64_t split_size, int64_t dim) const { + return at::_ops::split_Tensor::call(const_cast(*this), split_size, dim); +} + +// aten::split.Tensor(Tensor(a -> *) self, SymInt split_size, int dim=0) -> Tensor(a)[] +inline ::std::vector Tensor::split_symint(c10::SymInt split_size, int64_t dim) const { + return at::_ops::split_Tensor::call(const_cast(*this), split_size, dim); +} + +// aten::split.sizes(Tensor(a -> *) self, SymInt[] split_size, int dim=0) -> Tensor(a)[] +inline ::std::vector Tensor::split(at::IntArrayRef split_size, int64_t dim) const { + return at::_ops::split_sizes::call(const_cast(*this), c10::fromIntArrayRefSlow(split_size), dim); +} + +// aten::split.sizes(Tensor(a -> *) self, SymInt[] split_size, int dim=0) -> Tensor(a)[] +inline ::std::vector Tensor::split_symint(c10::SymIntArrayRef split_size, int64_t dim) const { + return at::_ops::split_sizes::call(const_cast(*this), split_size, dim); +} + +// aten::unsafe_split_with_sizes(Tensor self, SymInt[] split_sizes, int dim=0) -> Tensor[] +inline ::std::vector Tensor::unsafe_split_with_sizes(at::IntArrayRef split_sizes, int64_t dim) const { + return at::_ops::unsafe_split_with_sizes::call(const_cast(*this), c10::fromIntArrayRefSlow(split_sizes), dim); +} + +// aten::unsafe_split_with_sizes(Tensor self, SymInt[] split_sizes, int dim=0) -> Tensor[] +inline ::std::vector Tensor::unsafe_split_with_sizes_symint(c10::SymIntArrayRef split_sizes, int64_t dim) const { + return at::_ops::unsafe_split_with_sizes::call(const_cast(*this), split_sizes, dim); +} + +// aten::split_with_sizes(Tensor(a -> *) self, SymInt[] split_sizes, int dim=0) -> Tensor(a)[] +inline ::std::vector Tensor::split_with_sizes(at::IntArrayRef split_sizes, int64_t dim) const { + return at::_ops::split_with_sizes::call(const_cast(*this), c10::fromIntArrayRefSlow(split_sizes), dim); +} + +// aten::split_with_sizes(Tensor(a -> *) self, SymInt[] split_sizes, int dim=0) -> Tensor(a)[] +inline ::std::vector Tensor::split_with_sizes_symint(c10::SymIntArrayRef split_sizes, int64_t dim) const { + return at::_ops::split_with_sizes::call(const_cast(*this), split_sizes, dim); +} + +// aten::hsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[] +inline ::std::vector Tensor::hsplit(int64_t sections) const { + return at::_ops::hsplit_int::call(const_cast(*this), sections); +} + +// aten::hsplit.array(Tensor(a -> *) self, int[] indices) -> Tensor(a)[] +inline ::std::vector Tensor::hsplit(at::IntArrayRef indices) const { + return at::_ops::hsplit_array::call(const_cast(*this), indices); +} + +// aten::vsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[] +inline ::std::vector Tensor::vsplit(int64_t sections) const { + return at::_ops::vsplit_int::call(const_cast(*this), sections); +} + +// aten::vsplit.array(Tensor(a -> *) self, int[] indices) -> Tensor(a)[] +inline ::std::vector Tensor::vsplit(at::IntArrayRef indices) const { + return at::_ops::vsplit_array::call(const_cast(*this), indices); +} + +// aten::dsplit.int(Tensor(a -> *) self, int sections) -> Tensor(a)[] +inline ::std::vector Tensor::dsplit(int64_t sections) const { + return at::_ops::dsplit_int::call(const_cast(*this), sections); +} + +// aten::dsplit.array(Tensor(a -> *) self, int[] indices) -> Tensor(a)[] +inline ::std::vector Tensor::dsplit(at::IntArrayRef indices) const { + return at::_ops::dsplit_array::call(const_cast(*this), indices); +} + +// aten::squeeze(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::squeeze() const { + return at::_ops::squeeze::call(const_cast(*this)); +} + +// aten::squeeze.dim(Tensor(a) self, int dim) -> Tensor(a) +inline at::Tensor Tensor::squeeze(int64_t dim) const { + return at::_ops::squeeze_dim::call(const_cast(*this), dim); +} + +// aten::squeeze.dimname(Tensor(a) self, Dimname dim) -> Tensor(a) +inline at::Tensor Tensor::squeeze(at::Dimname dim) const { + return at::_ops::squeeze_dimname::call(const_cast(*this), dim); +} + +// aten::squeeze.dims(Tensor(a) self, int[] dim) -> Tensor(a) +inline at::Tensor Tensor::squeeze(at::IntArrayRef dim) const { + return at::_ops::squeeze_dims::call(const_cast(*this), dim); +} + +// aten::squeeze_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::squeeze_() const { + return at::_ops::squeeze_::call(const_cast(*this)); +} + +// aten::squeeze_.dim(Tensor(a!) self, int dim) -> Tensor(a!) +inline at::Tensor & Tensor::squeeze_(int64_t dim) const { + return at::_ops::squeeze__dim::call(const_cast(*this), dim); +} + +// aten::squeeze_.dims(Tensor(a!) self, int[] dim) -> Tensor(a!) +inline at::Tensor & Tensor::squeeze_(at::IntArrayRef dim) const { + return at::_ops::squeeze__dims::call(const_cast(*this), dim); +} + +// aten::squeeze_.dimname(Tensor(a!) self, Dimname dim) -> Tensor(a!) +inline at::Tensor & Tensor::squeeze_(at::Dimname dim) const { + return at::_ops::squeeze__dimname::call(const_cast(*this), dim); +} + +// aten::sspaddmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor +inline at::Tensor Tensor::sspaddmm(const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta, const at::Scalar & alpha) const { + return at::_ops::sspaddmm::call(const_cast(*this), mat1, mat2, beta, alpha); +} + +// aten::stft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool normalized=False, bool? onesided=None, bool? return_complex=None, bool? align_to_window=None) -> Tensor +inline at::Tensor Tensor::stft(int64_t n_fft, ::std::optional hop_length, ::std::optional win_length, const ::std::optional & window, bool normalized, ::std::optional onesided, ::std::optional return_complex, ::std::optional align_to_window) const { + return at::_ops::stft::call(const_cast(*this), n_fft, hop_length, win_length, window, normalized, onesided, return_complex, align_to_window); +} + +// aten::stft.center(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, str pad_mode="reflect", bool normalized=False, bool? onesided=None, bool? return_complex=None, bool? align_to_window=None) -> Tensor +inline at::Tensor Tensor::stft(int64_t n_fft, ::std::optional hop_length, ::std::optional win_length, const ::std::optional & window, bool center, c10::string_view pad_mode, bool normalized, ::std::optional onesided, ::std::optional return_complex, ::std::optional align_to_window) const { + return at::_ops::stft_center::call(const_cast(*this), n_fft, hop_length, win_length, window, center, pad_mode, normalized, onesided, return_complex, align_to_window); +} + +// aten::istft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, bool normalized=False, bool? onesided=None, int? length=None, bool return_complex=False) -> Tensor +inline at::Tensor Tensor::istft(int64_t n_fft, ::std::optional hop_length, ::std::optional win_length, const ::std::optional & window, bool center, bool normalized, ::std::optional onesided, ::std::optional length, bool return_complex) const { + return at::_ops::istft::call(const_cast(*this), n_fft, hop_length, win_length, window, center, normalized, onesided, length, return_complex); +} + +// aten::stride.Dimname(Tensor self, Dimname dim) -> int +inline int64_t Tensor::stride(at::Dimname dim) const { + return at::_ops::stride_Dimname::call(const_cast(*this), dim); +} + +// aten::sum(Tensor self, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::sum(::std::optional dtype) const { + return at::_ops::sum::call(const_cast(*this), dtype); +} + +// aten::sum.dim_IntList(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::sum(at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype) const { + return at::_ops::sum_dim_IntList::call(const_cast(*this), dim, keepdim, dtype); +} + +// aten::sum.dim_DimnameList(Tensor self, Dimname[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::sum(at::DimnameList dim, bool keepdim, ::std::optional dtype) const { + return at::_ops::sum_dim_DimnameList::call(const_cast(*this), dim, keepdim, dtype); +} + +// aten::nansum(Tensor self, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::nansum(at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype) const { + return at::_ops::nansum::call(const_cast(*this), dim, keepdim, dtype); +} + +// aten::hash_tensor(Tensor self, int[1] dim=[], *, bool keepdim=False, int mode=0) -> Tensor +inline at::Tensor Tensor::hash_tensor(at::IntArrayRef dim, bool keepdim, int64_t mode) const { + return at::_ops::hash_tensor::call(const_cast(*this), dim, keepdim, mode); +} + +// aten::sum_to_size(Tensor self, SymInt[] size) -> Tensor +inline at::Tensor Tensor::sum_to_size(at::IntArrayRef size) const { + return at::_ops::sum_to_size::call(const_cast(*this), c10::fromIntArrayRefSlow(size)); +} + +// aten::sum_to_size(Tensor self, SymInt[] size) -> Tensor +inline at::Tensor Tensor::sum_to_size_symint(c10::SymIntArrayRef size) const { + return at::_ops::sum_to_size::call(const_cast(*this), size); +} + +// aten::sqrt(Tensor self) -> Tensor +inline at::Tensor Tensor::sqrt() const { + return at::_ops::sqrt::call(const_cast(*this)); +} + +// aten::sqrt_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::sqrt_() const { + return at::_ops::sqrt_::call(const_cast(*this)); +} + +// aten::square(Tensor self) -> Tensor +inline at::Tensor Tensor::square() const { + return at::_ops::square::call(const_cast(*this)); +} + +// aten::square_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::square_() const { + return at::_ops::square_::call(const_cast(*this)); +} + +// aten::std(Tensor self, bool unbiased=True) -> Tensor +inline at::Tensor Tensor::std(bool unbiased) const { + return at::_ops::std::call(const_cast(*this), unbiased); +} + +// aten::std.dim(Tensor self, int[1]? dim, bool unbiased=True, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::std(at::OptionalIntArrayRef dim, bool unbiased, bool keepdim) const { + return at::_ops::std_dim::call(const_cast(*this), dim, unbiased, keepdim); +} + +// aten::std.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::std(at::OptionalIntArrayRef dim, const ::std::optional & correction, bool keepdim) const { + return at::_ops::std_correction::call(const_cast(*this), dim, correction, keepdim); +} + +// aten::std.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::std(at::DimnameList dim, bool unbiased, bool keepdim) const { + return at::_ops::std_names_dim::call(const_cast(*this), dim, unbiased, keepdim); +} + +// aten::std.correction_names(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::std(at::DimnameList dim, const ::std::optional & correction, bool keepdim) const { + return at::_ops::std_correction_names::call(const_cast(*this), dim, correction, keepdim); +} + +// aten::prod(Tensor self, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::prod(::std::optional dtype) const { + return at::_ops::prod::call(const_cast(*this), dtype); +} + +// aten::prod.dim_int(Tensor self, int dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::prod(int64_t dim, bool keepdim, ::std::optional dtype) const { + return at::_ops::prod_dim_int::call(const_cast(*this), dim, keepdim, dtype); +} + +// aten::prod.dim_Dimname(Tensor self, Dimname dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::prod(at::Dimname dim, bool keepdim, ::std::optional dtype) const { + return at::_ops::prod_dim_Dimname::call(const_cast(*this), dim, keepdim, dtype); +} + +// aten::t(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::t() const { + return at::_ops::t::call(const_cast(*this)); +} + +// aten::t_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::t_() const { + return at::_ops::t_::call(const_cast(*this)); +} + +// aten::tan(Tensor self) -> Tensor +inline at::Tensor Tensor::tan() const { + return at::_ops::tan::call(const_cast(*this)); +} + +// aten::tan_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::tan_() const { + return at::_ops::tan_::call(const_cast(*this)); +} + +// aten::tanh(Tensor self) -> Tensor +inline at::Tensor Tensor::tanh() const { + return at::_ops::tanh::call(const_cast(*this)); +} + +// aten::tanh_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::tanh_() const { + return at::_ops::tanh_::call(const_cast(*this)); +} + +// aten::tile(Tensor self, SymInt[] dims) -> Tensor +inline at::Tensor Tensor::tile(at::IntArrayRef dims) const { + return at::_ops::tile::call(const_cast(*this), c10::fromIntArrayRefSlow(dims)); +} + +// aten::tile(Tensor self, SymInt[] dims) -> Tensor +inline at::Tensor Tensor::tile_symint(c10::SymIntArrayRef dims) const { + return at::_ops::tile::call(const_cast(*this), dims); +} + +// aten::transpose.int(Tensor(a) self, int dim0, int dim1) -> Tensor(a) +inline at::Tensor Tensor::transpose(int64_t dim0, int64_t dim1) const { + return at::_ops::transpose_int::call(const_cast(*this), dim0, dim1); +} + +// aten::transpose.Dimname(Tensor(a) self, Dimname dim0, Dimname dim1) -> Tensor(a) +inline at::Tensor Tensor::transpose(at::Dimname dim0, at::Dimname dim1) const { + return at::_ops::transpose_Dimname::call(const_cast(*this), dim0, dim1); +} + +// aten::transpose_(Tensor(a!) self, int dim0, int dim1) -> Tensor(a!) +inline at::Tensor & Tensor::transpose_(int64_t dim0, int64_t dim1) const { + return at::_ops::transpose_::call(const_cast(*this), dim0, dim1); +} + +// aten::flip(Tensor self, int[] dims) -> Tensor +inline at::Tensor Tensor::flip(at::IntArrayRef dims) const { + return at::_ops::flip::call(const_cast(*this), dims); +} + +// aten::fliplr(Tensor self) -> Tensor +inline at::Tensor Tensor::fliplr() const { + return at::_ops::fliplr::call(const_cast(*this)); +} + +// aten::flipud(Tensor self) -> Tensor +inline at::Tensor Tensor::flipud() const { + return at::_ops::flipud::call(const_cast(*this)); +} + +// aten::roll(Tensor self, SymInt[1] shifts, int[1] dims=[]) -> Tensor +inline at::Tensor Tensor::roll(at::IntArrayRef shifts, at::IntArrayRef dims) const { + return at::_ops::roll::call(const_cast(*this), c10::fromIntArrayRefSlow(shifts), dims); +} + +// aten::roll(Tensor self, SymInt[1] shifts, int[1] dims=[]) -> Tensor +inline at::Tensor Tensor::roll_symint(c10::SymIntArrayRef shifts, at::IntArrayRef dims) const { + return at::_ops::roll::call(const_cast(*this), shifts, dims); +} + +// aten::rot90(Tensor self, int k=1, int[] dims=[0,1]) -> Tensor +inline at::Tensor Tensor::rot90(int64_t k, at::IntArrayRef dims) const { + return at::_ops::rot90::call(const_cast(*this), k, dims); +} + +// aten::_nested_tensor_size(Tensor self) -> Tensor +inline at::Tensor Tensor::_nested_tensor_size() const { + return at::_ops::_nested_tensor_size::call(const_cast(*this)); +} + +// aten::_nested_tensor_strides(Tensor self) -> Tensor +inline at::Tensor Tensor::_nested_tensor_strides() const { + return at::_ops::_nested_tensor_strides::call(const_cast(*this)); +} + +// aten::_nested_tensor_storage_offsets(Tensor self) -> Tensor +inline at::Tensor Tensor::_nested_tensor_storage_offsets() const { + return at::_ops::_nested_tensor_storage_offsets::call(const_cast(*this)); +} + +// aten::trunc(Tensor self) -> Tensor +inline at::Tensor Tensor::trunc() const { + return at::_ops::trunc::call(const_cast(*this)); +} + +// aten::trunc_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::trunc_() const { + return at::_ops::trunc_::call(const_cast(*this)); +} + +// aten::fix(Tensor self) -> Tensor +inline at::Tensor Tensor::fix() const { + return at::_ops::fix::call(const_cast(*this)); +} + +// aten::fix_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::fix_() const { + return at::_ops::fix_::call(const_cast(*this)); +} + +// aten::type_as(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::type_as(const at::Tensor & other) const { + return at::_ops::type_as::call(const_cast(*this), other); +} + +// aten::unsqueeze(Tensor(a) self, int dim) -> Tensor(a) +inline at::Tensor Tensor::unsqueeze(int64_t dim) const { + return at::_ops::unsqueeze::call(const_cast(*this), dim); +} + +// aten::unsqueeze_(Tensor(a!) self, int dim) -> Tensor(a!) +inline at::Tensor & Tensor::unsqueeze_(int64_t dim) const { + return at::_ops::unsqueeze_::call(const_cast(*this), dim); +} + +// aten::var(Tensor self, bool unbiased=True) -> Tensor +inline at::Tensor Tensor::var(bool unbiased) const { + return at::_ops::var::call(const_cast(*this), unbiased); +} + +// aten::var.dim(Tensor self, int[1]? dim, bool unbiased=True, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::var(at::OptionalIntArrayRef dim, bool unbiased, bool keepdim) const { + return at::_ops::var_dim::call(const_cast(*this), dim, unbiased, keepdim); +} + +// aten::var.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::var(at::OptionalIntArrayRef dim, const ::std::optional & correction, bool keepdim) const { + return at::_ops::var_correction::call(const_cast(*this), dim, correction, keepdim); +} + +// aten::var.names_dim(Tensor self, Dimname[1] dim, bool unbiased=True, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::var(at::DimnameList dim, bool unbiased, bool keepdim) const { + return at::_ops::var_names_dim::call(const_cast(*this), dim, unbiased, keepdim); +} + +// aten::var.correction_names(Tensor self, Dimname[1] dim, *, Scalar? correction=None, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::var(at::DimnameList dim, const ::std::optional & correction, bool keepdim) const { + return at::_ops::var_correction_names::call(const_cast(*this), dim, correction, keepdim); +} + +// aten::view_as(Tensor(a) self, Tensor other) -> Tensor(a) +inline at::Tensor Tensor::view_as(const at::Tensor & other) const { + return at::_ops::view_as::call(const_cast(*this), other); +} + +// aten::where.self(Tensor condition, Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::where(const at::Tensor & condition, const at::Tensor & other) const { + return at::_ops::where_self::call(condition, const_cast(*this), other); +} + +// aten::where.ScalarOther(Tensor condition, Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::where(const at::Tensor & condition, const at::Scalar & other) const { + return at::_ops::where_ScalarOther::call(condition, const_cast(*this), other); +} + +// aten::norm.ScalarOpt_dtype(Tensor self, Scalar? p, *, ScalarType dtype) -> Tensor +inline at::Tensor Tensor::norm(const ::std::optional & p, at::ScalarType dtype) const { + return at::_ops::norm_ScalarOpt_dtype::call(const_cast(*this), p, dtype); +} + +// aten::norm.Scalar(Tensor self, Scalar p=2) -> Tensor +inline at::Tensor Tensor::norm(const at::Scalar & p) const { + return at::_ops::norm_Scalar::call(const_cast(*this), p); +} + +// aten::norm.ScalarOpt_dim_dtype(Tensor self, Scalar? p, int[1] dim, bool keepdim, *, ScalarType dtype) -> Tensor +inline at::Tensor Tensor::norm(const ::std::optional & p, at::IntArrayRef dim, bool keepdim, at::ScalarType dtype) const { + return at::_ops::norm_ScalarOpt_dim_dtype::call(const_cast(*this), p, dim, keepdim, dtype); +} + +// aten::norm.ScalarOpt_dim(Tensor self, Scalar? p, int[1] dim, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::norm(const ::std::optional & p, at::IntArrayRef dim, bool keepdim) const { + return at::_ops::norm_ScalarOpt_dim::call(const_cast(*this), p, dim, keepdim); +} + +// aten::norm.names_ScalarOpt_dim_dtype(Tensor self, Scalar? p, Dimname[1] dim, bool keepdim, *, ScalarType dtype) -> Tensor +inline at::Tensor Tensor::norm(const ::std::optional & p, at::DimnameList dim, bool keepdim, at::ScalarType dtype) const { + return at::_ops::norm_names_ScalarOpt_dim_dtype::call(const_cast(*this), p, dim, keepdim, dtype); +} + +// aten::norm.names_ScalarOpt_dim(Tensor self, Scalar? p, Dimname[1] dim, bool keepdim=False) -> Tensor +inline at::Tensor Tensor::norm(const ::std::optional & p, at::DimnameList dim, bool keepdim) const { + return at::_ops::norm_names_ScalarOpt_dim::call(const_cast(*this), p, dim, keepdim); +} + +// aten::frexp.Tensor(Tensor self) -> (Tensor mantissa, Tensor exponent) +inline ::std::tuple Tensor::frexp() const { + return at::_ops::frexp_Tensor::call(const_cast(*this)); +} + +// aten::clone(Tensor self, *, MemoryFormat? memory_format=None) -> Tensor +inline at::Tensor Tensor::clone(::std::optional memory_format) const { + return at::_ops::clone::call(const_cast(*this), memory_format); +} + +// aten::positive(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::positive() const { + return at::_ops::positive::call(const_cast(*this)); +} + +// aten::resize_as_(Tensor(a!) self, Tensor the_template, *, MemoryFormat? memory_format=None) -> Tensor(a!) +inline const at::Tensor & Tensor::resize_as_(const at::Tensor & the_template, ::std::optional memory_format) const { + return at::_ops::resize_as_::call(const_cast(*this), the_template, memory_format); +} + +// aten::resize_as_sparse_(Tensor(a!) self, Tensor the_template) -> Tensor(a!) +inline const at::Tensor & Tensor::resize_as_sparse_(const at::Tensor & the_template) const { + return at::_ops::resize_as_sparse_::call(const_cast(*this), the_template); +} + +// aten::zero_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::zero_() const { + return at::_ops::zero_::call(const_cast(*this)); +} + +// aten::sub.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor +inline at::Tensor Tensor::sub(const at::Tensor & other, const at::Scalar & alpha) const { + return at::_ops::sub_Tensor::call(const_cast(*this), other, alpha); +} + +// aten::sub_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!) +inline at::Tensor & Tensor::sub_(const at::Tensor & other, const at::Scalar & alpha) const { + return at::_ops::sub__Tensor::call(const_cast(*this), other, alpha); +} + +// aten::sub.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor +inline at::Tensor Tensor::sub(const at::Scalar & other, const at::Scalar & alpha) const { + return at::_ops::sub_Scalar::call(const_cast(*this), other, alpha); +} + +// aten::sub_.Scalar(Tensor(a!) self, Scalar other, Scalar alpha=1) -> Tensor(a!) +inline at::Tensor & Tensor::sub_(const at::Scalar & other, const at::Scalar & alpha) const { + return at::_ops::sub__Scalar::call(const_cast(*this), other, alpha); +} + +// aten::subtract.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor +inline at::Tensor Tensor::subtract(const at::Tensor & other, const at::Scalar & alpha) const { + return at::_ops::subtract_Tensor::call(const_cast(*this), other, alpha); +} + +// aten::subtract_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!) +inline at::Tensor & Tensor::subtract_(const at::Tensor & other, const at::Scalar & alpha) const { + return at::_ops::subtract__Tensor::call(const_cast(*this), other, alpha); +} + +// aten::subtract.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor +inline at::Tensor Tensor::subtract(const at::Scalar & other, const at::Scalar & alpha) const { + return at::_ops::subtract_Scalar::call(const_cast(*this), other, alpha); +} + +// aten::subtract_.Scalar(Tensor(a!) self, Scalar other, Scalar alpha=1) -> Tensor(a!) +inline at::Tensor & Tensor::subtract_(const at::Scalar & other, const at::Scalar & alpha) const { + return at::_ops::subtract__Scalar::call(const_cast(*this), other, alpha); +} + +// aten::heaviside(Tensor self, Tensor values) -> Tensor +inline at::Tensor Tensor::heaviside(const at::Tensor & values) const { + return at::_ops::heaviside::call(const_cast(*this), values); +} + +// aten::heaviside_(Tensor(a!) self, Tensor values) -> Tensor(a!) +inline at::Tensor & Tensor::heaviside_(const at::Tensor & values) const { + return at::_ops::heaviside_::call(const_cast(*this), values); +} + +// aten::addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor +inline at::Tensor Tensor::addmm(const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta, const at::Scalar & alpha) const { + return at::_ops::addmm::call(const_cast(*this), mat1, mat2, beta, alpha); +} + +// aten::addmm_(Tensor(a!) self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!) +inline at::Tensor & Tensor::addmm_(const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta, const at::Scalar & alpha) const { + return at::_ops::addmm_::call(const_cast(*this), mat1, mat2, beta, alpha); +} + +// aten::_addmm_activation(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1, bool use_gelu=False) -> Tensor +inline at::Tensor Tensor::_addmm_activation(const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta, const at::Scalar & alpha, bool use_gelu) const { + return at::_ops::_addmm_activation::call(const_cast(*this), mat1, mat2, beta, alpha, use_gelu); +} + +// aten::sparse_resize_(Tensor(a!) self, int[] size, int sparse_dim, int dense_dim) -> Tensor(a!) +inline const at::Tensor & Tensor::sparse_resize_(at::IntArrayRef size, int64_t sparse_dim, int64_t dense_dim) const { + return at::_ops::sparse_resize_::call(const_cast(*this), size, sparse_dim, dense_dim); +} + +// aten::sparse_resize_and_clear_(Tensor(a!) self, int[] size, int sparse_dim, int dense_dim) -> Tensor(a!) +inline const at::Tensor & Tensor::sparse_resize_and_clear_(at::IntArrayRef size, int64_t sparse_dim, int64_t dense_dim) const { + return at::_ops::sparse_resize_and_clear_::call(const_cast(*this), size, sparse_dim, dense_dim); +} + +// aten::sparse_mask(Tensor self, Tensor mask) -> Tensor +inline at::Tensor Tensor::sparse_mask(const at::Tensor & mask) const { + return at::_ops::sparse_mask::call(const_cast(*this), mask); +} + +// aten::_sparse_mask_projection(Tensor self, Tensor mask, bool accumulate_matches=False) -> Tensor +inline at::Tensor Tensor::_sparse_mask_projection(const at::Tensor & mask, bool accumulate_matches) const { + return at::_ops::_sparse_mask_projection::call(const_cast(*this), mask, accumulate_matches); +} + +// aten::to_dense(Tensor self, ScalarType? dtype=None, *, bool? masked_grad=None) -> Tensor +inline at::Tensor Tensor::to_dense(::std::optional dtype, ::std::optional masked_grad) const { + return at::_ops::to_dense::call(const_cast(*this), dtype, masked_grad); +} + +// aten::_to_dense(Tensor self, ScalarType? dtype=None, bool? masked_grad=None) -> Tensor +inline at::Tensor Tensor::_to_dense(::std::optional dtype, ::std::optional masked_grad) const { + return at::_ops::_to_dense::call(const_cast(*this), dtype, masked_grad); +} + +// aten::sparse_dim(Tensor self) -> int +inline int64_t Tensor::sparse_dim() const { + return at::_ops::sparse_dim::call(const_cast(*this)); +} + +// aten::_dimI(Tensor self) -> int +inline int64_t Tensor::_dimI() const { + return at::_ops::_dimI::call(const_cast(*this)); +} + +// aten::dense_dim(Tensor self) -> int +inline int64_t Tensor::dense_dim() const { + return at::_ops::dense_dim::call(const_cast(*this)); +} + +// aten::_dimV(Tensor self) -> int +inline int64_t Tensor::_dimV() const { + return at::_ops::_dimV::call(const_cast(*this)); +} + +// aten::_nnz(Tensor self) -> int +inline int64_t Tensor::_nnz() const { + return at::_ops::_nnz::call(const_cast(*this)); +} + +// aten::coalesce(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::coalesce() const { + return at::_ops::coalesce::call(const_cast(*this)); +} + +// aten::is_coalesced(Tensor self) -> bool +inline bool Tensor::is_coalesced() const { + return at::_ops::is_coalesced::call(const_cast(*this)); +} + +// aten::_indices(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::_indices() const { + return at::_ops::_indices::call(const_cast(*this)); +} + +// aten::_values(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::_values() const { + return at::_ops::_values::call(const_cast(*this)); +} + +// aten::_coalesced_(Tensor(a!) self, bool coalesced) -> Tensor(a!) +inline at::Tensor & Tensor::_coalesced_(bool coalesced) const { + return at::_ops::_coalesced_::call(const_cast(*this), coalesced); +} + +// aten::indices(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::indices() const { + return at::_ops::indices::call(const_cast(*this)); +} + +// aten::values(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::values() const { + return at::_ops::values::call(const_cast(*this)); +} + +// aten::crow_indices(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::crow_indices() const { + return at::_ops::crow_indices::call(const_cast(*this)); +} + +// aten::col_indices(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::col_indices() const { + return at::_ops::col_indices::call(const_cast(*this)); +} + +// aten::ccol_indices(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::ccol_indices() const { + return at::_ops::ccol_indices::call(const_cast(*this)); +} + +// aten::row_indices(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::row_indices() const { + return at::_ops::row_indices::call(const_cast(*this)); +} + +// aten::unbind.int(Tensor(a -> *) self, int dim=0) -> Tensor(a)[] +inline ::std::vector Tensor::unbind(int64_t dim) const { + return at::_ops::unbind_int::call(const_cast(*this), dim); +} + +// aten::unbind.Dimname(Tensor(a -> *) self, Dimname dim) -> Tensor(a)[] +inline ::std::vector Tensor::unbind(at::Dimname dim) const { + return at::_ops::unbind_Dimname::call(const_cast(*this), dim); +} + +// aten::to_sparse.sparse_dim(Tensor self, int sparse_dim) -> Tensor +inline at::Tensor Tensor::to_sparse(int64_t sparse_dim) const { + return at::_ops::to_sparse_sparse_dim::call(const_cast(*this), sparse_dim); +} + +// aten::_to_sparse.sparse_dim(Tensor self, int sparse_dim) -> Tensor +inline at::Tensor Tensor::_to_sparse(int64_t sparse_dim) const { + return at::_ops::_to_sparse_sparse_dim::call(const_cast(*this), sparse_dim); +} + +// aten::to_sparse(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None) -> Tensor +inline at::Tensor Tensor::to_sparse(::std::optional layout, at::OptionalIntArrayRef blocksize, ::std::optional dense_dim) const { + return at::_ops::to_sparse::call(const_cast(*this), layout, blocksize, dense_dim); +} + +// aten::_to_sparse(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None) -> Tensor +inline at::Tensor Tensor::_to_sparse(::std::optional layout, at::OptionalIntArrayRef blocksize, ::std::optional dense_dim) const { + return at::_ops::_to_sparse::call(const_cast(*this), layout, blocksize, dense_dim); +} + +// aten::to_sparse_csr(Tensor self, int? dense_dim=None) -> Tensor +inline at::Tensor Tensor::to_sparse_csr(::std::optional dense_dim) const { + return at::_ops::to_sparse_csr::call(const_cast(*this), dense_dim); +} + +// aten::_to_sparse_csr(Tensor self, int? dense_dim=None) -> Tensor +inline at::Tensor Tensor::_to_sparse_csr(::std::optional dense_dim) const { + return at::_ops::_to_sparse_csr::call(const_cast(*this), dense_dim); +} + +// aten::to_sparse_csc(Tensor self, int? dense_dim=None) -> Tensor +inline at::Tensor Tensor::to_sparse_csc(::std::optional dense_dim) const { + return at::_ops::to_sparse_csc::call(const_cast(*this), dense_dim); +} + +// aten::_to_sparse_csc(Tensor self, int? dense_dim=None) -> Tensor +inline at::Tensor Tensor::_to_sparse_csc(::std::optional dense_dim) const { + return at::_ops::_to_sparse_csc::call(const_cast(*this), dense_dim); +} + +// aten::to_sparse_bsr(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor +inline at::Tensor Tensor::to_sparse_bsr(at::IntArrayRef blocksize, ::std::optional dense_dim) const { + return at::_ops::to_sparse_bsr::call(const_cast(*this), blocksize, dense_dim); +} + +// aten::_to_sparse_bsr(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor +inline at::Tensor Tensor::_to_sparse_bsr(at::IntArrayRef blocksize, ::std::optional dense_dim) const { + return at::_ops::_to_sparse_bsr::call(const_cast(*this), blocksize, dense_dim); +} + +// aten::to_sparse_bsc(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor +inline at::Tensor Tensor::to_sparse_bsc(at::IntArrayRef blocksize, ::std::optional dense_dim) const { + return at::_ops::to_sparse_bsc::call(const_cast(*this), blocksize, dense_dim); +} + +// aten::_to_sparse_bsc(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor +inline at::Tensor Tensor::_to_sparse_bsc(at::IntArrayRef blocksize, ::std::optional dense_dim) const { + return at::_ops::_to_sparse_bsc::call(const_cast(*this), blocksize, dense_dim); +} + +// aten::to_mkldnn(Tensor self, ScalarType? dtype=None) -> Tensor +inline at::Tensor Tensor::to_mkldnn(::std::optional dtype) const { + return at::_ops::to_mkldnn::call(const_cast(*this), dtype); +} + +// aten::dequantize.self(Tensor self) -> Tensor +inline at::Tensor Tensor::dequantize() const { + return at::_ops::dequantize_self::call(const_cast(*this)); +} + +// aten::q_scale(Tensor self) -> float +inline double Tensor::q_scale() const { + return at::_ops::q_scale::call(const_cast(*this)); +} + +// aten::q_zero_point(Tensor self) -> int +inline int64_t Tensor::q_zero_point() const { + return at::_ops::q_zero_point::call(const_cast(*this)); +} + +// aten::q_per_channel_scales(Tensor self) -> Tensor +inline at::Tensor Tensor::q_per_channel_scales() const { + return at::_ops::q_per_channel_scales::call(const_cast(*this)); +} + +// aten::q_per_channel_zero_points(Tensor self) -> Tensor +inline at::Tensor Tensor::q_per_channel_zero_points() const { + return at::_ops::q_per_channel_zero_points::call(const_cast(*this)); +} + +// aten::q_per_channel_axis(Tensor self) -> int +inline int64_t Tensor::q_per_channel_axis() const { + return at::_ops::q_per_channel_axis::call(const_cast(*this)); +} + +// aten::int_repr(Tensor self) -> Tensor +inline at::Tensor Tensor::int_repr() const { + return at::_ops::int_repr::call(const_cast(*this)); +} + +// aten::qscheme(Tensor self) -> QScheme +inline at::QScheme Tensor::qscheme() const { + return at::_ops::qscheme::call(const_cast(*this)); +} + +// aten::_autocast_to_reduced_precision(Tensor(a) self, bool cuda_enabled, bool cpu_enabled, ScalarType cuda_dtype, ScalarType cpu_dtype) -> Tensor(a) +inline at::Tensor Tensor::_autocast_to_reduced_precision(bool cuda_enabled, bool cpu_enabled, at::ScalarType cuda_dtype, at::ScalarType cpu_dtype) const { + return at::_ops::_autocast_to_reduced_precision::call(const_cast(*this), cuda_enabled, cpu_enabled, cuda_dtype, cpu_dtype); +} + +// aten::_autocast_to_full_precision(Tensor(a) self, bool cuda_enabled, bool cpu_enabled) -> Tensor(a) +inline at::Tensor Tensor::_autocast_to_full_precision(bool cuda_enabled, bool cpu_enabled) const { + return at::_ops::_autocast_to_full_precision::call(const_cast(*this), cuda_enabled, cpu_enabled); +} + +// aten::to.dtype_layout(Tensor(a) self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor(a) +inline at::Tensor Tensor::to(at::TensorOptions options, bool non_blocking, bool copy, ::std::optional memory_format) const { + return at::_ops::to_dtype_layout::call(const_cast(*this), c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt(), non_blocking, copy, c10::impl::check_tensor_options_and_extract_memory_format(options, memory_format)); +} + +// aten::to.dtype_layout(Tensor(a) self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor(a) +inline at::Tensor Tensor::to(::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, bool non_blocking, bool copy, ::std::optional memory_format) const { + return at::_ops::to_dtype_layout::call(const_cast(*this), dtype, layout, device, pin_memory, non_blocking, copy, memory_format); +} + +// aten::to.device(Tensor(a) self, Device device, ScalarType dtype, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor(a) +inline at::Tensor Tensor::to(at::Device device, at::ScalarType dtype, bool non_blocking, bool copy, ::std::optional memory_format) const { + return at::_ops::to_device::call(const_cast(*this), device, dtype, non_blocking, copy, memory_format); +} + +// aten::to.dtype(Tensor(a) self, ScalarType dtype, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor(a) +inline at::Tensor Tensor::to(at::ScalarType dtype, bool non_blocking, bool copy, ::std::optional memory_format) const { + return at::_ops::to_dtype::call(const_cast(*this), dtype, non_blocking, copy, memory_format); +} + +// aten::to.other(Tensor(a) self, Tensor other, bool non_blocking=False, bool copy=False, MemoryFormat? memory_format=None) -> Tensor(a) +inline at::Tensor Tensor::to(const at::Tensor & other, bool non_blocking, bool copy, ::std::optional memory_format) const { + return at::_ops::to_other::call(const_cast(*this), other, non_blocking, copy, memory_format); +} + +// aten::item(Tensor self) -> Scalar +inline at::Scalar Tensor::item() const { + return at::_ops::item::call(const_cast(*this)); +} + +// aten::set_.source_Storage(Tensor(a!) self, Storage source) -> Tensor(a!) +inline at::Tensor & Tensor::set_(at::Storage source) const { + return at::_ops::set__source_Storage::call(const_cast(*this), source); +} + +// aten::set_.source_Storage_storage_offset(Tensor(a!) self, Storage source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!) +inline at::Tensor & Tensor::set_(at::Storage source, int64_t storage_offset, at::IntArrayRef size, at::IntArrayRef stride) const { + return at::_ops::set__source_Storage_storage_offset::call(const_cast(*this), source, storage_offset, c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride)); +} + +// aten::set_.source_Storage_storage_offset(Tensor(a!) self, Storage source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!) +inline at::Tensor & Tensor::set__symint(at::Storage source, c10::SymInt storage_offset, c10::SymIntArrayRef size, c10::SymIntArrayRef stride) const { + return at::_ops::set__source_Storage_storage_offset::call(const_cast(*this), source, storage_offset, size, stride); +} + +// aten::set_.source_Tensor_storage_offset(Tensor(a!) self, Tensor source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!) +inline at::Tensor & Tensor::set_(const at::Tensor & source, int64_t storage_offset, at::IntArrayRef size, at::IntArrayRef stride) const { + return at::_ops::set__source_Tensor_storage_offset::call(const_cast(*this), source, storage_offset, c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride)); +} + +// aten::set_.source_Tensor_storage_offset(Tensor(a!) self, Tensor source, SymInt storage_offset, SymInt[] size, SymInt[] stride=[]) -> Tensor(a!) +inline at::Tensor & Tensor::set__symint(const at::Tensor & source, c10::SymInt storage_offset, c10::SymIntArrayRef size, c10::SymIntArrayRef stride) const { + return at::_ops::set__source_Tensor_storage_offset::call(const_cast(*this), source, storage_offset, size, stride); +} + +// aten::set_.source_Tensor(Tensor(a!) self, Tensor source) -> Tensor(a!) +inline at::Tensor & Tensor::set_(const at::Tensor & source) const { + return at::_ops::set__source_Tensor::call(const_cast(*this), source); +} + +// aten::set_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::set_() const { + return at::_ops::set_::call(const_cast(*this)); +} + +// aten::is_set_to(Tensor self, Tensor tensor) -> bool +inline bool Tensor::is_set_to(const at::Tensor & tensor) const { + return at::_ops::is_set_to::call(const_cast(*this), tensor); +} + +// aten::masked_fill_.Scalar(Tensor(a!) self, Tensor mask, Scalar value) -> Tensor(a!) +inline at::Tensor & Tensor::masked_fill_(const at::Tensor & mask, const at::Scalar & value) const { + return at::_ops::masked_fill__Scalar::call(const_cast(*this), mask, value); +} + +// aten::masked_fill.Scalar(Tensor self, Tensor mask, Scalar value) -> Tensor +inline at::Tensor Tensor::masked_fill(const at::Tensor & mask, const at::Scalar & value) const { + return at::_ops::masked_fill_Scalar::call(const_cast(*this), mask, value); +} + +// aten::masked_fill_.Tensor(Tensor(a!) self, Tensor mask, Tensor value) -> Tensor(a!) +inline at::Tensor & Tensor::masked_fill_(const at::Tensor & mask, const at::Tensor & value) const { + return at::_ops::masked_fill__Tensor::call(const_cast(*this), mask, value); +} + +// aten::masked_fill.Tensor(Tensor self, Tensor mask, Tensor value) -> Tensor +inline at::Tensor Tensor::masked_fill(const at::Tensor & mask, const at::Tensor & value) const { + return at::_ops::masked_fill_Tensor::call(const_cast(*this), mask, value); +} + +// aten::masked_scatter_(Tensor(a!) self, Tensor mask, Tensor source) -> Tensor(a!) +inline at::Tensor & Tensor::masked_scatter_(const at::Tensor & mask, const at::Tensor & source) const { + return at::_ops::masked_scatter_::call(const_cast(*this), mask, source); +} + +// aten::masked_scatter(Tensor self, Tensor mask, Tensor source) -> Tensor +inline at::Tensor Tensor::masked_scatter(const at::Tensor & mask, const at::Tensor & source) const { + return at::_ops::masked_scatter::call(const_cast(*this), mask, source); +} + +// aten::view(Tensor(a) self, SymInt[] size) -> Tensor(a) +inline at::Tensor Tensor::view(at::IntArrayRef size) const { + return at::_ops::view::call(const_cast(*this), c10::fromIntArrayRefSlow(size)); +} + +// aten::view(Tensor(a) self, SymInt[] size) -> Tensor(a) +inline at::Tensor Tensor::view_symint(c10::SymIntArrayRef size) const { + return at::_ops::view::call(const_cast(*this), size); +} + +// aten::view.dtype(Tensor(a) self, ScalarType dtype) -> Tensor(a) +inline at::Tensor Tensor::view(at::ScalarType dtype) const { + return at::_ops::view_dtype::call(const_cast(*this), dtype); +} + +// aten::put_(Tensor(a!) self, Tensor index, Tensor source, bool accumulate=False) -> Tensor(a!) +inline at::Tensor & Tensor::put_(const at::Tensor & index, const at::Tensor & source, bool accumulate) const { + return at::_ops::put_::call(const_cast(*this), index, source, accumulate); +} + +// aten::put(Tensor self, Tensor index, Tensor source, bool accumulate=False) -> Tensor +inline at::Tensor Tensor::put(const at::Tensor & index, const at::Tensor & source, bool accumulate) const { + return at::_ops::put::call(const_cast(*this), index, source, accumulate); +} + +// aten::index_add_(Tensor(a!) self, int dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor(a!) +inline at::Tensor & Tensor::index_add_(int64_t dim, const at::Tensor & index, const at::Tensor & source, const at::Scalar & alpha) const { + return at::_ops::index_add_::call(const_cast(*this), dim, index, source, alpha); +} + +// aten::index_add(Tensor self, int dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor +inline at::Tensor Tensor::index_add(int64_t dim, const at::Tensor & index, const at::Tensor & source, const at::Scalar & alpha) const { + return at::_ops::index_add::call(const_cast(*this), dim, index, source, alpha); +} + +// aten::index_add.dimname(Tensor self, Dimname dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor +inline at::Tensor Tensor::index_add(at::Dimname dim, const at::Tensor & index, const at::Tensor & source, const at::Scalar & alpha) const { + return at::_ops::index_add_dimname::call(const_cast(*this), dim, index, source, alpha); +} + +// aten::index_reduce_(Tensor(a!) self, int dim, Tensor index, Tensor source, str reduce, *, bool include_self=True) -> Tensor(a!) +inline at::Tensor & Tensor::index_reduce_(int64_t dim, const at::Tensor & index, const at::Tensor & source, c10::string_view reduce, bool include_self) const { + return at::_ops::index_reduce_::call(const_cast(*this), dim, index, source, reduce, include_self); +} + +// aten::index_reduce(Tensor self, int dim, Tensor index, Tensor source, str reduce, *, bool include_self=True) -> Tensor +inline at::Tensor Tensor::index_reduce(int64_t dim, const at::Tensor & index, const at::Tensor & source, c10::string_view reduce, bool include_self) const { + return at::_ops::index_reduce::call(const_cast(*this), dim, index, source, reduce, include_self); +} + +// aten::index_fill_.int_Scalar(Tensor(a!) self, int dim, Tensor index, Scalar value) -> Tensor(a!) +inline at::Tensor & Tensor::index_fill_(int64_t dim, const at::Tensor & index, const at::Scalar & value) const { + return at::_ops::index_fill__int_Scalar::call(const_cast(*this), dim, index, value); +} + +// aten::index_fill.int_Scalar(Tensor self, int dim, Tensor index, Scalar value) -> Tensor +inline at::Tensor Tensor::index_fill(int64_t dim, const at::Tensor & index, const at::Scalar & value) const { + return at::_ops::index_fill_int_Scalar::call(const_cast(*this), dim, index, value); +} + +// aten::index_fill_.int_Tensor(Tensor(a!) self, int dim, Tensor index, Tensor value) -> Tensor(a!) +inline at::Tensor & Tensor::index_fill_(int64_t dim, const at::Tensor & index, const at::Tensor & value) const { + return at::_ops::index_fill__int_Tensor::call(const_cast(*this), dim, index, value); +} + +// aten::index_fill.int_Tensor(Tensor self, int dim, Tensor index, Tensor value) -> Tensor +inline at::Tensor Tensor::index_fill(int64_t dim, const at::Tensor & index, const at::Tensor & value) const { + return at::_ops::index_fill_int_Tensor::call(const_cast(*this), dim, index, value); +} + +// aten::index_fill_.Dimname_Scalar(Tensor(a!) self, Dimname dim, Tensor index, Scalar value) -> Tensor(a!) +inline at::Tensor & Tensor::index_fill_(at::Dimname dim, const at::Tensor & index, const at::Scalar & value) const { + return at::_ops::index_fill__Dimname_Scalar::call(const_cast(*this), dim, index, value); +} + +// aten::index_fill_.Dimname_Tensor(Tensor(a!) self, Dimname dim, Tensor index, Tensor value) -> Tensor(a!) +inline at::Tensor & Tensor::index_fill_(at::Dimname dim, const at::Tensor & index, const at::Tensor & value) const { + return at::_ops::index_fill__Dimname_Tensor::call(const_cast(*this), dim, index, value); +} + +// aten::index_fill.Dimname_Scalar(Tensor self, Dimname dim, Tensor index, Scalar value) -> Tensor +inline at::Tensor Tensor::index_fill(at::Dimname dim, const at::Tensor & index, const at::Scalar & value) const { + return at::_ops::index_fill_Dimname_Scalar::call(const_cast(*this), dim, index, value); +} + +// aten::index_fill.Dimname_Tensor(Tensor self, Dimname dim, Tensor index, Tensor value) -> Tensor +inline at::Tensor Tensor::index_fill(at::Dimname dim, const at::Tensor & index, const at::Tensor & value) const { + return at::_ops::index_fill_Dimname_Tensor::call(const_cast(*this), dim, index, value); +} + +// aten::scatter.src(Tensor self, int dim, Tensor index, Tensor src) -> Tensor +inline at::Tensor Tensor::scatter(int64_t dim, const at::Tensor & index, const at::Tensor & src) const { + return at::_ops::scatter_src::call(const_cast(*this), dim, index, src); +} + +// aten::scatter_.src(Tensor(a!) self, int dim, Tensor index, Tensor src) -> Tensor(a!) +inline at::Tensor & Tensor::scatter_(int64_t dim, const at::Tensor & index, const at::Tensor & src) const { + return at::_ops::scatter__src::call(const_cast(*this), dim, index, src); +} + +// aten::scatter.value(Tensor self, int dim, Tensor index, Scalar value) -> Tensor +inline at::Tensor Tensor::scatter(int64_t dim, const at::Tensor & index, const at::Scalar & value) const { + return at::_ops::scatter_value::call(const_cast(*this), dim, index, value); +} + +// aten::scatter_.value(Tensor(a!) self, int dim, Tensor index, Scalar value) -> Tensor(a!) +inline at::Tensor & Tensor::scatter_(int64_t dim, const at::Tensor & index, const at::Scalar & value) const { + return at::_ops::scatter__value::call(const_cast(*this), dim, index, value); +} + +// aten::scatter.reduce(Tensor self, int dim, Tensor index, Tensor src, *, str reduce) -> Tensor +inline at::Tensor Tensor::scatter(int64_t dim, const at::Tensor & index, const at::Tensor & src, c10::string_view reduce) const { + return at::_ops::scatter_reduce::call(const_cast(*this), dim, index, src, reduce); +} + +// aten::scatter_.reduce(Tensor(a!) self, int dim, Tensor index, Tensor src, *, str reduce) -> Tensor(a!) +inline at::Tensor & Tensor::scatter_(int64_t dim, const at::Tensor & index, const at::Tensor & src, c10::string_view reduce) const { + return at::_ops::scatter__reduce::call(const_cast(*this), dim, index, src, reduce); +} + +// aten::scatter.value_reduce(Tensor self, int dim, Tensor index, Scalar value, *, str reduce) -> Tensor +inline at::Tensor Tensor::scatter(int64_t dim, const at::Tensor & index, const at::Scalar & value, c10::string_view reduce) const { + return at::_ops::scatter_value_reduce::call(const_cast(*this), dim, index, value, reduce); +} + +// aten::scatter_.value_reduce(Tensor(a!) self, int dim, Tensor index, Scalar value, *, str reduce) -> Tensor(a!) +inline at::Tensor & Tensor::scatter_(int64_t dim, const at::Tensor & index, const at::Scalar & value, c10::string_view reduce) const { + return at::_ops::scatter__value_reduce::call(const_cast(*this), dim, index, value, reduce); +} + +// aten::scatter.dimname_src(Tensor self, Dimname dim, Tensor index, Tensor src) -> Tensor +inline at::Tensor Tensor::scatter(at::Dimname dim, const at::Tensor & index, const at::Tensor & src) const { + return at::_ops::scatter_dimname_src::call(const_cast(*this), dim, index, src); +} + +// aten::scatter.dimname_value(Tensor self, Dimname dim, Tensor index, Scalar value) -> Tensor +inline at::Tensor Tensor::scatter(at::Dimname dim, const at::Tensor & index, const at::Scalar & value) const { + return at::_ops::scatter_dimname_value::call(const_cast(*this), dim, index, value); +} + +// aten::scatter_add(Tensor self, int dim, Tensor index, Tensor src) -> Tensor +inline at::Tensor Tensor::scatter_add(int64_t dim, const at::Tensor & index, const at::Tensor & src) const { + return at::_ops::scatter_add::call(const_cast(*this), dim, index, src); +} + +// aten::scatter_add_(Tensor(a!) self, int dim, Tensor index, Tensor src) -> Tensor(a!) +inline at::Tensor & Tensor::scatter_add_(int64_t dim, const at::Tensor & index, const at::Tensor & src) const { + return at::_ops::scatter_add_::call(const_cast(*this), dim, index, src); +} + +// aten::scatter_add.dimname(Tensor self, Dimname dim, Tensor index, Tensor src) -> Tensor +inline at::Tensor Tensor::scatter_add(at::Dimname dim, const at::Tensor & index, const at::Tensor & src) const { + return at::_ops::scatter_add_dimname::call(const_cast(*this), dim, index, src); +} + +// aten::scatter_reduce.two(Tensor self, int dim, Tensor index, Tensor src, str reduce, *, bool include_self=True) -> Tensor +inline at::Tensor Tensor::scatter_reduce(int64_t dim, const at::Tensor & index, const at::Tensor & src, c10::string_view reduce, bool include_self) const { + return at::_ops::scatter_reduce_two::call(const_cast(*this), dim, index, src, reduce, include_self); +} + +// aten::scatter_reduce_.two(Tensor(a!) self, int dim, Tensor index, Tensor src, str reduce, *, bool include_self=True) -> Tensor(a!) +inline at::Tensor & Tensor::scatter_reduce_(int64_t dim, const at::Tensor & index, const at::Tensor & src, c10::string_view reduce, bool include_self) const { + return at::_ops::scatter_reduce__two::call(const_cast(*this), dim, index, src, reduce, include_self); +} + +// aten::eq_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::eq_(const at::Scalar & other) const { + return at::_ops::eq__Scalar::call(const_cast(*this), other); +} + +// aten::eq_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::eq_(const at::Tensor & other) const { + return at::_ops::eq__Tensor::call(const_cast(*this), other); +} + +// aten::bitwise_and.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::bitwise_and(const at::Scalar & other) const { + return at::_ops::bitwise_and_Scalar::call(const_cast(*this), other); +} + +// aten::bitwise_and.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::bitwise_and(const at::Tensor & other) const { + return at::_ops::bitwise_and_Tensor::call(const_cast(*this), other); +} + +// aten::bitwise_and_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::bitwise_and_(const at::Scalar & other) const { + return at::_ops::bitwise_and__Scalar::call(const_cast(*this), other); +} + +// aten::bitwise_and_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::bitwise_and_(const at::Tensor & other) const { + return at::_ops::bitwise_and__Tensor::call(const_cast(*this), other); +} + +// aten::__and__.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::__and__(const at::Scalar & other) const { + return at::_ops::__and___Scalar::call(const_cast(*this), other); +} + +// aten::__and__.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::__and__(const at::Tensor & other) const { + return at::_ops::__and___Tensor::call(const_cast(*this), other); +} + +// aten::__iand__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::__iand__(const at::Scalar & other) const { + return at::_ops::__iand___Scalar::call(const_cast(*this), other); +} + +// aten::__iand__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::__iand__(const at::Tensor & other) const { + return at::_ops::__iand___Tensor::call(const_cast(*this), other); +} + +// aten::bitwise_or.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::bitwise_or(const at::Scalar & other) const { + return at::_ops::bitwise_or_Scalar::call(const_cast(*this), other); +} + +// aten::bitwise_or.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::bitwise_or(const at::Tensor & other) const { + return at::_ops::bitwise_or_Tensor::call(const_cast(*this), other); +} + +// aten::bitwise_or_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::bitwise_or_(const at::Scalar & other) const { + return at::_ops::bitwise_or__Scalar::call(const_cast(*this), other); +} + +// aten::bitwise_or_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::bitwise_or_(const at::Tensor & other) const { + return at::_ops::bitwise_or__Tensor::call(const_cast(*this), other); +} + +// aten::__or__.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::__or__(const at::Scalar & other) const { + return at::_ops::__or___Scalar::call(const_cast(*this), other); +} + +// aten::__or__.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::__or__(const at::Tensor & other) const { + return at::_ops::__or___Tensor::call(const_cast(*this), other); +} + +// aten::__ior__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::__ior__(const at::Scalar & other) const { + return at::_ops::__ior___Scalar::call(const_cast(*this), other); +} + +// aten::__ior__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::__ior__(const at::Tensor & other) const { + return at::_ops::__ior___Tensor::call(const_cast(*this), other); +} + +// aten::bitwise_xor.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::bitwise_xor(const at::Scalar & other) const { + return at::_ops::bitwise_xor_Scalar::call(const_cast(*this), other); +} + +// aten::bitwise_xor.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::bitwise_xor(const at::Tensor & other) const { + return at::_ops::bitwise_xor_Tensor::call(const_cast(*this), other); +} + +// aten::bitwise_xor_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::bitwise_xor_(const at::Scalar & other) const { + return at::_ops::bitwise_xor__Scalar::call(const_cast(*this), other); +} + +// aten::bitwise_xor_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::bitwise_xor_(const at::Tensor & other) const { + return at::_ops::bitwise_xor__Tensor::call(const_cast(*this), other); +} + +// aten::__xor__.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::__xor__(const at::Scalar & other) const { + return at::_ops::__xor___Scalar::call(const_cast(*this), other); +} + +// aten::__xor__.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::__xor__(const at::Tensor & other) const { + return at::_ops::__xor___Tensor::call(const_cast(*this), other); +} + +// aten::__ixor__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::__ixor__(const at::Scalar & other) const { + return at::_ops::__ixor___Scalar::call(const_cast(*this), other); +} + +// aten::__ixor__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::__ixor__(const at::Tensor & other) const { + return at::_ops::__ixor___Tensor::call(const_cast(*this), other); +} + +// aten::__lshift__.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::__lshift__(const at::Scalar & other) const { + return at::_ops::__lshift___Scalar::call(const_cast(*this), other); +} + +// aten::__lshift__.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::__lshift__(const at::Tensor & other) const { + return at::_ops::__lshift___Tensor::call(const_cast(*this), other); +} + +// aten::__ilshift__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::__ilshift__(const at::Scalar & other) const { + return at::_ops::__ilshift___Scalar::call(const_cast(*this), other); +} + +// aten::__ilshift__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::__ilshift__(const at::Tensor & other) const { + return at::_ops::__ilshift___Tensor::call(const_cast(*this), other); +} + +// aten::bitwise_left_shift.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::bitwise_left_shift(const at::Tensor & other) const { + return at::_ops::bitwise_left_shift_Tensor::call(const_cast(*this), other); +} + +// aten::bitwise_left_shift_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::bitwise_left_shift_(const at::Tensor & other) const { + return at::_ops::bitwise_left_shift__Tensor::call(const_cast(*this), other); +} + +// aten::bitwise_left_shift.Tensor_Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::bitwise_left_shift(const at::Scalar & other) const { + return at::_ops::bitwise_left_shift_Tensor_Scalar::call(const_cast(*this), other); +} + +// aten::bitwise_left_shift_.Tensor_Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::bitwise_left_shift_(const at::Scalar & other) const { + return at::_ops::bitwise_left_shift__Tensor_Scalar::call(const_cast(*this), other); +} + +// aten::__rshift__.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::__rshift__(const at::Scalar & other) const { + return at::_ops::__rshift___Scalar::call(const_cast(*this), other); +} + +// aten::__rshift__.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::__rshift__(const at::Tensor & other) const { + return at::_ops::__rshift___Tensor::call(const_cast(*this), other); +} + +// aten::__irshift__.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::__irshift__(const at::Scalar & other) const { + return at::_ops::__irshift___Scalar::call(const_cast(*this), other); +} + +// aten::__irshift__.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::__irshift__(const at::Tensor & other) const { + return at::_ops::__irshift___Tensor::call(const_cast(*this), other); +} + +// aten::bitwise_right_shift.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::bitwise_right_shift(const at::Tensor & other) const { + return at::_ops::bitwise_right_shift_Tensor::call(const_cast(*this), other); +} + +// aten::bitwise_right_shift_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::bitwise_right_shift_(const at::Tensor & other) const { + return at::_ops::bitwise_right_shift__Tensor::call(const_cast(*this), other); +} + +// aten::bitwise_right_shift.Tensor_Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::bitwise_right_shift(const at::Scalar & other) const { + return at::_ops::bitwise_right_shift_Tensor_Scalar::call(const_cast(*this), other); +} + +// aten::bitwise_right_shift_.Tensor_Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::bitwise_right_shift_(const at::Scalar & other) const { + return at::_ops::bitwise_right_shift__Tensor_Scalar::call(const_cast(*this), other); +} + +// aten::tril_(Tensor(a!) self, SymInt diagonal=0) -> Tensor(a!) +inline at::Tensor & Tensor::tril_(int64_t diagonal) const { + return at::_ops::tril_::call(const_cast(*this), diagonal); +} + +// aten::tril_(Tensor(a!) self, SymInt diagonal=0) -> Tensor(a!) +inline at::Tensor & Tensor::tril__symint(c10::SymInt diagonal) const { + return at::_ops::tril_::call(const_cast(*this), diagonal); +} + +// aten::triu_(Tensor(a!) self, SymInt diagonal=0) -> Tensor(a!) +inline at::Tensor & Tensor::triu_(int64_t diagonal) const { + return at::_ops::triu_::call(const_cast(*this), diagonal); +} + +// aten::triu_(Tensor(a!) self, SymInt diagonal=0) -> Tensor(a!) +inline at::Tensor & Tensor::triu__symint(c10::SymInt diagonal) const { + return at::_ops::triu_::call(const_cast(*this), diagonal); +} + +// aten::digamma_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::digamma_() const { + return at::_ops::digamma_::call(const_cast(*this)); +} + +// aten::lerp_.Scalar(Tensor(a!) self, Tensor end, Scalar weight) -> Tensor(a!) +inline at::Tensor & Tensor::lerp_(const at::Tensor & end, const at::Scalar & weight) const { + return at::_ops::lerp__Scalar::call(const_cast(*this), end, weight); +} + +// aten::lerp_.Tensor(Tensor(a!) self, Tensor end, Tensor weight) -> Tensor(a!) +inline at::Tensor & Tensor::lerp_(const at::Tensor & end, const at::Tensor & weight) const { + return at::_ops::lerp__Tensor::call(const_cast(*this), end, weight); +} + +// aten::addbmm_(Tensor(a!) self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor(a!) +inline at::Tensor & Tensor::addbmm_(const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta, const at::Scalar & alpha) const { + return at::_ops::addbmm_::call(const_cast(*this), batch1, batch2, beta, alpha); +} + +// aten::addbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor +inline at::Tensor Tensor::addbmm(const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta, const at::Scalar & alpha) const { + return at::_ops::addbmm::call(const_cast(*this), batch1, batch2, beta, alpha); +} + +// aten::random_.from(Tensor(a!) self, int from, int? to, *, Generator? generator=None) -> Tensor(a!) +inline at::Tensor & Tensor::random_(int64_t from, ::std::optional to, ::std::optional generator) const { + return at::_ops::random__from::call(const_cast(*this), from, to, generator); +} + +// aten::random_.to(Tensor(a!) self, int to, *, Generator? generator=None) -> Tensor(a!) +inline at::Tensor & Tensor::random_(int64_t to, ::std::optional generator) const { + return at::_ops::random__to::call(const_cast(*this), to, generator); +} + +// aten::random_(Tensor(a!) self, *, Generator? generator=None) -> Tensor(a!) +inline at::Tensor & Tensor::random_(::std::optional generator) const { + return at::_ops::random_::call(const_cast(*this), generator); +} + +// aten::uniform_(Tensor(a!) self, float from=0, float to=1, *, Generator? generator=None) -> Tensor(a!) +inline at::Tensor & Tensor::uniform_(double from, double to, ::std::optional generator) const { + return at::_ops::uniform_::call(const_cast(*this), from, to, generator); +} + +// aten::cauchy_(Tensor(a!) self, float median=0, float sigma=1, *, Generator? generator=None) -> Tensor(a!) +inline at::Tensor & Tensor::cauchy_(double median, double sigma, ::std::optional generator) const { + return at::_ops::cauchy_::call(const_cast(*this), median, sigma, generator); +} + +// aten::log_normal_(Tensor(a!) self, float mean=1, float std=2, *, Generator? generator=None) -> Tensor(a!) +inline at::Tensor & Tensor::log_normal_(double mean, double std, ::std::optional generator) const { + return at::_ops::log_normal_::call(const_cast(*this), mean, std, generator); +} + +// aten::exponential_(Tensor(a!) self, float lambd=1, *, Generator? generator=None) -> Tensor(a!) +inline at::Tensor & Tensor::exponential_(double lambd, ::std::optional generator) const { + return at::_ops::exponential_::call(const_cast(*this), lambd, generator); +} + +// aten::geometric_(Tensor(a!) self, float p, *, Generator? generator=None) -> Tensor(a!) +inline at::Tensor & Tensor::geometric_(double p, ::std::optional generator) const { + return at::_ops::geometric_::call(const_cast(*this), p, generator); +} + +// aten::diag(Tensor self, int diagonal=0) -> Tensor +inline at::Tensor Tensor::diag(int64_t diagonal) const { + return at::_ops::diag::call(const_cast(*this), diagonal); +} + +// aten::cross(Tensor self, Tensor other, int? dim=None) -> Tensor +inline at::Tensor Tensor::cross(const at::Tensor & other, ::std::optional dim) const { + return at::_ops::cross::call(const_cast(*this), other, dim); +} + +// aten::triu(Tensor self, SymInt diagonal=0) -> Tensor +inline at::Tensor Tensor::triu(int64_t diagonal) const { + return at::_ops::triu::call(const_cast(*this), diagonal); +} + +// aten::triu(Tensor self, SymInt diagonal=0) -> Tensor +inline at::Tensor Tensor::triu_symint(c10::SymInt diagonal) const { + return at::_ops::triu::call(const_cast(*this), diagonal); +} + +// aten::tril(Tensor self, SymInt diagonal=0) -> Tensor +inline at::Tensor Tensor::tril(int64_t diagonal) const { + return at::_ops::tril::call(const_cast(*this), diagonal); +} + +// aten::tril(Tensor self, SymInt diagonal=0) -> Tensor +inline at::Tensor Tensor::tril_symint(c10::SymInt diagonal) const { + return at::_ops::tril::call(const_cast(*this), diagonal); +} + +// aten::trace(Tensor self) -> Tensor +inline at::Tensor Tensor::trace() const { + return at::_ops::trace::call(const_cast(*this)); +} + +// aten::ne.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::ne(const at::Scalar & other) const { + return at::_ops::ne_Scalar::call(const_cast(*this), other); +} + +// aten::ne.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::ne(const at::Tensor & other) const { + return at::_ops::ne_Tensor::call(const_cast(*this), other); +} + +// aten::ne_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::ne_(const at::Scalar & other) const { + return at::_ops::ne__Scalar::call(const_cast(*this), other); +} + +// aten::ne_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::ne_(const at::Tensor & other) const { + return at::_ops::ne__Tensor::call(const_cast(*this), other); +} + +// aten::not_equal.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::not_equal(const at::Scalar & other) const { + return at::_ops::not_equal_Scalar::call(const_cast(*this), other); +} + +// aten::not_equal.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::not_equal(const at::Tensor & other) const { + return at::_ops::not_equal_Tensor::call(const_cast(*this), other); +} + +// aten::not_equal_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::not_equal_(const at::Scalar & other) const { + return at::_ops::not_equal__Scalar::call(const_cast(*this), other); +} + +// aten::not_equal_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::not_equal_(const at::Tensor & other) const { + return at::_ops::not_equal__Tensor::call(const_cast(*this), other); +} + +// aten::eq.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::eq(const at::Scalar & other) const { + return at::_ops::eq_Scalar::call(const_cast(*this), other); +} + +// aten::eq.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::eq(const at::Tensor & other) const { + return at::_ops::eq_Tensor::call(const_cast(*this), other); +} + +// aten::ge.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::ge(const at::Scalar & other) const { + return at::_ops::ge_Scalar::call(const_cast(*this), other); +} + +// aten::ge.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::ge(const at::Tensor & other) const { + return at::_ops::ge_Tensor::call(const_cast(*this), other); +} + +// aten::ge_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::ge_(const at::Scalar & other) const { + return at::_ops::ge__Scalar::call(const_cast(*this), other); +} + +// aten::ge_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::ge_(const at::Tensor & other) const { + return at::_ops::ge__Tensor::call(const_cast(*this), other); +} + +// aten::greater_equal.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::greater_equal(const at::Scalar & other) const { + return at::_ops::greater_equal_Scalar::call(const_cast(*this), other); +} + +// aten::greater_equal.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::greater_equal(const at::Tensor & other) const { + return at::_ops::greater_equal_Tensor::call(const_cast(*this), other); +} + +// aten::greater_equal_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::greater_equal_(const at::Scalar & other) const { + return at::_ops::greater_equal__Scalar::call(const_cast(*this), other); +} + +// aten::greater_equal_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::greater_equal_(const at::Tensor & other) const { + return at::_ops::greater_equal__Tensor::call(const_cast(*this), other); +} + +// aten::le.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::le(const at::Scalar & other) const { + return at::_ops::le_Scalar::call(const_cast(*this), other); +} + +// aten::le.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::le(const at::Tensor & other) const { + return at::_ops::le_Tensor::call(const_cast(*this), other); +} + +// aten::le_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::le_(const at::Scalar & other) const { + return at::_ops::le__Scalar::call(const_cast(*this), other); +} + +// aten::le_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::le_(const at::Tensor & other) const { + return at::_ops::le__Tensor::call(const_cast(*this), other); +} + +// aten::less_equal.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::less_equal(const at::Scalar & other) const { + return at::_ops::less_equal_Scalar::call(const_cast(*this), other); +} + +// aten::less_equal.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::less_equal(const at::Tensor & other) const { + return at::_ops::less_equal_Tensor::call(const_cast(*this), other); +} + +// aten::less_equal_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::less_equal_(const at::Scalar & other) const { + return at::_ops::less_equal__Scalar::call(const_cast(*this), other); +} + +// aten::less_equal_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::less_equal_(const at::Tensor & other) const { + return at::_ops::less_equal__Tensor::call(const_cast(*this), other); +} + +// aten::gt.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::gt(const at::Scalar & other) const { + return at::_ops::gt_Scalar::call(const_cast(*this), other); +} + +// aten::gt.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::gt(const at::Tensor & other) const { + return at::_ops::gt_Tensor::call(const_cast(*this), other); +} + +// aten::gt_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::gt_(const at::Scalar & other) const { + return at::_ops::gt__Scalar::call(const_cast(*this), other); +} + +// aten::gt_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::gt_(const at::Tensor & other) const { + return at::_ops::gt__Tensor::call(const_cast(*this), other); +} + +// aten::greater.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::greater(const at::Scalar & other) const { + return at::_ops::greater_Scalar::call(const_cast(*this), other); +} + +// aten::greater.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::greater(const at::Tensor & other) const { + return at::_ops::greater_Tensor::call(const_cast(*this), other); +} + +// aten::greater_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::greater_(const at::Scalar & other) const { + return at::_ops::greater__Scalar::call(const_cast(*this), other); +} + +// aten::greater_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::greater_(const at::Tensor & other) const { + return at::_ops::greater__Tensor::call(const_cast(*this), other); +} + +// aten::lt.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::lt(const at::Scalar & other) const { + return at::_ops::lt_Scalar::call(const_cast(*this), other); +} + +// aten::lt.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::lt(const at::Tensor & other) const { + return at::_ops::lt_Tensor::call(const_cast(*this), other); +} + +// aten::lt_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::lt_(const at::Scalar & other) const { + return at::_ops::lt__Scalar::call(const_cast(*this), other); +} + +// aten::lt_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::lt_(const at::Tensor & other) const { + return at::_ops::lt__Tensor::call(const_cast(*this), other); +} + +// aten::less.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::less(const at::Scalar & other) const { + return at::_ops::less_Scalar::call(const_cast(*this), other); +} + +// aten::less.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::less(const at::Tensor & other) const { + return at::_ops::less_Tensor::call(const_cast(*this), other); +} + +// aten::less_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::less_(const at::Scalar & other) const { + return at::_ops::less__Scalar::call(const_cast(*this), other); +} + +// aten::less_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::less_(const at::Tensor & other) const { + return at::_ops::less__Tensor::call(const_cast(*this), other); +} + +// aten::take(Tensor self, Tensor index) -> Tensor +inline at::Tensor Tensor::take(const at::Tensor & index) const { + return at::_ops::take::call(const_cast(*this), index); +} + +// aten::take_along_dim(Tensor self, Tensor indices, int? dim=None) -> Tensor +inline at::Tensor Tensor::take_along_dim(const at::Tensor & indices, ::std::optional dim) const { + return at::_ops::take_along_dim::call(const_cast(*this), indices, dim); +} + +// aten::index_select(Tensor self, int dim, Tensor index) -> Tensor +inline at::Tensor Tensor::index_select(int64_t dim, const at::Tensor & index) const { + return at::_ops::index_select::call(const_cast(*this), dim, index); +} + +// aten::index_select.dimname(Tensor self, Dimname dim, Tensor index) -> Tensor +inline at::Tensor Tensor::index_select(at::Dimname dim, const at::Tensor & index) const { + return at::_ops::index_select_dimname::call(const_cast(*this), dim, index); +} + +// aten::masked_select(Tensor self, Tensor mask) -> Tensor +inline at::Tensor Tensor::masked_select(const at::Tensor & mask) const { + return at::_ops::masked_select::call(const_cast(*this), mask); +} + +// aten::nonzero(Tensor self) -> Tensor +inline at::Tensor Tensor::nonzero() const { + return at::_ops::nonzero::call(const_cast(*this)); +} + +// aten::nonzero_static(Tensor self, *, SymInt size, int fill_value=-1) -> Tensor +inline at::Tensor Tensor::nonzero_static(int64_t size, int64_t fill_value) const { + return at::_ops::nonzero_static::call(const_cast(*this), size, fill_value); +} + +// aten::nonzero_static(Tensor self, *, SymInt size, int fill_value=-1) -> Tensor +inline at::Tensor Tensor::nonzero_static_symint(c10::SymInt size, int64_t fill_value) const { + return at::_ops::nonzero_static::call(const_cast(*this), size, fill_value); +} + +// aten::nonzero_numpy(Tensor self) -> Tensor[] +inline ::std::vector Tensor::nonzero_numpy() const { + return at::_ops::nonzero_numpy::call(const_cast(*this)); +} + +// aten::argwhere(Tensor self) -> Tensor +inline at::Tensor Tensor::argwhere() const { + return at::_ops::argwhere::call(const_cast(*this)); +} + +// aten::gather(Tensor self, int dim, Tensor index, *, bool sparse_grad=False) -> Tensor +inline at::Tensor Tensor::gather(int64_t dim, const at::Tensor & index, bool sparse_grad) const { + return at::_ops::gather::call(const_cast(*this), dim, index, sparse_grad); +} + +// aten::gather.dimname(Tensor self, Dimname dim, Tensor index, *, bool sparse_grad=False) -> Tensor +inline at::Tensor Tensor::gather(at::Dimname dim, const at::Tensor & index, bool sparse_grad) const { + return at::_ops::gather_dimname::call(const_cast(*this), dim, index, sparse_grad); +} + +// aten::addcmul(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor +inline at::Tensor Tensor::addcmul(const at::Tensor & tensor1, const at::Tensor & tensor2, const at::Scalar & value) const { + return at::_ops::addcmul::call(const_cast(*this), tensor1, tensor2, value); +} + +// aten::addcmul_(Tensor(a!) self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor(a!) +inline at::Tensor & Tensor::addcmul_(const at::Tensor & tensor1, const at::Tensor & tensor2, const at::Scalar & value) const { + return at::_ops::addcmul_::call(const_cast(*this), tensor1, tensor2, value); +} + +// aten::addcdiv(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor +inline at::Tensor Tensor::addcdiv(const at::Tensor & tensor1, const at::Tensor & tensor2, const at::Scalar & value) const { + return at::_ops::addcdiv::call(const_cast(*this), tensor1, tensor2, value); +} + +// aten::addcdiv_(Tensor(a!) self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor(a!) +inline at::Tensor & Tensor::addcdiv_(const at::Tensor & tensor1, const at::Tensor & tensor2, const at::Scalar & value) const { + return at::_ops::addcdiv_::call(const_cast(*this), tensor1, tensor2, value); +} + +// aten::triangular_solve(Tensor self, Tensor A, bool upper=True, bool transpose=False, bool unitriangular=False) -> (Tensor solution, Tensor cloned_coefficient) +inline ::std::tuple Tensor::triangular_solve(const at::Tensor & A, bool upper, bool transpose, bool unitriangular) const { + return at::_ops::triangular_solve::call(const_cast(*this), A, upper, transpose, unitriangular); +} + +// aten::svd(Tensor self, bool some=True, bool compute_uv=True) -> (Tensor U, Tensor S, Tensor V) +inline ::std::tuple Tensor::svd(bool some, bool compute_uv) const { + return at::_ops::svd::call(const_cast(*this), some, compute_uv); +} + +// aten::swapaxes(Tensor(a) self, int axis0, int axis1) -> Tensor(a) +inline at::Tensor Tensor::swapaxes(int64_t axis0, int64_t axis1) const { + return at::_ops::swapaxes::call(const_cast(*this), axis0, axis1); +} + +// aten::swapaxes_(Tensor(a!) self, int axis0, int axis1) -> Tensor(a!) +inline at::Tensor & Tensor::swapaxes_(int64_t axis0, int64_t axis1) const { + return at::_ops::swapaxes_::call(const_cast(*this), axis0, axis1); +} + +// aten::swapdims(Tensor(a) self, int dim0, int dim1) -> Tensor(a) +inline at::Tensor Tensor::swapdims(int64_t dim0, int64_t dim1) const { + return at::_ops::swapdims::call(const_cast(*this), dim0, dim1); +} + +// aten::swapdims_(Tensor(a!) self, int dim0, int dim1) -> Tensor(a!) +inline at::Tensor & Tensor::swapdims_(int64_t dim0, int64_t dim1) const { + return at::_ops::swapdims_::call(const_cast(*this), dim0, dim1); +} + +// aten::cholesky(Tensor self, bool upper=False) -> Tensor +inline at::Tensor Tensor::cholesky(bool upper) const { + return at::_ops::cholesky::call(const_cast(*this), upper); +} + +// aten::cholesky_solve(Tensor self, Tensor input2, bool upper=False) -> Tensor +inline at::Tensor Tensor::cholesky_solve(const at::Tensor & input2, bool upper) const { + return at::_ops::cholesky_solve::call(const_cast(*this), input2, upper); +} + +// aten::cholesky_inverse(Tensor self, bool upper=False) -> Tensor +inline at::Tensor Tensor::cholesky_inverse(bool upper) const { + return at::_ops::cholesky_inverse::call(const_cast(*this), upper); +} + +// aten::qr(Tensor self, bool some=True) -> (Tensor Q, Tensor R) +inline ::std::tuple Tensor::qr(bool some) const { + return at::_ops::qr::call(const_cast(*this), some); +} + +// aten::geqrf(Tensor self) -> (Tensor a, Tensor tau) +inline ::std::tuple Tensor::geqrf() const { + return at::_ops::geqrf::call(const_cast(*this)); +} + +// aten::orgqr(Tensor self, Tensor input2) -> Tensor +inline at::Tensor Tensor::orgqr(const at::Tensor & input2) const { + return at::_ops::orgqr::call(const_cast(*this), input2); +} + +// aten::ormqr(Tensor self, Tensor input2, Tensor input3, bool left=True, bool transpose=False) -> Tensor +inline at::Tensor Tensor::ormqr(const at::Tensor & input2, const at::Tensor & input3, bool left, bool transpose) const { + return at::_ops::ormqr::call(const_cast(*this), input2, input3, left, transpose); +} + +// aten::lu_solve(Tensor self, Tensor LU_data, Tensor LU_pivots) -> Tensor +inline at::Tensor Tensor::lu_solve(const at::Tensor & LU_data, const at::Tensor & LU_pivots) const { + return at::_ops::lu_solve::call(const_cast(*this), LU_data, LU_pivots); +} + +// aten::multinomial(Tensor self, SymInt num_samples, bool replacement=False, *, Generator? generator=None) -> Tensor +inline at::Tensor Tensor::multinomial(int64_t num_samples, bool replacement, ::std::optional generator) const { + return at::_ops::multinomial::call(const_cast(*this), num_samples, replacement, generator); +} + +// aten::multinomial(Tensor self, SymInt num_samples, bool replacement=False, *, Generator? generator=None) -> Tensor +inline at::Tensor Tensor::multinomial_symint(c10::SymInt num_samples, bool replacement, ::std::optional generator) const { + return at::_ops::multinomial::call(const_cast(*this), num_samples, replacement, generator); +} + +// aten::lgamma_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::lgamma_() const { + return at::_ops::lgamma_::call(const_cast(*this)); +} + +// aten::lgamma(Tensor self) -> Tensor +inline at::Tensor Tensor::lgamma() const { + return at::_ops::lgamma::call(const_cast(*this)); +} + +// aten::digamma(Tensor self) -> Tensor +inline at::Tensor Tensor::digamma() const { + return at::_ops::digamma::call(const_cast(*this)); +} + +// aten::polygamma(int n, Tensor self) -> Tensor +inline at::Tensor Tensor::polygamma(int64_t n) const { + return at::_ops::polygamma::call(n, const_cast(*this)); +} + +// aten::polygamma_(Tensor(a!) self, int n) -> Tensor(a!) +inline at::Tensor & Tensor::polygamma_(int64_t n) const { + return at::_ops::polygamma_::call(const_cast(*this), n); +} + +// aten::erfinv(Tensor self) -> Tensor +inline at::Tensor Tensor::erfinv() const { + return at::_ops::erfinv::call(const_cast(*this)); +} + +// aten::erfinv_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::erfinv_() const { + return at::_ops::erfinv_::call(const_cast(*this)); +} + +// aten::i0(Tensor self) -> Tensor +inline at::Tensor Tensor::i0() const { + return at::_ops::i0::call(const_cast(*this)); +} + +// aten::i0_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::i0_() const { + return at::_ops::i0_::call(const_cast(*this)); +} + +// aten::sign(Tensor self) -> Tensor +inline at::Tensor Tensor::sign() const { + return at::_ops::sign::call(const_cast(*this)); +} + +// aten::sign_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & Tensor::sign_() const { + return at::_ops::sign_::call(const_cast(*this)); +} + +// aten::signbit(Tensor self) -> Tensor +inline at::Tensor Tensor::signbit() const { + return at::_ops::signbit::call(const_cast(*this)); +} + +// aten::dist(Tensor self, Tensor other, Scalar p=2) -> Tensor +inline at::Tensor Tensor::dist(const at::Tensor & other, const at::Scalar & p) const { + return at::_ops::dist::call(const_cast(*this), other, p); +} + +// aten::atan2_(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::atan2_(const at::Tensor & other) const { + return at::_ops::atan2_::call(const_cast(*this), other); +} + +// aten::atan2(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::atan2(const at::Tensor & other) const { + return at::_ops::atan2::call(const_cast(*this), other); +} + +// aten::arctan2(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::arctan2(const at::Tensor & other) const { + return at::_ops::arctan2::call(const_cast(*this), other); +} + +// aten::arctan2_(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::arctan2_(const at::Tensor & other) const { + return at::_ops::arctan2_::call(const_cast(*this), other); +} + +// aten::lerp.Scalar(Tensor self, Tensor end, Scalar weight) -> Tensor +inline at::Tensor Tensor::lerp(const at::Tensor & end, const at::Scalar & weight) const { + return at::_ops::lerp_Scalar::call(const_cast(*this), end, weight); +} + +// aten::lerp.Tensor(Tensor self, Tensor end, Tensor weight) -> Tensor +inline at::Tensor Tensor::lerp(const at::Tensor & end, const at::Tensor & weight) const { + return at::_ops::lerp_Tensor::call(const_cast(*this), end, weight); +} + +// aten::histc(Tensor self, int bins=100, Scalar min=0, Scalar max=0) -> Tensor +inline at::Tensor Tensor::histc(int64_t bins, const at::Scalar & min, const at::Scalar & max) const { + return at::_ops::histc::call(const_cast(*this), bins, min, max); +} + +// aten::histogram.bins_tensor(Tensor self, Tensor bins, *, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor bin_edges) +inline ::std::tuple Tensor::histogram(const at::Tensor & bins, const ::std::optional & weight, bool density) const { + return at::_ops::histogram_bins_tensor::call(const_cast(*this), bins, weight, density); +} + +// aten::histogram.bin_ct(Tensor self, int bins=100, *, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor bin_edges) +inline ::std::tuple Tensor::histogram(int64_t bins, ::std::optional> range, const ::std::optional & weight, bool density) const { + return at::_ops::histogram_bin_ct::call(const_cast(*this), bins, range, weight, density); +} + +// aten::fmod.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::fmod(const at::Scalar & other) const { + return at::_ops::fmod_Scalar::call(const_cast(*this), other); +} + +// aten::fmod_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::fmod_(const at::Scalar & other) const { + return at::_ops::fmod__Scalar::call(const_cast(*this), other); +} + +// aten::fmod.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::fmod(const at::Tensor & other) const { + return at::_ops::fmod_Tensor::call(const_cast(*this), other); +} + +// aten::fmod_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::fmod_(const at::Tensor & other) const { + return at::_ops::fmod__Tensor::call(const_cast(*this), other); +} + +// aten::hypot(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::hypot(const at::Tensor & other) const { + return at::_ops::hypot::call(const_cast(*this), other); +} + +// aten::hypot_(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::hypot_(const at::Tensor & other) const { + return at::_ops::hypot_::call(const_cast(*this), other); +} + +// aten::igamma(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::igamma(const at::Tensor & other) const { + return at::_ops::igamma::call(const_cast(*this), other); +} + +// aten::igamma_(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::igamma_(const at::Tensor & other) const { + return at::_ops::igamma_::call(const_cast(*this), other); +} + +// aten::igammac(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::igammac(const at::Tensor & other) const { + return at::_ops::igammac::call(const_cast(*this), other); +} + +// aten::igammac_(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::igammac_(const at::Tensor & other) const { + return at::_ops::igammac_::call(const_cast(*this), other); +} + +// aten::nextafter(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::nextafter(const at::Tensor & other) const { + return at::_ops::nextafter::call(const_cast(*this), other); +} + +// aten::nextafter_(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::nextafter_(const at::Tensor & other) const { + return at::_ops::nextafter_::call(const_cast(*this), other); +} + +// aten::remainder.Scalar(Tensor self, Scalar other) -> Tensor +inline at::Tensor Tensor::remainder(const at::Scalar & other) const { + return at::_ops::remainder_Scalar::call(const_cast(*this), other); +} + +// aten::remainder_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!) +inline at::Tensor & Tensor::remainder_(const at::Scalar & other) const { + return at::_ops::remainder__Scalar::call(const_cast(*this), other); +} + +// aten::remainder.Tensor(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::remainder(const at::Tensor & other) const { + return at::_ops::remainder_Tensor::call(const_cast(*this), other); +} + +// aten::remainder_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!) +inline at::Tensor & Tensor::remainder_(const at::Tensor & other) const { + return at::_ops::remainder__Tensor::call(const_cast(*this), other); +} + +// aten::min(Tensor self) -> Tensor +inline at::Tensor Tensor::min() const { + return at::_ops::min::call(const_cast(*this)); +} + +// aten::fmin(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::fmin(const at::Tensor & other) const { + return at::_ops::fmin::call(const_cast(*this), other); +} + +// aten::max(Tensor self) -> Tensor +inline at::Tensor Tensor::max() const { + return at::_ops::max::call(const_cast(*this)); +} + +// aten::fmax(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::fmax(const at::Tensor & other) const { + return at::_ops::fmax::call(const_cast(*this), other); +} + +// aten::maximum(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::maximum(const at::Tensor & other) const { + return at::_ops::maximum::call(const_cast(*this), other); +} + +// aten::max.other(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::max(const at::Tensor & other) const { + return at::_ops::max_other::call(const_cast(*this), other); +} + +// aten::minimum(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::minimum(const at::Tensor & other) const { + return at::_ops::minimum::call(const_cast(*this), other); +} + +// aten::min.other(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::min(const at::Tensor & other) const { + return at::_ops::min_other::call(const_cast(*this), other); +} + +// aten::quantile(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor +inline at::Tensor Tensor::quantile(const at::Tensor & q, ::std::optional dim, bool keepdim, c10::string_view interpolation) const { + return at::_ops::quantile::call(const_cast(*this), q, dim, keepdim, interpolation); +} + +// aten::quantile.scalar(Tensor self, float q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor +inline at::Tensor Tensor::quantile(double q, ::std::optional dim, bool keepdim, c10::string_view interpolation) const { + return at::_ops::quantile_scalar::call(const_cast(*this), q, dim, keepdim, interpolation); +} + +// aten::nanquantile(Tensor self, Tensor q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor +inline at::Tensor Tensor::nanquantile(const at::Tensor & q, ::std::optional dim, bool keepdim, c10::string_view interpolation) const { + return at::_ops::nanquantile::call(const_cast(*this), q, dim, keepdim, interpolation); +} + +// aten::nanquantile.scalar(Tensor self, float q, int? dim=None, bool keepdim=False, *, str interpolation='linear') -> Tensor +inline at::Tensor Tensor::nanquantile(double q, ::std::optional dim, bool keepdim, c10::string_view interpolation) const { + return at::_ops::nanquantile_scalar::call(const_cast(*this), q, dim, keepdim, interpolation); +} + +// aten::sort(Tensor self, int dim=-1, bool descending=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::sort(int64_t dim, bool descending) const { + return at::_ops::sort::call(const_cast(*this), dim, descending); +} + +// aten::sort.stable(Tensor self, *, bool? stable, int dim=-1, bool descending=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::sort(::std::optional stable, int64_t dim, bool descending) const { + return at::_ops::sort_stable::call(const_cast(*this), stable, dim, descending); +} + +// aten::sort.dimname(Tensor self, Dimname dim, bool descending=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::sort(at::Dimname dim, bool descending) const { + return at::_ops::sort_dimname::call(const_cast(*this), dim, descending); +} + +// aten::sort.dimname_stable(Tensor self, *, bool? stable, Dimname dim, bool descending=False) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::sort(::std::optional stable, at::Dimname dim, bool descending) const { + return at::_ops::sort_dimname_stable::call(const_cast(*this), stable, dim, descending); +} + +// aten::msort(Tensor self) -> Tensor +inline at::Tensor Tensor::msort() const { + return at::_ops::msort::call(const_cast(*this)); +} + +// aten::argsort(Tensor self, int dim=-1, bool descending=False) -> Tensor +inline at::Tensor Tensor::argsort(int64_t dim, bool descending) const { + return at::_ops::argsort::call(const_cast(*this), dim, descending); +} + +// aten::argsort.stable(Tensor self, *, bool stable, int dim=-1, bool descending=False) -> Tensor +inline at::Tensor Tensor::argsort(bool stable, int64_t dim, bool descending) const { + return at::_ops::argsort_stable::call(const_cast(*this), stable, dim, descending); +} + +// aten::argsort.dimname(Tensor self, Dimname dim, bool descending=False) -> Tensor +inline at::Tensor Tensor::argsort(at::Dimname dim, bool descending) const { + return at::_ops::argsort_dimname::call(const_cast(*this), dim, descending); +} + +// aten::topk(Tensor self, SymInt k, int dim=-1, bool largest=True, bool sorted=True) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::topk(int64_t k, int64_t dim, bool largest, bool sorted) const { + return at::_ops::topk::call(const_cast(*this), k, dim, largest, sorted); +} + +// aten::topk(Tensor self, SymInt k, int dim=-1, bool largest=True, bool sorted=True) -> (Tensor values, Tensor indices) +inline ::std::tuple Tensor::topk_symint(c10::SymInt k, int64_t dim, bool largest, bool sorted) const { + return at::_ops::topk::call(const_cast(*this), k, dim, largest, sorted); +} + +// aten::all(Tensor self) -> Tensor +inline at::Tensor Tensor::all() const { + return at::_ops::all::call(const_cast(*this)); +} + +// aten::any(Tensor self) -> Tensor +inline at::Tensor Tensor::any() const { + return at::_ops::any::call(const_cast(*this)); +} + +// aten::renorm(Tensor self, Scalar p, int dim, Scalar maxnorm) -> Tensor +inline at::Tensor Tensor::renorm(const at::Scalar & p, int64_t dim, const at::Scalar & maxnorm) const { + return at::_ops::renorm::call(const_cast(*this), p, dim, maxnorm); +} + +// aten::renorm_(Tensor(a!) self, Scalar p, int dim, Scalar maxnorm) -> Tensor(a!) +inline at::Tensor & Tensor::renorm_(const at::Scalar & p, int64_t dim, const at::Scalar & maxnorm) const { + return at::_ops::renorm_::call(const_cast(*this), p, dim, maxnorm); +} + +// aten::unfold(Tensor(a) self, int dimension, int size, int step) -> Tensor(a) +inline at::Tensor Tensor::unfold(int64_t dimension, int64_t size, int64_t step) const { + return at::_ops::unfold::call(const_cast(*this), dimension, size, step); +} + +// aten::equal(Tensor self, Tensor other) -> bool +inline bool Tensor::equal(const at::Tensor & other) const { + return at::_ops::equal::call(const_cast(*this), other); +} + +// aten::pow.Tensor_Tensor(Tensor self, Tensor exponent) -> Tensor +inline at::Tensor Tensor::pow(const at::Tensor & exponent) const { + return at::_ops::pow_Tensor_Tensor::call(const_cast(*this), exponent); +} + +// aten::pow.Tensor_Scalar(Tensor self, Scalar exponent) -> Tensor +inline at::Tensor Tensor::pow(const at::Scalar & exponent) const { + return at::_ops::pow_Tensor_Scalar::call(const_cast(*this), exponent); +} + +// aten::pow_.Scalar(Tensor(a!) self, Scalar exponent) -> Tensor(a!) +inline at::Tensor & Tensor::pow_(const at::Scalar & exponent) const { + return at::_ops::pow__Scalar::call(const_cast(*this), exponent); +} + +// aten::pow_.Tensor(Tensor(a!) self, Tensor exponent) -> Tensor(a!) +inline at::Tensor & Tensor::pow_(const at::Tensor & exponent) const { + return at::_ops::pow__Tensor::call(const_cast(*this), exponent); +} + +// aten::float_power.Tensor_Tensor(Tensor self, Tensor exponent) -> Tensor +inline at::Tensor Tensor::float_power(const at::Tensor & exponent) const { + return at::_ops::float_power_Tensor_Tensor::call(const_cast(*this), exponent); +} + +// aten::float_power.Tensor_Scalar(Tensor self, Scalar exponent) -> Tensor +inline at::Tensor Tensor::float_power(const at::Scalar & exponent) const { + return at::_ops::float_power_Tensor_Scalar::call(const_cast(*this), exponent); +} + +// aten::float_power_.Scalar(Tensor(a!) self, Scalar exponent) -> Tensor(a!) +inline at::Tensor & Tensor::float_power_(const at::Scalar & exponent) const { + return at::_ops::float_power__Scalar::call(const_cast(*this), exponent); +} + +// aten::float_power_.Tensor(Tensor(a!) self, Tensor exponent) -> Tensor(a!) +inline at::Tensor & Tensor::float_power_(const at::Tensor & exponent) const { + return at::_ops::float_power__Tensor::call(const_cast(*this), exponent); +} + +// aten::normal_(Tensor(a!) self, float mean=0, float std=1, *, Generator? generator=None) -> Tensor(a!) +inline at::Tensor & Tensor::normal_(double mean, double std, ::std::optional generator) const { + return at::_ops::normal_::call(const_cast(*this), mean, std, generator); +} + +// aten::alias(Tensor(a) self) -> Tensor(a) +inline at::Tensor Tensor::alias() const { + return at::_ops::alias::call(const_cast(*this)); +} + +// aten::isfinite(Tensor self) -> Tensor +inline at::Tensor Tensor::isfinite() const { + return at::_ops::isfinite::call(const_cast(*this)); +} + +// aten::isinf(Tensor self) -> Tensor +inline at::Tensor Tensor::isinf() const { + return at::_ops::isinf::call(const_cast(*this)); +} + +// aten::record_stream(Tensor(a!) self, Stream s) -> () +inline void Tensor::record_stream(at::Stream s) const { + return at::_ops::record_stream::call(const_cast(*this), s); +} + +// aten::isposinf(Tensor self) -> Tensor +inline at::Tensor Tensor::isposinf() const { + return at::_ops::isposinf::call(const_cast(*this)); +} + +// aten::isneginf(Tensor self) -> Tensor +inline at::Tensor Tensor::isneginf() const { + return at::_ops::isneginf::call(const_cast(*this)); +} + +// aten::det(Tensor self) -> Tensor +inline at::Tensor Tensor::det() const { + return at::_ops::det::call(const_cast(*this)); +} + +// aten::slogdet(Tensor self) -> (Tensor sign, Tensor logabsdet) +inline ::std::tuple Tensor::slogdet() const { + return at::_ops::slogdet::call(const_cast(*this)); +} + +// aten::logdet(Tensor self) -> Tensor +inline at::Tensor Tensor::logdet() const { + return at::_ops::logdet::call(const_cast(*this)); +} + +// aten::inverse(Tensor self) -> Tensor +inline at::Tensor Tensor::inverse() const { + return at::_ops::inverse::call(const_cast(*this)); +} + +// aten::inner(Tensor self, Tensor other) -> Tensor +inline at::Tensor Tensor::inner(const at::Tensor & other) const { + return at::_ops::inner::call(const_cast(*this), other); +} + +// aten::outer(Tensor self, Tensor vec2) -> Tensor +inline at::Tensor Tensor::outer(const at::Tensor & vec2) const { + return at::_ops::outer::call(const_cast(*this), vec2); +} + +// aten::ger(Tensor self, Tensor vec2) -> Tensor +inline at::Tensor Tensor::ger(const at::Tensor & vec2) const { + return at::_ops::ger::call(const_cast(*this), vec2); +} + +// aten::to_padded_tensor(Tensor self, float padding, SymInt[]? output_size=None) -> Tensor +inline at::Tensor Tensor::to_padded_tensor(double padding, at::OptionalIntArrayRef output_size) const { + return at::_ops::to_padded_tensor::call(const_cast(*this), padding, output_size.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*output_size)) : ::std::nullopt); +} + +// aten::to_padded_tensor(Tensor self, float padding, SymInt[]? output_size=None) -> Tensor +inline at::Tensor Tensor::to_padded_tensor_symint(double padding, at::OptionalSymIntArrayRef output_size) const { + return at::_ops::to_padded_tensor::call(const_cast(*this), padding, output_size); +} +} // namespace at + + +namespace c10 { +template <> +struct MaybeOwnedTraits { + using owned_type = at::Tensor; + using borrow_type = at::Tensor; + + static borrow_type createBorrow(const owned_type& from) { + // NOTE: this can be implemented without the special + // unsafe_borrow_t Tensor constructor as + // + // return borrow_type(c10::intrusive_ptr::reclaim(from.unsafeGetTensorImpl())); + // + // but that hurts inlining due to the nullptr check in the + // Tensor(c10::intrusive_ptr<...>) constructor. We already know + // that from.impl_ isn't null because from is a valid Tensor, so + // we needn't do the check again. (using __builtin_assume can + // avoid this, but wouldn't be portable to MSVC.) + return borrow_type(borrow_type::unsafe_borrow_t{}, from); + } + + static void assignBorrow(borrow_type& lhs, const borrow_type& rhs) { + lhs.unsafeReleaseTensorImpl(); + // See above note: this can be implemented with public API + // similarly to createBorrow(), but that would hurt inlining. + lhs = borrow_type(borrow_type::unsafe_borrow_t{}, rhs); + } + + static void destroyBorrow(borrow_type& toDestroy) { + toDestroy.unsafeReleaseTensorImpl(); // "leak" it, but it was already +0. + } + + static const owned_type& referenceFromBorrow(const borrow_type& borrow) { + return borrow; + } + + static const owned_type* pointerFromBorrow(const borrow_type& borrow) { + return &borrow; + } + + static bool debugBorrowIsValid(const borrow_type& /*borrow*/) { + return true; + } +}; + +template <> +struct ExclusivelyOwnedTraits { + using repr_type = at::Tensor; + using pointer_type = at::Tensor*; + using const_pointer_type = const at::Tensor*; + + static repr_type nullRepr() { + return at::Tensor(); + } + + template + static repr_type createInPlace(Args&&... args) { + return at::Tensor(std::forward(args)...); + } + + static repr_type moveToRepr(at::Tensor&& x) { + return std::move(x); + } + + static void destroyOwned(at::Tensor& x) { + return ExclusivelyOwnedTraits::destroyOwned(x); + } + + static at::Tensor take(at::Tensor& x) { + return std::move(x); + } + + static pointer_type getImpl(repr_type& x) { + return &x; + } + + static const_pointer_type getImpl(const repr_type& x) { + return &x; + } +}; +} // namespace c10 + +namespace at { + +inline c10::MaybeOwned borrow_from_optional_tensor( + const std::optional& opt) { + return opt.has_value() + ? c10::MaybeOwned::borrowed(*opt) + : c10::MaybeOwned::owned(std::in_place); +} + +inline c10::MaybeOwned Tensor::expect_contiguous(MemoryFormat memory_format) const & { + if (is_contiguous(memory_format)) { + return c10::MaybeOwned::borrowed(*this); + } else { + return c10::MaybeOwned::owned(__dispatch_contiguous(memory_format)); + } +} +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/TorchDispatchUtils.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/TorchDispatchUtils.h new file mode 100644 index 0000000000000000000000000000000000000000..11851fa597489b30a0f22b9fab591826a081ab53 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/TorchDispatchUtils.h @@ -0,0 +1,22 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace at::impl { + +TORCH_API bool tensor_has_dispatch(const at::Tensor& t); +TORCH_API bool tensorlist_has_dispatch(at::ITensorListRef li); +TORCH_API bool tensorlist_has_dispatch( + const c10::List>& li); +using c10::impl::dispatch_mode_enabled; + +} // namespace at::impl + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/TransformationHelper.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/TransformationHelper.h new file mode 100644 index 0000000000000000000000000000000000000000..5a7c420c5f7f2df051306625c741562a04b0bed9 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/TransformationHelper.h @@ -0,0 +1,180 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { + +// Using DistAccumType in accumulate types for distributions. +// Note: Ideally we'd be using ATen/AccumulateType.h but looks +// like the there is some inconsistency in how accumulate types +// are mapped currently, e.g. for the cpu side, float is mapped +// to double. +template +struct DistAccumType { }; + +#if defined(__CUDACC__) || defined(__HIPCC__) +template <> struct DistAccumType { using type = float; }; +#endif +template <> struct DistAccumType { using type = float; }; +template <> struct DistAccumType { using type = float; }; +template <> struct DistAccumType { using type = float; }; +template <> struct DistAccumType { using type = double; }; + +template +using dist_acctype = typename DistAccumType::type; + +namespace transformation { + +/** + * A transformation function for `torch.Tensor.random_()`, when both `from` and `to` are specified. + * `range` is `to - from` + * `base` is `from` + */ +template +C10_HOST_DEVICE inline T uniform_int_from_to(V val, uint64_t range, int64_t base) { + return static_cast(static_cast((val % range) + base)); +} + +/** + * A transformation function for `torch.Tensor.random_()`, when `from=min_value(int64_t)` and to=None + */ +template +C10_HOST_DEVICE inline T uniform_int_full_range(V val) { + return static_cast(static_cast(val)); +} + +/** + * A transformation function for `torch.Tensor.random_()`, when used without specifying `from` and `to`. + * In order to prevent compiler warnings reported in GitHub issue 46391, T can't be float or double + * in this overloaded version + */ +template +C10_HOST_DEVICE inline std::enable_if_t, T>uniform_int(V val) { + if constexpr (std::is_same_v) { + return static_cast(val & 1); + } else if constexpr (std::is_same_v) { + return static_cast(val % (static_cast(std::numeric_limits::max()) + 1)); + } else if constexpr (std::is_same_v || std::is_same_v) { + return static_cast(val % static_cast((1ULL << std::numeric_limits::digits) + 1)); + } else if constexpr (std::is_integral_v) { + return static_cast(val % (static_cast(std::numeric_limits::max()) + 1)); + } else { + assert(false); + return 0; + } +} + +/** + * An overloaded transformation function for `torch.Tensor.random_()`, when used without specifying `from` and `to`, + * added to fix compiler warnings reported in GitHub issue 46391. T is either float or double in this version. + */ +template +C10_HOST_DEVICE inline std::enable_if_t, T>uniform_int(V val) { + return static_cast(val % static_cast((1ULL << std::numeric_limits::digits) + 1)); +} + +template +C10_HOST_DEVICE inline dist_acctype uniform_real(V val, T from, T to) { + constexpr auto MASK = static_cast((static_cast(1) << std::numeric_limits::digits) - 1); + constexpr auto DIVISOR = static_cast>(1) / (static_cast(1) << std::numeric_limits::digits); + dist_acctype x = (val & MASK) * DIVISOR; + return (x * (to - from) + from); +} + +/** + * Transforms normally distributed `val` with mean 0.0 and standard deviation 1.0 to + * normally distributed with `mean` and standard deviation `std`. + */ +template +C10_HOST_DEVICE inline T normal(T val, T mean, T std) { + return val * std + mean; +} + +/** + * Transforms uniformly distributed `val` between 0.0 and 1.0 to + * Cauchy distribution with location parameter `median` and scale parameter `sigma`. + */ +template +C10_HOST_DEVICE inline T cauchy(T val, T median, T sigma) { + // https://en.wikipedia.org/wiki/Cauchy_distribution#Cumulative_distribution_function + // __tanf overflows and returns `inf/-inf` when (val > 1 - eps) or (val < 0 + eps), + // thus we clip those values. + constexpr T eps = std::numeric_limits::epsilon(); + constexpr T one_minus_eps = 1 - eps; + constexpr T zero_plus_eps = 0 + eps; + val = (val > one_minus_eps ? one_minus_eps : val); + val = (val < zero_plus_eps ? zero_plus_eps : val); + return median + sigma * at::tan(c10::pi * (val - static_cast(0.5))); +} + +template <> +C10_HOST_DEVICE inline double cauchy(double val, double median, double sigma) { + // https://en.wikipedia.org/wiki/Cauchy_distribution#Cumulative_distribution_function + return median + sigma * at::tan(c10::pi * (val - 0.5)); +} + +/** + * Transforms uniformly distributed `val` between 0.0 and 1.0 to + * exponentially distributed with `lambda` parameter of the distribution. + */ +template +C10_HOST_DEVICE inline T exponential(T val, T lambda) { + // https://en.wikipedia.org/wiki/Exponential_distribution#Generating_exponential_variates + // Different implementations for CUDA and CPU to preserve original logic + // TODO: must be investigated and unified!!! + // https://github.com/pytorch/pytorch/issues/38662 +#if defined(__CUDACC__) || defined(__HIPCC__) + // BEFORE TOUCHING THIS CODE READ: https://github.com/pytorch/pytorch/issues/16706 + // curand_uniform has (0,1] bounds. log(1) is 0 and exponential excludes 0. + // we need log to be not 0, and not underflow when converted to half + // fast __logf approximation can underflow, so set log to -epsilon/2 for 1 or close to 1 args + auto log = val >= static_cast(1.) - std::numeric_limits::epsilon() / 2 + ? -std::numeric_limits::epsilon() / 2 + : at::log(val); + return static_cast(-1.0) / lambda * log; +#else + return static_cast(-1.0) / lambda * at::log1p(-val); +#endif +} + +/** + * Transforms uniformly distributed `val` between 0.0 and 1.0 to + * geometrically distributed with success probability `p`. + */ +template +C10_HOST_DEVICE inline T geometric(T val, T p) { + // https://en.wikipedia.org/wiki/Geometric_distribution#Related_distributions + return static_cast(::ceil(at::log(val) / at::log1p(-p))); +} + +/** + * Transforms normally distributed `val` to log-normally distributed. + */ +template +C10_HOST_DEVICE inline T log_normal(T val) { + // https://en.wikipedia.org/wiki/Log-normal_distribution#Mode,_median,_quantiles + return at::exp(val); +} + +/** + * Transforms uniformly distributed `val` between 0.0 and 1.0 to + * bernoulli distributed with success probability `p`. + */ +template +C10_HOST_DEVICE inline T bernoulli(T val, T p) { + return val < p; +} + +}} // namespace at::transformation + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/UndefinedTensorImpl.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/UndefinedTensorImpl.h new file mode 100644 index 0000000000000000000000000000000000000000..a75b3cdb4ac8475b22cf0dd9e0f1df5b40d1ae8f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/UndefinedTensorImpl.h @@ -0,0 +1,6 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/UnsafeFromTH.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/UnsafeFromTH.h new file mode 100644 index 0000000000000000000000000000000000000000..b50bd4da60e1ddb90cfc671cb63b6afe53a7fb64 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/UnsafeFromTH.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +namespace at { + +inline Tensor unsafeTensorFromTH(void * th_pointer, bool retain) { + auto tensor_impl = c10::intrusive_ptr::reclaim(static_cast(th_pointer)); + if (retain && tensor_impl.get() != UndefinedTensorImpl::singleton()) { + c10::raw::intrusive_ptr::incref(tensor_impl.get()); + } + return Tensor(std::move(tensor_impl)); +} + +inline Storage unsafeStorageFromTH(void * th_pointer, bool retain) { + if (retain && th_pointer) { + c10::raw::intrusive_ptr::incref(static_cast(th_pointer)); + } + return Storage(c10::intrusive_ptr::reclaim(static_cast(th_pointer))); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/VariableHooksInterface.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/VariableHooksInterface.h new file mode 100644 index 0000000000000000000000000000000000000000..8d323a26becf0790fb5b7f176593fac6225dee9f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/VariableHooksInterface.h @@ -0,0 +1,90 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +// A little explanation about why this file exists at all. We have +// a few methods on Tensor class which require access to reified access to +// AutogradMeta. In open source, this isn't a big deal: we just access +// torch/csrc/autograd/variable.h from aten/src/ATen/core/Tensor.cpp and +// we can put the definitions inline. This is because everything gets balled +// into a single dynamic library in the end. +// +// However, inside our Facebook internal version of our build system, we +// have a split between aten and torch/csrc. So we cannot simply just +// cross this boundary. "Now wait," you might say, "Why don't we just +// merge the libraries inside Facebook". Well, the problem is that there +// are some downstream applications which are at binary size limit, and +// incorporating all of the extra code from libtorch would push them +// over (admarket/adreview/service:adreviewservice, see also +// https://github.com/pytorch/pytorch/pull/29299) So if you want to do that, +// we have to fix all of the services like this. +// +// I didn't want to block eliminating Tensor-Variable on this work, so I +// had to introduce another dynamic dispatch to get to the variable +// implementations (which live in torch/csrc/autograd/variable.cpp, FYI). +// +// I also considered using our existing dynamic dispatch mechanism, c10 +// dispatcher, to do this. However, (1) some of the functions on Tensor +// have weird signatures that are not supported by autograd, and (2) +// see this bug https://github.com/pytorch/pytorch/issues/30102 + +namespace torch::autograd { + +struct Node; + +} // namespace torch::autograd + +namespace at::impl { + +struct TORCH_API VariableHooksInterface { + virtual ~VariableHooksInterface() = default; + virtual TensorBase tensor_data(const TensorBase&) const = 0; + virtual TensorBase variable_data(const TensorBase&) const = 0; + virtual const std::shared_ptr& grad_fn( + const TensorBase&) const = 0; + virtual unsigned _register_hook( + const TensorBase&, + std::function hook) const = 0; + virtual void remove_hook(const TensorBase&, unsigned pos) const = 0; + virtual bool is_view(const TensorBase&) const = 0; + virtual const TensorBase& base(const TensorBase&) const = 0; + virtual const std::string& name(const TensorBase&) const = 0; + virtual bool is_leaf(const TensorBase&) const = 0; + virtual int64_t output_nr(const TensorBase&) const = 0; + virtual void set_data(const TensorBase&, const TensorBase&) const = 0; + virtual TensorBase data(const TensorBase&) const = 0; + virtual int64_t _version(const TensorBase&) const = 0; + virtual void retain_grad(const TensorBase&) const = 0; + virtual bool retains_grad(const TensorBase&) const = 0; + virtual void _backward( + const Tensor&, + TensorList, + const std::optional&, + std::optional, + bool) const = 0; + virtual void requires_grad_(const TensorBase&, bool) const = 0; + virtual void basic_autograd_not_implemented_fallback( + const c10::OperatorHandle& op, + c10::DispatchKeySet dispatch_keys, + torch::jit::Stack* stack) const = 0; + virtual std::optional grad_dtype(const TensorBase&) const = 0; + virtual void set_grad_dtype(const TensorBase&, const std::optional&) const = 0; +}; + +TORCH_API void SetVariableHooks(VariableHooksInterface* hooks); +TORCH_API VariableHooksInterface* GetVariableHooks(); +TORCH_API bool HasVariableHooks(); + +struct TORCH_API VariableHooksRegisterer { + explicit VariableHooksRegisterer(VariableHooksInterface* hooks) { + SetVariableHooks(hooks); + } +}; + +} // namespace at::impl + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Variadic.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Variadic.h new file mode 100644 index 0000000000000000000000000000000000000000..9c5e4676cc4dcc407a8022edbcf1f4d3589998ea --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Variadic.h @@ -0,0 +1,97 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include + +namespace at { + +// This class allows you to write variadic functions which +// call a (possibly overloaded) function on each argument, +// in order. This is most commonly used in autogenerated code, +// where it is convenient to have a function that can uniformly +// take arguments of different types. If your arguments +// are homogeneous consider using a std::initializer_list instead. +// +// For examples of this in use, see torch/csrc/utils/variadic.h +template +struct IterArgs { + template + inline F& apply() { + return self(); + } + + // NB: Use perfect forwarding here, otherwise we'll make value + // copies of all arguments! + template + inline F& apply(T&& arg, Args&&... args) { + self()(std::forward(arg)); + if (self().short_circuit()) { + return self(); + } else { + return apply(std::forward(args)...); + } + } + + // Here are some handy overloads which provide sensible + // defaults for container-like structures that one might + // be interested in recursing into. You can enable them + // by adding: + // + // using IterArgs::operator() + // + // to your struct. These are not enabled by default because + // you may be able to process these structures more efficiently + // than handling them one-by-one. + + template + void operator()(c10::IListRef args) { + for (const auto& arg : args) { + self()(arg); + if (self().short_circuit()) + return; + } + } + + template + void operator()(at::ArrayRef args) { + for (const auto& arg : args) { + self()(arg); + if (self().short_circuit()) + return; + } + } + + template + void operator()(const torch::List& args) { + for (const auto& arg : args) { + self()(arg); + if (self().short_circuit()) + return; + } + } + + // NB: we need to specify std::vector manually as C++ won't + // do an implicit conversion to make a template deduction go through. + template + void operator()(const std::vector& args) { + self()(at::ArrayRef{args}); + } + + constexpr bool short_circuit() const { + return false; + } + + private: + inline F& self() { + return *static_cast(this); + } +}; + +} // namespace torch + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Vitals.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Vitals.h new file mode 100644 index 0000000000000000000000000000000000000000..db051046081768c9389dd0f08a6efccb52f22405 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/Vitals.h @@ -0,0 +1,99 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include + +#include + +namespace at::vitals { + +TORCH_API bool torchVitalEnabled(); + +struct TORCH_API TorchVitalAttr { + // always initialized to empty + std::string value; + template + TorchVitalAttr& operator<<(const T& t) { + if (torchVitalEnabled()) { + std::stringstream ss; + ss << t; + value += ss.str(); + } + return *this; + } + + template + void write(const T& t, bool force) { + if (force || torchVitalEnabled()) { + std::stringstream ss; + ss << t; + value = ss.str(); + } + } +}; + +struct TORCH_API TorchVital { + std::string name; + std::unordered_map attrs; + + explicit TorchVital(std::string n) : name(std::move(n)) {} + TorchVital(const TorchVital&) = default; + TorchVital(TorchVital&&) = default; + TorchVital& operator=(const TorchVital&) = default; + TorchVital& operator=(TorchVital&&) = default; + TorchVital() = delete; + + TorchVitalAttr& create(const std::string& attr); + TorchVitalAttr& create(const std::string& attr, bool force); + friend std::ostream& operator<<(std::ostream& os, const TorchVital& dt); + + ~TorchVital(); +}; + +std::ostream& operator<<(std::ostream& os, TorchVital const& tv); + +// A way to access vitals by string names instead of by global reference. +// This enables access to vitals from the PythonAPI. +class TORCH_API APIVitals { + public: + bool vitals_enabled; + + // Set any vital sign that was added to the map. + bool setVital( + const std::string& vital_name, + const std::string& attr_name, + const std::string& value, + bool force = false); + std::string readVitals(); + + APIVitals(); + + // Ensure this stays a singleton + APIVitals(APIVitals const& other) = delete; + APIVitals(APIVitals&& other) = delete; + APIVitals& operator=(const APIVitals&) = delete; + APIVitals& operator=(APIVitals&&) = delete; + ~APIVitals() = default; + + private: + std::unordered_map name_map_; +}; + +extern TORCH_API APIVitals VitalsAPI; + +} // namespace at::vitals + +#define TORCH_VITAL_DECLARE(name) \ + TORCH_API at::vitals::TorchVital TorchVital_##name; + +#define TORCH_VITAL_DEFINE(name) \ + TORCH_API at::vitals::TorchVital TorchVital_##name(#name); + +#define TORCH_VITAL_BASE(name) TorchVital_##name + +#define TORCH_VITAL(name, attr) TORCH_VITAL_BASE(name).create(#attr) + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/alias_info.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/alias_info.h new file mode 100644 index 0000000000000000000000000000000000000000..249263de784801697cc7cc2fe5cfe8fd43afdf92 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/alias_info.h @@ -0,0 +1,167 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include +#include +#include +#include +#include + +namespace c10 { +/** + * class AliasInfo + * + * Data structure to hold aliasing information for an `Argument`. They can be + * nested to represent aliasing information on contained types. + * + * There is a `beforeSet` which describes the aliasing information before the + * operator executes, and an `afterSet` that describes aliasing info + * after execution. + */ +class AliasInfo { + public: + AliasInfo() = default; + AliasInfo(bool is_write, const std::set& before_qual_strings, const std::set& after_qual_strings) : isWrite_(is_write) { + for (const auto& s: before_qual_strings) { + beforeSets_.insert(Symbol::fromQualString(s)); + } + for (const auto& s : after_qual_strings) { + afterSets_.insert(Symbol::fromQualString(s)); + } + } + // Symbol for the set that can alias anything + static Symbol wildcardSet() { + static const Symbol wc = Symbol::fromQualString("alias::*"); + return wc; + } + + void setIsWrite(bool isWrite) { + isWrite_ = isWrite; + } + + bool isWrite() const { + return isWrite_; + } + + void addBeforeSet(Symbol aliasSet) { + beforeSets_.insert(aliasSet); + } + + void addAfterSet(Symbol aliasSet) { + afterSets_.insert(aliasSet); + } + + const std::unordered_set& beforeSets() const { + return beforeSets_; + } + + const std::unordered_set& afterSets() const { + return afterSets_; + } + + Symbol beforeSet() const { + AT_ASSERT(beforeSets_.size() == 1); + return *beforeSets_.begin(); + } + + bool isWildcardBefore() const { + return beforeSets_.count(wildcardSet()) != 0; + } + + bool isWildcardAfter() const { + return afterSets_.count(wildcardSet()) != 0; + } + + // the alias info for the contained types of the type + // e.g. if this is an annotation on List[T], `sets` refers to + // the alias sets that the list may be in + // while containedTypes()[0] refers to the sets that members of the list + // may be in + void addContainedType(AliasInfo aliasInfo) { + containedTypes_.push_back(std::move(aliasInfo)); + } + const std::vector& containedTypes() const { + return containedTypes_; + } + + private: + std::unordered_set beforeSets_; + std::unordered_set afterSets_; + std::vector containedTypes_; + bool isWrite_ = false; +}; + +inline bool operator==(const AliasInfo& lhs, const AliasInfo& rhs) { + return lhs.isWrite() == rhs.isWrite() + && lhs.beforeSets() == rhs.beforeSets() + && lhs.afterSets() == rhs.afterSets() + && lhs.containedTypes() == rhs.containedTypes(); +} + +// this does match the way things are represented in the schema +inline std::ostream& operator<<(std::ostream& out, const AliasInfo& aliasInfo) { + out << '('; + bool first = true; + for (const auto& set : aliasInfo.beforeSets()) { + if (first) { + first = false; + } else { + out << '|'; + } + out << set.toUnqualString(); + } + if (aliasInfo.isWrite()) { + out << '!'; + } + if (aliasInfo.beforeSets() != aliasInfo.afterSets()) { + out << " -> "; + first = true; + for (const auto& set : aliasInfo.afterSets()) { + if (first) { + first = false; + } else { + out << '|'; + } + out << set.toUnqualString(); + } + } + out << ')'; + return out; +} +} // namespace c10 + +namespace std { +template <> + struct hash { + size_t operator()(const c10::AliasInfo& aliasInfo) const { + auto hash = std::hash()(aliasInfo.isWrite()); + + // NOTE: for unordered_set hashes, we couldn't use hash_combine + // because hash_combine is order dependent. Instead, we choose to + // use XOR as the combining function as XOR is commutative. + size_t before_set_hash_seed = 0; + for (auto &e: aliasInfo.beforeSets()) { + auto symbol_hash = std::hash()(e); + before_set_hash_seed = before_set_hash_seed ^ symbol_hash; + } + size_t after_set_hash_seed = 0; + for (auto &e: aliasInfo.afterSets()) { + auto symbol_hash = std::hash()(e); + after_set_hash_seed = after_set_hash_seed ^ symbol_hash; + } + + hash = c10::hash_combine(hash, before_set_hash_seed); + hash = c10::hash_combine(hash, after_set_hash_seed); + for (auto &e: aliasInfo.containedTypes()) { + auto contained_type_hash = std::hash()(e); + hash = c10::hash_combine(hash, contained_type_hash); + } + return hash; + } + }; +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/aten_interned_strings.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/aten_interned_strings.h new file mode 100644 index 0000000000000000000000000000000000000000..e34758ffb70290101a01e078ed47b05248b0deea --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/aten_interned_strings.h @@ -0,0 +1,2309 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from aten_interned_strings.h + +#if defined(TORCH_ASSERT_NO_OPERATORS) || defined(TORCH_ASSERT_ONLY_METHOD_OPERATORS) +#error This change adds a dependency on native_functions.yaml, \ + meaning the file will need to be re-compiled every time an operator \ + is changed or added. Consider if including for \ + the c10::Symbol class would be sufficient, or if your change would be \ + better placed in another file. +#endif + +// ATen symbols correspond exactly to operators defined in ATen. Every +// symbol here corresponds exactly to an ATen operation defined in +// native_functions.yaml; attributes are in one-to-one correspondence +// with their ATen name. + +#define FORALL_ATEN_BASE_SYMBOLS(_) \ +_(aten, __and__) \ +_(aten, __iand__) \ +_(aten, __ilshift__) \ +_(aten, __ior__) \ +_(aten, __irshift__) \ +_(aten, __ixor__) \ +_(aten, __lshift__) \ +_(aten, __or__) \ +_(aten, __rshift__) \ +_(aten, __xor__) \ +_(aten, _adaptive_avg_pool2d) \ +_(aten, _adaptive_avg_pool2d_backward) \ +_(aten, _adaptive_avg_pool3d) \ +_(aten, _adaptive_avg_pool3d_backward) \ +_(aten, _add_batch_dim) \ +_(aten, _add_relu) \ +_(aten, _add_relu_) \ +_(aten, _addmm_activation) \ +_(aten, _aminmax) \ +_(aten, _amp_foreach_non_finite_check_and_unscale) \ +_(aten, _amp_foreach_non_finite_check_and_unscale_) \ +_(aten, _amp_update_scale) \ +_(aten, _amp_update_scale_) \ +_(aten, _assert_async) \ +_(aten, _assert_scalar) \ +_(aten, _assert_tensor_metadata) \ +_(aten, _autocast_to_full_precision) \ +_(aten, _autocast_to_reduced_precision) \ +_(aten, _backward) \ +_(aten, _batch_norm_impl_index) \ +_(aten, _batch_norm_impl_index_backward) \ +_(aten, _batch_norm_no_update) \ +_(aten, _batch_norm_with_update) \ +_(aten, _batch_norm_with_update_functional) \ +_(aten, _cast_Byte) \ +_(aten, _cast_Char) \ +_(aten, _cast_Double) \ +_(aten, _cast_Float) \ +_(aten, _cast_Half) \ +_(aten, _cast_Int) \ +_(aten, _cast_Long) \ +_(aten, _cast_Short) \ +_(aten, _cdist_backward) \ +_(aten, _cdist_forward) \ +_(aten, _cholesky_solve_helper) \ +_(aten, _choose_qparams_per_tensor) \ +_(aten, _chunk_cat) \ +_(aten, _coalesce) \ +_(aten, _coalesced) \ +_(aten, _coalesced_) \ +_(aten, _compute_linear_combination) \ +_(aten, _conj) \ +_(aten, _conj_copy) \ +_(aten, _conj_physical) \ +_(aten, _conv_depthwise2d) \ +_(aten, _convert_indices_from_coo_to_csr) \ +_(aten, _convert_indices_from_csr_to_coo) \ +_(aten, _convert_weight_to_int4pack) \ +_(aten, _convert_weight_to_int4pack_for_cpu) \ +_(aten, _convolution) \ +_(aten, _convolution_double_backward) \ +_(aten, _convolution_mode) \ +_(aten, _copy_from) \ +_(aten, _copy_from_and_resize) \ +_(aten, _cslt_compress) \ +_(aten, _cslt_sparse_mm) \ +_(aten, _cslt_sparse_mm_search) \ +_(aten, _ctc_loss) \ +_(aten, _ctc_loss_backward) \ +_(aten, _cudnn_attention_backward) \ +_(aten, _cudnn_attention_forward) \ +_(aten, _cudnn_ctc_loss) \ +_(aten, _cudnn_init_dropout_state) \ +_(aten, _cudnn_rnn) \ +_(aten, _cudnn_rnn_backward) \ +_(aten, _cudnn_rnn_flatten_weight) \ +_(aten, _cufft_clear_plan_cache) \ +_(aten, _cufft_get_plan_cache_max_size) \ +_(aten, _cufft_get_plan_cache_size) \ +_(aten, _cufft_set_plan_cache_max_size) \ +_(aten, _cummax_helper) \ +_(aten, _cummin_helper) \ +_(aten, _debug_has_internal_overlap) \ +_(aten, _dimI) \ +_(aten, _dimV) \ +_(aten, _dim_arange) \ +_(aten, _dirichlet_grad) \ +_(aten, _dyn_quant_matmul_4bit) \ +_(aten, _dyn_quant_pack_4bit_weight) \ +_(aten, _efficient_attention_backward) \ +_(aten, _efficient_attention_forward) \ +_(aten, _efficientzerotensor) \ +_(aten, _embedding_bag) \ +_(aten, _embedding_bag_backward) \ +_(aten, _embedding_bag_dense_backward) \ +_(aten, _embedding_bag_forward_only) \ +_(aten, _embedding_bag_per_sample_weights_backward) \ +_(aten, _embedding_bag_sparse_backward) \ +_(aten, _empty_affine_quantized) \ +_(aten, _empty_per_channel_affine_quantized) \ +_(aten, _euclidean_dist) \ +_(aten, _fake_quantize_learnable_per_channel_affine) \ +_(aten, _fake_quantize_learnable_per_channel_affine_backward) \ +_(aten, _fake_quantize_learnable_per_tensor_affine) \ +_(aten, _fake_quantize_learnable_per_tensor_affine_backward) \ +_(aten, _fake_quantize_per_tensor_affine_cachemask_tensor_qparams) \ +_(aten, _fft_c2c) \ +_(aten, _fft_c2r) \ +_(aten, _fft_r2c) \ +_(aten, _fill_mem_eff_dropout_mask) \ +_(aten, _fill_mem_eff_dropout_mask_) \ +_(aten, _flash_attention_backward) \ +_(aten, _flash_attention_forward) \ +_(aten, _foobar) \ +_(aten, _foreach_abs) \ +_(aten, _foreach_abs_) \ +_(aten, _foreach_acos) \ +_(aten, _foreach_acos_) \ +_(aten, _foreach_add) \ +_(aten, _foreach_add_) \ +_(aten, _foreach_addcdiv) \ +_(aten, _foreach_addcdiv_) \ +_(aten, _foreach_addcmul) \ +_(aten, _foreach_addcmul_) \ +_(aten, _foreach_asin) \ +_(aten, _foreach_asin_) \ +_(aten, _foreach_atan) \ +_(aten, _foreach_atan_) \ +_(aten, _foreach_ceil) \ +_(aten, _foreach_ceil_) \ +_(aten, _foreach_clamp_max) \ +_(aten, _foreach_clamp_max_) \ +_(aten, _foreach_clamp_min) \ +_(aten, _foreach_clamp_min_) \ +_(aten, _foreach_copy) \ +_(aten, _foreach_copy_) \ +_(aten, _foreach_cos) \ +_(aten, _foreach_cos_) \ +_(aten, _foreach_cosh) \ +_(aten, _foreach_cosh_) \ +_(aten, _foreach_div) \ +_(aten, _foreach_div_) \ +_(aten, _foreach_erf) \ +_(aten, _foreach_erf_) \ +_(aten, _foreach_erfc) \ +_(aten, _foreach_erfc_) \ +_(aten, _foreach_exp) \ +_(aten, _foreach_exp_) \ +_(aten, _foreach_expm1) \ +_(aten, _foreach_expm1_) \ +_(aten, _foreach_floor) \ +_(aten, _foreach_floor_) \ +_(aten, _foreach_frac) \ +_(aten, _foreach_frac_) \ +_(aten, _foreach_lerp) \ +_(aten, _foreach_lerp_) \ +_(aten, _foreach_lgamma) \ +_(aten, _foreach_lgamma_) \ +_(aten, _foreach_log) \ +_(aten, _foreach_log10) \ +_(aten, _foreach_log10_) \ +_(aten, _foreach_log1p) \ +_(aten, _foreach_log1p_) \ +_(aten, _foreach_log2) \ +_(aten, _foreach_log2_) \ +_(aten, _foreach_log_) \ +_(aten, _foreach_max) \ +_(aten, _foreach_maximum) \ +_(aten, _foreach_maximum_) \ +_(aten, _foreach_minimum) \ +_(aten, _foreach_minimum_) \ +_(aten, _foreach_mul) \ +_(aten, _foreach_mul_) \ +_(aten, _foreach_neg) \ +_(aten, _foreach_neg_) \ +_(aten, _foreach_norm) \ +_(aten, _foreach_pow) \ +_(aten, _foreach_pow_) \ +_(aten, _foreach_reciprocal) \ +_(aten, _foreach_reciprocal_) \ +_(aten, _foreach_round) \ +_(aten, _foreach_round_) \ +_(aten, _foreach_rsqrt) \ +_(aten, _foreach_rsqrt_) \ +_(aten, _foreach_sigmoid) \ +_(aten, _foreach_sigmoid_) \ +_(aten, _foreach_sign) \ +_(aten, _foreach_sign_) \ +_(aten, _foreach_sin) \ +_(aten, _foreach_sin_) \ +_(aten, _foreach_sinh) \ +_(aten, _foreach_sinh_) \ +_(aten, _foreach_sqrt) \ +_(aten, _foreach_sqrt_) \ +_(aten, _foreach_sub) \ +_(aten, _foreach_sub_) \ +_(aten, _foreach_tan) \ +_(aten, _foreach_tan_) \ +_(aten, _foreach_tanh) \ +_(aten, _foreach_tanh_) \ +_(aten, _foreach_trunc) \ +_(aten, _foreach_trunc_) \ +_(aten, _foreach_zero) \ +_(aten, _foreach_zero_) \ +_(aten, _functional_assert_async) \ +_(aten, _functional_assert_scalar) \ +_(aten, _functional_sym_constrain_range) \ +_(aten, _functional_sym_constrain_range_for_size) \ +_(aten, _fused_adagrad) \ +_(aten, _fused_adagrad_) \ +_(aten, _fused_adam) \ +_(aten, _fused_adam_) \ +_(aten, _fused_adamw) \ +_(aten, _fused_adamw_) \ +_(aten, _fused_dropout) \ +_(aten, _fused_moving_avg_obs_fq_helper) \ +_(aten, _fused_moving_avg_obs_fq_helper_functional) \ +_(aten, _fused_rms_norm) \ +_(aten, _fused_rms_norm_backward) \ +_(aten, _fused_sdp_choice) \ +_(aten, _fused_sgd) \ +_(aten, _fused_sgd_) \ +_(aten, _fw_primal) \ +_(aten, _fw_primal_copy) \ +_(aten, _gather_sparse_backward) \ +_(aten, _grid_sampler_2d_cpu_fallback) \ +_(aten, _grid_sampler_2d_cpu_fallback_backward) \ +_(aten, _grouped_mm) \ +_(aten, _has_compatible_shallow_copy_type) \ +_(aten, _has_same_storage_numel) \ +_(aten, _histogramdd_bin_edges) \ +_(aten, _histogramdd_from_bin_cts) \ +_(aten, _histogramdd_from_bin_tensors) \ +_(aten, _index_put_impl) \ +_(aten, _index_put_impl_) \ +_(aten, _indices) \ +_(aten, _indices_copy) \ +_(aten, _int_mm) \ +_(aten, _is_all_true) \ +_(aten, _is_any_true) \ +_(aten, _is_zerotensor) \ +_(aten, _jagged_to_padded_dense_forward) \ +_(aten, _lazy_clone) \ +_(aten, _linalg_check_errors) \ +_(aten, _linalg_det) \ +_(aten, _linalg_eigh) \ +_(aten, _linalg_eigvals) \ +_(aten, _linalg_slogdet) \ +_(aten, _linalg_solve_ex) \ +_(aten, _linalg_svd) \ +_(aten, _local_scalar_dense) \ +_(aten, _log_softmax) \ +_(aten, _log_softmax_backward_data) \ +_(aten, _logcumsumexp) \ +_(aten, _lstm_mps) \ +_(aten, _lu_with_info) \ +_(aten, _make_dep_token) \ +_(aten, _make_dual) \ +_(aten, _make_dual_copy) \ +_(aten, _make_per_channel_quantized_tensor) \ +_(aten, _make_per_tensor_quantized_tensor) \ +_(aten, _masked_scale) \ +_(aten, _masked_softmax) \ +_(aten, _masked_softmax_backward) \ +_(aten, _mixed_dtypes_linear) \ +_(aten, _mkldnn_reshape) \ +_(aten, _mkldnn_transpose) \ +_(aten, _mkldnn_transpose_) \ +_(aten, _mps_convolution) \ +_(aten, _mps_convolution_transpose) \ +_(aten, _native_batch_norm_legit) \ +_(aten, _native_batch_norm_legit_functional) \ +_(aten, _native_batch_norm_legit_no_training) \ +_(aten, _native_multi_head_attention) \ +_(aten, _neg_view) \ +_(aten, _neg_view_copy) \ +_(aten, _nested_compute_contiguous_strides_offsets) \ +_(aten, _nested_from_padded) \ +_(aten, _nested_from_padded_and_nested_example) \ +_(aten, _nested_from_padded_tensor) \ +_(aten, _nested_get_jagged_dummy) \ +_(aten, _nested_get_lengths) \ +_(aten, _nested_get_max_seqlen) \ +_(aten, _nested_get_min_seqlen) \ +_(aten, _nested_get_offsets) \ +_(aten, _nested_get_ragged_idx) \ +_(aten, _nested_get_values) \ +_(aten, _nested_get_values_copy) \ +_(aten, _nested_select_backward) \ +_(aten, _nested_sum_backward) \ +_(aten, _nested_tensor_from_mask) \ +_(aten, _nested_tensor_from_mask_left_aligned) \ +_(aten, _nested_tensor_from_tensor_list) \ +_(aten, _nested_tensor_size) \ +_(aten, _nested_tensor_softmax_with_shape) \ +_(aten, _nested_tensor_storage_offsets) \ +_(aten, _nested_tensor_strides) \ +_(aten, _nested_view_from_buffer) \ +_(aten, _nested_view_from_buffer_copy) \ +_(aten, _nested_view_from_jagged) \ +_(aten, _nested_view_from_jagged_copy) \ +_(aten, _new_zeros_with_same_feature_meta) \ +_(aten, _nnpack_available) \ +_(aten, _nnpack_spatial_convolution) \ +_(aten, _nnz) \ +_(aten, _pack_padded_sequence) \ +_(aten, _pack_padded_sequence_backward) \ +_(aten, _pad_circular) \ +_(aten, _pad_enum) \ +_(aten, _pad_packed_sequence) \ +_(aten, _padded_dense_to_jagged_forward) \ +_(aten, _pdist_backward) \ +_(aten, _pdist_forward) \ +_(aten, _pin_memory) \ +_(aten, _prelu_kernel) \ +_(aten, _prelu_kernel_backward) \ +_(aten, _print) \ +_(aten, _propagate_xla_data) \ +_(aten, _remove_batch_dim) \ +_(aten, _reshape_alias) \ +_(aten, _reshape_alias_copy) \ +_(aten, _reshape_copy) \ +_(aten, _reshape_from_tensor) \ +_(aten, _resize_output) \ +_(aten, _resize_output_) \ +_(aten, _rowwise_prune) \ +_(aten, _safe_softmax) \ +_(aten, _sample_dirichlet) \ +_(aten, _saturate_weight_to_fp16) \ +_(aten, _scaled_dot_product_attention_math) \ +_(aten, _scaled_dot_product_attention_math_for_mps) \ +_(aten, _scaled_dot_product_cudnn_attention) \ +_(aten, _scaled_dot_product_cudnn_attention_backward) \ +_(aten, _scaled_dot_product_efficient_attention) \ +_(aten, _scaled_dot_product_efficient_attention_backward) \ +_(aten, _scaled_dot_product_flash_attention) \ +_(aten, _scaled_dot_product_flash_attention_backward) \ +_(aten, _scaled_dot_product_flash_attention_for_cpu) \ +_(aten, _scaled_dot_product_flash_attention_for_cpu_backward) \ +_(aten, _scaled_dot_product_fused_attention_overrideable) \ +_(aten, _scaled_dot_product_fused_attention_overrideable_backward) \ +_(aten, _scaled_grouped_mm) \ +_(aten, _scaled_grouped_mm_v2) \ +_(aten, _scaled_mm) \ +_(aten, _scaled_mm_v2) \ +_(aten, _segment_reduce_backward) \ +_(aten, _shape_as_tensor) \ +_(aten, _slow_conv2d_backward) \ +_(aten, _slow_conv2d_forward) \ +_(aten, _sobol_engine_draw) \ +_(aten, _sobol_engine_ff) \ +_(aten, _sobol_engine_ff_) \ +_(aten, _sobol_engine_initialize_state) \ +_(aten, _sobol_engine_initialize_state_) \ +_(aten, _sobol_engine_scramble) \ +_(aten, _sobol_engine_scramble_) \ +_(aten, _softmax) \ +_(aten, _softmax_backward_data) \ +_(aten, _sparse_addmm) \ +_(aten, _sparse_broadcast_to) \ +_(aten, _sparse_broadcast_to_copy) \ +_(aten, _sparse_bsc_tensor_unsafe) \ +_(aten, _sparse_bsr_tensor_unsafe) \ +_(aten, _sparse_compressed_tensor_unsafe) \ +_(aten, _sparse_compressed_tensor_with_dims) \ +_(aten, _sparse_coo_tensor_unsafe) \ +_(aten, _sparse_coo_tensor_with_dims) \ +_(aten, _sparse_coo_tensor_with_dims_and_tensors) \ +_(aten, _sparse_csc_tensor_unsafe) \ +_(aten, _sparse_csr_prod) \ +_(aten, _sparse_csr_sum) \ +_(aten, _sparse_csr_tensor_unsafe) \ +_(aten, _sparse_log_softmax) \ +_(aten, _sparse_log_softmax_backward_data) \ +_(aten, _sparse_mask_projection) \ +_(aten, _sparse_mm) \ +_(aten, _sparse_mm_reduce_impl) \ +_(aten, _sparse_mm_reduce_impl_backward) \ +_(aten, _sparse_semi_structured_addmm) \ +_(aten, _sparse_semi_structured_apply) \ +_(aten, _sparse_semi_structured_apply_dense) \ +_(aten, _sparse_semi_structured_linear) \ +_(aten, _sparse_semi_structured_mm) \ +_(aten, _sparse_semi_structured_tile) \ +_(aten, _sparse_softmax) \ +_(aten, _sparse_softmax_backward_data) \ +_(aten, _sparse_sparse_matmul) \ +_(aten, _sparse_sum) \ +_(aten, _sparse_sum_backward) \ +_(aten, _spdiags) \ +_(aten, _spsolve) \ +_(aten, _stack) \ +_(aten, _standard_gamma) \ +_(aten, _standard_gamma_grad) \ +_(aten, _test_ambiguous_defaults) \ +_(aten, _test_autograd_multiple_dispatch) \ +_(aten, _test_autograd_multiple_dispatch_view) \ +_(aten, _test_autograd_multiple_dispatch_view_copy) \ +_(aten, _test_check_tensor) \ +_(aten, _test_functorch_fallback) \ +_(aten, _test_optional_filled_intlist) \ +_(aten, _test_optional_floatlist) \ +_(aten, _test_optional_intlist) \ +_(aten, _test_parallel_materialize) \ +_(aten, _test_serialization_subcmul) \ +_(aten, _test_string_default) \ +_(aten, _test_warn_in_autograd) \ +_(aten, _thnn_differentiable_gru_cell_backward) \ +_(aten, _thnn_differentiable_lstm_cell_backward) \ +_(aten, _thnn_fused_gru_cell) \ +_(aten, _thnn_fused_gru_cell_backward) \ +_(aten, _thnn_fused_lstm_cell) \ +_(aten, _thnn_fused_lstm_cell_backward) \ +_(aten, _thnn_fused_lstm_cell_backward_impl) \ +_(aten, _to_copy) \ +_(aten, _to_cpu) \ +_(aten, _to_dense) \ +_(aten, _to_sparse) \ +_(aten, _to_sparse_bsc) \ +_(aten, _to_sparse_bsr) \ +_(aten, _to_sparse_csc) \ +_(aten, _to_sparse_csr) \ +_(aten, _to_sparse_semi_structured) \ +_(aten, _transform_bias_rescale_qkv) \ +_(aten, _transformer_encoder_layer_fwd) \ +_(aten, _trilinear) \ +_(aten, _triton_multi_head_attention) \ +_(aten, _triton_scaled_dot_attention) \ +_(aten, _unique) \ +_(aten, _unique2) \ +_(aten, _unpack_dual) \ +_(aten, _unsafe_index) \ +_(aten, _unsafe_index_put) \ +_(aten, _unsafe_masked_index) \ +_(aten, _unsafe_masked_index_put_accumulate) \ +_(aten, _unsafe_view) \ +_(aten, _upsample_bicubic2d_aa) \ +_(aten, _upsample_bicubic2d_aa_backward) \ +_(aten, _upsample_bilinear2d_aa) \ +_(aten, _upsample_bilinear2d_aa_backward) \ +_(aten, _upsample_nearest_exact1d) \ +_(aten, _upsample_nearest_exact1d_backward) \ +_(aten, _upsample_nearest_exact2d) \ +_(aten, _upsample_nearest_exact2d_backward) \ +_(aten, _upsample_nearest_exact3d) \ +_(aten, _upsample_nearest_exact3d_backward) \ +_(aten, _use_cudnn_ctc_loss) \ +_(aten, _use_cudnn_rnn_flatten_weight) \ +_(aten, _validate_compressed_sparse_indices) \ +_(aten, _validate_sparse_bsc_tensor_args) \ +_(aten, _validate_sparse_bsr_tensor_args) \ +_(aten, _validate_sparse_compressed_tensor_args) \ +_(aten, _validate_sparse_coo_tensor_args) \ +_(aten, _validate_sparse_csc_tensor_args) \ +_(aten, _validate_sparse_csr_tensor_args) \ +_(aten, _values) \ +_(aten, _values_copy) \ +_(aten, _version) \ +_(aten, _weight_int4pack_mm) \ +_(aten, _weight_int4pack_mm_for_cpu) \ +_(aten, _weight_int4pack_mm_with_scales_and_zeros) \ +_(aten, _weight_int8pack_mm) \ +_(aten, _weight_norm) \ +_(aten, _weight_norm_differentiable_backward) \ +_(aten, _weight_norm_interface) \ +_(aten, _weight_norm_interface_backward) \ +_(aten, _wrapped_linear_prepack) \ +_(aten, _wrapped_quantized_linear_prepacked) \ +_(aten, abs) \ +_(aten, abs_) \ +_(aten, absolute) \ +_(aten, absolute_) \ +_(aten, acos) \ +_(aten, acos_) \ +_(aten, acosh) \ +_(aten, acosh_) \ +_(aten, adaptive_avg_pool1d) \ +_(aten, adaptive_avg_pool2d) \ +_(aten, adaptive_avg_pool3d) \ +_(aten, adaptive_avg_pool3d_backward) \ +_(aten, adaptive_max_pool1d) \ +_(aten, adaptive_max_pool2d) \ +_(aten, adaptive_max_pool2d_backward) \ +_(aten, adaptive_max_pool3d) \ +_(aten, adaptive_max_pool3d_backward) \ +_(aten, add) \ +_(aten, add_) \ +_(aten, addbmm) \ +_(aten, addbmm_) \ +_(aten, addcdiv) \ +_(aten, addcdiv_) \ +_(aten, addcmul) \ +_(aten, addcmul_) \ +_(aten, addmm) \ +_(aten, addmm_) \ +_(aten, addmv) \ +_(aten, addmv_) \ +_(aten, addr) \ +_(aten, addr_) \ +_(aten, adjoint) \ +_(aten, affine_grid_generator) \ +_(aten, affine_grid_generator_backward) \ +_(aten, alias) \ +_(aten, alias_copy) \ +_(aten, align_as) \ +_(aten, align_tensors) \ +_(aten, align_to) \ +_(aten, all) \ +_(aten, allclose) \ +_(aten, alpha_dropout) \ +_(aten, alpha_dropout_) \ +_(aten, amax) \ +_(aten, amin) \ +_(aten, aminmax) \ +_(aten, angle) \ +_(aten, any) \ +_(aten, arange) \ +_(aten, arccos) \ +_(aten, arccos_) \ +_(aten, arccosh) \ +_(aten, arccosh_) \ +_(aten, arcsin) \ +_(aten, arcsin_) \ +_(aten, arcsinh) \ +_(aten, arcsinh_) \ +_(aten, arctan) \ +_(aten, arctan2) \ +_(aten, arctan2_) \ +_(aten, arctan_) \ +_(aten, arctanh) \ +_(aten, arctanh_) \ +_(aten, argmax) \ +_(aten, argmin) \ +_(aten, argsort) \ +_(aten, argwhere) \ +_(aten, as_strided) \ +_(aten, as_strided_) \ +_(aten, as_strided_copy) \ +_(aten, as_strided_scatter) \ +_(aten, asin) \ +_(aten, asin_) \ +_(aten, asinh) \ +_(aten, asinh_) \ +_(aten, atan) \ +_(aten, atan2) \ +_(aten, atan2_) \ +_(aten, atan_) \ +_(aten, atanh) \ +_(aten, atanh_) \ +_(aten, atleast_1d) \ +_(aten, atleast_2d) \ +_(aten, atleast_3d) \ +_(aten, avg_pool1d) \ +_(aten, avg_pool2d) \ +_(aten, avg_pool2d_backward) \ +_(aten, avg_pool3d) \ +_(aten, avg_pool3d_backward) \ +_(aten, baddbmm) \ +_(aten, baddbmm_) \ +_(aten, bartlett_window) \ +_(aten, batch_norm) \ +_(aten, batch_norm_backward) \ +_(aten, batch_norm_backward_elemt) \ +_(aten, batch_norm_backward_reduce) \ +_(aten, batch_norm_elemt) \ +_(aten, batch_norm_gather_stats) \ +_(aten, batch_norm_gather_stats_with_counts) \ +_(aten, batch_norm_stats) \ +_(aten, batch_norm_update_stats) \ +_(aten, bernoulli) \ +_(aten, bernoulli_) \ +_(aten, bilinear) \ +_(aten, binary_cross_entropy) \ +_(aten, binary_cross_entropy_backward) \ +_(aten, binary_cross_entropy_with_logits) \ +_(aten, bincount) \ +_(aten, binomial) \ +_(aten, bitwise_and) \ +_(aten, bitwise_and_) \ +_(aten, bitwise_left_shift) \ +_(aten, bitwise_left_shift_) \ +_(aten, bitwise_not) \ +_(aten, bitwise_not_) \ +_(aten, bitwise_or) \ +_(aten, bitwise_or_) \ +_(aten, bitwise_right_shift) \ +_(aten, bitwise_right_shift_) \ +_(aten, bitwise_xor) \ +_(aten, bitwise_xor_) \ +_(aten, blackman_window) \ +_(aten, block_diag) \ +_(aten, bmm) \ +_(aten, broadcast_tensors) \ +_(aten, broadcast_to) \ +_(aten, bucketize) \ +_(aten, can_cast) \ +_(aten, cartesian_prod) \ +_(aten, cat) \ +_(aten, cauchy) \ +_(aten, cauchy_) \ +_(aten, ccol_indices) \ +_(aten, ccol_indices_copy) \ +_(aten, cdist) \ +_(aten, ceil) \ +_(aten, ceil_) \ +_(aten, celu) \ +_(aten, celu_) \ +_(aten, chain_matmul) \ +_(aten, chalf) \ +_(aten, channel_shuffle) \ +_(aten, cholesky) \ +_(aten, cholesky_inverse) \ +_(aten, cholesky_solve) \ +_(aten, choose_qparams_optimized) \ +_(aten, chunk) \ +_(aten, clamp) \ +_(aten, clamp_) \ +_(aten, clamp_max) \ +_(aten, clamp_max_) \ +_(aten, clamp_min) \ +_(aten, clamp_min_) \ +_(aten, clip) \ +_(aten, clip_) \ +_(aten, clone) \ +_(aten, coalesce) \ +_(aten, col2im) \ +_(aten, col_indices) \ +_(aten, col_indices_copy) \ +_(aten, column_stack) \ +_(aten, combinations) \ +_(aten, complex) \ +_(aten, concat) \ +_(aten, concatenate) \ +_(aten, conj) \ +_(aten, conj_physical) \ +_(aten, conj_physical_) \ +_(aten, constant_pad_nd) \ +_(aten, contiguous) \ +_(aten, conv1d) \ +_(aten, conv2d) \ +_(aten, conv3d) \ +_(aten, conv_depthwise3d) \ +_(aten, conv_tbc) \ +_(aten, conv_tbc_backward) \ +_(aten, conv_transpose1d) \ +_(aten, conv_transpose2d) \ +_(aten, conv_transpose3d) \ +_(aten, convolution) \ +_(aten, convolution_backward) \ +_(aten, convolution_backward_overrideable) \ +_(aten, convolution_overrideable) \ +_(aten, copy) \ +_(aten, copy_) \ +_(aten, copy_sparse_to_sparse) \ +_(aten, copy_sparse_to_sparse_) \ +_(aten, copysign) \ +_(aten, copysign_) \ +_(aten, corrcoef) \ +_(aten, cos) \ +_(aten, cos_) \ +_(aten, cosh) \ +_(aten, cosh_) \ +_(aten, cosine_embedding_loss) \ +_(aten, cosine_similarity) \ +_(aten, count_nonzero) \ +_(aten, cov) \ +_(aten, cross) \ +_(aten, cross_entropy_loss) \ +_(aten, crow_indices) \ +_(aten, crow_indices_copy) \ +_(aten, ctc_loss) \ +_(aten, cudnn_affine_grid_generator) \ +_(aten, cudnn_affine_grid_generator_backward) \ +_(aten, cudnn_batch_norm) \ +_(aten, cudnn_batch_norm_backward) \ +_(aten, cudnn_convolution) \ +_(aten, cudnn_convolution_add_relu) \ +_(aten, cudnn_convolution_relu) \ +_(aten, cudnn_convolution_transpose) \ +_(aten, cudnn_grid_sampler) \ +_(aten, cudnn_grid_sampler_backward) \ +_(aten, cudnn_is_acceptable) \ +_(aten, cummax) \ +_(aten, cummaxmin_backward) \ +_(aten, cummin) \ +_(aten, cumprod) \ +_(aten, cumprod_) \ +_(aten, cumprod_backward) \ +_(aten, cumsum) \ +_(aten, cumsum_) \ +_(aten, cumulative_trapezoid) \ +_(aten, data) \ +_(aten, deg2rad) \ +_(aten, deg2rad_) \ +_(aten, dense_dim) \ +_(aten, dequantize) \ +_(aten, det) \ +_(aten, detach) \ +_(aten, detach_) \ +_(aten, detach_copy) \ +_(aten, diag) \ +_(aten, diag_embed) \ +_(aten, diagflat) \ +_(aten, diagonal) \ +_(aten, diagonal_backward) \ +_(aten, diagonal_copy) \ +_(aten, diagonal_scatter) \ +_(aten, diff) \ +_(aten, digamma) \ +_(aten, digamma_) \ +_(aten, dist) \ +_(aten, div) \ +_(aten, div_) \ +_(aten, divide) \ +_(aten, divide_) \ +_(aten, dot) \ +_(aten, dropout) \ +_(aten, dropout_) \ +_(aten, dsplit) \ +_(aten, dstack) \ +_(aten, einsum) \ +_(aten, elu) \ +_(aten, elu_) \ +_(aten, elu_backward) \ +_(aten, embedding) \ +_(aten, embedding_backward) \ +_(aten, embedding_bag) \ +_(aten, embedding_dense_backward) \ +_(aten, embedding_renorm) \ +_(aten, embedding_renorm_) \ +_(aten, embedding_sparse_backward) \ +_(aten, empty) \ +_(aten, empty_like) \ +_(aten, empty_permuted) \ +_(aten, empty_quantized) \ +_(aten, empty_strided) \ +_(aten, eq) \ +_(aten, eq_) \ +_(aten, equal) \ +_(aten, erf) \ +_(aten, erf_) \ +_(aten, erfc) \ +_(aten, erfc_) \ +_(aten, erfinv) \ +_(aten, erfinv_) \ +_(aten, exp) \ +_(aten, exp2) \ +_(aten, exp2_) \ +_(aten, exp_) \ +_(aten, expand) \ +_(aten, expand_as) \ +_(aten, expand_copy) \ +_(aten, expm1) \ +_(aten, expm1_) \ +_(aten, exponential) \ +_(aten, exponential_) \ +_(aten, eye) \ +_(aten, fake_quantize_per_channel_affine) \ +_(aten, fake_quantize_per_channel_affine_cachemask) \ +_(aten, fake_quantize_per_channel_affine_cachemask_backward) \ +_(aten, fake_quantize_per_tensor_affine) \ +_(aten, fake_quantize_per_tensor_affine_cachemask) \ +_(aten, fake_quantize_per_tensor_affine_cachemask_backward) \ +_(aten, fbgemm_linear_fp16_weight) \ +_(aten, fbgemm_linear_fp16_weight_fp32_activation) \ +_(aten, fbgemm_linear_int8_weight) \ +_(aten, fbgemm_linear_int8_weight_fp32_activation) \ +_(aten, fbgemm_linear_quantize_weight) \ +_(aten, fbgemm_pack_gemm_matrix_fp16) \ +_(aten, fbgemm_pack_quantized_matrix) \ +_(aten, feature_alpha_dropout) \ +_(aten, feature_alpha_dropout_) \ +_(aten, feature_dropout) \ +_(aten, feature_dropout_) \ +_(aten, fft_fft) \ +_(aten, fft_fft2) \ +_(aten, fft_fftfreq) \ +_(aten, fft_fftn) \ +_(aten, fft_fftshift) \ +_(aten, fft_hfft) \ +_(aten, fft_hfft2) \ +_(aten, fft_hfftn) \ +_(aten, fft_ifft) \ +_(aten, fft_ifft2) \ +_(aten, fft_ifftn) \ +_(aten, fft_ifftshift) \ +_(aten, fft_ihfft) \ +_(aten, fft_ihfft2) \ +_(aten, fft_ihfftn) \ +_(aten, fft_irfft) \ +_(aten, fft_irfft2) \ +_(aten, fft_irfftn) \ +_(aten, fft_rfft) \ +_(aten, fft_rfft2) \ +_(aten, fft_rfftfreq) \ +_(aten, fft_rfftn) \ +_(aten, fill) \ +_(aten, fill_) \ +_(aten, fill_diagonal) \ +_(aten, fill_diagonal_) \ +_(aten, fix) \ +_(aten, fix_) \ +_(aten, flatten) \ +_(aten, flatten_dense_tensors) \ +_(aten, flip) \ +_(aten, fliplr) \ +_(aten, flipud) \ +_(aten, float_power) \ +_(aten, float_power_) \ +_(aten, floor) \ +_(aten, floor_) \ +_(aten, floor_divide) \ +_(aten, floor_divide_) \ +_(aten, fmax) \ +_(aten, fmin) \ +_(aten, fmod) \ +_(aten, fmod_) \ +_(aten, frac) \ +_(aten, frac_) \ +_(aten, fractional_max_pool2d) \ +_(aten, fractional_max_pool2d_backward) \ +_(aten, fractional_max_pool3d) \ +_(aten, fractional_max_pool3d_backward) \ +_(aten, frexp) \ +_(aten, frobenius_norm) \ +_(aten, from_file) \ +_(aten, full) \ +_(aten, full_like) \ +_(aten, fused_moving_avg_obs_fake_quant) \ +_(aten, gather) \ +_(aten, gather_backward) \ +_(aten, gcd) \ +_(aten, gcd_) \ +_(aten, ge) \ +_(aten, ge_) \ +_(aten, gelu) \ +_(aten, gelu_) \ +_(aten, gelu_backward) \ +_(aten, geometric) \ +_(aten, geometric_) \ +_(aten, geqrf) \ +_(aten, ger) \ +_(aten, glu) \ +_(aten, glu_backward) \ +_(aten, glu_backward_jvp) \ +_(aten, glu_jvp) \ +_(aten, gradient) \ +_(aten, greater) \ +_(aten, greater_) \ +_(aten, greater_equal) \ +_(aten, greater_equal_) \ +_(aten, grid_sampler) \ +_(aten, grid_sampler_2d) \ +_(aten, grid_sampler_2d_backward) \ +_(aten, grid_sampler_3d) \ +_(aten, grid_sampler_3d_backward) \ +_(aten, group_norm) \ +_(aten, gru) \ +_(aten, gru_cell) \ +_(aten, gt) \ +_(aten, gt_) \ +_(aten, hamming_window) \ +_(aten, hann_window) \ +_(aten, hardshrink) \ +_(aten, hardshrink_backward) \ +_(aten, hardsigmoid) \ +_(aten, hardsigmoid_) \ +_(aten, hardsigmoid_backward) \ +_(aten, hardswish) \ +_(aten, hardswish_) \ +_(aten, hardswish_backward) \ +_(aten, hardtanh) \ +_(aten, hardtanh_) \ +_(aten, hardtanh_backward) \ +_(aten, hash_tensor) \ +_(aten, heaviside) \ +_(aten, heaviside_) \ +_(aten, hinge_embedding_loss) \ +_(aten, histc) \ +_(aten, histogram) \ +_(aten, histogramdd) \ +_(aten, hsplit) \ +_(aten, hspmm) \ +_(aten, hstack) \ +_(aten, huber_loss) \ +_(aten, huber_loss_backward) \ +_(aten, hypot) \ +_(aten, hypot_) \ +_(aten, i0) \ +_(aten, i0_) \ +_(aten, igamma) \ +_(aten, igamma_) \ +_(aten, igammac) \ +_(aten, igammac_) \ +_(aten, im2col) \ +_(aten, imag) \ +_(aten, index) \ +_(aten, index_add) \ +_(aten, index_add_) \ +_(aten, index_copy) \ +_(aten, index_copy_) \ +_(aten, index_fill) \ +_(aten, index_fill_) \ +_(aten, index_put) \ +_(aten, index_put_) \ +_(aten, index_reduce) \ +_(aten, index_reduce_) \ +_(aten, index_select) \ +_(aten, index_select_backward) \ +_(aten, indices) \ +_(aten, indices_copy) \ +_(aten, infinitely_differentiable_gelu_backward) \ +_(aten, inner) \ +_(aten, instance_norm) \ +_(aten, int_repr) \ +_(aten, inverse) \ +_(aten, is_coalesced) \ +_(aten, is_complex) \ +_(aten, is_conj) \ +_(aten, is_distributed) \ +_(aten, is_floating_point) \ +_(aten, is_inference) \ +_(aten, is_leaf) \ +_(aten, is_neg) \ +_(aten, is_nonzero) \ +_(aten, is_pinned) \ +_(aten, is_same_size) \ +_(aten, is_set_to) \ +_(aten, is_signed) \ +_(aten, is_vulkan_available) \ +_(aten, isclose) \ +_(aten, isfinite) \ +_(aten, isin) \ +_(aten, isinf) \ +_(aten, isnan) \ +_(aten, isneginf) \ +_(aten, isposinf) \ +_(aten, isreal) \ +_(aten, istft) \ +_(aten, item) \ +_(aten, kaiser_window) \ +_(aten, kl_div) \ +_(aten, kron) \ +_(aten, kthvalue) \ +_(aten, l1_loss) \ +_(aten, layer_norm) \ +_(aten, lcm) \ +_(aten, lcm_) \ +_(aten, ldexp) \ +_(aten, ldexp_) \ +_(aten, le) \ +_(aten, le_) \ +_(aten, leaky_relu) \ +_(aten, leaky_relu_) \ +_(aten, leaky_relu_backward) \ +_(aten, lerp) \ +_(aten, lerp_) \ +_(aten, less) \ +_(aten, less_) \ +_(aten, less_equal) \ +_(aten, less_equal_) \ +_(aten, lgamma) \ +_(aten, lgamma_) \ +_(aten, lift) \ +_(aten, lift_fresh) \ +_(aten, lift_fresh_copy) \ +_(aten, linalg_cholesky) \ +_(aten, linalg_cholesky_ex) \ +_(aten, linalg_cond) \ +_(aten, linalg_cross) \ +_(aten, linalg_det) \ +_(aten, linalg_diagonal) \ +_(aten, linalg_eig) \ +_(aten, linalg_eigh) \ +_(aten, linalg_eigvals) \ +_(aten, linalg_eigvalsh) \ +_(aten, linalg_householder_product) \ +_(aten, linalg_inv) \ +_(aten, linalg_inv_ex) \ +_(aten, linalg_ldl_factor) \ +_(aten, linalg_ldl_factor_ex) \ +_(aten, linalg_ldl_solve) \ +_(aten, linalg_lstsq) \ +_(aten, linalg_lu) \ +_(aten, linalg_lu_factor) \ +_(aten, linalg_lu_factor_ex) \ +_(aten, linalg_lu_solve) \ +_(aten, linalg_matmul) \ +_(aten, linalg_matrix_exp) \ +_(aten, linalg_matrix_norm) \ +_(aten, linalg_matrix_power) \ +_(aten, linalg_matrix_rank) \ +_(aten, linalg_multi_dot) \ +_(aten, linalg_norm) \ +_(aten, linalg_pinv) \ +_(aten, linalg_qr) \ +_(aten, linalg_slogdet) \ +_(aten, linalg_solve) \ +_(aten, linalg_solve_ex) \ +_(aten, linalg_solve_triangular) \ +_(aten, linalg_svd) \ +_(aten, linalg_svdvals) \ +_(aten, linalg_tensorinv) \ +_(aten, linalg_tensorsolve) \ +_(aten, linalg_vander) \ +_(aten, linalg_vecdot) \ +_(aten, linalg_vector_norm) \ +_(aten, linear) \ +_(aten, linear_backward) \ +_(aten, linspace) \ +_(aten, log) \ +_(aten, log10) \ +_(aten, log10_) \ +_(aten, log1p) \ +_(aten, log1p_) \ +_(aten, log2) \ +_(aten, log2_) \ +_(aten, log_) \ +_(aten, log_normal) \ +_(aten, log_normal_) \ +_(aten, log_sigmoid) \ +_(aten, log_sigmoid_backward) \ +_(aten, log_sigmoid_forward) \ +_(aten, log_softmax) \ +_(aten, logaddexp) \ +_(aten, logaddexp2) \ +_(aten, logcumsumexp) \ +_(aten, logdet) \ +_(aten, logical_and) \ +_(aten, logical_and_) \ +_(aten, logical_not) \ +_(aten, logical_not_) \ +_(aten, logical_or) \ +_(aten, logical_or_) \ +_(aten, logical_xor) \ +_(aten, logical_xor_) \ +_(aten, logit) \ +_(aten, logit_) \ +_(aten, logit_backward) \ +_(aten, logspace) \ +_(aten, logsumexp) \ +_(aten, lshift) \ +_(aten, lstm) \ +_(aten, lstm_cell) \ +_(aten, lstm_mps_backward) \ +_(aten, lt) \ +_(aten, lt_) \ +_(aten, lu_solve) \ +_(aten, lu_unpack) \ +_(aten, mH) \ +_(aten, mT) \ +_(aten, margin_ranking_loss) \ +_(aten, masked_fill) \ +_(aten, masked_fill_) \ +_(aten, masked_scatter) \ +_(aten, masked_scatter_) \ +_(aten, masked_scatter_backward) \ +_(aten, masked_select) \ +_(aten, masked_select_backward) \ +_(aten, matmul) \ +_(aten, matmul_backward) \ +_(aten, matrix_H) \ +_(aten, matrix_exp) \ +_(aten, matrix_exp_backward) \ +_(aten, matrix_power) \ +_(aten, max) \ +_(aten, max_pool1d) \ +_(aten, max_pool1d_with_indices) \ +_(aten, max_pool2d) \ +_(aten, max_pool2d_backward) \ +_(aten, max_pool2d_with_indices) \ +_(aten, max_pool2d_with_indices_backward) \ +_(aten, max_pool3d) \ +_(aten, max_pool3d_with_indices) \ +_(aten, max_pool3d_with_indices_backward) \ +_(aten, max_unpool2d) \ +_(aten, max_unpool3d) \ +_(aten, maximum) \ +_(aten, mean) \ +_(aten, median) \ +_(aten, meshgrid) \ +_(aten, min) \ +_(aten, minimum) \ +_(aten, miopen_batch_norm) \ +_(aten, miopen_batch_norm_backward) \ +_(aten, miopen_convolution) \ +_(aten, miopen_convolution_add_relu) \ +_(aten, miopen_convolution_relu) \ +_(aten, miopen_convolution_transpose) \ +_(aten, miopen_depthwise_convolution) \ +_(aten, miopen_rnn) \ +_(aten, miopen_rnn_backward) \ +_(aten, mish) \ +_(aten, mish_) \ +_(aten, mish_backward) \ +_(aten, mkldnn_adaptive_avg_pool2d) \ +_(aten, mkldnn_adaptive_avg_pool2d_backward) \ +_(aten, mkldnn_convolution) \ +_(aten, mkldnn_linear) \ +_(aten, mkldnn_linear_backward) \ +_(aten, mkldnn_linear_backward_input) \ +_(aten, mkldnn_linear_backward_weights) \ +_(aten, mkldnn_max_pool2d) \ +_(aten, mkldnn_max_pool2d_backward) \ +_(aten, mkldnn_max_pool3d) \ +_(aten, mkldnn_max_pool3d_backward) \ +_(aten, mkldnn_reorder_conv2d_weight) \ +_(aten, mkldnn_reorder_conv3d_weight) \ +_(aten, mkldnn_rnn_layer) \ +_(aten, mkldnn_rnn_layer_backward) \ +_(aten, mm) \ +_(aten, mode) \ +_(aten, moveaxis) \ +_(aten, movedim) \ +_(aten, mps_convolution_backward) \ +_(aten, mps_convolution_transpose_backward) \ +_(aten, mse_loss) \ +_(aten, mse_loss_backward) \ +_(aten, msort) \ +_(aten, mul) \ +_(aten, mul_) \ +_(aten, multi_margin_loss) \ +_(aten, multi_margin_loss_backward) \ +_(aten, multilabel_margin_loss) \ +_(aten, multilabel_margin_loss_backward) \ +_(aten, multilabel_margin_loss_forward) \ +_(aten, multinomial) \ +_(aten, multiply) \ +_(aten, multiply_) \ +_(aten, mv) \ +_(aten, mvlgamma) \ +_(aten, mvlgamma_) \ +_(aten, nan_to_num) \ +_(aten, nan_to_num_) \ +_(aten, nanmean) \ +_(aten, nanmedian) \ +_(aten, nanquantile) \ +_(aten, nansum) \ +_(aten, narrow) \ +_(aten, narrow_copy) \ +_(aten, native_batch_norm) \ +_(aten, native_batch_norm_backward) \ +_(aten, native_channel_shuffle) \ +_(aten, native_dropout) \ +_(aten, native_dropout_backward) \ +_(aten, native_group_norm) \ +_(aten, native_group_norm_backward) \ +_(aten, native_layer_norm) \ +_(aten, native_layer_norm_backward) \ +_(aten, native_norm) \ +_(aten, ne) \ +_(aten, ne_) \ +_(aten, neg) \ +_(aten, neg_) \ +_(aten, negative) \ +_(aten, negative_) \ +_(aten, nested_to_padded_tensor) \ +_(aten, new_empty) \ +_(aten, new_empty_strided) \ +_(aten, new_full) \ +_(aten, new_ones) \ +_(aten, new_zeros) \ +_(aten, nextafter) \ +_(aten, nextafter_) \ +_(aten, nll_loss) \ +_(aten, nll_loss2d) \ +_(aten, nll_loss2d_backward) \ +_(aten, nll_loss2d_forward) \ +_(aten, nll_loss_backward) \ +_(aten, nll_loss_forward) \ +_(aten, nll_loss_nd) \ +_(aten, nonzero) \ +_(aten, nonzero_numpy) \ +_(aten, nonzero_static) \ +_(aten, norm) \ +_(aten, norm_except_dim) \ +_(aten, normal) \ +_(aten, normal_) \ +_(aten, normal_functional) \ +_(aten, not_equal) \ +_(aten, not_equal_) \ +_(aten, nuclear_norm) \ +_(aten, numpy_T) \ +_(aten, one_hot) \ +_(aten, ones) \ +_(aten, ones_like) \ +_(aten, orgqr) \ +_(aten, ormqr) \ +_(aten, outer) \ +_(aten, output_nr) \ +_(aten, pad) \ +_(aten, pad_sequence) \ +_(aten, pairwise_distance) \ +_(aten, pdist) \ +_(aten, permute) \ +_(aten, permute_copy) \ +_(aten, pin_memory) \ +_(aten, pinverse) \ +_(aten, pixel_shuffle) \ +_(aten, pixel_unshuffle) \ +_(aten, poisson) \ +_(aten, poisson_nll_loss) \ +_(aten, polar) \ +_(aten, polygamma) \ +_(aten, polygamma_) \ +_(aten, positive) \ +_(aten, pow) \ +_(aten, pow_) \ +_(aten, prelu) \ +_(aten, prod) \ +_(aten, promote_types) \ +_(aten, put) \ +_(aten, put_) \ +_(aten, q_per_channel_axis) \ +_(aten, q_per_channel_scales) \ +_(aten, q_per_channel_zero_points) \ +_(aten, q_scale) \ +_(aten, q_zero_point) \ +_(aten, qr) \ +_(aten, qscheme) \ +_(aten, quantile) \ +_(aten, quantize_per_channel) \ +_(aten, quantize_per_tensor) \ +_(aten, quantize_per_tensor_dynamic) \ +_(aten, quantized_batch_norm) \ +_(aten, quantized_gru_cell) \ +_(aten, quantized_lstm_cell) \ +_(aten, quantized_max_pool1d) \ +_(aten, quantized_max_pool2d) \ +_(aten, quantized_max_pool3d) \ +_(aten, quantized_rnn_relu_cell) \ +_(aten, quantized_rnn_tanh_cell) \ +_(aten, rad2deg) \ +_(aten, rad2deg_) \ +_(aten, rand) \ +_(aten, rand_like) \ +_(aten, randint) \ +_(aten, randint_like) \ +_(aten, randn) \ +_(aten, randn_like) \ +_(aten, random) \ +_(aten, random_) \ +_(aten, randperm) \ +_(aten, range) \ +_(aten, ravel) \ +_(aten, real) \ +_(aten, reciprocal) \ +_(aten, reciprocal_) \ +_(aten, record_stream) \ +_(aten, refine_names) \ +_(aten, reflection_pad1d) \ +_(aten, reflection_pad1d_backward) \ +_(aten, reflection_pad2d) \ +_(aten, reflection_pad2d_backward) \ +_(aten, reflection_pad3d) \ +_(aten, reflection_pad3d_backward) \ +_(aten, relu) \ +_(aten, relu6) \ +_(aten, relu6_) \ +_(aten, relu_) \ +_(aten, remainder) \ +_(aten, remainder_) \ +_(aten, rename) \ +_(aten, rename_) \ +_(aten, renorm) \ +_(aten, renorm_) \ +_(aten, repeat) \ +_(aten, repeat_interleave) \ +_(aten, replication_pad1d) \ +_(aten, replication_pad1d_backward) \ +_(aten, replication_pad2d) \ +_(aten, replication_pad2d_backward) \ +_(aten, replication_pad3d) \ +_(aten, replication_pad3d_backward) \ +_(aten, requires_grad) \ +_(aten, requires_grad_) \ +_(aten, reshape) \ +_(aten, reshape_as) \ +_(aten, resize) \ +_(aten, resize_) \ +_(aten, resize_as) \ +_(aten, resize_as_) \ +_(aten, resize_as_sparse) \ +_(aten, resize_as_sparse_) \ +_(aten, resolve_conj) \ +_(aten, resolve_neg) \ +_(aten, result_type) \ +_(aten, retain_grad) \ +_(aten, retains_grad) \ +_(aten, rms_norm) \ +_(aten, rnn_relu) \ +_(aten, rnn_relu_cell) \ +_(aten, rnn_tanh) \ +_(aten, rnn_tanh_cell) \ +_(aten, roll) \ +_(aten, rot90) \ +_(aten, round) \ +_(aten, round_) \ +_(aten, row_indices) \ +_(aten, row_indices_copy) \ +_(aten, row_stack) \ +_(aten, rrelu) \ +_(aten, rrelu_) \ +_(aten, rrelu_with_noise) \ +_(aten, rrelu_with_noise_) \ +_(aten, rrelu_with_noise_backward) \ +_(aten, rrelu_with_noise_functional) \ +_(aten, rshift) \ +_(aten, rsqrt) \ +_(aten, rsqrt_) \ +_(aten, rsub) \ +_(aten, scalar_tensor) \ +_(aten, scaled_dot_product_attention) \ +_(aten, scatter) \ +_(aten, scatter_) \ +_(aten, scatter_add) \ +_(aten, scatter_add_) \ +_(aten, scatter_reduce) \ +_(aten, scatter_reduce_) \ +_(aten, searchsorted) \ +_(aten, segment_reduce) \ +_(aten, select) \ +_(aten, select_backward) \ +_(aten, select_copy) \ +_(aten, select_scatter) \ +_(aten, selu) \ +_(aten, selu_) \ +_(aten, set) \ +_(aten, set_) \ +_(aten, set_data) \ +_(aten, sgn) \ +_(aten, sgn_) \ +_(aten, sigmoid) \ +_(aten, sigmoid_) \ +_(aten, sigmoid_backward) \ +_(aten, sign) \ +_(aten, sign_) \ +_(aten, signbit) \ +_(aten, silu) \ +_(aten, silu_) \ +_(aten, silu_backward) \ +_(aten, sin) \ +_(aten, sin_) \ +_(aten, sinc) \ +_(aten, sinc_) \ +_(aten, sinh) \ +_(aten, sinh_) \ +_(aten, size) \ +_(aten, slice) \ +_(aten, slice_backward) \ +_(aten, slice_copy) \ +_(aten, slice_inverse) \ +_(aten, slice_scatter) \ +_(aten, slogdet) \ +_(aten, slow_conv3d) \ +_(aten, slow_conv3d_forward) \ +_(aten, slow_conv_dilated2d) \ +_(aten, slow_conv_dilated3d) \ +_(aten, slow_conv_transpose2d) \ +_(aten, slow_conv_transpose3d) \ +_(aten, smm) \ +_(aten, smooth_l1_loss) \ +_(aten, smooth_l1_loss_backward) \ +_(aten, soft_margin_loss) \ +_(aten, soft_margin_loss_backward) \ +_(aten, softmax) \ +_(aten, softplus) \ +_(aten, softplus_backward) \ +_(aten, softshrink) \ +_(aten, softshrink_backward) \ +_(aten, sort) \ +_(aten, sparse_bsc_tensor) \ +_(aten, sparse_bsr_tensor) \ +_(aten, sparse_compressed_tensor) \ +_(aten, sparse_coo_tensor) \ +_(aten, sparse_csc_tensor) \ +_(aten, sparse_csr_tensor) \ +_(aten, sparse_dim) \ +_(aten, sparse_mask) \ +_(aten, sparse_resize) \ +_(aten, sparse_resize_) \ +_(aten, sparse_resize_and_clear) \ +_(aten, sparse_resize_and_clear_) \ +_(aten, sparse_sampled_addmm) \ +_(aten, special_airy_ai) \ +_(aten, special_bessel_j0) \ +_(aten, special_bessel_j1) \ +_(aten, special_bessel_y0) \ +_(aten, special_bessel_y1) \ +_(aten, special_chebyshev_polynomial_t) \ +_(aten, special_chebyshev_polynomial_u) \ +_(aten, special_chebyshev_polynomial_v) \ +_(aten, special_chebyshev_polynomial_w) \ +_(aten, special_digamma) \ +_(aten, special_entr) \ +_(aten, special_erf) \ +_(aten, special_erfc) \ +_(aten, special_erfcx) \ +_(aten, special_erfinv) \ +_(aten, special_exp2) \ +_(aten, special_expit) \ +_(aten, special_expm1) \ +_(aten, special_gammainc) \ +_(aten, special_gammaincc) \ +_(aten, special_gammaln) \ +_(aten, special_hermite_polynomial_h) \ +_(aten, special_hermite_polynomial_he) \ +_(aten, special_i0) \ +_(aten, special_i0e) \ +_(aten, special_i1) \ +_(aten, special_i1e) \ +_(aten, special_laguerre_polynomial_l) \ +_(aten, special_legendre_polynomial_p) \ +_(aten, special_log1p) \ +_(aten, special_log_ndtr) \ +_(aten, special_log_softmax) \ +_(aten, special_logit) \ +_(aten, special_logsumexp) \ +_(aten, special_modified_bessel_i0) \ +_(aten, special_modified_bessel_i1) \ +_(aten, special_modified_bessel_k0) \ +_(aten, special_modified_bessel_k1) \ +_(aten, special_multigammaln) \ +_(aten, special_ndtr) \ +_(aten, special_ndtri) \ +_(aten, special_polygamma) \ +_(aten, special_psi) \ +_(aten, special_round) \ +_(aten, special_scaled_modified_bessel_k0) \ +_(aten, special_scaled_modified_bessel_k1) \ +_(aten, special_shifted_chebyshev_polynomial_t) \ +_(aten, special_shifted_chebyshev_polynomial_u) \ +_(aten, special_shifted_chebyshev_polynomial_v) \ +_(aten, special_shifted_chebyshev_polynomial_w) \ +_(aten, special_sinc) \ +_(aten, special_softmax) \ +_(aten, special_spherical_bessel_j0) \ +_(aten, special_xlog1py) \ +_(aten, special_xlogy) \ +_(aten, special_zeta) \ +_(aten, split) \ +_(aten, split_copy) \ +_(aten, split_with_sizes) \ +_(aten, split_with_sizes_copy) \ +_(aten, sqrt) \ +_(aten, sqrt_) \ +_(aten, square) \ +_(aten, square_) \ +_(aten, squeeze) \ +_(aten, squeeze_) \ +_(aten, squeeze_copy) \ +_(aten, sspaddmm) \ +_(aten, stack) \ +_(aten, std) \ +_(aten, std_mean) \ +_(aten, stft) \ +_(aten, stride) \ +_(aten, sub) \ +_(aten, sub_) \ +_(aten, subtract) \ +_(aten, subtract_) \ +_(aten, sum) \ +_(aten, sum_to_size) \ +_(aten, svd) \ +_(aten, swapaxes) \ +_(aten, swapaxes_) \ +_(aten, swapdims) \ +_(aten, swapdims_) \ +_(aten, sym_constrain_range) \ +_(aten, sym_constrain_range_for_size) \ +_(aten, sym_is_contiguous) \ +_(aten, sym_numel) \ +_(aten, sym_size) \ +_(aten, sym_storage_offset) \ +_(aten, sym_stride) \ +_(aten, t) \ +_(aten, t_) \ +_(aten, t_copy) \ +_(aten, take) \ +_(aten, take_along_dim) \ +_(aten, tan) \ +_(aten, tan_) \ +_(aten, tanh) \ +_(aten, tanh_) \ +_(aten, tanh_backward) \ +_(aten, tensor_split) \ +_(aten, tensordot) \ +_(aten, thnn_conv2d) \ +_(aten, threshold) \ +_(aten, threshold_) \ +_(aten, threshold_backward) \ +_(aten, tile) \ +_(aten, to) \ +_(aten, to_dense) \ +_(aten, to_dense_backward) \ +_(aten, to_mkldnn) \ +_(aten, to_mkldnn_backward) \ +_(aten, to_padded_tensor) \ +_(aten, to_sparse) \ +_(aten, to_sparse_bsc) \ +_(aten, to_sparse_bsr) \ +_(aten, to_sparse_csc) \ +_(aten, to_sparse_csr) \ +_(aten, topk) \ +_(aten, trace) \ +_(aten, trace_backward) \ +_(aten, transpose) \ +_(aten, transpose_) \ +_(aten, transpose_copy) \ +_(aten, trapezoid) \ +_(aten, trapz) \ +_(aten, triangular_solve) \ +_(aten, tril) \ +_(aten, tril_) \ +_(aten, tril_indices) \ +_(aten, triplet_margin_loss) \ +_(aten, triu) \ +_(aten, triu_) \ +_(aten, triu_indices) \ +_(aten, true_divide) \ +_(aten, true_divide_) \ +_(aten, trunc) \ +_(aten, trunc_) \ +_(aten, type_as) \ +_(aten, unbind) \ +_(aten, unbind_copy) \ +_(aten, unflatten) \ +_(aten, unflatten_dense_tensors) \ +_(aten, unfold) \ +_(aten, unfold_backward) \ +_(aten, unfold_copy) \ +_(aten, uniform) \ +_(aten, uniform_) \ +_(aten, unique_consecutive) \ +_(aten, unique_dim) \ +_(aten, unique_dim_consecutive) \ +_(aten, unsafe_chunk) \ +_(aten, unsafe_split) \ +_(aten, unsafe_split_with_sizes) \ +_(aten, unsqueeze) \ +_(aten, unsqueeze_) \ +_(aten, unsqueeze_copy) \ +_(aten, upsample_bicubic2d) \ +_(aten, upsample_bicubic2d_backward) \ +_(aten, upsample_bilinear2d) \ +_(aten, upsample_bilinear2d_backward) \ +_(aten, upsample_linear1d) \ +_(aten, upsample_linear1d_backward) \ +_(aten, upsample_nearest1d) \ +_(aten, upsample_nearest1d_backward) \ +_(aten, upsample_nearest2d) \ +_(aten, upsample_nearest2d_backward) \ +_(aten, upsample_nearest3d) \ +_(aten, upsample_nearest3d_backward) \ +_(aten, upsample_trilinear3d) \ +_(aten, upsample_trilinear3d_backward) \ +_(aten, value_selecting_reduction_backward) \ +_(aten, values) \ +_(aten, values_copy) \ +_(aten, vander) \ +_(aten, var) \ +_(aten, var_mean) \ +_(aten, vdot) \ +_(aten, view) \ +_(aten, view_as) \ +_(aten, view_as_complex) \ +_(aten, view_as_complex_copy) \ +_(aten, view_as_real) \ +_(aten, view_as_real_copy) \ +_(aten, view_copy) \ +_(aten, vsplit) \ +_(aten, vstack) \ +_(aten, where) \ +_(aten, xlogy) \ +_(aten, xlogy_) \ +_(aten, zero) \ +_(aten, zero_) \ +_(aten, zeros) \ +_(aten, zeros_like) + +#define FORALL_ATTR_BASE_SYMBOLS(_) \ +_(attr, A) \ +_(attr, B) \ +_(attr, C) \ +_(attr, H) \ +_(attr, HxW) \ +_(attr, K) \ +_(attr, L) \ +_(attr, LD) \ +_(attr, LU) \ +_(attr, LU_data) \ +_(attr, LU_pivots) \ +_(attr, M) \ +_(attr, N) \ +_(attr, P) \ +_(attr, Q) \ +_(attr, R) \ +_(attr, S) \ +_(attr, U) \ +_(attr, UPLO) \ +_(attr, V) \ +_(attr, Vh) \ +_(attr, W) \ +_(attr, X) \ +_(attr, a) \ +_(attr, abs) \ +_(attr, accumulate) \ +_(attr, accumulate_matches) \ +_(attr, activation) \ +_(attr, addends) \ +_(attr, adjoint) \ +_(attr, alg_id) \ +_(attr, algorithm) \ +_(attr, alibi_slopes) \ +_(attr, align_corners) \ +_(attr, align_to_window) \ +_(attr, allow_tf32) \ +_(attr, alpha) \ +_(attr, amsgrad) \ +_(attr, anchor) \ +_(attr, angle) \ +_(attr, any) \ +_(attr, api_name) \ +_(attr, append) \ +_(attr, approximate) \ +_(attr, arg1) \ +_(attr, arg2) \ +_(attr, arg3) \ +_(attr, arg_out) \ +_(attr, assert_msg) \ +_(attr, assume_unique) \ +_(attr, atol) \ +_(attr, attn_bias) \ +_(attr, attn_mask) \ +_(attr, average_attn_weights) \ +_(attr, averaging_const) \ +_(attr, aweights) \ +_(attr, axis) \ +_(attr, axis0) \ +_(attr, axis1) \ +_(attr, b) \ +_(attr, b_hh) \ +_(attr, b_ih) \ +_(attr, bag_size) \ +_(attr, base) \ +_(attr, batch1) \ +_(attr, batch2) \ +_(attr, batch_dim) \ +_(attr, batch_first) \ +_(attr, batch_size) \ +_(attr, batch_sizes) \ +_(attr, benchmark) \ +_(attr, beta) \ +_(attr, beta1) \ +_(attr, beta2) \ +_(attr, bias) \ +_(attr, bias_defined) \ +_(attr, bias_g) \ +_(attr, bias_requires_grad) \ +_(attr, bias_sizes) \ +_(attr, bidirectional) \ +_(attr, bin_edges) \ +_(attr, bins) \ +_(attr, bit_width) \ +_(attr, blank) \ +_(attr, block_size) \ +_(attr, blocksize) \ +_(attr, boundaries) \ +_(attr, buffer) \ +_(attr, ccol_indices) \ +_(attr, cdim) \ +_(attr, cdist) \ +_(attr, ceil_mode) \ +_(attr, cell_state_fwd) \ +_(attr, center) \ +_(attr, ch_axis) \ +_(attr, check_errors) \ +_(attr, check_pinning) \ +_(attr, chunks) \ +_(attr, coalesced) \ +_(attr, coefficients) \ +_(attr, col) \ +_(attr, col_indices) \ +_(attr, col_offsets) \ +_(attr, col_offsets_hh) \ +_(attr, col_offsets_ih) \ +_(attr, compressed_A) \ +_(attr, compressed_idx) \ +_(attr, compressed_indices) \ +_(attr, compressed_indices_dtype) \ +_(attr, compute_log_sumexp) \ +_(attr, compute_mode) \ +_(attr, compute_uv) \ +_(attr, compute_v) \ +_(attr, condition) \ +_(attr, contraction_dim) \ +_(attr, copy) \ +_(attr, correction) \ +_(attr, count) \ +_(attr, count_include_pad) \ +_(attr, counts) \ +_(attr, cpu_dtype) \ +_(attr, cpu_enabled) \ +_(attr, cpu_nested_shape_example) \ +_(attr, create_graph) \ +_(attr, crow_indices) \ +_(attr, cu_seqlens_k) \ +_(attr, cu_seqlens_q) \ +_(attr, cuda_dtype) \ +_(attr, cuda_enabled) \ +_(attr, cudnn_enable) \ +_(attr, cudnn_enabled) \ +_(attr, cum_seq_k) \ +_(attr, cum_seq_q) \ +_(attr, custom_mask_type) \ +_(attr, cx) \ +_(attr, cx_) \ +_(attr, cx_tmp) \ +_(attr, cy) \ +_(attr, cy_) \ +_(attr, d) \ +_(attr, dampening) \ +_(attr, data) \ +_(attr, decimals) \ +_(attr, delta) \ +_(attr, dense) \ +_(attr, dense_B) \ +_(attr, dense_dim) \ +_(attr, density) \ +_(attr, dep_token) \ +_(attr, descending) \ +_(attr, destination) \ +_(attr, deterministic) \ +_(attr, device) \ +_(attr, device_index) \ +_(attr, dgrad_glu) \ +_(attr, diagonal) \ +_(attr, diagonals) \ +_(attr, dilation) \ +_(attr, dim) \ +_(attr, dim0) \ +_(attr, dim1) \ +_(attr, dim2) \ +_(attr, dimension) \ +_(attr, dims) \ +_(attr, dims_other) \ +_(attr, dims_self) \ +_(attr, divisor_override) \ +_(attr, downscale_factor) \ +_(attr, driver) \ +_(attr, dropout) \ +_(attr, dropout_mask) \ +_(attr, dropout_p) \ +_(attr, dropout_seed) \ +_(attr, dropout_state) \ +_(attr, dst) \ +_(attr, dtype) \ +_(attr, dual) \ +_(attr, dummy) \ +_(attr, dx) \ +_(attr, edge_order) \ +_(attr, eigenvalues) \ +_(attr, eigenvectors) \ +_(attr, eigvals) \ +_(attr, eigvecs) \ +_(attr, element) \ +_(attr, elements) \ +_(attr, ellipsis_idx) \ +_(attr, embed_dim) \ +_(attr, enable_gqa) \ +_(attr, end) \ +_(attr, end_dim) \ +_(attr, eps) \ +_(attr, epsilon) \ +_(attr, equal_nan) \ +_(attr, equation) \ +_(attr, exp_avg_sqs) \ +_(attr, exp_avgs) \ +_(attr, expand1) \ +_(attr, expand2) \ +_(attr, expand3) \ +_(attr, exponent) \ +_(attr, exponential_average_factor) \ +_(attr, fake_quant_enabled) \ +_(attr, fake_quant_on) \ +_(attr, ffn_bias_1) \ +_(attr, ffn_bias_2) \ +_(attr, ffn_weight_1) \ +_(attr, ffn_weight_2) \ +_(attr, filename) \ +_(attr, fill) \ +_(attr, fill_value) \ +_(attr, flat) \ +_(attr, forward) \ +_(attr, found_inf) \ +_(attr, from) \ +_(attr, from_) \ +_(attr, full) \ +_(attr, full_matrices) \ +_(attr, fuse_transform_0213) \ +_(attr, fweights) \ +_(attr, g) \ +_(attr, gO) \ +_(attr, generator) \ +_(attr, ggI) \ +_(attr, ggW) \ +_(attr, ggb) \ +_(attr, glu) \ +_(attr, grad) \ +_(attr, grad_bias) \ +_(attr, grad_cy) \ +_(attr, grad_factor) \ +_(attr, grad_glu) \ +_(attr, grad_hy) \ +_(attr, grad_in) \ +_(attr, grad_input) \ +_(attr, grad_input_mask) \ +_(attr, grad_out) \ +_(attr, grad_out_) \ +_(attr, grad_output) \ +_(attr, grad_scale) \ +_(attr, grad_w) \ +_(attr, grad_weight) \ +_(attr, grad_x) \ +_(attr, grad_y) \ +_(attr, gradient) \ +_(attr, grads) \ +_(attr, grid) \ +_(attr, group) \ +_(attr, groups) \ +_(attr, growth_interval) \ +_(attr, growth_tracker) \ +_(attr, half_to_float) \ +_(attr, has_bias) \ +_(attr, has_biases) \ +_(attr, hermitian) \ +_(attr, hidden_bias) \ +_(attr, hidden_gates) \ +_(attr, hidden_size) \ +_(attr, high) \ +_(attr, hist) \ +_(attr, hop_length) \ +_(attr, hx) \ +_(attr, hx_) \ +_(attr, hy_) \ +_(attr, i1) \ +_(attr, i2) \ +_(attr, i3) \ +_(attr, ignore_index) \ +_(attr, imag) \ +_(attr, impl_index) \ +_(attr, implicit) \ +_(attr, in_features) \ +_(attr, include_last_offset) \ +_(attr, include_self) \ +_(attr, increasing) \ +_(attr, ind) \ +_(attr, index) \ +_(attr, index_dtype) \ +_(attr, indexing) \ +_(attr, indices) \ +_(attr, info) \ +_(attr, initial) \ +_(attr, innerKTiles) \ +_(attr, inp) \ +_(attr, input) \ +_(attr, input1) \ +_(attr, input2) \ +_(attr, input3) \ +_(attr, input_bias) \ +_(attr, input_dtype) \ +_(attr, input_g) \ +_(attr, input_gates) \ +_(attr, input_lengths) \ +_(attr, input_scale) \ +_(attr, input_size) \ +_(attr, input_sizes) \ +_(attr, input_zero_point) \ +_(attr, inputs) \ +_(attr, interpolation) \ +_(attr, interpolation_mode) \ +_(attr, inv_scale) \ +_(attr, inverse) \ +_(attr, invert) \ +_(attr, invstd) \ +_(attr, is_causal) \ +_(attr, is_coalesced) \ +_(attr, is_crow) \ +_(attr, is_first_step) \ +_(attr, is_matrix) \ +_(attr, is_result) \ +_(attr, is_target) \ +_(attr, k) \ +_(attr, keepdim) \ +_(attr, kernel_size) \ +_(attr, key) \ +_(attr, label_smoothing) \ +_(attr, lambd) \ +_(attr, largest) \ +_(attr, last_dim_size) \ +_(attr, layersOutputs) \ +_(attr, layout) \ +_(attr, left) \ +_(attr, length) \ +_(attr, lengths) \ +_(attr, level) \ +_(attr, like) \ +_(attr, list) \ +_(attr, log_alpha) \ +_(attr, log_input) \ +_(attr, log_probs) \ +_(attr, log_target) \ +_(attr, logabsdet) \ +_(attr, logsumexp) \ +_(attr, low) \ +_(attr, lower) \ +_(attr, lr) \ +_(attr, lr_decay) \ +_(attr, ltm) \ +_(attr, m) \ +_(attr, mantissa) \ +_(attr, margin) \ +_(attr, mask) \ +_(attr, mask_check) \ +_(attr, mask_type) \ +_(attr, masked_grad) \ +_(attr, mat) \ +_(attr, mat1) \ +_(attr, mat1_meta) \ +_(attr, mat2) \ +_(attr, matrices) \ +_(attr, max) \ +_(attr, max_exp_avg_sqs) \ +_(attr, max_k) \ +_(attr, max_lengths) \ +_(attr, max_norm) \ +_(attr, max_q) \ +_(attr, max_seqlen) \ +_(attr, max_seqlen_k) \ +_(attr, max_seqlen_q) \ +_(attr, max_size) \ +_(attr, max_val) \ +_(attr, max_values) \ +_(attr, maximize) \ +_(attr, maximum_indices) \ +_(attr, maxnorm) \ +_(attr, mean) \ +_(attr, median) \ +_(attr, memory_format) \ +_(attr, meta) \ +_(attr, min) \ +_(attr, min_indices) \ +_(attr, min_seqlen) \ +_(attr, min_val) \ +_(attr, minlength) \ +_(attr, mode) \ +_(attr, momentum) \ +_(attr, momentum_buffer_list) \ +_(attr, n) \ +_(attr, n_bins) \ +_(attr, n_fft) \ +_(attr, names) \ +_(attr, nan) \ +_(attr, need_weights) \ +_(attr, neg_log_likelihood) \ +_(attr, negative) \ +_(attr, negative_slope) \ +_(attr, neginf) \ +_(attr, nested_size) \ +_(attr, nested_strides) \ +_(attr, nesterov) \ +_(attr, new_data) \ +_(attr, nnz) \ +_(attr, noise) \ +_(attr, non_blocking) \ +_(attr, norm) \ +_(attr, norm_bias_1) \ +_(attr, norm_bias_2) \ +_(attr, norm_first) \ +_(attr, norm_type) \ +_(attr, norm_weight_1) \ +_(attr, norm_weight_2) \ +_(attr, normalization) \ +_(attr, normalized) \ +_(attr, normalized_shape) \ +_(attr, nt_example) \ +_(attr, num_chunks) \ +_(attr, num_classes) \ +_(attr, num_generated) \ +_(attr, num_groups) \ +_(attr, num_head) \ +_(attr, num_heads) \ +_(attr, num_layers) \ +_(attr, num_parallel) \ +_(attr, num_samples) \ +_(attr, num_splits_key) \ +_(attr, num_weights) \ +_(attr, numel) \ +_(attr, observer_on) \ +_(attr, offs) \ +_(attr, offset) \ +_(attr, offset2bag) \ +_(attr, offsets) \ +_(attr, onesided) \ +_(attr, ord) \ +_(attr, order) \ +_(attr, other) \ +_(attr, out) \ +_(attr, out0) \ +_(attr, out1) \ +_(attr, out2) \ +_(attr, out3) \ +_(attr, out4) \ +_(attr, out5) \ +_(attr, out6) \ +_(attr, out_channel) \ +_(attr, out_dim) \ +_(attr, out_dtype) \ +_(attr, out_features) \ +_(attr, out_int32) \ +_(attr, outdim) \ +_(attr, output) \ +_(attr, output_mask) \ +_(attr, output_padding) \ +_(attr, output_scale) \ +_(attr, output_size) \ +_(attr, output_zero_point) \ +_(attr, p) \ +_(attr, packed) \ +_(attr, packed_hh) \ +_(attr, packed_ih) \ +_(attr, packed_weight) \ +_(attr, packed_weights) \ +_(attr, pad) \ +_(attr, pad_mode) \ +_(attr, padded) \ +_(attr, padding) \ +_(attr, padding_idx) \ +_(attr, padding_mode) \ +_(attr, padding_side) \ +_(attr, padding_value) \ +_(attr, params) \ +_(attr, path) \ +_(attr, pdist) \ +_(attr, per_row_fake_quant) \ +_(attr, per_sample_weights) \ +_(attr, periodic) \ +_(attr, philox_offset) \ +_(attr, philox_seed) \ +_(attr, physical_layout) \ +_(attr, pin_memory) \ +_(attr, pivot) \ +_(attr, pivots) \ +_(attr, plain_idx) \ +_(attr, plain_indices) \ +_(attr, pos_weight) \ +_(attr, posinf) \ +_(attr, positive) \ +_(attr, pow) \ +_(attr, prepend) \ +_(attr, primal) \ +_(attr, prob) \ +_(attr, proj_bias) \ +_(attr, proj_size) \ +_(attr, proj_weight) \ +_(attr, q) \ +_(attr, qGroupSize) \ +_(attr, qScale) \ +_(attr, qScaleAndZeros) \ +_(attr, qZeros) \ +_(attr, qkv) \ +_(attr, qkv_bias) \ +_(attr, qkv_weight) \ +_(attr, qtensor) \ +_(attr, quant_max) \ +_(attr, quant_min) \ +_(attr, quasi) \ +_(attr, query) \ +_(attr, r) \ +_(attr, ragged_idx) \ +_(attr, random_samples) \ +_(attr, range) \ +_(attr, rank) \ +_(attr, ratio) \ +_(attr, rcond) \ +_(attr, real) \ +_(attr, recipe_a) \ +_(attr, recipe_b) \ +_(attr, reduce) \ +_(attr, reduce_range) \ +_(attr, reduction) \ +_(attr, repeats) \ +_(attr, replacement) \ +_(attr, requires_grad) \ +_(attr, reserve) \ +_(attr, reserveSpace) \ +_(attr, reservedSpace) \ +_(attr, residuals) \ +_(attr, result) \ +_(attr, retain_graph) \ +_(attr, return_complex) \ +_(attr, return_counts) \ +_(attr, return_debug_mask) \ +_(attr, return_inverse) \ +_(attr, reverse) \ +_(attr, right) \ +_(attr, rng_state) \ +_(attr, rounding_mode) \ +_(attr, row) \ +_(attr, row_indices) \ +_(attr, rstd) \ +_(attr, rtol) \ +_(attr, running_max) \ +_(attr, running_mean) \ +_(attr, running_min) \ +_(attr, running_var) \ +_(attr, s) \ +_(attr, save_invstd) \ +_(attr, save_mean) \ +_(attr, save_var) \ +_(attr, save_var_transform) \ +_(attr, saved_g) \ +_(attr, saved_norms) \ +_(attr, saved_v) \ +_(attr, scalar) \ +_(attr, scalar1) \ +_(attr, scalar2) \ +_(attr, scalars) \ +_(attr, scale) \ +_(attr, scale_a) \ +_(attr, scale_b) \ +_(attr, scale_backoff_factor) \ +_(attr, scale_factors) \ +_(attr, scale_grad_by_freq) \ +_(attr, scale_growth_factor) \ +_(attr, scale_hh) \ +_(attr, scale_ih) \ +_(attr, scale_result) \ +_(attr, scales) \ +_(attr, scales_d) \ +_(attr, scales_h) \ +_(attr, scales_w) \ +_(attr, scales_zeros) \ +_(attr, sections) \ +_(attr, seed) \ +_(attr, self) \ +_(attr, self_is_result) \ +_(attr, self_num_batch_dims) \ +_(attr, self_or_result) \ +_(attr, self_sizes) \ +_(attr, seqlen_k) \ +_(attr, sequences) \ +_(attr, seqused_k) \ +_(attr, shape) \ +_(attr, shared) \ +_(attr, shared_storage_dqdkdv) \ +_(attr, shifts) \ +_(attr, side) \ +_(attr, sigma) \ +_(attr, sign) \ +_(attr, singular_values) \ +_(attr, size) \ +_(attr, sizes) \ +_(attr, skip_first) \ +_(attr, sobolstate) \ +_(attr, solution) \ +_(attr, some) \ +_(attr, sorted) \ +_(attr, sorted_sequence) \ +_(attr, sorter) \ +_(attr, source) \ +_(attr, spacing) \ +_(attr, sparse) \ +_(attr, sparse_dim) \ +_(attr, sparse_grad) \ +_(attr, split_k) \ +_(attr, split_k_mode) \ +_(attr, split_size) \ +_(attr, split_sizes) \ +_(attr, src) \ +_(attr, stable) \ +_(attr, start) \ +_(attr, start_dim) \ +_(attr, state_steps) \ +_(attr, state_sums) \ +_(attr, std) \ +_(attr, step) \ +_(attr, steps) \ +_(attr, storage_offset) \ +_(attr, stride) \ +_(attr, sum_S) \ +_(attr, sum_dy) \ +_(attr, sum_dy_xmu) \ +_(attr, sumdim) \ +_(attr, swap) \ +_(attr, swizzle_a) \ +_(attr, swizzle_b) \ +_(attr, symmetric_quant) \ +_(attr, t) \ +_(attr, tangent) \ +_(attr, target) \ +_(attr, target_lengths) \ +_(attr, targets) \ +_(attr, tau) \ +_(attr, tensor) \ +_(attr, tensor1) \ +_(attr, tensor2) \ +_(attr, tensor_indices_or_sections) \ +_(attr, tensors) \ +_(attr, tensors1) \ +_(attr, test_element) \ +_(attr, test_elements) \ +_(attr, the_template) \ +_(attr, theta) \ +_(attr, thread_masks) \ +_(attr, threshold) \ +_(attr, to) \ +_(attr, tol) \ +_(attr, total) \ +_(attr, total_L) \ +_(attr, total_length) \ +_(attr, total_weight) \ +_(attr, train) \ +_(attr, training) \ +_(attr, transpose) \ +_(attr, transpose_result) \ +_(attr, transposed) \ +_(attr, type1) \ +_(attr, type2) \ +_(attr, unbiased) \ +_(attr, unitriangular) \ +_(attr, unpack_data) \ +_(attr, unpack_pivots) \ +_(attr, unroll_dim) \ +_(attr, unsafe) \ +_(attr, unused) \ +_(attr, update) \ +_(attr, upper) \ +_(attr, upscale_factor) \ +_(attr, use_cutlass) \ +_(attr, use_fast_accum) \ +_(attr, use_gelu) \ +_(attr, use_input_stats) \ +_(attr, v) \ +_(attr, value) \ +_(attr, values) \ +_(attr, var) \ +_(attr, vec) \ +_(attr, vec1) \ +_(attr, vec2) \ +_(attr, w_hh) \ +_(attr, w_ih) \ +_(attr, weight) \ +_(attr, weight0) \ +_(attr, weight1) \ +_(attr, weight2) \ +_(attr, weight3) \ +_(attr, weight4) \ +_(attr, weight_arr) \ +_(attr, weight_buf) \ +_(attr, weight_decay) \ +_(attr, weight_g) \ +_(attr, weight_scale) \ +_(attr, weight_stride0) \ +_(attr, weight_zero_point) \ +_(attr, weights) \ +_(attr, win_length) \ +_(attr, window) \ +_(attr, window_length) \ +_(attr, window_size) \ +_(attr, window_size_left) \ +_(attr, window_size_right) \ +_(attr, with_replacement) \ +_(attr, workspace) \ +_(attr, wrap) \ +_(attr, x) \ +_(attr, x1) \ +_(attr, x2) \ +_(attr, y) \ +_(attr, z) \ +_(attr, z_state) \ +_(attr, zero_infinity) \ +_(attr, zero_point) \ +_(attr, zero_point_hh) \ +_(attr, zero_point_ih) \ +_(attr, zero_points) + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/blob.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/blob.h new file mode 100644 index 0000000000000000000000000000000000000000..f2e5e419a26260fdc2effdb8a2e93707f1ebc49a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/blob.h @@ -0,0 +1,209 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include +#include + +namespace caffe2 { + +class Tensor; + +/** + * @brief Blob is a general container that hosts a typed pointer. + * + * A Blob hosts a pointer as well as its type, and takes charge of deleting it + * properly when the blob is deallocated or re-allocated with a new type. A blob + * could contain anything, although the most common case is to contain a Tensor. + */ +class TORCH_API Blob final : public c10::intrusive_ptr_target { + public: + /** + * Initializes an empty Blob. + */ + Blob() noexcept = default; + ~Blob() override { + Reset(); + } + + Blob(Blob&& other) noexcept : Blob() { + swap(other); + } + + Blob& operator=(Blob&& other) noexcept { + Blob(std::move(other)).swap(*this); + return *this; + } + + /** + * Checks if the content stored in the blob is of type T. + */ + template + bool IsType() const noexcept { + return meta_.Match(); + } + + /** + * Returns the meta info of the blob. + */ + const TypeMeta meta() const noexcept { + return meta_; + } + + /** + * Returns a printable typename of the blob. + */ + std::string_view TypeName() const noexcept { + return meta_.name(); + } + + /** + * @brief Gets the const reference of the stored object. The code checks if + * the stored object is of the desired type. + */ + // TODO(jerryzh): add a Get(c10::DeviceType) function? + template + const T& Get() const { + TORCH_INTERNAL_ASSERT( + IsType(), + "wrong type for the Blob instance. Blob contains ", + meta_.name(), + " while caller expects ", + TypeMeta::TypeName()); + // TODO: after we add Get(c10::DeviceType) + // and changed all the callsites, we can add + // a static assert here to enforce T != Tensor + return *static_cast(pointer_); + } + + const void* GetRaw() const noexcept { + return pointer_; + } + void* GetRaw() noexcept { + return pointer_; + } + + /** + * @brief Gets a mutable pointer to the stored object. + * + * If the current object is not of the right type, a new object is created + * and the old object is freed. Note that type T should have a default + * constructor. Otherwise, create the object yourself first, and use + * Reset(). + */ + template + T* GetMutable() { + static_assert( + std::is_default_constructible_v, + "GetMutable can't be called with non-default-constructible types. " + "Try using specialized methods"); + if (IsType()) { + return static_cast(pointer_); + } else { + // TODO Re-enable logging + // VLOG(1) << "Create new mutable object " << TypeMeta::TypeName(); + return Reset(new T()); + } + } + + template + T* GetMutableOrNull() { + if (IsType()) { + return static_cast(pointer_); + } else { + return nullptr; + } + } + + /** + * Sets the underlying object to the allocated one. The Blob then takes over + * the ownership of the passed in pointer. If there is already an object in + * the Blob, the old object is freed. + * + * This is used when the underlying class T does not have a default ctor, or + * complex initializations needs to be done outside the blob. + */ + template + T* Reset(T* allocated) { + free_(); + meta_ = TypeMeta::Make(); + pointer_ = static_cast(allocated); + has_ownership_ = true; + return allocated; + } + + /** + * Sets the underlying object to the allocated one, but does not take over + * the ownership of the passed in pointer. If there is already an object in + * the Blob, the old object is freed. + * + * Unlike Reset, this does not take over the ownership of the pointer and the + * caller is responsible for making sure that the lifetime of the allocated + * blob outlasts the lifetime of any access to this blob, until another Reset + * call is made or the blob is destructed. + */ + template + std::remove_const_t* ShareExternal( + std::remove_const_t* allocated) { + return static_cast(ShareExternal( + static_cast(allocated), + TypeMeta::Make>())); + } + + void* ShareExternal(void* allocated, const TypeMeta meta) { + free_(); + meta_ = meta; + pointer_ = allocated; + has_ownership_ = false; + return allocated; + } + + /** + * Resets the Blob to an empty one. + */ + void Reset() { + free_(); + pointer_ = nullptr; + meta_ = TypeMeta(); + has_ownership_ = false; + } + + /** + * @brief Swaps the underlying storage of two blobs. + */ + void swap(Blob& rhs) noexcept { + using std::swap; + swap(meta_, rhs.meta_); + swap(pointer_, rhs.pointer_); + swap(has_ownership_, rhs.has_ownership_); + } + + private: + void free_() { + if (has_ownership_ && pointer_ != nullptr) { + (*meta_.deleteFn())(pointer_); + } + } + + TypeMeta meta_; + void* pointer_{nullptr}; + bool has_ownership_{false}; + + C10_DISABLE_COPY_AND_ASSIGN(Blob); +}; + +inline void swap(Blob& lhs, Blob& rhs) noexcept { + lhs.swap(rhs); +} + +inline std::ostream& operator<<(std::ostream& out, const Blob& v) { + return out << "Blob[" << v.TypeName() << ']'; +} + +} // namespace caffe2 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/builtin_function.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/builtin_function.h new file mode 100644 index 0000000000000000000000000000000000000000..2660a2f88cd6eadb124d31c7e45c3ac84560d130 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/builtin_function.h @@ -0,0 +1,95 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace torch::jit { + +struct BuiltinOpFunction : public Function { + BuiltinOpFunction( + c10::QualifiedName qualname, + c10::FunctionSchema schema, + std::function callable, + std::string doc_string = "") + : name_(std::move(qualname)), + callable_(std::move(callable)), + schema_(std::move(schema)), + doc_string_(std::move(doc_string)) { + TORCH_INTERNAL_ASSERT(schema_.returns().size() == 1); + } + + std::string_view doc_string() const override { + return doc_string_; + } + + void run(Stack& stack) override { + callable_(stack); + } + + c10::intrusive_ptr runAsync( + Stack& stack, + TaskLauncher /* not used */) override { + run(stack); + auto res = c10::make_intrusive(stack.front().type()); + res->markCompleted(std::move(stack.front())); + return res; + } + + const c10::QualifiedName& qualname() const override { + return name_; + } + + // if this isn't yet defined, run its method_creator function + void ensure_defined() override { + // nop + } + + const c10::FunctionSchema& getSchema() const override { + return schema_; + } + + size_t num_inputs() const override { + return schema_.arguments().size(); + } + + Function& setSchema(c10::FunctionSchema schema) override { + schema_ = std::move(schema); + return *this; + } + + bool call( + Stack& stack, + std::optional /*unused*/, + c10::function_ref /*unused*/) override { + run(stack); + return false; + } + + bool call(Stack& stack, c10::function_ref /*unused*/) + override { + run(stack); + return false; + } + + ~BuiltinOpFunction() override = default; + + private: + c10::QualifiedName name_; + + std::function callable_; + + c10::FunctionSchema schema_; + + std::string doc_string_; +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/class_type.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/class_type.h new file mode 100644 index 0000000000000000000000000000000000000000..484afef253a21957ffcf503bfb0c8f95f27519b0 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/class_type.h @@ -0,0 +1,446 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include +#include + + +namespace torch::jit { +struct CompilationUnit; +struct Function; +} // namespace torch::jit + + +namespace c10 { + +struct FunctionSchema; + +// This enumerator represents the 'kind' of an attribute - a buffer, a parameter, or neither. +// This state is mutually exclusive. Buffers and Parameters can only appear on modules. +enum class AttributeKind { + BUFFER, + PARAMETER, + REGULAR_ATTRIBUTE +}; + +// This structure represents all notional booking entities in a class attribute: name, kind (see: AttributeKind), and type (see: TypePtr). +// Note: This structure does not represent the value of the attribute. +struct TORCH_API ClassAttribute { + public: + ClassAttribute(AttributeKind kind, + TypePtr attributeType, + std::string attributeName) : + kind_(kind), + attributeType_(std::move(attributeType)), + attributeName_(std::move(attributeName)) {} + + AttributeKind getKind() const { + return kind_; + } + + const TypePtr& getType() const { + return attributeType_; + } + + const std::string& getName() const { + return attributeName_; + } + + private: + AttributeKind kind_; + TypePtr attributeType_; + std::string attributeName_; +}; + +/** + * User Defined Types + */ + +struct ClassType; +using ClassTypePtr = std::shared_ptr; +using ::torch::jit::CompilationUnit; + +// This represents a class in TorchScript. +struct TORCH_API ClassType : public NamedType { + // This represents an attribute of a class; a name associated with an attribute, and a + // getter and (optional) setter for that attribute. + struct Property { + std::string name; + torch::jit::Function* getter; + torch::jit::Function* setter; + }; + + // Create a class type with name `name` and its methods stored in `cu`. + static ClassTypePtr create( + std::optional qualifiedName, + std::weak_ptr cu, + bool is_module = false, + std::string doc_string = "", + std::vector unresolved_class_attributes = {}); + + bool equals(const Type& rhs) const override { + if (this == &rhs) { + return true; + } + if (auto user_rhs = rhs.castRaw()) { + const auto& lhs_name = name(); + const auto& rhs_name = user_rhs->name(); + return lhs_name.has_value() && lhs_name == rhs_name && + this->compilation_unit() == user_rhs->compilation_unit(); + } + return false; + } + + std::string str() const override { + return annotation_str(); + } + + std::string repr_str() const override { + std::stringstream ss; + ss << str() + << " (of Python compilation unit at: " << compilation_unit().get() << ')'; + return ss.str(); + } + + const std::vector& methods() const; + + TypePtr findAttribute(const std::string& name) const { + size_t pos = 0; + for (const auto& attr : attributes_) { + if (name == attr.getName()) { + break; + } + ++pos; + } + + if (pos >= attributes_.size()) { + return nullptr; + } + return attributes_[pos].getType(); + } + + const TypePtr& getAttribute(const std::string& name) const { + auto slot = findAttributeSlot(name); + TORCH_CHECK( + slot, + repr_str(), + " does not have an attribute with name '", + name, + "'"); + return attributes_[*slot].getType(); + } + + size_t numAttributes() const { + return attributes_.size(); + } + + const TypePtr& getAttribute(size_t slot) const { + AT_ASSERT(slot < attributes_.size()); + return attributes_.at(slot).getType(); + } + + const std::string getAttributeName(size_t slot) const { + AT_ASSERT(slot < attributes_.size()); + return attributes_[slot].getName(); + } + + void checkNotExist(const std::string& name, const std::string& what) const; + + // Attributes are stored in a specific slot at runtime for efficiency. + // When emitting instructions we specify the slot so that attribute access is + // a constant lookup + std::optional findAttributeSlot(const std::string& name) const { + size_t slot = 0; + for (const auto& attr : attributes_) { + if (name == attr.getName()) { + return slot; + } + slot++; + } + return std::nullopt; + } + size_t getAttributeSlot(const std::string& name) const { + if (auto r = findAttributeSlot(name)) { + return *r; + } + TORCH_CHECK( + false, + repr_str(), + " does not have an attribute with name '", + name, + "'"); + } + + bool hasAttribute(const std::string& name) const { + return std::find_if( + attributes_.cbegin(), + attributes_.cend(), + [&](const ClassAttribute& attr) { return attr.getName() == name; }) != + attributes_.cend(); + } + + bool isUnresolvedClassAttribute(const std::string& name) const; + + at::ArrayRef containedTypes() const override { + return attributeTypes_; + } + + size_t addAttribute( + const std::string& name, + TypePtr type, + bool is_parameter = false, + bool is_buffer = false); + + // [Internal Only] Remove attribute from the ClassType, + // caller is responsible to make sure the modification is safe: + // it is unsafe to having existing allocations + // of this object around anymore, and any code that works on + // the attribute is now invalid. Only newly created code is + // valid again. + void unsafeRemoveAttribute(const std::string& name); + + // [Internal Only] Change the type of an attribute of the ClassType, + // The caller is responsible to make sure the modification is safe: + // it is unsafe to maintain uses of the old type of the attribute, + // and any code that works on the attribute is now invalid. + // Only newly created code is valid again. + void unsafeChangeAttributeType(const std::string& name, const TypePtr& new_ty); + + // Add attribute \p NAME if it doesn't exist or verify that it has a + // compatible type otherwise. + size_t addOrCheckAttribute( + const std::string& name, + TypePtr ty, + bool is_parameter = false, + bool is_buffer = false) { + auto slot_idx = findAttributeSlot(name); + if (!slot_idx) { + return addAttribute(name, std::move(ty), is_parameter, is_buffer); + } + + TORCH_CHECK( + is_parameter == this->is_parameter(*slot_idx), + "Parameter field mismatch for the field '", + name, + "'"); + const TypePtr& atype = getAttribute(*slot_idx); + TORCH_CHECK( + ty->isSubtypeOf(*atype), + ty->repr_str(), + " is not compatible with the type ", + atype->repr_str(), + " for the field '", + name, + "'"); + return *slot_idx; + } + + // Get the property with the given \p name, if it exists on the class. + std::optional getProperty(const std::string& name); + // Add a property named \p name with \p getter and \p setter as its getter and setter. + void addProperty(const std::string& name, torch::jit::Function* getter, torch::jit::Function* setter); + // Get a list of all properties. + const std::vector& properties() const { + return properties_; + } + + bool hasConstant(const std::string& name) const { + return std::find_if( + constantNames_.cbegin(), + constantNames_.cend(), + [&](const std::string& constant) { return constant == name; }) != + constantNames_.cend(); + } + + size_t addConstant(const std::string& name, const IValue& value); + + std::optional findConstantSlot(const std::string& name) const; + + size_t getConstantSlot(const std::string& name) const { + if (auto r = findConstantSlot(name)) { + return *r; + } + TORCH_CHECK( + false, + repr_str(), + " does not have constant field with the name '", + name, + "'"); + } + + const std::string& getConstantName(size_t slot) const; + + const std::string& doc_string() const { + return doc_string_; + } + + IValue getConstant(const std::string& name) const; + + IValue getConstant(size_t slot) const; + + std::optional findConstant(const std::string& name) const; + + size_t numConstants() const; + + at::ArrayRef constantNames() const { + return constantNames_; + } + + at::ArrayRef constantValues() const; + + // [Internal Only] Remove constant from the ClassType + // caller is responsible to make sure the modification is safe: + // it is unsafe to having existing allocations + // of this object around anymore, and any code that works on + // the attribute is now invalid. Only newly created code is + // valid again. + void unsafeRemoveConstant(const std::string& name); + + TypePtr createWithContained(std::vector contained_types) const override { + auto ptr = ClassType::create(name(), compilation_unit_, is_module()); + AT_ASSERT(numAttributes() == contained_types.size()); + for(size_t i = 0; i < attributes_.size(); ++i) { + AT_ASSERT(attributes_[i].getType()->isSubtypeOf(*contained_types[i])); + ptr->addAttribute(attributes_[i].getName(), std::move(contained_types[i])); + } + // Copy methods over + for (const auto& method : methods()) { + ptr->addMethod(method); + } + return ptr; + } + + bool is_module() const override { + return isModule_; + } + + const std::vector& getAttributes() const { + return attributes_; + } + + bool is_parameter(size_t slot) const { + TORCH_INTERNAL_ASSERT( + is_module(), "asking for parameterSlots of non-Module"); + return attributes_.at(slot).getKind() == AttributeKind::PARAMETER; + } + + bool is_buffer(size_t slot) const { + TORCH_INTERNAL_ASSERT( + is_module(), "asking for bufferWrittenSlots of non-Module"); + return attributes_.at(slot).getKind() == AttributeKind::BUFFER; + } + + void addForwardPreHook(torch::jit::Function* pre_hook_ptr); + void addForwardHook(torch::jit::Function* hook_ptr); + torch::jit::Function* findForwardPreHook(const std::string& name) const; + torch::jit::Function* findForwardHook(const std::string& name) const; + const std::vector& getForwardHooks() const; + const std::vector& getForwardPreHooks() const; + + void checkForwardPreHookSchema( + size_t pre_hook_idx, + const FunctionSchema& pre_hook_schema) const; + void checkForwardHookSchema( + size_t hook_idx, + const FunctionSchema& hook_schema) const; + + void addMethod(torch::jit::Function* method); + torch::jit::Function* findMethod(const std::string& name) const; + torch::jit::Function& getMethod(const std::string& name) const; + torch::jit::Function* findHook(const std::string& name) const; + torch::jit::Function& getHook(const std::string& name) const; + bool hasMethod(const std::string& name) const; + + torch::jit::Function* findStaticMethod(const std::string& name) const; + void addStaticMethod(torch::jit::Function* method); + + // [Internal Only] Remove method from the ClassType + // caller is responsible to make sure the modification is safe: + // it is unsafe to having existing allocations + // of this object around anymore, and any code that works on + // the attribute is now invalid. Only newly created code is + // valid again. + // Note this method is intended for freezing only. + void unsafeRemoveMethod(const std::string& name); + + std::shared_ptr compilation_unit(); + + std::shared_ptr compilation_unit() const; + + // generate a refined version of this class. + // It has the same name but the slot Types are subtypes of + // the original slots. It is only valid to refine a class type in a context + // where it is know that there are not assignments to the objects slots + // that would invalidate the refinement. + // These variants are not registered in the global class table. + ClassTypePtr refine(at::ArrayRef refined_slots) const; + + bool isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const override; + + static const TypeKind Kind = TypeKind::ClassType; + + private: + ClassType( + std::optional name, + std::weak_ptr cu, + bool is_module = false, + std::string doc_string = "", + std::vector unresolved_class_attributes = {}); + + std::string annotation_str_impl( + [[maybe_unused]] const TypePrinter& printer = nullptr) const override { + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) + return name()->qualifiedName(); + } + + void addAttribute(ClassAttribute classAttribute); + std::string getForwardPreHookErrorMessage(size_t pre_hook_idx) const; + std::string getForwardHookErrorMessage(size_t hook_idx) const; + + // Mapping of attribute names -> their type. + // NOTE: this does not contain methods, which are stored in the module + // TODO: once modules support arbitrary ivalue attributes, we don't need this + // anymore. + // TODO: This is better represented as an OrderedDict, but alas it is not yet + // available from c10 + + // Mapping of constant names -> their value. + std::vector constantNames_; + std::vector constantValues_; + // Holds method attributes + std::weak_ptr compilation_unit_; + + // Holds all attributes, attribute details are found on ClassAttribute + std::vector attributes_; + // Construct mirroring attributes_, only around due to the fact that `containedTypes()` method returns an ArrayRef. + // Never fill this without using the appropriate provideNewClassAttribute method + std::vector attributeTypes_; + + // List of methods associated with this class. + std::vector methods_; + std::vector staticmethods_; + + // List of hooks to be run before/after forward. + std::vector forward_hooks_; + std::vector forward_pre_hooks_; + + // List of properties exposed by this class. + std::vector properties_; + + bool isModule_ = false; + + // Doc string of class. + std::string doc_string_; + + // For error reporting accesses to class level attributes. + std::vector unresolved_class_attributes_; +}; + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/custom_class.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/custom_class.h new file mode 100644 index 0000000000000000000000000000000000000000..2811256fb352bb7bf2cfe3fac74c5db69e932319 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/custom_class.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include + +namespace c10 { + +struct ClassType; +using ClassTypePtr = std::shared_ptr; + +TORCH_API c10::ClassTypePtr getCustomClassTypeImpl(const std::type_index &tindex); + +template +const c10::ClassTypePtr& getCustomClassType() { + // Classes are never unregistered from getCustomClassTypeMap and the + // hash lookup can be a hot path, so just cache. + // For the same reason, it's fine If this ends up getting duplicated across + // DSO boundaries for whatever reason. + static c10::ClassTypePtr cache = getCustomClassTypeImpl( + std::type_index(typeid(T))); + return cache; +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/dynamic_type.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/dynamic_type.h new file mode 100644 index 0000000000000000000000000000000000000000..9d44d9ae8df7de333932d10e5921953e78491418 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/dynamic_type.h @@ -0,0 +1,252 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include + +namespace c10 { + +using DynamicTypeBits = std::uint32_t; +#define DYNAMIC_TYPE_BIT(x) (1u << x) + +constexpr DynamicTypeBits kDynamicCovariantTypeBit = DYNAMIC_TYPE_BIT(31); +constexpr DynamicTypeBits kDynamicAnyTypeBit = DYNAMIC_TYPE_BIT(30); + +constexpr DynamicTypeBits kDynamicNoneTypeBit = DYNAMIC_TYPE_BIT(1); +constexpr DynamicTypeBits kDynamicIntTypeBit = DYNAMIC_TYPE_BIT(3); +constexpr DynamicTypeBits kDynamicFloatTypeBit = DYNAMIC_TYPE_BIT(4); +constexpr DynamicTypeBits kDynamicComplexTypeBit = DYNAMIC_TYPE_BIT(5); +constexpr DynamicTypeBits kDynamicListTypeBit = DYNAMIC_TYPE_BIT(7); +constexpr DynamicTypeBits kDynamicTupleTypeBit = DYNAMIC_TYPE_BIT(8); +constexpr DynamicTypeBits kDynamicClassTypeBit = DYNAMIC_TYPE_BIT(10); + +#define FORALL_DYNAMIC_TYPES(_) \ + _(Tensor, DYNAMIC_TYPE_BIT(0), 1) \ + _(None, kDynamicNoneTypeBit, 1) \ + _(Bool, DYNAMIC_TYPE_BIT(2), 1) \ + _(Int, kDynamicIntTypeBit, 1) \ + _(Float, kDynamicFloatTypeBit, 1) \ + _(Complex, kDynamicComplexTypeBit, 1) \ + _(Number, \ + (kDynamicIntTypeBit | kDynamicFloatTypeBit | kDynamicComplexTypeBit), \ + 1) \ + _(String, DYNAMIC_TYPE_BIT(6), 1) \ + _(List, kDynamicListTypeBit, 0) \ + _(Tuple, (kDynamicTupleTypeBit | kDynamicCovariantTypeBit), 0) \ + _(Dict, DYNAMIC_TYPE_BIT(9), 0) \ + _(Class, kDynamicClassTypeBit, 0) \ + _(Optional, \ + (DYNAMIC_TYPE_BIT(11) | kDynamicNoneTypeBit | kDynamicCovariantTypeBit), \ + 0) \ + _(AnyList, (kDynamicListTypeBit | kDynamicAnyTypeBit), 1) \ + _(AnyTuple, \ + (kDynamicTupleTypeBit | kDynamicCovariantTypeBit | kDynamicAnyTypeBit), \ + 1) \ + _(DeviceObj, DYNAMIC_TYPE_BIT(12), 1) \ + _(StreamObj, DYNAMIC_TYPE_BIT(13), 1) \ + _(Capsule, DYNAMIC_TYPE_BIT(14), 1) \ + _(Generator, DYNAMIC_TYPE_BIT(15), 1) \ + _(Storage, DYNAMIC_TYPE_BIT(16), 1) \ + _(Var, DYNAMIC_TYPE_BIT(17), 0) \ + _(AnyClass, (kDynamicClassTypeBit | kDynamicAnyTypeBit), 1) \ + _(QScheme, DYNAMIC_TYPE_BIT(18), 1) \ + _(Quantizer, DYNAMIC_TYPE_BIT(19), 1) \ + _(AnyEnum, DYNAMIC_TYPE_BIT(20), 1) \ + _(RRef, DYNAMIC_TYPE_BIT(21), 0) \ + _(Future, DYNAMIC_TYPE_BIT(22), 0) \ + _(Await, DYNAMIC_TYPE_BIT(23), 0) \ + _(Any, 0xffffffff, 1) + +#define FORALL_DYNAMIC_TYPES_FAKE(_) \ + _(ScalarType, kDynamicIntTypeBit, 1) \ + _(Layout, kDynamicIntTypeBit, 1) \ + _(SymInt, kDynamicIntTypeBit, 1) \ + _(SymBool, kDynamicIntTypeBit, 1) \ + _(MemoryFormat, kDynamicIntTypeBit, 1) + +#define FORWARD_DECL_TYPE(NAME, _, __) struct NAME ## Type; + FORALL_DYNAMIC_TYPES(FORWARD_DECL_TYPE) + FORALL_DYNAMIC_TYPES_FAKE(FORWARD_DECL_TYPE) +#undef FORWARD_DECL_TYPE + +class DynamicType; +using DynamicTypePtr = std::shared_ptr; + +/** + * DynamicType is designed as a low dependency type system for TorchScript. The + * existing JIT types are used for both compilation and runtime, which makes + * sense for server contexts because we often compile and run the model in + * the same process, however this doesn't hold for mobile devices where we + * always compiles a model ahead of time, therefore there will be dependencies + * which are not needed, but built with mobile runtime causing binary size + * bloat, by design. Every basic type like Int, Bool or String will bring their + * vtable, typeinfo, constructor, destructor and even more data from their + * specializations for STL types to the binary causing a long tail bloat. + * + * The core problem is about the complexity to implement and maintain a single + * type system for both analysis and execution purposes. Although they should + * have the exactly same semantics, in practice implement a unified abstraction + * adds conceptual and representational overhead for both sides of the world. + * + * To address the issues, DynamicType implements a minimal subset of JIT types + * and uses a generic algorithm to test all subtyping relations. To achieve + * this, we assign each dynamic type a single integer tag to represent its + * semantics. More specifically, a dynamic type is defined as a set of "control + * bits" and "data bits", where control bits describe the special behavior when + * testing a type and data bits map to identity of each nominal type. We use bit + * operations to perform all the tests. + * + * For example, a "covariant bit" is a control bit used to describe if a type + * is covariant, right now the most used one is tuple type, and in addition to + * the control bit, tuple type's data bit is the 8th bit from the LSB. Control + * bits start from MSB and data bits start from LSB. + * + * If two types are equal, then they are subtype of each other, also if the bits + * from one type tag is subset of the other tag, it automatically becomes a + * subtype of the other. This simplifies the subtyping logic a lot, and over the + * long term it is possible to adopt this scheme on the server side as well. + * Special cases can be added but they generally should not take too much code + * size. + * + * DynamicType may or may not inherit from c10::Type because it's not the core + * requirement of DynamicType to interface with existing JIT types, but we might + * want to inherit from c10::Type to reduce the migration cost. + */ +class DynamicType : public SharedType { + using ClassTypePtr = std::shared_ptr; + + /** + * A implementation detail to support NamedTuple. + */ + struct LabeledDynamicType { + std::optional label; + DynamicTypePtr ty; + explicit LabeledDynamicType(DynamicTypePtr t) : ty(std::move(t)) {} + + bool equals(const LabeledDynamicType& other) const; + bool isSubtypeOf(const LabeledDynamicType& other) const; + }; + + public: + // TODO Change Ptr to DynamicTypePtr when all migrations are done. + using Ptr = TypePtr; + using ElementType = DynamicType; + ~DynamicType() override; + + struct Arguments { + Arguments() = default; + Arguments(c10::ArrayRef /*args*/); + Arguments(const std::vector& /*names*/, c10::ArrayRef /*args*/); + std::vector elems; + }; + + enum class Tag : DynamicTypeBits { +#define DYNAMIC_TYPE_ITEM(NAME, VAL, _) NAME = VAL, + FORALL_DYNAMIC_TYPES(DYNAMIC_TYPE_ITEM) + FORALL_DYNAMIC_TYPES_FAKE(DYNAMIC_TYPE_ITEM) +#undef DYNAMIC_TYPE_ITEM + }; + + bool equals(const Type& rhs) const override; + bool isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const override; + std::string str() const override; + static const TypeKind Kind = TypeKind::DynamicType; + static TORCH_API DynamicTypePtr create(Type& ty); + + explicit DynamicType(Tag /*tag*/, Arguments /*arguments*/); + explicit DynamicType(Tag /*tag*/, std::string_view /*name*/, Arguments /*arguments*/); + + DynamicType(DynamicType&& other) = delete; + DynamicType(const DynamicType&) = delete; + DynamicType& operator=(const DynamicType&) = delete; + DynamicType& operator=(DynamicType&&) = delete; + + TypePtr containedType(size_t /*i*/) const override; + size_t containedTypeSize() const override; + Tag tag() const { + return tag_; + } + const std::optional& name() const { + return name_; + } + const Arguments& arguments() const { + return arguments_; + } + TORCH_API TypeKind dynamicKind() const; + + // Should be used only on the server side to restore static type information. +#ifndef C10_MOBILE + TORCH_API +#endif + TypePtr fallback() const; + + private: + bool symmetric() const override { + return false; + } + friend struct Type; + // NOTE: Here we are using SingletonOrSharedTypePtr to mean + // "original-type-because-it-was-actually-a-DynamicType or shared". + static SingletonOrSharedTypePtr create(const Type& ty); + DynamicType(const Type& other); + bool equals(const DynamicType& other) const; + + template + bool compareArguments(const DynamicType& other, const F& f) const { + if (arguments_.elems.size() != other.arguments_.elems.size()) { + return false; + } + for (size_t i = 0; i < arguments_.elems.size(); i++) { + if (!f(arguments_.elems[i], other.arguments_.elems[i])) { + return false; + } + } + return true; + } + + Tag tag_; + std::optional name_; + union { + Arguments arguments_; + ClassTypePtr class_; + }; +}; + +template +struct DynamicTypeTrait { + C10_NOINLINE static auto tagValue() { + TORCH_CHECK(false); + return DynamicType::Tag::Any; + } +}; + +namespace detail { +C10_NOINLINE DynamicTypePtr makeBaseType(DynamicType::Tag tag); +} + +#define DYNAMIC_TYPE_TAG_VALUE(NAME, _, IS_BASE_TYPE) \ + template <> \ + struct TORCH_API DynamicTypeTrait { \ + C10_ERASE static auto tagValue() { \ + return DynamicType::Tag::NAME; \ + } \ + static constexpr bool isBaseType = IS_BASE_TYPE; \ + template \ + static std::enable_if_t getBaseType() { \ + static auto type = detail::makeBaseType(tagValue()); \ + return type; \ + } \ + }; // namespace c10 +FORALL_DYNAMIC_TYPES(DYNAMIC_TYPE_TAG_VALUE) +FORALL_DYNAMIC_TYPES_FAKE(DYNAMIC_TYPE_TAG_VALUE) +#undef DYNAMIC_TYPE_TAG_VALUE + +} // namespace c10 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/enum_tag.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/enum_tag.h new file mode 100644 index 0000000000000000000000000000000000000000..a5f38f49d2638be21cd7e99cfcec402577afc65f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/enum_tag.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from enum_tag.h + +namespace at { + // Enum of valid tags obtained from the entries in tags.yaml + enum class Tag { + core, + cudagraph_unsafe, + data_dependent_output, + dynamic_output_shape, + flexible_layout, + generated, + inplace_view, + maybe_aliasing_or_mutating, + needs_contiguous_strides, + needs_exact_strides, + needs_fixed_stride_order, + nondeterministic_bitwise, + nondeterministic_seeded, + pointwise, + pt2_compliant_tag, + reduction, + view_copy + }; +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/enum_type.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/enum_type.h new file mode 100644 index 0000000000000000000000000000000000000000..3c67209920ef6135e8f6ff0b54260e8c8631aaef --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/enum_type.h @@ -0,0 +1,109 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include + +namespace c10 { + +struct EnumType; +using EnumTypePtr = std::shared_ptr; +using EnumNameValue = std::pair; +struct TORCH_API EnumType : public NamedType { + friend struct Type; + static const TypeKind Kind = TypeKind::EnumType; + + static EnumTypePtr create( + const c10::QualifiedName& qualified_class_name, + TypePtr value, + std::vector enum_names_values, + std::weak_ptr<::torch::jit::CompilationUnit> cu) { + C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wswitch-enum") + switch (value->kind()) { + case TypeKind::IntType: + case TypeKind::FloatType: + case TypeKind::StringType: + return EnumTypePtr(new EnumType( + qualified_class_name, + std::move(value), + std::move(enum_names_values), + std::move(cu))); + default: + TORCH_CHECK( + false, + "Cannot create Enum with value type '", + value->str(), + "', only int, float and string are supported"); + } + C10_DIAGNOSTIC_POP() + } + + std::string str() const override { + return "Enum<" + annotation_str() + ">"; + } + + std::string repr_str() const override { + return str(); + } + + const TypePtr& getValueType() const { + return value_type_; + } + + bool equals(const Type& rhs) const override { + if (auto* enum_rhs = rhs.castRaw()) { + return name().has_value() && name() == enum_rhs->name() && + *getValueType() == *(enum_rhs->getValueType()) && + this->compilation_unit() == enum_rhs->compilation_unit(); + } + return false; + } + + bool isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const override; + + std::shared_ptr compilation_unit() + const { + auto cu = cu_.lock(); + return cu; + } + + const QualifiedName& qualifiedClassName() const { + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) + return name().value(); + } + + at::ArrayRef containedTypes() const override { + return value_type_; + } + + const at::ArrayRef enumNamesValues() const { + return enum_names_values_; + } + + private: + EnumType( + c10::QualifiedName qualified_class_name, + TypePtr value_type, + std::vector enum_names_values, + std::weak_ptr cu) + : NamedType(TypeKind::EnumType, std::move(qualified_class_name)), + value_type_(std::move(value_type)), + enum_names_values_(std::move(enum_names_values)), + cu_(std::move(cu)) {} + + std::string annotation_str_impl( + [[maybe_unused]] const TypePrinter& printer = nullptr) const override { + return qualifiedClassName().qualifiedName(); + } + + TypePtr value_type_; + std::vector enum_names_values_; + std::weak_ptr<::torch::jit::CompilationUnit> cu_; +}; + +} // namespace c10 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/function.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/function.h new file mode 100644 index 0000000000000000000000000000000000000000..255bb8715de7e9fbf7cafad7dcc9b7ec22f75c51 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/function.h @@ -0,0 +1,119 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace c10 { +struct FunctionSchema; +} + +namespace at { +TORCH_API void launch(std::function func); +} + +namespace torch::jit { + +struct Graph; +struct Code; + +namespace mobile { +struct Code; +} + +using Stack = std::vector; +using Kwargs = std::unordered_map; +struct RecursiveMethodCallError : public std::exception {}; +using TaskLauncher = std::function)>; + +TORCH_API void preoptimizeGraph( + std::shared_ptr& graph, + bool disable_autocast = false); + +// A Function is a pure Graph with no implicit `self` object bound. +// It contains schema information and the executor that manages the +// execution of the function. Method is a wrapper around an +// underlying Function that also provides a `self` object. +struct TORCH_API Function { + Function() = default; + Function(const Function&) = default; + Function& operator=(const Function&) = default; + Function(Function&&) noexcept = default; + Function& operator=(Function&&) noexcept = default; + virtual std::string_view doc_string() const { + static constexpr std::string_view no_doc_string; + return no_doc_string; + } + + virtual bool isGraphFunction() const { + return false; + } + + virtual void run(Stack& stack) = 0; + + virtual c10::intrusive_ptr runAsync( + Stack& /*stack*/, + // NOLINTNEXTLINE(performance-unnecessary-value-param) + [[maybe_unused]] TaskLauncher taskLauncher = at::launch) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(false); + return {}; + } + + at::IValue operator()(Stack stack, const Kwargs& kwargs = Kwargs()) { + getSchema().checkAndNormalizeInputs(stack, kwargs); + run(stack); + return stack.front(); + } + + virtual const c10::QualifiedName& qualname() const = 0; + + const std::string& name() const { + return qualname().name(); + } + + // if this isn't yet defined, run its method_creator function + virtual void ensure_defined() = 0; + + virtual const c10::FunctionSchema& getSchema() const = 0; + + virtual size_t num_inputs() const = 0; + + virtual Function& setSchema(c10::FunctionSchema schema) = 0; + + // call() defines how different interpreter implementations interacts with + // Function objects. Basically interpreters need to provide a callback to + // communicate to Functions what to do if provided a Code object. + // Alternatively we could design the signature to return an optional Code + // object, but that requires special handling the null case in interpreter + // and the fallback behavior is not well defined by interpreter but rather + // Function themselves, so a callback approach is more reasonable than + // returning values. + // If call() returns true, then callback completes successfully, otherwise + // call() returns false. + + // Overload for server interpreter, a bailout size is needed for graph + // executor. + virtual bool call( + Stack& /*unused*/, + std::optional /*unused*/, + c10::function_ref /*unused*/) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(false); + return false; + } + + // Overload for mobile interpreter. + virtual bool call(Stack& /*unused*/, c10::function_ref /*unused*/) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(false); + return false; + } + + virtual ~Function() = default; +}; +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/function_schema.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/function_schema.h new file mode 100644 index 0000000000000000000000000000000000000000..a9f8e0238cdf0aea822e26c2dcb8c39284326305 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/function_schema.h @@ -0,0 +1,695 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace c10 { + +// schema as used in the compiler for resolving function calls and reporting +// errors. These objects should be constructed from C10 schema once those +// are available. + +struct Argument; +struct FunctionSchema; + +using AliasTypeSet = std::vector; + +bool operator==(const Argument& lhs, const Argument& rhs); + +struct TORCH_API Argument { + Argument( + std::string name = "", + const TypePtr& type = nullptr, + std::optional N = std::nullopt, + std::optional default_value = std::nullopt, + bool kwarg_only = false, + std::optional alias_info = std::nullopt) + : Argument(std::move(name), type, type, N, std::move(default_value), kwarg_only, std::move(alias_info)) {} + + Argument( + std::string name, + TypePtr fake_type, + TypePtr real_type, + std::optional N = std::nullopt, + std::optional default_value = std::nullopt, + bool kwarg_only = false, + std::optional alias_info = std::nullopt) + : name_(std::move(name)), + type_(fake_type ? std::move(fake_type) : TensorType::get()), + real_type_(real_type ? std::move(real_type) : type_), + N_(N), + default_value_(std::move(default_value)), + alias_info_(alias_info ? std::make_unique(std::move(*alias_info)) : nullptr), + kwarg_only_(kwarg_only) { + // this is an softly-enforced invariant for out arguments. + bool is_alias = alias_info_ != nullptr && alias_info_->isWrite(); + is_out_ = kwarg_only_ && is_alias; + } + + Argument(Argument&& rhs) noexcept = default; + + Argument(const Argument& rhs) + : name_(rhs.name_), + type_(rhs.type_), + real_type_(rhs.real_type_), + N_(rhs.N_), + default_value_(rhs.default_value_), + alias_info_(rhs.alias_info_ ? std::make_unique(*rhs.alias_info_) : nullptr), + kwarg_only_(rhs.kwarg_only_), + is_out_(rhs.is_out_) {} + + Argument& operator=(Argument&& rhs) = default; + + Argument& operator=(const Argument& rhs) { + if (this != &rhs) { + name_ = rhs.name_; + type_ = rhs.type_; + real_type_ = rhs.real_type_; + N_ = rhs.N_; + default_value_ = rhs.default_value_; + alias_info_ = rhs.alias_info_ ? std::make_unique(*rhs.alias_info_) : nullptr; + kwarg_only_ = rhs.kwarg_only_; + is_out_ = rhs.is_out_; + } + return *this; + } + ~Argument() = default; + + const std::string& name() const { + return name_; + } + const TypePtr& type() const { + return type_; + } + // if type() is non-null, this is guaranteed to be non-null (if no real + // type was provided, this takes on type()'s value) + const TypePtr& real_type() const { + return real_type_; + } + const std::optional& N() const { + return N_; + } + const std::optional& default_value() const { + return default_value_; + } + bool kwarg_only() const { + return kwarg_only_; + } + + bool is_out() const { + return is_out_; + } + + [[nodiscard]] const AliasInfo* alias_info() const { + return alias_info_.get(); + } + + bool is_inferred_type() const { + bool is_inferred_type = false; + TORCH_INTERNAL_ASSERT(type_); + if (auto pt = type_->cast()) { + if (pt->isInferredType()) { + is_inferred_type = true; + } + } + return is_inferred_type; + } + + std::string formatTypeMismatchMsg(const std::string& actual_type) const { + std::string inferred_type_hint; + if (is_inferred_type()) { + inferred_type_hint = c10::str( + "Inferred '", + name(), + "' to be of type 'Tensor' ", + "because it was not annotated with an explicit type.\n"); + } + return c10::str( + "Expected a value of type '", + type()->repr_str(), + "' for argument '", + name(), + "' but instead found type '", + actual_type, + "'.\n", + inferred_type_hint); + } + + Argument cloneWithType(const TypePtr& new_type) const { + return Argument( + name_, + new_type, + N_, + default_value_, + kwarg_only_, + alias_info_ ? std::optional(*alias_info_) : std::nullopt); + } + + // this function checks whether this Argument is backward compatible with + // the old one. we consider the following cases are backward compatible: + // 1) two arguments are equal + // 2) this arg's type should be subtype of old + // 3) this arg must provide the same default value if old arg has one, + bool isBackwardCompatibleWith( + const Argument& old, + std::ostream* why_not=nullptr) const; + + // this function checks whether this Argument is forward compatible with + // the old one. we consider the following cases are forward compatible: + // 1) two arguments are equal + // 2) this arg's type should be subtype of old + // 3) this arg must provide the same default value if old arg has one, + bool isForwardCompatibleWith( + const Argument& old, + std::ostream* why_not = nullptr) const; + + private: + std::string name_; + TypePtr type_; + TypePtr real_type_; // this is ScalarType, not int, e.g. + // for list types, an optional statically known length for the list + // e.g. for int[3]: type = ListType::ofInts(), N = 3 + // If present, this will allow scalars to be broadcast to this length to + // become a list. + std::optional N_; + + std::optional default_value_; + // AliasInfo is huge, so let's only allocate memory for it if + // necessary (which it isn't during schema parsing on startup, to + // give a pertinent example). + std::unique_ptr alias_info_; + // is this only specifiable as a keyword argument? + bool kwarg_only_; + // marks if the argument is out variant of the schema + bool is_out_; +}; + +inline bool operator==(const Argument& lhs, const Argument& rhs) { + return lhs.name() == rhs.name() + && *lhs.type() == *rhs.type() + && lhs.N() == rhs.N() + && lhs.default_value() == rhs.default_value() + && lhs.kwarg_only() == rhs.kwarg_only() + && (lhs.alias_info() == rhs.alias_info() + || (lhs.alias_info() != nullptr && rhs.alias_info() != nullptr + && *lhs.alias_info() == *rhs.alias_info())); +} + +inline bool operator!=(const Argument& lhs, const Argument& rhs) { + return !(lhs == rhs); +} + +enum struct TORCH_API SchemaArgType { input, output }; + +/** + * struct SchemaArgument + * + * Structure used to represent arguments or returns for a schema. + */ +struct TORCH_API SchemaArgument { + SchemaArgType type; + size_t index; + SchemaArgument(SchemaArgType tpe, size_t idx) : type(tpe), index(idx) {} + bool operator==(const SchemaArgument& rhs) const { + return type == rhs.type && index == rhs.index; + } +}; + +bool operator==(const FunctionSchema& lhs, const FunctionSchema& rhs); + +struct TORCH_API FunctionSchema { + FunctionSchema( + std::string name, + std::string overload_name, + std::vector arguments, + std::vector returns, + bool is_vararg = false, + bool is_varret = false) + : name_({std::move(name), std::move(overload_name)}), + arguments_(std::move(arguments)), + returns_(std::move(returns)), + is_vararg_(is_vararg), + is_varret_(is_varret) { + checkSchema(); + } + + FunctionSchema( + Symbol name, + std::string overload_name, + std::vector arguments, + std::vector returns, + bool is_vararg = false, + bool is_varret = false) + : FunctionSchema( + name.toQualString(), + std::move(overload_name), + std::move(arguments), + std::move(returns), + is_vararg, + is_varret) { + checkSchema(); + } + + // Checks whether this schema is backward compatible with the old one. + // The following conditions must be true: + // [Function structure] The new schema's name, overload-name, varargs, and + // return arity are the same. + // [Output Narrowing] The new schema's output type must be the same class + // or inherit from the old schema's output type. + // [Argument count] The new schema must have at least as many arguments as + // the old schema (considering the list of positional and kwargs). + // [Arg Compatibility] Every argument in the old schema has a corresponding + // argument in the new schema that: + // * is at the same position. + // * has the same name. + // * is either positional, or kwarg and the old argument was kwarg. + // * has the same type, or the old argument's type inherits from the + // new argument's type. + // [Default Values] Every new argument must have a default value. + // E.g. + // OK f_new(a, b, c=1) => f_old(a, b) + // NOK f_new(a, c=1, *, b) => f_old(a, *, b) + // OK f_new(a, b, *, c) => f_old(a, *, b, c) + // NOK f_new(a, *, b, c) -> f_old(a, b, *, c) + // NOK f_new(a, *, c, b) => f_old(a, *, b, c) + // OK f_new(a, *, b, c, d=1) => f_old(a, *, b, c) + bool isBackwardCompatibleWith( + const FunctionSchema& old, + std::ostream* why_not = nullptr) const; + + // Checks whether this schema is forward compatible with the old one. + // The following conditions must be true: + // [Function structure] The new schema's name, overload-name, varargs, and + // return arity are the same. + // [Output Narrowing] The new schema's output type must be the same class + // or inherit from the old schema's output type. + // [Arg Compatibility] Every argument in the old schema has a corresponding + // argument in the new schema that: + // * is at the same position. + // * has the same name. + // * is either positional, or kwarg and the old argument was kwarg. + // * has the same type, or the old argument's type inherits from the + // new argument's type. + // [Default Values] Every new argument must have a default value. + // Each default value type should NOT be a container type. + // [Positioning] All defaults arguments MUST go after either old + // default arguments or the end of positional arguments + // and right BEFORE all out arguments + bool isForwardCompatibleWith( + const FunctionSchema& old, + std::ostringstream& why_not) const; + + private: + OperatorName name_; + std::vector arguments_; + std::vector returns_; + // if true then this schema takes an arbitrary number of additional arguments + // after the argument specified in arguments + // currently this is used primarily to represent 'primitive' operators whose + // arguments are not checked by schema + bool is_vararg_; + bool is_varret_; + + // if no alias information is directly specified, what kind of "default" + // alias information should we infer? + // NB: due to alias analysis kind merging, this may be nullopt. Eventually + // this should always be set no matter what + std::optional alias_kind_; + + template + void checkArg(const IValue& value, const Argument& argument, std::optional pos) const; + + void checkSchema() const { + bool seen_default_arg = false; + for (const auto& arg : arguments()) { + if (arg.default_value()) { + seen_default_arg = true; + } else { + // we have historically serialized broadcasting lists wo/default values, + // so to not break BC allow lists here + if (arg.type()->kind() == ListType::Kind) { + continue; + } + TORCH_INTERNAL_ASSERT( + !seen_default_arg || arg.kwarg_only(), + "Non-default positional argument follows default argument. Parameter ", + arg.name(), + " in ", + *this); + } + } + } + + public: + + void dump() const; + + const OperatorName& operator_name() const { + return name_; + } + const std::string& name() const { + return name_.name; + } + const std::string& overload_name() const { + return name_.overload_name; + } + const std::vector& arguments() const { + return arguments_; + } + const std::vector& returns() const { + return returns_; + } + bool is_vararg() const { + return is_vararg_; + } + bool is_varret() const { + return is_varret_; + } + bool is_aliasing(const c10::SchemaArgument &argument) const { + TORCH_INTERNAL_ASSERT( + argument.index < getCorrectList(argument.type).size(), + "Invalid index for schema."); + const AliasInfo* aliasInfo = getCorrectList(argument.type)[argument.index].alias_info(); + return aliasInfo; + } + bool is_mutable() const { + return std::any_of( + arguments_.cbegin(), arguments_.cend(), [](const Argument& arg) { + const AliasInfo* aliasInfo = arg.alias_info(); + return aliasInfo && aliasInfo->isWrite(); + }); + } + bool is_mutable(const c10::SchemaArgument &argument) const { + TORCH_INTERNAL_ASSERT( + argument.index < getCorrectList(argument.type).size(), + "Invalid index for schema."); + const AliasInfo* aliasInfo = getCorrectList(argument.type)[argument.index].alias_info(); + return aliasInfo && aliasInfo->isWrite(); + } + bool is_mutable(std::string_view name) const { + std::optional index = argumentIndexWithName(name); + TORCH_INTERNAL_ASSERT( + index.has_value(), "Schema has no argument named ", name); + + return is_mutable({c10::SchemaArgType::input, static_cast(*index)}); + } + + // Returns whether lhs and rhs may alias directly. + // This does not account for cases where lhs or rhs are a container that + // may contain elements that alias the other argument. + // FunctionSchema::may_contain_alias will include that functionality. + bool may_alias(const SchemaArgument& lhs, const SchemaArgument& rhs) const; + + // Returns whether lhs and rhs may alias directly or whether lhs/rhs are a container + // that may contain elements that alias the other argument. + // bidirectional = false only returns whether lhs may contain an alias of rhs + // while bidirectional = true returns both directions. + bool may_contain_alias(const SchemaArgument& lhs, const SchemaArgument& rhs, bool bidirectional = true) const; + + // Returns whether the two AliasTypeSets contain any similarities + // ie: whether the two type sets can alias. + bool canAliasTypeSetsAlias(const std::optional &lhs, const std::optional &rhs) const; + + // Recursively Finds all contained types within the AliasTypeSet. + std::optional getAliasTypeSetContainedTypes(const std::optional &aliasTypeSet) const; + + // Similar to mapTypeToAliasTypeSet defined in alias_analysis.cpp. + // Used to map types to a type such that all types that can alias will be mapped to the same type. + // For example, calling this method on 'Optional[List[int]]' is the same as calling this method + // on 'List[int]'. + std::optional mapTypeToAliasTypeSet(const TypePtr& type) const; + + // Returns either arguments() or returns() depending on the SchemaArgType + // output => returns(), input => arguments() + const std::vector& getCorrectList(SchemaArgType type) const; + + std::optional argumentIndexWithName(std::string_view name) const { + for (const auto i : c10::irange(arguments().size())) { + if(name == arguments()[i].name()) + return i; + } + return std::nullopt; + } + FunctionSchema cloneWithName(std::string name, std::string overload_name) const { + return FunctionSchema( + std::move(name), + std::move(overload_name), + arguments(), + returns(), + is_vararg(), + is_varret() + ); + } + FunctionSchema cloneWithArguments(std::vector new_arguments) const { + return FunctionSchema( + name(), + overload_name(), + std::move(new_arguments), + returns(), + is_vararg(), + is_varret()); + } + FunctionSchema cloneWithReturns(std::vector new_returns) const { + return FunctionSchema( + name(), + overload_name(), + arguments(), + std::move(new_returns), + is_vararg(), + is_varret()); + } + + std::string formatTypeMismatchMsg( + const Argument& expected, + const std::string& actual_type, + std::optional position = std::nullopt, + std::optional value = std::nullopt) const; + + FunctionSchema cloneWithRemappedTypes( + const std::function type_map) const; + + FunctionSchema cloneWithRealTypes(bool with_symint=true) const; + + // Check that inputs have the correct types and appends any missing default + // values. + template + void checkAndNormalizeInputs( + std::vector& inputs, + const std::unordered_map& kwargs = + std::unordered_map{}) const; + + std::string findErrorInKwargs(const std::vector& kwargs) const; + + bool hasAnyAliasInfo() const { + for (const auto& arg : arguments_) { + if (arg.alias_info() != nullptr) { + return true; + } + } + for (const auto& ret : returns_) { + if (ret.alias_info() != nullptr) { + return true; + } + } + return false; + } + + + // TODO remove the mutation here + bool isDefaultAliasAnalysisKind() const { + return !alias_kind_; + } + AliasAnalysisKind aliasAnalysis() const { + return alias_kind_.value_or(AliasAnalysisKind::CONSERVATIVE); + } + void setAliasAnalysis(AliasAnalysisKind v) { + alias_kind_ = v; + } + + std::optional getNamespace() const { + return name_.getNamespace(); + } + + // Returns true if we successfully set the namespace (as there + // was none set, and false otherwise) + bool setNamespaceIfNotSet(const char* ns) { + return name_.setNamespaceIfNotSet(ns); + } + + // can a function with this schema be substituted for a function of rhs's + // schema and have the program typecheck? + // as_method - if true, treat this schema as a method and ignore + // the first argument, which will be the object in both cases + bool isSubtypeOf(const FunctionSchema& rhs, bool as_method, std::ostream* why_not=nullptr) const; +}; + +inline bool operator==(const FunctionSchema& lhs, const FunctionSchema& rhs) { + return lhs.name() == rhs.name() + && lhs.overload_name() == rhs.overload_name() + && lhs.arguments() == rhs.arguments() + && lhs.returns() == rhs.returns() + && lhs.is_vararg() == rhs.is_vararg() + && lhs.is_varret() == rhs.is_varret(); +} + +inline bool operator!=(const FunctionSchema& lhs, const FunctionSchema& rhs) { + return !(lhs == rhs); +} + +// print out Argument, which is compatible with FunctionSchema parser +// full format: Type(alias)? name=default_value +inline std::ostream& operator<<(std::ostream& out, const Argument& arg) { + + // for adjusting the ? position. + // in schema, we have Tensor?(a!) input, and t(a!)?. + // however, t?(a!) doesn't work with schema parser. + // so we always use Type(alias)? format + // real_type versus fake_type: in order to be compatible with FunctionSchema + // parser, printing an argument with either MemoryFormat or Layout type should + // give us the original schema string, hence printing out real_type. + auto type = arg.real_type(); + bool is_opt = type->kind() == OptionalType::Kind; + auto unopt_type = is_opt ? type->castRaw()->getElementType() : type; + + if (unopt_type->kind() == ListType::Kind) { + // sized lists get size N from arg, not type + auto list = unopt_type->cast(); + out << list->getElementType()->str(); + if (arg.alias_info() && !arg.alias_info()->containedTypes().empty()){ + out << arg.alias_info()->containedTypes()[0]; + } + std::string N; + if (arg.N()) { + N = std::to_string(*arg.N()); + } + out << '[' << N << ']'; + } else { + out << unopt_type->str(); + } + + // print alias info if it has beforeSets. + if (arg.alias_info() && !arg.alias_info()->beforeSets().empty()) { + out << *arg.alias_info(); + } + + if (is_opt) { + out << '?'; + } + + if (!arg.name().empty()) { + out << ' ' << arg.name(); + } + + if (arg.default_value()) { + out << '='; + if ((type->kind() == c10::TypeKind::StringType || + unopt_type->kind() == c10::TypeKind::StringType) && + arg.default_value().value().isString()) { + printQuotedString(out, arg.default_value().value().toStringRef()); + } else if (type->kind() == TypeKind::ListType && type->castRaw()->getElementType()->kind() == c10::TypeKind::IntType) { + // We want to faithfully replicate JIT schema. + // in native_functions.yaml defaults for int arrays with a single value always look like + // int[2] stride=1 + // instead of + // int[2] stride=[1, 1] + auto default_val = arg.default_value().value().toIntList(); + if (default_val.size() > 1) { + auto all_defaults_the_same = true; + for (const auto i : c10::irange(1, default_val.size())) { + if (default_val[0] != default_val[i]) all_defaults_the_same = false; + } + if (all_defaults_the_same) { + out << default_val[0]; + } else { + out << arg.default_value().value(); + } + } else { + out << arg.default_value().value(); + } + } else { + out << arg.default_value().value(); + } + } + + return out; +} + +TORCH_API std::ostream& operator<<(std::ostream& out, const FunctionSchema& schema); + +inline std::string toString(const FunctionSchema& schema) { + std::ostringstream str; + str << schema; + return str.str(); +} + +} // namespace c10 + +namespace std { +template<> + struct hash { + size_t operator()(const c10::SchemaArgument& arg) const + { + return c10::hash_combine(std::hash()(arg.index), std::hash()(static_cast(arg.type))); + } + }; +template<> + struct hash { + size_t operator()(const c10::Argument& arg) const + { + auto hash = std::hash{}(arg.name()); + auto type_hash = std::hash{}(arg.type()); + auto kwarg_only_hash = std::hash{}(arg.kwarg_only()); + hash = c10::hash_combine(hash, type_hash); + hash = c10::hash_combine(hash, kwarg_only_hash); + // hashing optional fields if they exist + if (arg.default_value().has_value()) { + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) + auto default_value_hash = c10::hash{}(*arg.default_value()); + hash = c10::hash_combine(hash, default_value_hash); + } + if (arg.N().has_value()) { + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) + auto N_hash = std::hash{}(*arg.N()); + hash = c10::hash_combine(hash, N_hash); + } + if (arg.alias_info()) { + auto alias_info_hash = std::hash{}(*arg.alias_info()); + hash = c10::hash_combine(hash, alias_info_hash); + } + return hash; + } + }; +template<> + struct hash { + size_t operator()(const c10::FunctionSchema& schema) const + { + auto hash = std::hash{}(schema.operator_name()); + auto args_hash = c10::hash>{}(schema.arguments()); + auto returns_hash = c10::hash>{}(schema.returns()); + auto is_vararg_hash = std::hash{}(schema.is_vararg()); + auto is_varret_hash = std::hash{}(schema.is_varret()); + hash = c10::hash_combine(hash, args_hash); + hash = c10::hash_combine(hash, returns_hash); + hash = c10::hash_combine(hash, is_vararg_hash); + hash = c10::hash_combine(hash, is_varret_hash); + return hash; + } + }; +} // namespace std + + +#include // IWYU pragma: keep + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/function_schema_inl.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/function_schema_inl.h new file mode 100644 index 0000000000000000000000000000000000000000..0e7bd33a69878dc610bafe19017c7c36c1139b84 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/function_schema_inl.h @@ -0,0 +1,83 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include + +namespace c10 { + +template +inline void FunctionSchema::checkArg( + const IValue& value, + const Argument& argument, + std::optional pos) const { + if (value.isTensor() && argument.type() == TensorType::get()) { + // Fast-path for the common case + return; + } + if (value.isGenericDict() && value.toGenericDict().empty()) { + return; + } + if (!value.type()->isSubtypeOf(*argument.type())) { + TORCH_CHECK( + false, + formatTypeMismatchMsg( + argument, value.type()->repr_str(), pos)); + } +} + +template +inline void FunctionSchema::checkAndNormalizeInputs( + std::vector& inputs, + const std::unordered_map& kwargs) const { + // Do we have more inputs than the schema accepts? + TORCH_CHECK( + inputs.size() <= arguments().size(), + "Expected at most ", + arguments().size(), + " argument(s) for operator '", + name(), + "', but received ", + inputs.size(), + " argument(s). Declaration: ", + *this); + + size_t consumed_kwargs = 0; + for (const auto pos : c10::irange(arguments().size())) { + const auto& argument = arguments()[pos]; + if (pos < inputs.size()) { + checkArg(inputs[pos], argument, pos); + continue; + } + auto it = kwargs.find(argument.name()); + if (it != kwargs.end()) { + checkArg(it->second, argument, std::nullopt); + inputs.push_back(it->second); + consumed_kwargs++; + continue; + } + if (argument.default_value()) { + inputs.push_back(*argument.default_value()); + continue; + } + TORCH_CHECK(false, + name(), + "() is missing value for argument '", + argument.name(), + "'. Declaration: ", + *this); + } + if (consumed_kwargs != kwargs.size()) { + std::vector names; + names.reserve(kwargs.size()); + for(const auto& k : kwargs) { + names.emplace_back(k.first); + } + TORCH_CHECK(false, findErrorInKwargs(names)); + } +} + +} // namespace c10 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/functional.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/functional.h new file mode 100644 index 0000000000000000000000000000000000000000..c5bf676563f2704d91659ed57b4d757331432279 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/functional.h @@ -0,0 +1,59 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace c10 { + +// The passed in function must take T by value (T), or by +// const reference (const T&); taking T by non-const reference +// will result in an error like: +// +// error: no type named 'type' in 'class std::invoke_result' +// +// No explicit template parameters are required. + +// Overload for explicit function and ArrayRef +template +inline auto fmap(const T& inputs, const F& fn) -> std::vector { + std::vector r; + r.reserve(inputs.size()); + for(const auto & input : inputs) + r.push_back(fn(input)); + return r; +} + +// C++ forbids taking an address of a constructor, so here's a workaround... +// Overload for constructor (R) application +template +inline std::vector fmap(const T& inputs) { + std::vector r; + r.reserve(inputs.size()); + for(auto & input : inputs) + r.push_back(R(input)); + return r; +} + +template +inline std::vector filter(at::ArrayRef inputs, const F& fn) { + std::vector r; + r.reserve(inputs.size()); + for(auto & input : inputs) { + if (fn(input)) { + r.push_back(input); + } + } + return r; +} + +template +inline std::vector filter(const std::vector& inputs, const F& fn) { + return filter(static_cast>(inputs), fn); +} + +} // namespace c10 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/grad_mode.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/grad_mode.h new file mode 100644 index 0000000000000000000000000000000000000000..15b2c523a2f07e92eab466d3193577817752ed4b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/grad_mode.h @@ -0,0 +1,15 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace at { + using GradMode = c10::GradMode; + using AutoGradMode = c10::AutoGradMode; + using NoGradGuard = c10::NoGradGuard; +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/interned_strings.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/interned_strings.h new file mode 100644 index 0000000000000000000000000000000000000000..ffe7388fe4a57d082b91906c0e629b3531be8784 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/interned_strings.h @@ -0,0 +1,360 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include + +namespace c10 { + +#define FORALL_NS_SYMBOLS(_) \ + _(namespaces, prim) \ + _(namespaces, prims) \ + _(namespaces, nvprims) \ + _(namespaces, aten) \ + _(namespaces, cuda) \ + _(namespaces, onnx) \ + _(namespaces, attr) \ + _(namespaces, scope) \ + _(namespaces, user) \ + _(namespaces, _caffe2) \ + _(namespaces, dimname) \ + _(namespaces, namespaces) \ + _(prim, Assign) \ + _(prim, BroadcastingChunk) \ + _(prim, BroadcastSizes) \ + _(prim, ReductionSizes) \ + _(prim, Constant) \ + _(prim, ChunkSizes) \ + _(prim, ConstantMKLDNNTensor) \ + _(prim, BroadcastMKLDNNTensors) \ + _(prim, MKLDNNGroup) \ + _(prim, MKLDNNHardSwish) \ + _(prim, MKLDNNHardSigmoid) \ + _(prim, MKLDNNHardTanh) \ + _(prim, MKLDNNClamp) \ + _(prim, StaticRuntimeCopyOuts) \ + _(prim, Drop) \ + _(prim, Eval) \ + _(prim, Expand) /* onnx */ \ + _(prim, FusionGroup) \ + _(prim, CudaFusionGroup) \ + _(prim, CudaFusionGuard) \ + _(prim, oneDNNFusionGroup) \ + _(prim, oneDNNFusionGuard) \ + _(prim, FunctionalGraph) \ + _(prim, add_optional) \ + _(prim, view_copy) \ + _(prim, permute_copy) \ + _(prim, reshape_copy) \ + _(prim, squeeze_copy) \ + _(prim, t_copy) \ + _(prim, transpose_copy) \ + _(prim, unsqueeze_copy) \ + _(prim, flatten_copy) \ + _(prim, expand_copy) \ + _(prim, expand_as_copy) \ + _(prim, DifferentiableGraph) \ + _(prim, TensorExprGroup) \ + _(prim, TensorExprDynamicGroup) \ + _(prim, StaticSubgraph) \ + _(prim, If) \ + _(prim, Jump) /* debug */ \ + _(prim, JumpNZ) /* debug */ \ + _(prim, JumpZ) /* debug */ \ + _(prim, Load) \ + _(prim, Loop) \ + _(prim, Param) \ + _(prim, PackPadded) /* onnx */ \ + _(prim, PadPacked) /* onnx */ \ + _(prim, Placeholder) /* debug */ \ + _(prim, Print) \ + _(prim, EmptyListLiteral) \ + _(prim, LegacyTypedConstructor) \ + _(prim, PythonOp) \ + _(prim, IgnoredPythonOp) \ + _(prim, Reverse) \ + _(prim, Return) \ + _(prim, ReturnStmt) \ + _(prim, BreakStmt) \ + _(prim, ContinueStmt) \ + _(prim, ComprehensionScope) \ + _(prim, Store) \ + _(prim, AutogradZero) \ + _(prim, AutogradAnyNonZero) \ + _(prim, AutogradAllNonZero) \ + _(prim, AutogradAllZero) \ + _(prim, Starred) \ + _(prim, TupleConstruct) \ + _(prim, TupleUnpack) \ + _(prim, TupleIndex) \ + _(prim, TupleSlice) \ + _(prim, ListConstruct) \ + _(prim, ListUnpack) \ + _(prim, DictConstruct) \ + _(prim, ModuleContainerIndex) \ + _(prim, EnumName) \ + _(prim, EnumValue) \ + _(prim, StringIndex) \ + _(prim, NumToTensor) \ + _(prim, Uninitialized) \ + _(prim, VarConcat) \ + _(prim, VarStack) \ + _(prim, With) \ + _(prim, Enter) \ + _(prim, Exit) \ + _(prim, IfThenElse) \ + _(aten, Bool) \ + _(aten, Int) \ + _(aten, FloatImplicit) \ + _(aten, ComplexImplicit) \ + _(aten, IntImplicit) \ + _(aten, ScalarImplicit) \ + _(aten, Float) \ + _(aten, Complex) \ + _(aten, str) \ + _(aten, Delete) \ + _(prim, device) \ + _(prim, dtype) \ + _(prim, layout) \ + _(prim, id) \ + _(prim, requires_grad) \ + _(prim, MakeTestTensor) /* test */ \ + _(prim, AutogradAdd) \ + _(prim, GradOf) \ + _(aten, grad) \ + _(aten, backward) \ + _(prim, Guard) \ + _(prim, BailOut) \ + _(prim, TypeCheck) \ + _(prim, RequiresGradCheck) \ + _(prim, FallbackGraph) \ + _(prim, FusedConcat) \ + _(prim, ConstantChunk) \ + _(prim, MMTreeReduce) \ + _(prim, MMBatchSide) \ + _(prim, list) \ + _(prim, dict) \ + _(prim, min) \ + _(prim, max) \ + _(prim, abs) \ + _(aten, divmod) \ + _(prim, zip) \ + _(prim, enumerate) \ + _(prim, range) \ + _(prim, rangelist) \ + _(prim, isinstance) \ + _(prim, tolist) \ + _(prim, unchecked_cast) \ + _(aten, _grad_sum_to_size) \ + _(aten, _size_if_not_equal) \ + _(aten, _ncf_unsqueeze) \ + _(aten, warn) \ + _(aten, sorted) \ + _(aten, floordiv) \ + _(aten, __range_length) \ + _(aten, __derive_index) \ + _(aten, __round_to_zero_floordiv) \ + _(aten, is_scripting) \ + _(aten, _unwrap_optional) \ + _(prim, fork) \ + _(prim, awaitable) \ + _(prim, forkClosure) \ + _(prim, awaitableClosure) \ + _(prim, awaitable_nowait) \ + _(prim, awaitable_wait) \ + _(prim, RaiseException) \ + _(prim, Closure) \ + _(prim, CreateObject) \ + _(prim, SetAttr) \ + _(prim, GetAttr) \ + _(prim, HasAttr) \ + _(prim, profile) \ + _(prim, profile_ivalue) \ + _(prim, AddStatValue) \ + _(prim, TimePoint) \ + _(prim, CallFunction) \ + _(prim, CallMethod) \ + _(prim, LoopContinuation) \ + _(prim, annotate) \ + _(prim, TracedModuleForward) \ + _(prim, TracedFork) \ + _(prim, TracedAttr) \ + _(prim, rpc_async) \ + _(prim, rpc_sync) \ + _(prim, rpc_remote) \ + _(prim, is_cuda) \ + _(aten, append) \ + _(aten, as_tensor) \ + _(aten, adaptive_avg_pool2d_backward) \ + _(aten, dim) \ + _(aten, format) \ + _(aten, percentFormat) \ + _(aten, __not__) \ + _(aten, __is__) \ + _(aten, __isnot__) \ + _(aten, _ger) \ + _(aten, __getitem__) \ + _(aten, _set_item) \ + _(aten, manual_seed) \ + _(aten, device) \ + _(aten, hash) \ + _(aten, len) \ + _(aten, list) \ + _(aten, dict) \ + _(aten, wait) \ + _(aten, save) \ + _(aten, keys) \ + _(aten, ord) \ + _(aten, chr) \ + _(aten, hex) \ + _(aten, oct) \ + _(aten, clear) \ + _(aten, setdefault) \ + _(aten, bin) \ + _(aten, pop) \ + _(aten, insert) \ + _(aten, tensor) \ + _(prim, unchecked_unwrap_optional) \ + _(aten, __contains__) \ + _(prim, BailoutTemplate) \ + _(prim, grad) \ + _(cuda, _set_device) \ + _(cuda, set_stream) \ + _(cuda, _current_device) \ + _(cuda, synchronize) \ + _(aten, has_torch_function) \ + _(aten, is_autocast_enabled) \ + _(aten, is_autocast_cpu_enabled) \ + _(aten, is_autocast_xla_enabled) \ + _(aten, get_autocast_dtype) \ + _(aten, is_autocast_mps_enabled) \ + FORALL_ATEN_BASE_SYMBOLS(_) \ + _(onnx, Add) \ + _(onnx, Concat) \ + _(onnx, Constant) \ + _(onnx, ConstantFill) \ + _(onnx, Div) \ + _(onnx, GRU) \ + _(onnx, Gather) \ + _(onnx, Gemm) \ + _(onnx, LSTM) \ + _(onnx, MatMul) \ + _(onnx, Min) \ + _(onnx, Max) \ + _(onnx, Mul) \ + _(onnx, Pow) \ + _(onnx, RNN) \ + _(onnx, Shape) \ + _(onnx, Size) \ + _(onnx, Slice) \ + _(onnx, Softmax) \ + _(onnx, Squeeze) \ + _(onnx, Sub) \ + _(onnx, Transpose) \ + _(onnx, Unsqueeze) \ + _(onnx, Loop) \ + _(onnx, If) \ + _(onnx, Reshape) \ + _(onnx, Expand) \ + _(onnx, Equal) \ + _(onnx, Greater) \ + _(onnx, GreaterOrEqual) \ + _(onnx, Less) \ + _(onnx, LessOrEqual) \ + _(onnx, Not) \ + _(aten, ATen) \ + _(onnx, Split) \ + _(onnx, ConstantOfShape) \ + _(onnx, Cast) \ + _(onnx, Mod) \ + _(onnx, Sqrt) \ + _(onnx, SplitToSequence) \ + _(onnx, SequenceAt) \ + _(onnx, SequenceConstruct) \ + _(onnx, SequenceEmpty) \ + _(onnx, SequenceInsert) \ + _(onnx, SequenceErase) \ + _(onnx, ConcatFromSequence) \ + _(onnx, Identity) \ + _(onnx, SoftmaxCrossEntropyLoss) \ + _(onnx, NegativeLogLikelihoodLoss) \ + _(onnx, LogSoftmax) \ + _(onnx, ReduceL1) \ + _(onnx, ReduceL2) \ + _(onnx, Conv) \ + _(onnx, BatchNormalization) \ + _(onnx, ReduceMean) \ + _(onnx, ReduceProd) \ + _(onnx, Relu) \ + _(onnx, Neg) \ + _(onnx, NonZero) \ + _(onnx, Range) \ + _(onnx, Tile) \ + _(onnx, Where) \ + _(onnx, Optional) \ + _(onnx, OptionalGetElement) \ + _(onnx, OptionalHasElement) \ + FORALL_ATTR_BASE_SYMBOLS(_) \ + _(attr, Subgraph) \ + _(attr, ReverseSubgraph) \ + _(attr, f_real_outputs) \ + _(attr, df_input_vjps) \ + _(attr, df_input_captured_inputs) \ + _(attr, df_input_captured_outputs) \ + _(attr, df_output_vjps) \ + _(attr, axes) \ + _(attr, symbolic_shape_inputs) \ + _(attr, allow_stack_outputs) \ + _(attr, striding_inputs_desc) \ + _(attr, striding_outputs_desc) \ + _(attr, broadcast) \ + _(attr, direction) \ + _(attr, ends) \ + _(attr, inplace) \ + _(attr, input_as_shape) \ + _(attr, is_zero) \ + _(attr, num_none) \ + _(attr, num_present) \ + _(attr, perm) \ + _(attr, starts) \ + _(attr, profiled_type) \ + _(attr, transA) \ + _(attr, transB) \ + _(attr, name) \ + _(attr, module) \ + _(attr, beg) \ + _(attr, idx) \ + _(attr, split) \ + _(attr, slot) \ + _(attr, kinds) \ + _(attr, types) \ + _(attr, scope) \ + _(attr, keepdims) \ + _(attr, cache_id) \ + _(attr, new_axis) \ + _(attr, warn_id) \ + _(attr, output_layouts) \ + _(attr, allowzero) \ + _(attr, seen_none) \ + _(attr, overload_name) \ + _(attr, node_stack_idx) + +enum class _keys : unique_t { + #define DEFINE_KEY(ns, s) ns##_##s, + FORALL_NS_SYMBOLS(DEFINE_KEY) + #undef DEFINE_KEY + num_symbols +}; + +#define DEFINE_SYMBOL(ns, s) \ + namespace ns { constexpr Symbol s(static_cast(_keys::ns##_##s)); } +FORALL_NS_SYMBOLS(DEFINE_SYMBOL) +#undef DEFINE_SYMBOL + +} // namespace c10 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/interned_strings_class.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/interned_strings_class.h new file mode 100644 index 0000000000000000000000000000000000000000..ee490997967e4ae139de41a89ee511bbb95e0d04 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/interned_strings_class.h @@ -0,0 +1,37 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include +#include +#include +#include +#include +#include + +namespace c10 { + +struct TORCH_API InternedStrings { + InternedStrings(); + Symbol symbol(const std::string& s); + std::pair string(Symbol sym); + Symbol ns(Symbol sym); + + private: + // prereq - holding mutex_ + Symbol _symbol(const std::string& s); + std::pair customString(Symbol sym); + std::unordered_map string_to_sym_; + + struct SymbolInfo { + Symbol ns; + std::string qual_name; + std::string unqual_name; + }; + std::vector sym_to_info_; + + std::mutex mutex_; +}; + +} // namespace c10 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/ivalue.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/ivalue.h new file mode 100644 index 0000000000000000000000000000000000000000..1c0d3221f4b1a7afff964ab6e37472bb8131fd2e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/ivalue.h @@ -0,0 +1,1642 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wswitch-default") + +namespace torch { +class TORCH_API CustomClassHolder : public c10::intrusive_ptr_target {}; +namespace jit { +using ::torch::CustomClassHolder; +struct Function; +struct CompilationUnit; +struct Module; +} // namespace jit +} // namespace torch +namespace c10 { +template +class Dict; +template +class List; +template +class IListRef; +struct IValue; +struct ClassType; +struct Type; +class RRefInterface; + +struct ClassType; +using ClassTypePtr = std::shared_ptr; + +TORCH_API bool _fastEqualsForContainer(const IValue& lhs, const IValue& rhs); + +TORCH_API torch::jit::Function* checkObjectSortSchema( + const c10::ClassTypePtr& t, + std::stringstream& why_not); + +// A comparator that checks ordering of two IValues of same type. +typedef std::function IValueComparator; + +TORCH_API IValueComparator getLessThanComparator(const IValue& v); +TORCH_API IValueComparator getGreaterThanComparator(const IValue& v); + +namespace ivalue { +struct Tuple; +struct Future; +struct Await; +struct ConstantString; +struct GenericDict; +struct Object; +struct PyObjectHolder; +struct EnumHolder; +// We need a ComplexHolder because currently the payloads in the Union +// only take 64 bits. Since ComplexDouble takes up 128 bits, and is too big +// to fit in the IValue directly, we indirect complex numbers through an +// intrusive pointer to ComplexHolder (which contains a c10::complex). +struct ComplexHolder : c10::intrusive_ptr_target { + public: + template + ComplexHolder(c10::complex c) { + val = convert>(c); + } + ComplexHolder() = default; + c10::complex val; +}; + +// Similar to ComplexHolder, for StreamData3 +struct StreamData3Holder : c10::intrusive_ptr_target { + public: + StreamData3Holder(struct c10::StreamData3 d) : val(d) {} + StreamData3Holder() = delete; + struct c10::StreamData3 val; +}; + +} // namespace ivalue + +// This is an owning wrapper for a std::optional> +// that can be implicitly converted to a (non-owning) std::optional>. +// Its purpose is to be used in generated code to keep the vector alive +// either until the end of a statement (as a temporary), or as a saved arg +// in autograd. +template +struct OptionalArray { + std::optional> list; + + OptionalArray() = default; + OptionalArray(std::vector val) : list(std::move(val)) {} + + // Used when saving an argument for the backwards pass. + OptionalArray& operator=(std::optional> ref) { + if (ref) { + list = std::vector(ref->begin(), ref->end()); + } else { + list = std::nullopt; + } + return *this; + } + + // Used when saving an argument for the backwards pass. + OptionalArray& operator=(c10::OptionalArrayRef ref) { + if (ref) { + list = std::vector(ref->begin(), ref->end()); + } else { + list = std::nullopt; + } + return *this; + } + + operator std::optional>() { + if (!list) { + return std::nullopt; + } + return *list; + } + + operator c10::OptionalArrayRef() { + if (!list) { + return std::nullopt; + } + return *list; + } +}; + +// Capsule is an internal implementation detail of custom C++ classes. We +// define it as an owning wrapper for +// c10::intrusive_ptr This wrapper is here to serve as +// an abstraction of the type erased custom class object pointer. It also allow +// pybind11 to treat this as a standalone class to register as a separate type +// caster, instead of a custom pointer holder which the pointer holder type +// caster try to "unwrap" it automatically. +struct Capsule { + c10::intrusive_ptr obj_ptr; + explicit Capsule(c10::intrusive_ptr ptr) + : obj_ptr(std::move(ptr)) {} +}; + +// IValue is the generic tagged union used by the interpreter to hold +// all value types. +// It is a 16-byte object with an 8-byte payload and an 8-byte tag. +// The tag is currently 4 bytes to determine the type, and 1 byte +// to mark whether that type is a subtype of c10::intrusive_ptr_target and needs +// retain/release calls. + +#define TORCH_FORALL_TAGS(_) \ + _(None) \ + _(Tensor) \ + _(Storage) \ + _(Double) \ + _(ComplexDouble) \ + _(Int) \ + _(UInt) \ + _(SymInt) \ + _(SymFloat) \ + _(SymBool) \ + _(Bool) \ + _(Tuple) \ + _(String) \ + _(Blob) \ + _(GenericList) \ + _(GenericDict) \ + _(Future) \ + _(Await) \ + _(Device) \ + _(Stream) \ + _(Object) \ + _(PyObject) \ + _(Uninitialized) \ + _(Capsule) \ + _(RRef) \ + _(Quantizer) \ + _(Generator) \ + _(Enum) + +// [doxygen private] +// These methods are not actually private but we don't want to document them, so +// they are marked `@private`, which hides them on the doxygen documentation for +// this page. + +/// IValue (Interpreter Value) is a tagged union over the types +/// supported by the TorchScript interpreter. IValues contain their +/// values as an `IValue::Payload`, which holds primitive types +/// (`int64_t`, `bool`, `double`, `Device`) and `Tensor` as values, +/// and all other types as a `c10::intrusive_ptr`. In order to +/// optimize performance of the destructor and related operations by +/// making the `Tensor` and `c10::intrusive_ptr` paths generate the +/// same code, we represent a null `c10::intrusive_ptr` as +/// `UndefinedTensorImpl::singleton()`, *not* `nullptr`. +/// +/// IValues are used as inputs to and outputs from the TorchScript interpreter. +/// To retrieve the value contained within an IValue, use the `.toX()` methods, +/// where `X` is the type you are trying to get. Note that neither the `.toX()` +/// methods nor the templated `.to` functions do any kind of casting, they +/// only unwrap the contained value. For example: +/// +/// \rst +/// .. code-block:: cpp +/// +/// // Make the IValue +/// torch::IValue my_ivalue(26); +/// std::cout << my_ivalue << "\n"; +/// +/// // Unwrap the IValue +/// int64_t my_int = my_ivalue.toInt(); +/// std::cout << my_int << "\n"; +/// +/// // This will throw an error! +/// // `my_ivalue` is tagged as an int and cannot be used as another type +/// torch::Tensor my_tensor = my_ivalue.toTensor(); +/// \endrst +struct TORCH_API IValue final { + IValue(const IValue& rhs) : IValue(rhs.payload, rhs.tag) { + if (isIntrusivePtr() && + payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton()) { + c10::raw::intrusive_ptr::incref(payload.u.as_intrusive_ptr); + } + } + + IValue(IValue&& rhs) noexcept : tag(rhs.tag) { + moveFrom(std::move(rhs)); + } + + /// @private [doxygen private] + ~IValue() { + destroy(); + } + + C10_ALWAYS_INLINE IValue& operator=(IValue&& rhs) & noexcept { + if (&rhs == this) { + return *this; + } + + destroy(); + moveFrom(std::move(rhs)); + return *this; + } + + IValue& operator=(IValue const& rhs) & { + *this = IValue(rhs); + return *this; + } + + void dump() const; + + /** + * Equality comparison. The semantics are the same as Python's `==`: + * 1. Numerical types are compared by value. + * 2. Tensors compute element-wise equality, returning a BoolTensor (see: + * `torch.eq()`) + * 3. Strings are compared by value. + * 4. Sequence types (list, tuple) are compared lexicographically by + * comparing their elements. Different sequence types never compare equal. + * 5. Mappings (dict) must have equal (key, value) pairs. + * 6. If not listed above, the default behavior for is to test identity + * equality (e.g. pointer equality). + * + * Why does this return an IValue instead of a bool? Because in PyTorch, + * `tensor1 == tensor2` returns a `BoolTensor`, not a bool. + * + * NOTE: we (like Python) assume that identity equality implies value equality + * for efficiency. + * TODO: need to support customizing equality + */ + IValue equals(const IValue& rhs) const; + /** + * This implements the same semantics as `bool(lhs == rhs)` in Python. which + * is the same as `equals()` except for Tensor types. + */ + TORCH_API friend bool operator==(const IValue& lhs, const IValue& rhs); + TORCH_API friend bool operator!=(const IValue& lhs, const IValue& rhs); + + /** + * Identity comparison. Checks if `this` is the same object as `rhs`. The + * semantics are the same as Python's `is` operator. + * + * NOTE: Like in Python, this operation is poorly defined for primitive types + * like numbers and strings. Prefer to use `==` unless you really want to + * check identity equality. + */ + bool is(const IValue& rhs) const; + + /** + * Hashing for IValues. Returns an IValue-boxed int. + * + * Some notes: + * - Like eager, Tensors are hashed by looking at the pointer. This is not + * strictly correct because two value-equal tensors with different tensor + * pointers will hash differently, but we choose to reproduce the eager + * semantics. + * - Hashing is not defined on all built-in IValue types (e.g. list and + * dict), following Python. Calling `hash()` on these types will throw. + */ + IValue hash() const { + return (int64_t)IValue::hash(*this); + } + // This is defined because `c10::hash` dispatches to a function of this + // signature. See the member function `hash()`. + static size_t hash(const IValue& iv); + + /** + * @private [doxygen private] + * [container equality] + * This is an equality implementation that assumes objects with the same + * identity equal themselves, for efficiency reasons. We primarily have this + * for consistency, because Python does the same thing. This actually + * provokes user-visible changes in behavior due to quirks in torch: + * [tensor1] == [tensor1] -> True (because container equality will first + * compare identity) [tensor1] == [tensor1_copy] -> RuntimeError: + * Boolean value of Tensor with more than one value is ambiguous + */ + TORCH_API friend bool _fastEqualsForContainer( + const IValue& lhs, + const IValue& rhs); + + private: + static bool isAliasOf(const at::Tensor& a, const at::Tensor& b) { + if (a.is_sparse()) { + return isAliasOf(a._values(), b) || isAliasOf(a._indices(), b); + } + if (b.is_sparse()) { + return isAliasOf(a, b._values()) || isAliasOf(a, b._indices()); + } + if (a.is_sparse_csr()) { + return isAliasOf(a.values(), b) || isAliasOf(a.crow_indices(), b) || + isAliasOf(a.col_indices(), b); + } + if (b.is_sparse_csr()) { + return isAliasOf(a, b.values()) || isAliasOf(a, b.crow_indices()) || + isAliasOf(a, b.col_indices()); + } + + // Opaque tensors such as the ones constructed by the MKL-DNN backend + // don't have storage so we just compare their TensorImpls. + // TODO: Find way to expose alias info for opaque tensors. + if (!a.has_storage() || !b.has_storage()) { + return a.unsafeGetTensorImpl() == b.unsafeGetTensorImpl(); + } + + return a.is_alias_of(b); + } + + template + bool isListOf() const; + + public: + /// @private [doxygen private] + bool isAliasOf(const IValue& rhs) const { + if (this->tag != rhs.tag) { + // Trivially don't alias if the type is different + return false; + } + + // Tensors should be compared based on internal storage + if (this->isTensor()) { + return isAliasOf(this->toTensor(), rhs.toTensor()); + } + + if (!isIntrusivePtr()) { + // Primitive types don't alias anything + return false; + } + + AT_ASSERT(rhs.isIntrusivePtr()); + + // Other types can be compared by their ptr value + return this->payload.u.as_intrusive_ptr == rhs.payload.u.as_intrusive_ptr; + } + + /// @private [doxygen private] + size_t use_count() const noexcept { + if (isTensor()) { + return payload.as_tensor.use_count(); + } + + if (!isIntrusivePtrLegacyBehavior()) { + return 1; + } + + if (payload.u.as_intrusive_ptr == c10::UndefinedTensorImpl::singleton()) { + return 0; + } + return c10::raw::intrusive_ptr::use_count(payload.u.as_intrusive_ptr); + } + + /// @private [doxygen private] + void swap(IValue& rhs) noexcept { + if (isTensor() && rhs.isTensor()) { + std::swap(payload.as_tensor, rhs.payload.as_tensor); + } else if (isTensor()) { + at::Tensor t = std::move(payload.as_tensor); + // As far as I can tell, omitting the usual explicit destructor call + // is not UB in and of itself, and it's a slight perf win. The + // destructor is a no-op, because the moved-from Tensor is + // effectively an intrusive_ptr in the null state, so we don't need + // the behavior for correctness reasons either. Leaving this + // explanatory comment, including commented-out destructor call, to + // make this abundantly clear. + // + // payload.as_tensor.~Tensor(); + payload.u = rhs.payload.u; + new (&rhs.payload.as_tensor) at::Tensor(std::move(t)); + } else if (rhs.isTensor()) { + rhs.swap(*this); + return; + } else { + std::swap(payload.u, rhs.payload.u); + } + std::swap(tag, rhs.tag); + } + + // Accessors for subtypes are arranged together below + // While some of these accessors could be generated through templates, + // we prefer to write them manually for clarity + + IValue(at::TensorBase t) : tag(Tag::Tensor) { + new (&payload.as_tensor) at::Tensor(std::move(t)); + } + bool isTensor() const { + return Tag::Tensor == tag; + } + + private: + // Outlined error path so that toTensor() can be inlined. + [[noreturn]] void reportToTensorTypeError() const; + + public: + at::Tensor toTensor() &&; + at::Tensor& toTensor() &; + const at::Tensor& toTensor() const&; + at::TensorImpl* unsafeToTensorImpl() const { + TORCH_INTERNAL_ASSERT(isTensor()); + return payload.as_tensor.unsafeGetTensorImpl(); + } + + IValue(at::Storage s) : tag(Tag::Storage) { + payload.u.as_intrusive_ptr = + null_to_undefined_tensor(s.unsafeReleaseStorageImpl()); + } + bool isStorage() const { + return Tag::Storage == tag; + } + c10::Storage toStorage() &&; + c10::Storage toStorage() const&; + + const IValue& toIValue() const { + return *this; + } + IValue& toIValue() { + return *this; + } + + /// @private [doxygen private] + IValue(intrusive_ptr blob) : tag(Tag::Blob) { + // TODO (after Tensor merge) If we pass in a Blob holding a Tensor, extract + // and store it as a Tensor instead. + payload.u.as_intrusive_ptr = null_to_undefined_tensor(blob.release()); + } + + /// @private [doxygen private] + bool isBlob() const { + return Tag::Blob == tag; + } + + /// @private [doxygen private] + c10::intrusive_ptr toBlob() &&; + + /// @private [doxygen private] + c10::intrusive_ptr toBlob() const&; + + // Capsule. No new callsites of these APIs should + // be introduced. + static inline IValue make_capsule( + intrusive_ptr blob); + bool isCapsule() const { + return Tag::Capsule == tag; + } + c10::intrusive_ptr toCapsule() &&; + c10::intrusive_ptr toCapsule() const&; + + // Custom C++ classes + template < + typename T, + std::enable_if_t, int> = 0> + IValue(intrusive_ptr custom_class); + bool isCustomClass() const; + template + c10::intrusive_ptr toCustomClass() &&; + template + c10::intrusive_ptr toCustomClass() const&; + + // Tuple + IValue(c10::intrusive_ptr v); + + template < + typename... Args, + std::enable_if_t< + !std::disjunction_v< + std::is_lvalue_reference..., + std::negation>...>, + std::nullptr_t> = nullptr> + IValue(const std::tuple& t); + template < + typename... Args, + std::enable_if_t< + !std::disjunction_v< + std::is_lvalue_reference..., + std::negation>...>, + std::nullptr_t> = nullptr> + IValue(std::tuple&& t); + bool isTuple() const { + return Tag::Tuple == tag; + } + c10::intrusive_ptr toTuple() &&; + c10::intrusive_ptr toTuple() const&; + [[nodiscard]] ivalue::Tuple& toTupleRef() const; + + // Double + IValue(double d) : tag(Tag::Double) { + payload.u.as_double = d; + } + bool isDouble() const { + return Tag::Double == tag; + } + double toDouble() const { + if (isDouble()) { + return payload.u.as_double; + } else if (isSymFloat()) { + return toSymFloat().guard_float(__FILE__, __LINE__); + } else { + TORCH_INTERNAL_ASSERT(0, "expected double"); + } + } + + // ComplexDouble + template + IValue(c10::complex c); + bool isComplexDouble() const { + return Tag::ComplexDouble == tag; + } + c10::complex toComplexDouble() const; + + // Future + IValue(c10::intrusive_ptr v); + bool isFuture() const { + return Tag::Future == tag; + } + c10::intrusive_ptr toFuture() &&; + c10::intrusive_ptr toFuture() const&; + + IValue(c10::intrusive_ptr v); + bool isAwait() const { + return Tag::Await == tag; + } + c10::intrusive_ptr toAwait() &&; + c10::intrusive_ptr toAwait() const&; + + // RRef + IValue(c10::intrusive_ptr v); + bool isRRef() const { + return Tag::RRef == tag; + } + c10::intrusive_ptr toRRef() &&; + c10::intrusive_ptr toRRef() const&; + + // Quantizer + IValue(c10::intrusive_ptr v); + bool isQuantizer() const { + return Tag::Quantizer == tag; + } + c10::intrusive_ptr toQuantizer() &&; + c10::intrusive_ptr toQuantizer() const&; + + // Int + IValue(int64_t i) : tag(Tag::Int) { + payload.u.as_int = i; + } + + IValue(const c10::SymInt& i) { + if (auto mi = i.maybe_as_int()) { + tag = Tag::Int; + payload.u.as_int = *mi; + } else { + tag = Tag::SymInt; + payload.u.as_intrusive_ptr = i.toSymNode().release(); + } + } + + bool isSymInt() const { + return Tag::SymInt == tag; + } + + c10::SymInt toSymInt() &&; + c10::SymInt toSymInt() const&; + + IValue(const c10::SymFloat& i) { + if (i.is_symbolic()) { + tag = Tag::SymFloat; + payload.u.as_intrusive_ptr = i.toSymNodeImpl().release(); + } else { + tag = Tag::Double; + payload.u.as_double = i.as_float_unchecked(); + } + } + + bool isSymFloat() const { + return Tag::SymFloat == tag; + } + + c10::SymFloat toSymFloat() &&; + c10::SymFloat toSymFloat() const&; + + IValue(const c10::SymBool& i) { + if (auto mi = i.maybe_as_bool()) { + tag = Tag::Bool; +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + payload.u.as_int = *mi; +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ + /* due to byteorder if value assigned as_int, as_bool actually is not set correctly */ + payload.u.as_bool = *mi; +#else +#error Unexpected or undefined __BYTE_ORDER__ +#endif + } else { + tag = Tag::SymBool; + payload.u.as_intrusive_ptr = i.toSymNodeImpl().release(); + } + } + + bool isSymBool() const { + return Tag::SymBool == tag; + } + + c10::SymBool toSymBool() &&; + c10::SymBool toSymBool() const&; + + // allow you to pass literals (3, 4) without ambiguity + IValue(int32_t i) : IValue(static_cast(i)) {} + + bool isInt() const { + return Tag::Int == tag; + } + + int64_t toInt() const { + if (isInt()) { + return payload.u.as_int; + } else if (isSymInt()) { + return toSymInt().guard_int(__FILE__, __LINE__); + } else { + TORCH_INTERNAL_ASSERT(0, "expected int"); + } + } + + // Unsigned + IValue(uint64_t u) : tag( u <= std::numeric_limits::max() ? Tag::Int : Tag::UInt) { + payload.u.as_uint = u; + } + + + // See Note [Meaning of HAS_u] + // IValue type model closely follows that of c10::Scalar + // Where all integers are upcast to 64-bit representation, and `as_int` is used as default + // representation unless value could not be represented as signed int + bool isUnsigned() const { + return Tag::UInt == tag || (Tag::Int == tag && payload.u.as_int >= 0); + } + + uint64_t toUInt() const { + if (isUnsigned()) { + return payload.u.as_uint; + } else { + TORCH_INTERNAL_ASSERT(0, "expected unsigned int"); + } + } + + + // Bool + IValue(bool b) : tag(Tag::Bool) { +#if defined(__clang__) && defined(__x86_64__) + // Initializing entire payload stops valgrind's from reporting + // "jump or move depends on uninitialised value" in IValue copy constructor + // See https://github.com/pytorch/pytorch/issues/37117 + payload.u.as_int = b; +#else + payload.u.as_bool = b; +#endif + } + bool isBool() const { + return Tag::Bool == tag; + } + bool toBool() const { + if (isBool()) { + return payload.u.as_bool; + } else if (isSymBool()) { + return toSymBool().guard_bool(__FILE__, __LINE__); + } else { + TORCH_INTERNAL_ASSERT(0, "expected bool"); + } + } + + // IntList + bool isIntList() const; + bool isSymIntList() const; + c10::List toIntList() &&; + c10::List toIntList() const&; + std::vector toIntVector() const; + c10::List toSymIntList() &&; + c10::List toSymIntList() const&; + std::vector toSymIntVector() const; + at::DimVector toDimVector() const; + + // ConstantString + IValue(c10::intrusive_ptr v); + IValue(std::string v); + IValue(const char* v) : IValue(std::string(v)) {} + IValue(std::string_view v) : IValue(std::string(v)){} + bool isString() const { + return Tag::String == tag; + } + c10::intrusive_ptr toString() &&; + c10::intrusive_ptr toString() const&; + const std::string& toStringRef() const; + std::optional> toOptionalStringRef() + const; + std::string_view toStringView() const; + + // DoubleList + bool isDoubleList() const; + c10::List toDoubleList() &&; + c10::List toDoubleList() const&; + std::vector toDoubleVector() const; + + // ComplexDoubleList + bool isComplexDoubleList() const; + c10::List> toComplexDoubleList() &&; + c10::List> toComplexDoubleList() const&; + std::vector> toComplexDoubleVector() const; + + // BoolList + bool isBoolList() const; + c10::List toBoolList() &&; + c10::List toBoolList() const&; + + // TensorList + bool isTensorList() const; + c10::List toTensorList() &&; + c10::List toTensorList() const&; + std::vector toTensorVector() const; + + // OptionalTensorList + bool isOptionalTensorList() const; + c10::List> toOptionalTensorList() &&; + c10::List> toOptionalTensorList() const&; + std::vector> toOptionalTensorVector() const; + + // GenericList + IValue(c10::List v); + bool isList() const { + return Tag::GenericList == tag; + } + c10::List toList() &&; + c10::List toList() const&; + c10::ArrayRef toListRef() const; + + // Some template constructors of IValue calls another constructor recursively. + // This SFINAEs the called constructor exists. + template + using enable_if_ivalue_constructible = + std::enable_if_t, std::nullptr_t>; + + // The rule for lists is more complicated; the generic constructor is only + // acceptable if your element isn't SymInt. If you do have a SymInt element, + // then you must also, at construction time, check if you can decay the list + // into an int list (this is MANDATORY, as at a use site we may expect + // toIntList to work even if at the call site you had a SymIntArrayRef + // argument). In practice, only SymIntArrayRef is used this way, so we + // didn't bother making it work for the other constructors, we just make sure + // they're not selectable. + template + using enable_if_list_is_ivalue_constructible = std::enable_if_t< + std::is_constructible_v && !std::is_same_v, + std::nullptr_t>; + + template = nullptr> + IValue(c10::List&& v); + template = nullptr> + IValue(const c10::List& v); + template = nullptr> + IValue(at::ArrayRef v); + template = nullptr> + IValue(const std::vector& v); + template = nullptr> + IValue(std::vector&& v); + template + IValue(std::array v); + + // Manual constructors for lists of symints, which decay to int list if + // possible. To avoid ambiguous overload situations, we template them + // to prevent implicit conversions + template + using enable_if_symint = + std::enable_if_t, std::nullptr_t>; + + template = nullptr> + IValue(at::ArrayRef v); + template = nullptr> + IValue(at::OptionalArrayRef v); + template = nullptr> + IValue(const std::vector& v); + template = nullptr> + IValue(std::vector&& v); + + template + using enable_if_ilist_is_ivalue_constructible = std::enable_if_t< + std::is_constructible_v && + std::is_constructible_v::boxed_type> && + !std::is_same_v, + std::nullptr_t>; + + template = nullptr> + IValue(c10::IListRef v); + + // GenericDict + IValue(c10::Dict v); + bool isGenericDict() const { + return Tag::GenericDict == tag; + } + c10::Dict toGenericDict() &&; + c10::Dict toGenericDict() const&; + + template + IValue(c10::Dict v); + + template + /// \cond + /// DOXYGEN_CANNOT_HANDLE_CONSTRUCTORS_WITH_MACROS_SO_EXCLUDE_THIS_LINE_FROM_DOXYGEN + C10_DEPRECATED_MESSAGE( + "IValues based on std::unordered_map are slow and deprecated. Please use c10::Dict instead.") + /// \endcond + IValue(std::unordered_map v); + + template = nullptr> + IValue(std::optional v); + template = nullptr> + IValue(c10::OptionalArrayRef v); + IValue(std::nullopt_t /*unused*/); + + // ClassType + IValue(c10::intrusive_ptr v); + bool isObject() const { + return tag == Tag::Object; + } + c10::intrusive_ptr toObject() &&; + c10::intrusive_ptr toObject() const&; + ivalue::Object& toObjectRef() const; + + torch::jit::Module toModule() const; + bool isModule() const; + + // PyObject + IValue(c10::intrusive_ptr v); + bool isPyObject() const { + return tag == Tag::PyObject; + } + c10::intrusive_ptr toPyObjectHolder() &&; + c10::intrusive_ptr toPyObjectHolder() const&; + PyObject* toPyObject() const; + + // Enum + explicit IValue(c10::intrusive_ptr v); + bool isEnum() const { + return tag == Tag::Enum; + } + c10::intrusive_ptr toEnumHolder() &&; + c10::intrusive_ptr toEnumHolder() const&; + + // None + IValue() = default; + bool isNone() const { + return Tag::None == tag; + } + std::string toNone() const { + AT_ASSERT(isNone()); + return "None"; + } + + static IValue uninitialized() { + auto i = IValue(); + i.tag = Tag::Uninitialized; + return i; + } + + // Scalar, which gets encoded as either an Int, a Double or a ComplexDouble + IValue(const at::Scalar& s) : IValue() { + // NB: do the symbolic versions first, as isFloatingPoint is true + // for both SymFloat and double + if (s.isSymInt()) { + tag = Tag::SymInt; + payload.u.as_intrusive_ptr = s.toSymInt().toSymNode().release(); + } else if (s.isSymFloat()) { + tag = Tag::SymFloat; + payload.u.as_intrusive_ptr = s.toSymFloat().toSymNodeImpl().release(); + } else if (s.isSymBool()) { + tag = Tag::SymBool; + payload.u.as_intrusive_ptr = s.toSymBool().toSymNodeImpl().release(); + } else if (s.isFloatingPoint()) { + tag = Tag::Double; + payload.u.as_double = s.toDouble(); + } else if (s.isComplex()) { + *this = s.toComplexDouble(); + } else if (s.isBoolean()) { + tag = Tag::Bool; + payload.u.as_bool = s.toBool(); + } else { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + s.isIntegral(false), "Unknown type in Scalar"); + if (s.isUnsigned()) { + const auto val = s.toUInt64(); + payload.u.as_uint = val; + tag = val <= std::numeric_limits::max() ? Tag::Int : Tag::UInt; + } else { + payload.u.as_int = s.toLong(); + tag = Tag::Int; + } + } + } + + bool isScalar() const { + return isDouble() || isInt() || isComplexDouble() || isBool() || + isSymInt() || isSymFloat() || isSymBool(); + } + + at::Scalar toScalar() const { + if (isDouble()) + return toDouble(); + else if (isInt()) + return toInt(); + else if (isComplexDouble()) + return toComplexDouble(); + else if (isBool()) + return toBool(); + else if (isSymInt()) + return toSymInt(); + else if (isSymFloat()) + return toSymFloat(); + else if (isSymBool()) + return toSymBool(); + else if (isUnsigned()) + return toUInt(); + TORCH_CHECK(false, "IValue is not a Scalar"); + } + + // Device + IValue(c10::Device d) : tag(Tag::Device) { + payload.u.as_device.type = d.type(); + payload.u.as_device.index = d.index(); + } + bool isDevice() const { + return Tag::Device == tag; + } + c10::Device toDevice() const { + AT_ASSERT(isDevice()); + return c10::Device(payload.u.as_device.type, payload.u.as_device.index); + } + + // Stream + IValue(c10::Stream s) : tag(Tag::Stream) { + auto v = c10::make_intrusive(s.pack3()); + payload.u.as_intrusive_ptr = v.release(); + } + c10::Stream toStream() &&; + c10::Stream toStream() const&; + bool isStream() const { + return Tag::Stream == tag; + } + + // ScalarType + IValue(ScalarType t) + : IValue(static_cast>(t)) {} + at::ScalarType toScalarType() const { + return static_cast(toInt()); + } + + // Layout + IValue(Layout l) : IValue(static_cast>(l)) {} + at::Layout toLayout() const { + return static_cast(toInt()); + } + + // MemoryFormat + IValue(MemoryFormat m) + : IValue(static_cast>(m)) {} + at::MemoryFormat toMemoryFormat() const { + return static_cast(toInt()); + } + + // QScheme + IValue(at::QScheme qscheme) : tag(Tag::Int) { + payload.u.as_int = static_cast(qscheme); + } + + at::QScheme toQScheme() const { + return static_cast(toInt()); + } + + // Dimname + IValue(at::Dimname dimname) : IValue(dimname.symbol().toQualString()) {} + + at::Dimname toDimname() const { + return at::Dimname::fromSymbol(Symbol::fromQualString(toStringRef())); + } + + // Generator + IValue(at::Generator g) : tag(Tag::Generator) { + payload.u.as_intrusive_ptr = + null_to_undefined_tensor(g.unsafeReleaseGeneratorImpl()); + } + bool isGenerator() const { + return Tag::Generator == tag; + } + at::Generator toGenerator() &&; + at::Generator toGenerator() const&; + + // for debugging + std::string tagKind() const { + switch (tag) { +#define DEFINE_CASE(x) \ + case Tag::x: \ + return #x; + TORCH_FORALL_TAGS(DEFINE_CASE) +#undef DEFINE_CASE + } + return "InvalidTag(" + std::to_string(static_cast(tag)) + ")"; + } + + // generic v.to() implementations + // that can be used in special functions like pop/push + // that use template meta-programming. + // prefer the directly named methods when you can, + // since they are simpler to understand + + // Note: if you get linker errors saying one of these is missing, + // change it to ... && = delete; and you will see better error messages for + // why However, we cannot commit this because some compiler versions barf on + // it. + template + T to() &&; + template + typename c10::detail::ivalue_to_const_ref_overload_return::type to() + const&; + + // ToOptional: convert a IValue to the Optional obj that accepts both T and + // None + template + std::optional toOptional(); + template + std::optional toOptional() const; + + /// @private [doxygen private] + /// this is a shallow comparison of two IValues to test the object identity + bool isSameIdentity(const IValue& rhs) const; + + // Computes the "official" string representation of an IValue. This produces a + // TorchScript expression that can be used to recreate an IValue with the same + // value (e.g. when we are printing constants in the serializer). + // + // Callers can use `customFormatter` to override how `repr()` prints out an + // IValue. This is useful if you have some other environment where you can + // look up values, and you want to print a reference to that environment (like + // the serializer's constant table). + // + // repr() is not necessarily defined on all objects! + std::ostream& repr( + std::ostream& stream, + std::function customFormatter) + const; + + // Computes an "informal" string representation of an IValue. This should be + // used for debugging, or servicing `print()`-like functions. + // This is different from `repr()` in that there is no expectation that we can + // exactly reconstruct an IValue from the output; feel free to use a + // concise/pretty form + TORCH_API friend std::ostream& operator<<(std::ostream& out, const IValue& v); + + bool isPtrType() const { + if (isTensor()) { + return payload.as_tensor.defined(); + } + return isIntrusivePtrLegacyBehavior(); + } + + /// @private [doxygen private] + const void* internalToPointer() const { + TORCH_INTERNAL_ASSERT( + isPtrType(), "Can only call internalToPointer() for pointer types"); + if (isTensor()) { + return payload.as_tensor.unsafeGetTensorImpl(); + } else { + return payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton() + ? payload.u.as_intrusive_ptr + : nullptr; + } + } + + template + TypePtr type() const; + + // Detect aliased tensors. + struct HashAliasedIValue { + size_t hashTensor(const at::Tensor& ten) const { + if (ten.is_sparse()) { + // COO sparse tensors have a "values" tensor and an "indices" tensor + // so this will detect overlap of sparse tensors that share a values + // tensor, but not sparse tensors that share an indices tensor. + return hashTensor(ten._values()); + } else if (ten.is_sparse_csr()) { + // COO sparse tensors have a "values" tensor and an "indices" tensor + // so this will detect overlap of sparse tensors that share a values + // tensor, but not sparse tensors that share an indices tensor. + return hashTensor(ten.values()); + } else if (!ten.has_storage()) { + // Opaque tensors such as the ones constructed by the MKL-DNN backend + // don't have storage so we just use their TensorImpls. + // TODO: Find way to expose alias info for opaque tensors. + return reinterpret_cast(ten.unsafeGetTensorImpl()); + } else { + return reinterpret_cast(ten.storage().unsafeGetStorageImpl()); + } + } + size_t operator()(const IValue& val) const { + if (val.isTensor()) { + return hashTensor(val.toTensor()); + } + // If it is not a Tensor, then two mutable IValues alias each other only + // if they are the same pointer. + return val.payload.u.as_int; + } + }; + + struct CompAliasedIValues { + bool operator()(const IValue& lhs, const IValue& rhs) const { + return lhs.isAliasOf(rhs); + } + }; + + using HashAliasedIValues = + std::unordered_set; + using HashAliasedIValueMap = + std::unordered_map; + + struct HashIdentityIValue { + size_t operator()(const IValue& val) const { + return val.payload.u.as_int; + } + }; + + struct CompIdentityIValues { + bool operator()(const IValue& lhs, const IValue& rhs) const { + return lhs.is(rhs); + } + }; + + using HashIdentityIValues = + std::unordered_set; + using HashIdentityIValueMap = + std::unordered_map; + + // Checks if this and rhs has a subvalues in common. + // [t1,t2] and [t2, t3] returns true. + bool overlaps(const IValue& rhs) const; + + // Inserts all subvalues of this in subValues. + void getSubValues(HashAliasedIValues& subValues) const; + + // Apply visitor to every subvalue. + // TODO: There are several places that recurse over IValue. This is fragile. + // This visitor should be used to recurse over ivalues. + void visit(const std::function& visitor) const; + IValue deepcopy(std::optional device = std::nullopt) const; + IValue deepcopy( + HashIdentityIValueMap& memo, + std::optional device = std::nullopt) const; + + private: + static c10::intrusive_ptr_target* null_to_undefined_tensor( + c10::intrusive_ptr_target* p) { + return p ? p + : static_cast( + c10::UndefinedTensorImpl::singleton()); + } + + static bool ptrEqual(const IValue& lhs, const IValue& rhs); + // NOTE: IValue tags are intentionally private. In the future we may encode + // this value different (e.g. using NaN boxing), and this would make it more + // costly to determine the tag for all types vs just determining if something + // is a particular type. Instead we want clients to use the `isX` methods when + // possible. If for performance reasons you really, absolutely, must have a jump + // table, then we can revisit this. + enum class Tag : uint32_t { +#define DEFINE_TAG(x) x, + TORCH_FORALL_TAGS(DEFINE_TAG) +#undef DEFINE_TAG + }; + +#define COUNT_TAG(x) 1 + + static constexpr auto kNumTags = TORCH_FORALL_TAGS(COUNT_TAG) 0; +#undef COUNT_TAG + + template < + class T, + class NullType = c10::detail::intrusive_target_default_null_type> + c10::intrusive_ptr moveToIntrusivePtr(); + template < + typename T, + class NullType = c10::detail::intrusive_target_default_null_type> + c10::intrusive_ptr toIntrusivePtr() const; + + void destroy() { + // We carefully construct this call to both 1) avoid UB by using + // the "wrong" one of as_tensor and as_intrusive_ptr and 2) enable + // the compiler to generate the same code for each case. It is + // surprisingly difficult to get this right. + if (isTensor() || isIntrusivePtr()) { + c10::intrusive_ptr_target* p = isTensor() + ? payload.as_tensor.unsafeGetTensorImpl() + : payload.u.as_intrusive_ptr; + c10::intrusive_ptr:: + reclaim(p); + // No need to make this destructor call! + // payload.as_tensor.~Tensor(); + } + } + + // NOLINTNEXTLINE(cppcoreguidelines-rvalue-reference-param-not-moved) + C10_ALWAYS_INLINE void moveFrom(IValue&& rhs) noexcept { + if (rhs.isTensor()) { + new (&payload.as_tensor) at::Tensor(std::move(rhs.payload.as_tensor)); + // As far as I can tell, omitting the usual explicit destructor call + // is not UB in and of itself, and it's a slight perf win. The + // destructor is a no-op, because the moved-from Tensor is + // effectively an intrusive_ptr in the null state, so we don't need + // the behavior for correctness reasons either. Leaving this + // explanatory comment, including commented-out destructor call, to + // make this abundantly clear. + // + // rhs.payload.as_tensor.~Tensor(); + } else { + payload.u = rhs.payload.u; + } + tag = rhs.tag; + rhs.clearToNone(); + } + + void clearToNone() noexcept { + payload.u.as_int = 0; + tag = Tag::None; + } + + private: + // This is the source of truth for isIntrusivePtr; edit results here + // as needed and isIntrusivePtr will pick them up. + // NOLINTBEGIN(bugprone-branch-clone) + static constexpr bool isIntrusivePtrConstexpr(Tag tag) { + switch (tag) { + case Tag::None: + return false; + case Tag::Tensor: + return false; + case Tag::Storage: + return true; + case Tag::Generator: + return true; + case Tag::Double: + return false; + case Tag::ComplexDouble: + return true; + case Tag::Int: + return false; + case Tag::UInt: + return false; + case Tag::SymInt: + return true; + case Tag::SymFloat: + return true; + case Tag::SymBool: + return true; + case Tag::Bool: + return false; + case Tag::Tuple: + return true; + case Tag::String: + return true; + case Tag::Blob: + return true; + case Tag::GenericList: + return true; + case Tag::GenericDict: + return true; + case Tag::Future: + return true; + case Tag::Await: + return true; + case Tag::Device: + return false; + case Tag::Stream: + return true; + case Tag::Object: + return true; + case Tag::PyObject: + return true; + case Tag::Uninitialized: + return false; + case Tag::Capsule: + return true; + case Tag::RRef: + return true; + case Tag::Quantizer: + return true; + case Tag::Enum: + return true; + } + return false; + } + // NOLINTEND(bugprone-branch-clone) + + public: + // Don't edit this just to add results for new tags; edit + // isIntrusivePtrConstexpr above. + bool isIntrusivePtr() const { + // Implementation NOTE: the switch in isIntrusivePtrConstexpr + // above is the previous production implementation of this + // function. We observed that, at least on x86_64, the generated + // instruction sequence was a similar bit vector test to what we + // have manually implemented below, except that there was an extra + // "bounds check" branch confirming, essentially, that `tag < + // kNumTags` and providing a consistent result in that case. We + // don't care about the result if tag is out of bounds, so we'd + // like to eliminate that comparison and branch; manually + // implementing this function as a bit test is the simplest way I + // could find to accomplish that elimination. + static constexpr uint32_t kTruthTableBitVector = +#define TRUTH_TABLE_ENTRY(tag) \ + (uint32_t(isIntrusivePtrConstexpr(Tag::tag)) << uint32_t(Tag::tag)) | + TORCH_FORALL_TAGS(TRUTH_TABLE_ENTRY) +#undef TRUTH_TABLE_ENTRY + 0; + + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + static_cast(tag) < kNumTags, + "unexpected tag ", + static_cast(tag)); + return kTruthTableBitVector & (1 << (uint32_t(tag) % 32)); + } + + // Storage and Generator were treated specially when + // is_intrusive_ptr was stored as explicit state. This getter + // preserves the old behavior for use with WeakIValue for now. + bool isIntrusivePtrLegacyBehavior() const { + if (tag == Tag::Storage || tag == Tag::Generator) { + return payload.u.as_intrusive_ptr != + c10::UndefinedTensorImpl::singleton(); + } else { + return isIntrusivePtr(); + } + } + + union Payload { + // [TriviallyCopyablePayload] + // We use a nested union here so that we can make the copy easy + // and efficient in the non-tensor (i.e., trivially copyable) + // case. Specifically, we do not have to do a switch-on-tag to + // figure out which union member to assign; we can just use + // TriviallyCopyablePayload::operator=. + union TriviallyCopyablePayload { + TriviallyCopyablePayload() : as_int(0) {} + int64_t as_int; + // See Note [Meaning of HAS_u] + uint64_t as_uint; + double as_double; + bool as_bool; + // Invariant: never nullptr; null state is represented as + // c10::UndefinedTensorImpl::singleton() for consistency of + // representation with Tensor. + c10::intrusive_ptr_target* as_intrusive_ptr; + struct { + c10::DeviceType type; + DeviceIndex index; + } as_device; + } u; + static_assert(std::is_trivially_copyable_v); + at::Tensor as_tensor; + Payload() : u() {} + Payload(const Payload&) = delete; + Payload(Payload&&) = delete; + Payload& operator=(const Payload&) = delete; + Payload& operator=(Payload&&) = delete; + // NOLINTNEXTLINE(modernize-use-equals-default) + ~Payload() {} + }; + + IValue(const Payload& p, Tag t) : tag(t) { + if (isTensor()) { + new (&payload.as_tensor) at::Tensor(p.as_tensor); + } else { + payload.u = p.u; + } + } + + template + struct TagType {}; + + friend MaybeOwnedTraits; + + Payload payload; + Tag tag{IValue::Tag::None}; + friend struct WeakIValue; +}; + +struct TORCH_API WeakIValue final { + WeakIValue() = default; + + WeakIValue(const WeakIValue& rhs) + : payload(rhs.payload), + tag(rhs.tag), + is_intrusive_ptr(rhs.is_intrusive_ptr) { + if (is_intrusive_ptr && + payload.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton()) { + c10::raw::weak_intrusive_ptr::incref(payload.as_intrusive_ptr); + } + } + WeakIValue(const IValue& rhs) + : tag(rhs.tag), is_intrusive_ptr(rhs.isIntrusivePtrLegacyBehavior()) { + if (rhs.isTensor()) { + payload.as_intrusive_ptr = rhs.unsafeToTensorImpl(); + is_intrusive_ptr = true; + } else { + payload = rhs.payload.u; + } + if (is_intrusive_ptr) { + if (payload.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton()) { + c10::raw::weak_intrusive_ptr::incref(payload.as_intrusive_ptr); + } + } + } + WeakIValue(WeakIValue&& rhs) noexcept : WeakIValue() { + swap(rhs); + } + ~WeakIValue() { + if (is_intrusive_ptr && + payload.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton()) { + c10::raw::weak_intrusive_ptr::decref(payload.as_intrusive_ptr); + } + } + WeakIValue& operator=(WeakIValue&& rhs) & noexcept { + WeakIValue(std::move(rhs)).swap(*this); // this also sets rhs to None + return *this; + } + WeakIValue& operator=(WeakIValue const& rhs) & { + WeakIValue(rhs).swap(*this); + return *this; + } + void swap(WeakIValue& rhs) noexcept { + std::swap(payload, rhs.payload); + std::swap(is_intrusive_ptr, rhs.is_intrusive_ptr); + std::swap(tag, rhs.tag); + } + + bool isSameIdentity(const WeakIValue& rhs) const { + return payload.as_int == rhs.payload.as_int && tag == rhs.tag && + is_intrusive_ptr == rhs.is_intrusive_ptr; + } + + IValue lock() const { + if (!is_intrusive_ptr) { + IValue::Payload newPayload; + newPayload.u = payload; + return IValue(newPayload, tag); + } + if (IValue::Tag::Tensor == tag) { + auto temp = + c10::weak_intrusive_ptr:: + reclaim(static_cast(payload.as_intrusive_ptr)); + c10::intrusive_ptr ip( + temp.lock()); + temp.release(); + if (!ip) { + return IValue(); + } else { + return IValue(at::Tensor(std::move(ip))); + } + } else { + auto temp = c10::weak_intrusive_ptr::reclaim( + payload.as_intrusive_ptr == c10::UndefinedTensorImpl::singleton() + ? nullptr + : payload.as_intrusive_ptr); + IValue::Payload pl; + pl.u.as_intrusive_ptr = temp.lock().release(); + temp.release(); + if (!pl.u.as_intrusive_ptr) { + return IValue(); + } else { + return IValue(pl, tag); + } + } + } + + size_t use_count() const noexcept { + if (!is_intrusive_ptr) { + return 1; + } + auto temp = c10::weak_intrusive_ptr< + c10::intrusive_ptr_target, + c10::UndefinedTensorImpl>::reclaim(payload.as_intrusive_ptr); + size_t result = temp.use_count(); + temp.release(); + return result; + } + + size_t weak_use_count() const noexcept { + if (!is_intrusive_ptr) { + return 1; + } + auto temp = c10::weak_intrusive_ptr< + c10::intrusive_ptr_target, + c10::UndefinedTensorImpl>::reclaim(payload.as_intrusive_ptr); + size_t result = temp.weak_use_count(); + temp.release(); + return result; + } + size_t hash() const { + return payload.as_int; + } + + private: + using Payload = IValue::Payload::TriviallyCopyablePayload; + Payload payload; + IValue::Tag tag{IValue::Tag::None}; + bool is_intrusive_ptr{false}; +}; + +// An owning pointer to a type. When the type is class type, it requires a pair +// of shared_ptrs to the class type and its owning CU, so that the class type is +// guaranteed to stay alive as long as we hold this object. +struct TORCH_API StrongTypePtr { + StrongTypePtr(std::shared_ptr cu, TypePtr type); + + std::shared_ptr cu_; + TypePtr type_; +}; + +// [Constant Object Weak CompilationUnit Reference] +// A non owning pointer to a type. When a class get inserted as a constant +// into a graph, if we used a strong pointer we would have a circular reference +// from Object -> CompilationUnit and CompilationUnit -> Graph (which owns the +// Constant Object) +struct TORCH_API WeakTypePtr { + WeakTypePtr(std::weak_ptr cu, TypePtr type); + + std::weak_ptr cu_; + TypePtr type_; +}; + +// internal build errors with std::variant :/ +struct WeakOrStrongCompilationUnit { + explicit WeakOrStrongCompilationUnit( + std::shared_ptr shared_cu) + : strong_ptr_(std::move(shared_cu)), weak_ptr_(std::nullopt) {} + + explicit WeakOrStrongCompilationUnit( + std::weak_ptr weak_cu) + : strong_ptr_(std::nullopt), weak_ptr_(std::move(weak_cu)) {} + + std::shared_ptr getStrongRefOrThrow() const { + TORCH_INTERNAL_ASSERT(strong_ptr_.has_value()); + return *strong_ptr_; + } + + std::weak_ptr getWeakRefOrThrow() const { + TORCH_INTERNAL_ASSERT(weak_ptr_.has_value()); + return *weak_ptr_; + } + + bool holdingStrongRef() const { + return strong_ptr_.has_value(); + } + + bool holdingEmptyStrongRef() const { + return strong_ptr_ == nullptr; + } + + std::optional> strong_ptr_; + std::optional> weak_ptr_; +}; + +// An Object will hold a non-owning Compilation Unit reference if it is a +// Constant in the graph and a Owning reference otherwise +struct TORCH_API WeakOrStrongTypePtr { + explicit WeakOrStrongTypePtr(WeakTypePtr weak) + : cu_(WeakOrStrongCompilationUnit(std::move(weak.cu_))), + type_(std::move(weak.type_)) {} + explicit WeakOrStrongTypePtr(StrongTypePtr strong) + : cu_(WeakOrStrongCompilationUnit(std::move(strong.cu_))), + type_(std::move(strong.type_)) {} + explicit WeakOrStrongTypePtr(WeakOrStrongCompilationUnit cu, TypePtr type) + : cu_(std::move(cu)), type_(std::move(type)) {} + WeakTypePtr asWeakTypePtr() const; + + WeakOrStrongCompilationUnit cu_; + TypePtr type_; + + bool holds_strong_ref() const { + return cu_.holdingStrongRef(); + } + + bool holds_empty_strong_ref() const { + return cu_.holdingEmptyStrongRef(); + } +}; + +} // namespace c10 + +C10_DIAGNOSTIC_POP() + +#include // IWYU pragma: keep + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/ivalue_inl.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/ivalue_inl.h new file mode 100644 index 0000000000000000000000000000000000000000..e68c8ba8128d8e32b265e6e891dbead9794768a3 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/ivalue_inl.h @@ -0,0 +1,2578 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wswitch-default") + +namespace torch { +namespace jit { +struct Function; +struct CompilationUnit; +} // namespace jit +TORCH_API bool isCustomClass(const c10::IValue& v); +} // namespace torch +namespace c10 { +struct IValue; +struct ClassType; +struct TupleType; +struct EnumType; +struct InferredType; + +// For custom class __init__ registration, we need to pass in a function +// that looks like this: [](IValue x, args...) + +// However, make_boxed_from_unboxed_functor.h automatically sets the input types +// of the function by introspecting the types of the functor (which is IValue in +// this case). However, we need the type it binds to be Foo. + +// Instead, we pass in a lambda [](ivalue_holder x, args...) from +// which getTypePtr can recover the original class pointer. + +template +struct tagged_capsule { + IValue ivalue; +}; + +template +c10::intrusive_ptr IValue::moveToIntrusivePtr() { + auto t = c10::intrusive_ptr::reclaim( + payload.u.as_intrusive_ptr == c10::UndefinedTensorImpl::singleton() + ? NullType::singleton() + : static_cast(payload.u.as_intrusive_ptr)); + clearToNone(); + return t; +} +template +c10::intrusive_ptr IValue::toIntrusivePtr() const { + if (payload.u.as_intrusive_ptr == c10::UndefinedTensorImpl::singleton()) { + return c10::intrusive_ptr(); + } + c10::raw::intrusive_ptr::incref(payload.u.as_intrusive_ptr); + return c10::intrusive_ptr::reclaim( + static_cast(payload.u.as_intrusive_ptr)); +} + +template +intrusive_ptr static_intrusive_pointer_cast(intrusive_ptr r) { + return intrusive_ptr::reclaim(static_cast(r.release())); +} + +template +intrusive_ptr dynamic_intrusive_pointer_cast(intrusive_ptr r) { + return intrusive_ptr::reclaim(dynamic_cast(r.release())); +} + +inline c10::intrusive_ptr IValue::toFuture() && { + AT_ASSERT(isFuture(), "Expected Future but got ", tagKind()); + return moveToIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toFuture() const& { + AT_ASSERT(isFuture(), "Expected Future but got ", tagKind()); + return toIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toAwait() && { + AT_ASSERT(isAwait(), "Expected Await but got ", tagKind()); + return moveToIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toAwait() const& { + AT_ASSERT(isAwait(), "Expected Await but got ", tagKind()); + return toIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toRRef() && { + AT_ASSERT(isRRef(), "Expected RRef but got ", tagKind()); + return moveToIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toRRef() const& { + AT_ASSERT(isRRef(), "Expected RRef but got ", tagKind()); + return toIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toQuantizer() && { + AT_ASSERT(isQuantizer(), "Expected Quantizer but got ", tagKind()); + return moveToIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toQuantizer() const& { + AT_ASSERT(isQuantizer(), "Expected Quantizer but got ", tagKind()); + return toIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toString() && { + AT_ASSERT(isString(), "Expected String but got ", tagKind()); + return moveToIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toString() const& { + AT_ASSERT(isString(), "Expected String but got ", tagKind()); + return toIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toObject() && { + AT_ASSERT(isObject(), "Expected Object but got ", tagKind()); + return moveToIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toObject() const& { + AT_ASSERT(isObject(), "Expected Object but got ", tagKind()); + return toIntrusivePtr(); +} +inline c10::intrusive_ptr IValue:: + toPyObjectHolder() && { + TORCH_INTERNAL_ASSERT(isPyObject(), "Expected PyObject but got ", tagKind()); + return moveToIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toPyObjectHolder() + const& { + TORCH_INTERNAL_ASSERT(isPyObject(), "Expected PyObject but got ", tagKind()); + return toIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toEnumHolder() && { + TORCH_INTERNAL_ASSERT(isEnum(), "Expected Enum but got ", tagKind()); + return moveToIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toEnumHolder() const& { + TORCH_INTERNAL_ASSERT(isEnum(), "Expected Enum but got ", tagKind()); + return toIntrusivePtr(); +} +inline c10::complex IValue::toComplexDouble() const { + TORCH_INTERNAL_ASSERT(isComplexDouble(), "Expected ComplexDouble but got ", tagKind()); + auto ptr = toIntrusivePtr(); + return (*ptr).val; +} +inline at::Tensor IValue::toTensor() && { + if (C10_UNLIKELY(!isTensor())) { + reportToTensorTypeError(); + } + auto result = std::move(payload.as_tensor); + // As far as I can tell, omitting the usual explicit destructor call + // is not UB in and of itself, and it's a slight perf win. The + // destructor is a no-op, because the moved-from Tensor is + // effectively an intrusive_ptr in the null state, so we don't need + // the behavior for correctness reasons either. Leaving this + // explanatory comment, including commented-out destructor call, to + // make this abundantly clear. + // + // payload.as_tensor.~Tensor(); + clearToNone(); + return result; +} +inline at::Tensor& IValue::toTensor() & { + if (C10_UNLIKELY(!isTensor())) { + reportToTensorTypeError(); + } + return payload.as_tensor; +} +inline const at::Tensor& IValue::toTensor() const& { + if (C10_UNLIKELY(!isTensor())) { + reportToTensorTypeError(); + } + return payload.as_tensor; +} +inline c10::Storage IValue::toStorage() && { + AT_ASSERT(isStorage(), "Expected Storage but got ", tagKind()); + return c10::Storage( + moveToIntrusivePtr()); +} +inline c10::Storage IValue::toStorage() const& { + AT_ASSERT(isStorage(), "Expected Storage but got ", tagKind()); + return c10::Storage(toIntrusivePtr()); +} +inline c10::Stream IValue::toStream() && { + AT_ASSERT(isStream(), "Expected Stream but got ", tagKind()); + auto ptr = toIntrusivePtr(); + return c10::Stream::unpack3((*ptr).val.stream_id, + (*ptr).val.device_index, + (*ptr).val.device_type); +} +inline c10::Stream IValue::toStream() const& { + AT_ASSERT(isStream(), "Expected Stream but got ", tagKind()); + auto ptr = toIntrusivePtr(); + return c10::Stream::unpack3((*ptr).val.stream_id, + (*ptr).val.device_index, + (*ptr).val.device_type); +} +inline c10::intrusive_ptr IValue::toBlob() && { + AT_ASSERT(isBlob(), "Expected Blob but got ", tagKind()); + return moveToIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toBlob() const& { + AT_ASSERT(isBlob(), "Expected Blob but got ", tagKind()); + return toIntrusivePtr(); + ; +} +inline c10::intrusive_ptr IValue::toCapsule() && { + TORCH_INTERNAL_ASSERT(isCapsule()); + return moveToIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toCapsule() const& { + TORCH_INTERNAL_ASSERT(isCapsule()); + return toIntrusivePtr(); +} +inline at::Generator IValue::toGenerator() && { + AT_ASSERT(isGenerator(), "Expected Generator but got ", tagKind()); + return at::Generator(moveToIntrusivePtr()); +} +inline at::Generator IValue::toGenerator() const& { + AT_ASSERT(isGenerator(), "Expected Generator but got ", tagKind()); + return at::Generator(toIntrusivePtr()); +} +inline c10::SymInt IValue::toSymInt() && { + AT_ASSERT(isSymInt() || isInt(), "Expected SymInt or int but got ", tagKind()); + if (isSymInt()) { + return c10::SymInt(moveToIntrusivePtr()); + } else { + return c10::SymInt(payload.u.as_int); + } +} +inline c10::SymInt IValue::toSymInt() const& { + AT_ASSERT(isSymInt() || isInt(), "Expected SymInt or int but got ", tagKind()); + if (isSymInt()) { + return c10::SymInt(toIntrusivePtr()); + } else { + return c10::SymInt(payload.u.as_int); + } +} +inline c10::SymFloat IValue::toSymFloat() && { + AT_ASSERT(isSymFloat() || isDouble(), "Expected SymFloat or double but got ", tagKind()); + if (isSymFloat()) { + return c10::SymFloat(moveToIntrusivePtr()); + } else { + return c10::SymFloat(payload.u.as_double); + } +} +inline c10::SymFloat IValue::toSymFloat() const& { + AT_ASSERT(isSymFloat() || isDouble(), "Expected SymFloat or double but got ", tagKind()); + if (isSymFloat()) { + return c10::SymFloat(toIntrusivePtr()); + } else { + return c10::SymFloat(payload.u.as_double); + } +} +inline c10::SymBool IValue::toSymBool() && { + AT_ASSERT(isSymBool() || isBool(), "Expected SymBool or boolean but got ", tagKind()); + if (isSymBool()) { + return c10::SymBool(moveToIntrusivePtr()); + } else { + return c10::SymBool(payload.u.as_bool); + } +} + +inline c10::SymBool IValue::toSymBool() const& { + AT_ASSERT(isSymBool() || isBool(), "Expected SymBool or boolean but got ", tagKind()); + if (isSymBool()) { + return c10::SymBool(toIntrusivePtr()); + } else { + return c10::SymBool(payload.u.as_bool); + } +} + +namespace ivalue { + +void TORCH_API +checkCustomClassType(const ClassType* expected_type, const Type* actual_type); + +template +using Shared = c10::intrusive_ptr; + +// string +struct TORCH_API ConstantString final : c10::intrusive_ptr_target { + private: + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const std::string str_; + + public: + ConstantString(std::string str) : str_(std::move(str)) {} + ConstantString(std::string_view str) : str_(std::string(str)) {} + static c10::intrusive_ptr create(std::string str_); + static c10::intrusive_ptr create(std::string_view str_); + static c10::intrusive_ptr create(const char* str_); + + const std::string& string() const { + return str_; + } + std::string_view string_view() const { + return str_; + } + + operator const std::string&() const { + return string(); + } + TORCH_API friend std::ostream& operator<<( + std::ostream& out, + const ConstantString& v); +}; + +struct Future; + +struct TORCH_API TupleElements { + private: + size_t inlineSize_; + // We represent TupleElements this way to save doing a heap + // allocation in the common (at least for unpickling) case where we + // have only 3 elements. We have our own union instead of + // c10::SmallVector because c10::SmallVector always + // stores the begin/end/capacity pointers, which would be a waste of + // space in our use case. + union { + std::vector elementsVector_; + // Don't want to declare a std::array because the convenient + // iteration and size members are a footgun in this case -- the + // actual size of the array may be smaller than 3! + // NOLINTNEXTLINE(*c-arrays*) + IValue elementsInline_[3]; + }; + + void destroyInline() { + for (const auto ii : c10::irange(inlineSize_)) { + elementsInline_[ii].~IValue(); + } + } + public: + + using iterator = IValue*; + using const_iterator = const IValue*; + + TupleElements() : inlineSize_(0) { + new (&elementsVector_) std::vector(); + } + + explicit TupleElements(std::vector elements) + : inlineSize_(0), elementsVector_(std::move(elements)) {} + + explicit TupleElements(c10::ArrayRef elements) + : inlineSize_(elements.size() <= 3 ? elements.size() : 0) { + switch (inlineSize_) { + case 3: + new (&elementsInline_[2]) IValue(elements[2]); + [[fallthrough]]; + case 2: + new (&elementsInline_[1]) IValue(elements[1]); + [[fallthrough]]; + case 1: + new (&elementsInline_[0]) IValue(elements[0]); + break; + case 0: + new (&elementsVector_) std::vector(elements.begin(), elements.end()); + break; + } + } + + explicit TupleElements(IValue&& e1) + : inlineSize_(1) { + new (&elementsInline_[0]) IValue(std::move(e1)); + } + + explicit TupleElements(IValue&& e1, IValue&& e2) + : inlineSize_(2) { + new (&elementsInline_[0]) IValue(std::move(e1)); + new (&elementsInline_[1]) IValue(std::move(e2)); + } + + explicit TupleElements(IValue&& e1, IValue&& e2, IValue&& e3) + : inlineSize_(3) { + new (&elementsInline_[0]) IValue(std::move(e1)); + new (&elementsInline_[1]) IValue(std::move(e2)); + new (&elementsInline_[2]) IValue(std::move(e3)); + } + + ~TupleElements() { + if (inlineSize_) { + destroyInline(); + } else { + elementsVector_.~vector(); + } + } + + // It would be nice to make this noncopyable to prevent people from + // writing code like `auto output = + // forward(...).toTupleRef().elements()` (which does refcount bumps on + // each element, unlike the more efficient but verbose + // ``` + // auto outputIntrusivePtr = forward(...).toTuple(); + // const auto& output = outputIntrusivePtr->elements(); + // ``` + // ), but there is simply an overwhelming amount of code that does + // it the inefficient way. + // See also operator std::vector below. + TupleElements(const TupleElements& rhs) + : inlineSize_(rhs.inlineSize_) { + if (rhs.inlineSize_) { + for (const auto ii : c10::irange(inlineSize_)) { + new (&elementsInline_[ii]) IValue(rhs.elementsInline_[ii]); + } + } else { + new (&elementsVector_) std::vector(rhs.elementsVector_); + } + } + + TupleElements& operator=(const TupleElements& rhs) { + if (inlineSize_) { + if (rhs.inlineSize_) { + for (const auto ii : c10::irange(std::min(inlineSize_, rhs.inlineSize_))) { + elementsInline_[ii] = rhs.elementsInline_[ii]; + } + if (rhs.inlineSize_ > inlineSize_) { + for (const auto ii : c10::irange(inlineSize_, rhs.inlineSize_)) { + new (&elementsInline_[ii]) IValue(rhs.elementsInline_[ii]); + } + } else { + for (const auto ii : c10::irange(rhs.inlineSize_, inlineSize_)) { + elementsInline_[ii].~IValue(); + } + } + } else { + destroyInline(); + new (&elementsVector_) std::vector(rhs.elementsVector_); + } + } else { + if (rhs.inlineSize_) { + elementsVector_.~vector(); + for (const auto ii : c10::irange(rhs.inlineSize_)) { + new (&elementsInline_[ii]) IValue(rhs.elementsInline_[ii]); + } + } else { + elementsVector_ = rhs.elementsVector_; + } + } + inlineSize_ = rhs.inlineSize_; + return *this; + } + + TupleElements(TupleElements&& rhs) noexcept + : inlineSize_(rhs.inlineSize_) { + if (inlineSize_) { + for (const auto ii : c10::irange(inlineSize_)) { + new (&elementsInline_[ii]) IValue(std::move(rhs.elementsInline_[ii])); + } + } else { + new (&elementsVector_) std::vector(std::move(rhs.elementsVector_)); + } + } + + TupleElements& operator=(TupleElements&& rhs) noexcept { + if (inlineSize_) { + if (rhs.inlineSize_) { + for (const auto ii : c10::irange(std::min(inlineSize_, rhs.inlineSize_))) { + elementsInline_[ii] = std::move(rhs.elementsInline_[ii]); + } + if (rhs.inlineSize_ > inlineSize_) { + for (const auto ii : c10::irange(inlineSize_, rhs.inlineSize_)) { + new (&elementsInline_[ii]) IValue(std::move(rhs.elementsInline_[ii])); + } + } else { + for (const auto ii : c10::irange(rhs.inlineSize_, inlineSize_)) { + elementsInline_[ii].~IValue(); + } + } + } else { + destroyInline(); + new (&elementsVector_) std::vector(std::move(rhs.elementsVector_)); + } + } else { + if (rhs.inlineSize_) { + elementsVector_.~vector(); + for (const auto ii : c10::irange(rhs.inlineSize_)) { + new (&elementsInline_[ii]) IValue(std::move(rhs.elementsInline_[ii])); + } + } else { + elementsVector_ = std::move(rhs.elementsVector_); + } + } + inlineSize_ = rhs.inlineSize_; + return *this; + } + + [[nodiscard]] c10::ArrayRef asArrayRef() const { + if (inlineSize_) { + return c10::ArrayRef(elementsInline_, inlineSize_); + } else { + return elementsVector_; + } + } + + // Mimic implicit conversion from std::vector to ArrayRef. + operator c10::ArrayRef() const { + return asArrayRef(); + } + + static size_t hash(const TupleElements& v) { + return c10::hash>()(v.asArrayRef()); + } + + void setContents(std::vector&& contents) { + if (inlineSize_) { + destroyInline(); + new (&elementsVector_) std::vector(std::move(contents)); + inlineSize_ = 0; + } else { + elementsVector_ = std::move(contents); + } + } + + [[nodiscard]] bool empty() const { + return inlineSize_ ? false : elementsVector_.empty(); + } + + [[nodiscard]] size_t size() const { + return inlineSize_ ? inlineSize_ : elementsVector_.size(); + } + + [[nodiscard]] IValue& operator[](size_t idx) { + if (inlineSize_) { + return elementsInline_[idx]; + } else { + return elementsVector_[idx]; + } + } + + [[nodiscard]] const IValue& operator[](size_t idx) const { + if (inlineSize_) { + return elementsInline_[idx]; + } else { + return elementsVector_[idx]; + } + } + + [[nodiscard]] IValue& at(size_t idx) { + if (inlineSize_) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(inlineSize_ <= 3); + TORCH_CHECK(idx < inlineSize_, "TupleElements: invalid index Index = ", idx, "; Length = ", inlineSize_); + return elementsInline_[idx]; + } else { + return elementsVector_.at(idx); + } + } + + [[nodiscard]] const IValue& at(size_t idx) const { + if (inlineSize_) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(inlineSize_ <= 3); + TORCH_CHECK(idx < inlineSize_, "TupleElements: invalid index Index = ", idx, "; Length = ", inlineSize_); + return elementsInline_[idx]; + } else { + TORCH_CHECK(idx < elementsVector_.size(), "TupleElements: invalid index Index = ", idx, "; Length = ", elementsVector_.size()); + return elementsVector_.at(idx); + } + } + + [[nodiscard]] iterator begin() { + if (inlineSize_) { + return elementsInline_; + } else { + return elementsVector_.data(); + } + } + + [[nodiscard]] iterator end() { + if (inlineSize_) { + return elementsInline_ + inlineSize_; + } else { + return elementsVector_.data() + elementsVector_.size(); + } + } + + [[nodiscard]] const_iterator begin() const { + if (inlineSize_) { + return elementsInline_; + } else { + return elementsVector_.data(); + } + } + + [[nodiscard]] const_iterator end() const { + if (inlineSize_) { + return elementsInline_ + inlineSize_; + } else { + return elementsVector_.data() + elementsVector_.size(); + } + } + + [[nodiscard]] const_iterator cbegin() const { + return begin(); + } + + [[nodiscard]] const_iterator cend() const { + return end(); + } + + [[nodiscard]] std::vector vec() const& { + return asArrayRef().vec(); + } + + [[nodiscard]] IValue& back() { + return *(end() - 1); + } + + [[nodiscard]] const IValue& back() const { + return *(end() - 1); + } + + [[nodiscard]] std::vector vec() && { + std::vector result; + result.reserve(size()); + for (auto&& iv : *this) { + result.push_back(std::move(iv)); + } + return result; + } + + // More compatibility shims for the overwhelming amount of code that + // likes to copy tuple elements into a vector; see comment above the + // copy constructor. + operator std::vector() const & { + return vec(); + } + + operator std::vector() && { + return vec(); + } +}; + +template +struct TupleTypeFactory {}; + +template <> +struct TORCH_API TupleTypeFactory { + static TupleTypePtr create(std::vector types) { + return TupleType::create(std::move(types)); + } + static TupleTypePtr fallback(const Type& type); +}; + +template <> +struct TORCH_API TupleTypeFactory { + static DynamicTypePtr create(const std::vector& elemTypes); + static DynamicTypePtr fallback(const Type& /*unused*/); +}; + +struct TORCH_API Tuple : c10::intrusive_ptr_target { + private: + TupleElements elements_; + mutable c10::TypePtr type_; // lazily computed for unnamed tuples + + public: + // named tuples have additional type information, so we + // directly create them tagged + static c10::intrusive_ptr createNamed( + std::vector elements_, + c10::TypePtr type_) { + return c10::make_intrusive(std::move(elements_), std::move(type_)); + } + + static c10::intrusive_ptr createNamed( + TupleElements elements_, + std::shared_ptr type_) { + return c10::make_intrusive(std::move(elements_), std::move(type_)); + } + + static c10::intrusive_ptr createNamed( + std::initializer_list elements_, + std::shared_ptr type_) { + return createNamed(TupleElements(c10::ArrayRef(elements_)), std::move(type_)); + } + + // MSVC apparently can't disambiguate the other two overloads of + // create when passed an initializer_list without this. + static c10::intrusive_ptr create(std::initializer_list elements_) { + return create(c10::ArrayRef(elements_)); + } + + static c10::intrusive_ptr create(std::vector elements_) { + return c10::make_intrusive(std::move(elements_)); + } + + static c10::intrusive_ptr create(TupleElements elements_) { + return c10::make_intrusive(std::move(elements_)); + } + + static c10::intrusive_ptr create(c10::ArrayRef elements_) { + return create(TupleElements(elements_)); + } + + static c10::intrusive_ptr create(IValue e1) { + return c10::make_intrusive(std::move(e1)); + } + + static c10::intrusive_ptr create(IValue e1, IValue e2) { + return c10::make_intrusive(std::move(e1), std::move(e2)); + } + + static c10::intrusive_ptr create(IValue e1, IValue e2, IValue e3) { + return c10::make_intrusive(std::move(e1), std::move(e2), std::move(e3)); + } + + private: + // Workaround inability to use `>` operator in template argument list. + template + static constexpr bool hasMoreThanThreeArgs() { + return sizeof...(Args) > 3; + } + + public: + template + static c10::intrusive_ptr create(Args&&... elements_) { + switch (sizeof...(Args)) { + case 1: + case 2: + case 3: + return create(IValue(std::forward(elements_))...); + default: + return create( + std::vector{IValue(std::forward(elements_))...}); + } + } + + // Again, it would be nice to make this noncopyable, but there's a + // lot of extant code that copies Tuples. + // Tuple(const Tuple& rhs) = delete; + + const TupleElements& elements() const& { + return elements_; + } + + TupleElements elements() && { + return std::move(elements_); + } + + void setElements(std::vector&& elements) { + elements_.setContents(std::move(elements)); + } + + void setElements(TupleElements&& elements) { + elements_ = std::move(elements); + } + + void unsafeSetElement(size_t idx, const IValue& element) { + elements_[idx] = element; + } + + void unsafeSetElement(size_t idx, IValue&& element) { + elements_[idx] = std::move(element); + } + + size_t size() const { + return elements_.size(); + } + + template + std::shared_ptr type() const { + if (!type_) { + type_ = TupleTypeFactory::create(fmap(elements(), [&](const IValue& v) { + return v.type(); + })); + } + if (auto t = type_->cast()) { + return t; + } + return TupleTypeFactory::fallback(*type_); + } + + static size_t hash(const Tuple& t) { + return c10::get_hash(t.elements()); + } + + TORCH_API friend bool operator==( + const ivalue::Tuple& lhs, + const ivalue::Tuple& rhs); + + private: + // NOTE: If we try to avoid the overloads without + // `std::shared_ptr type` by defaulting it to nullptr, we + // end up having to call (part of) the shared_ptr destructor for + // `type` even though we should know statically it won't do + // anything. + explicit Tuple(std::vector elements) + : elements_(std::move(elements)){} + + explicit Tuple(std::vector elements, c10::TypePtr type) + : elements_(std::move(elements)), type_(std::move(type)) {} + + explicit Tuple(TupleElements&& elements) + : elements_(std::move(elements)) {} + + explicit Tuple(TupleElements&& elements, std::shared_ptr type) + : elements_(std::move(elements)), type_(std::move(type)) {} + + explicit Tuple(IValue&& e1) + : elements_(std::move(e1)) {} + + explicit Tuple(IValue&& e1, std::shared_ptr type) + : elements_(std::move(e1)), type_(std::move(type)) {} + + explicit Tuple(IValue&& e1, IValue&& e2) + : elements_(std::move(e1), std::move(e2)) {} + + explicit Tuple(IValue&& e1, IValue&& e2, std::shared_ptr type) + : elements_(std::move(e1), std::move(e2)), type_(std::move(type)) {} + + explicit Tuple(IValue&& e1, IValue&& e2, IValue&& e3) + : elements_(std::move(e1), std::move(e2), std::move(e3)) {} + + explicit Tuple(IValue&& e1, IValue&& e2, IValue&& e3, std::shared_ptr type) + : elements_(std::move(e1), std::move(e2), std::move(e3)), type_(std::move(type)) {} + + friend class c10::intrusive_ptr; +}; + +struct Object; +struct PyObjectHolder; +struct EnumHolder; +} // namespace ivalue + +// Future +struct C10_EXPORT ivalue::Future final : c10::intrusive_ptr_target { + private: + // Keep this private in order to force users to go through make_intrusive and + // thus prevent creating a Future that's not held by an intrusive_ptr. + explicit Future(TypePtr type, std::vector devices={}) + : type_(std::move(type)), + impl_(getTypeOfDevices(devices)), + devices_(sortAndDeduplicateDevices(impl_, std::move(devices))) {} + + friend c10::intrusive_ptr; + + struct FutureCallback { + std::function callback; + bool uses_future; // whether the Future& passed in is actually used + + template + FutureCallback(T callback, bool uses_future) + : callback(std::move(callback)), uses_future(uses_future) {} + }; + + public: + Future(const Future&) = delete; + Future(Future&&) = delete; + Future& operator=(const Future&) = delete; + Future& operator=(Future&&) = delete; + + // Destructor + // Explicitly destroy events under device guard, otherwise it can lead to + // extra context being created on device 0. Reason: python garbage collector + // calls this destructor, but python GC does not have a device context, so a + // "default" one (usually on device 0) could be created when we go down the + // line of event destroy. + ~Future() override { + while (!events_.empty()) { + c10::OptionalDeviceGuard deviceGuard(events_.back().device()); + events_.pop_back(); + } + } + + struct TORCH_API FutureError final : public std::exception { + explicit FutureError(std::string&& error_msg_) + : error_msg(std::move(error_msg_)) {} + + FutureError() = default; + + const char* what() const noexcept override { + return error_msg.c_str(); + } + + std::string error_msg; + }; + + /** + * Wait on the future until it completes. + */ + void wait() { + std::unique_lock lock(mutex_); + finished_cv_.wait(lock, [&]() -> bool { return completed_; }); + synchronizeWithCurrentStreams(); + } + + /** + * Wait on the future until it completes and throw an + * exception if an error exists. + */ + void waitAndThrow() { + wait(); + + if (eptr_) { + std::rethrow_exception(eptr_); + } + } + + /** + * Explicitly mark the future as completed with the output value. Optionally, + * the storages for all tensors in IValue can be passed as well. The DataPtrs + * of these storages are used to synchronize CUDA streams. If storages isn't + * given we will attempt to extract it from the value, if we need to (this + * happens if a non-empty set of devices was given to the constructor). Thus + * one only needs to provide storages when 1) they cannot be extracted through + * IValue::getSubValues() or through pickling in case of Python object; or + * when 2) customized storage extraction is more efficient. + */ + using WeakStorage = c10::weak_intrusive_ptr; + void markCompleted( + IValue value, + std::optional> storages = std::nullopt) { + // Start by performing all steps that can throw, before setting any field. + // Do this before even acquiring the mutex, because extractStorages might + // acquire the GIL, which could lead to a lock inversion with our mutex. + // See https://github.com/pytorch/pytorch/issues/58239. + std::vector actualStorages; + std::vector usedDevices; + try { + // FIXME We should always extract DataPtrs, in order to catch the case of + // users using CUDA values but forgetting to set devices, which currently + // leads to a silent synchronization/correctness issue. However, as this + // might worsen perf in CPU-only cases, we should only do so after careful + // benchmarks. + if (impl_.type() != c10::kCPU) { + actualStorages = + storages.has_value() ? std::move(*storages) : extractStorages(value); + usedDevices = getDevicesOfStorages(impl_, actualStorages); + ensureIsSubsetOfDevices(usedDevices, devices_); + } + } catch (const std::exception&) { + setError(std::current_exception()); + return; + } + + std::unique_lock lock(mutex_); + TORCH_CHECK( + !completed(), + "Attempting to mark a completed Future as complete again. Note that " + "a Future can only be marked completed once."); + + // Only set value_ and completed_ flag once all checks and preparation steps + // have returned successfully to allow for proper error propagation. + value_ = std::move(value); + completed_ = true; + + currentDevice_ = impl_.getDevice(); + storages_ = std::move(actualStorages); + for (const c10::Device& device : usedDevices) { + c10::Event event(impl_.type()); + event.record(impl_.getStream(device)); + events_.push_back(std::move(event)); + } + + std::vector cbs; + cbs.swap(callbacks_); + lock.unlock(); + + finished_cv_.notify_all(); + for (const auto& callback : cbs) { + invokeCallback(callback.callback, callback.uses_future); + } + } + + void markCompleted() { + markCompleted(IValue{}); + } + + void setError(std::exception_ptr eptr) { + std::unique_lock lock(mutex_); + setErrorInternal(std::move(eptr), lock); + } + + void setErrorIfNeeded(std::exception_ptr eptr) { + std::unique_lock lock(mutex_); + if (completed_) { + // This should be rare and shouldn't cause log spew. Its important to + // log errors and that's why we have this log here. + std::string msg = c10::str( + "Skipping setting following error on the Future since " + "it is already marked completed (this is not necessarily " + "an error):\n", + tryRetrieveErrorMessageInternal(std::move(eptr))); + if (eptr_) { + msg += c10::str( + ", \nOriginal exception:\n", + tryRetrieveErrorMessageInternal(eptr_)); + } + LOG(INFO) << msg; + return; + } else { + setErrorInternal(std::move(eptr), lock); + } + } + + // Get the result of the current future. + IValue value() { + std::unique_lock lock(mutex_); + AT_ASSERT(completed()); + if (eptr_) { + std::rethrow_exception(eptr_); + } + return value_; + } + + // This accessor should only be used if we know that the future is + // completed() with no error. + const IValue& constValue() const { + std::unique_lock lock(mutex_); + AT_ASSERT(completed()); + TORCH_INTERNAL_ASSERT( + !eptr_, + "value() accessor should only be used when future is not completed with ", + "an error, but future had the following error: ", + tryRetrieveErrorMessageInternal(eptr_) + ); + return value_; + } + + // This accessor should only be used if we know that the future is + // completed() with no error. + const std::vector& storages() const { + std::unique_lock lock(mutex_); + AT_ASSERT(completed()); + AT_ASSERT(!eptr_); + return storages_; + } + + /** + * Add a callback to the future. + * The callbacks will be executed once the future completes. + * If the future has already completed, + * this function will execute the callback immediately. + */ + template + void addCallback(T callback, bool uses_future = true) { + static_assert( + std::is_invocable_r_v, + "The callback must have signature void(Future&)"); + + std::unique_lock lock(mutex_); + if (completed()) { + lock.unlock(); + invokeCallback(callback, uses_future); + return; + } + callbacks_.emplace_back(std::move(callback), uses_future); + } + + /** + * Add a callback to the future, and return another Future to hold the return + * value of the callback. This is necessary when the callback provider needs + * to know for sure when the callback has finished. + */ + template + c10::intrusive_ptr then(T callback, TypePtr type) { + using IValueWithStorages = std::tuple>; + static_assert( + std::disjunction_v< + std::is_invocable_r, + std::is_invocable_r>, + "The callback must have signature IValue(Future&) or " + "std::tuple>(Future&)"); + + auto childFut = createInstance(::std::move(type)); + addCallback([childFut, + cb = std::move(callback)](Future& parentFut) { + try { + if constexpr (::std::is_convertible_v, IValueWithStorages>) { + auto [ivalue, storages] = cb(parentFut); + childFut->markCompleted(::std::move(ivalue), ::std::move(storages)); + } else { + childFut->markCompleted(cb(parentFut)); + } + } catch (std::exception&) { + childFut->setError(std::current_exception()); + } + }); + return childFut; + } + + template + c10::intrusive_ptr thenAsync(T callback, TypePtr type) { + static_assert( + std::is_invocable_r_v, T, Future&>, + "The callback must have signature c10::intrusive_ptr(Future&)"); + + auto childFut = createInstance(std::move(type)); + addCallback( + [childFut, cb = std::move(callback)](Future& parentFut) mutable { + c10::intrusive_ptr intermediateFut; + try { + intermediateFut = cb(parentFut); + } catch (std::exception&) { + childFut->setError(std::current_exception()); + return; + } + intermediateFut->addCallback( + [childFut = std::move(childFut)](Future& intermediateFut) { + if (intermediateFut.hasError()) { + childFut->setError(intermediateFut.exception_ptr()); + } else { + childFut->markCompleted( + intermediateFut.value(), intermediateFut.storages()); + } + }); + }); + return childFut; + } + + // Tries to retrieve the error message from std::exception_ptr. + std::string tryRetrieveErrorMessage() const { + TORCH_CHECK(hasError(), "No error present on the future."); + std::unique_lock lock(mutex_); + return tryRetrieveErrorMessageInternal(eptr_); + } + + // Check if the current future has completed + bool completed() const { + return completed_; + } + + bool hasValue() const { + std::unique_lock lock(mutex_); + return completed_ && !eptr_; + } + + bool hasError() const { + std::unique_lock lock(mutex_); + return eptr_ ? true : false; + } + + std::exception_ptr exception_ptr() const { + std::unique_lock lock(mutex_); + return eptr_; + } + + TORCH_API friend std::ostream& operator<<( + std::ostream& out, + const Future& v); + + const TypePtr& elementType() const { + return type_; + } + + const std::vector& devices() const { + return devices_; + } + + // This method should be used when one intends to manually create a child + // future, for example when implementing a customized version of then(). + c10::intrusive_ptr createInstance(at::TypePtr type) { + return c10::make_intrusive(std::move(type), devices_); + } + + private: + + // This method should always be used when invoking a callback (regardless of + // how/when that happens) as it will ensure that the proper "environment" is + // set up before running the callback, as in, it will set up the CUDA streams, + // synchronize them with the value, and so on (if needed). + template + void invokeCallback(T& callback, bool uses_future) { + static_assert( + std::is_invocable_r_v, + "The callback must have signature void(Future&)"); + + // The synchronization performed below shouldn't be needed when the future + // is not used by the callback. + if (uses_future) { + c10::OptionalDeviceGuard deviceGuard(currentDevice_); + + std::vector streams; + streams.reserve(devices_.size()); + for (const c10::Device& device : devices_) { + streams.push_back(impl_.getStreamFromGlobalPool(device)); + } + c10::MultiStreamGuard streamGuard(streams); + synchronizeWithCurrentStreams(); + callback(*this); + } else { + callback(*this); + } + } + + // This method should be called before this future's value is used, as it + // ensures that the CUDA streams that are "current" at the callsite properly + // synchronize with the value. + void synchronizeWithCurrentStreams() { + for (c10::Event& event : events_) { + event.block(impl_.getStream(event.device())); + } + + for (const WeakStorage& weak_storage : storages_) { + c10::intrusive_ptr storage = weak_storage.lock(); + if (!storage) { + continue; + } + if (!storage->device().is_cpu()) { + impl_.recordDataPtrOnStream( + storage->data_ptr(), impl_.getStream(storage->device())); + } + } + } + + void setErrorInternal( + std::exception_ptr eptr, + std::unique_lock& lock) { + TORCH_CHECK( + !eptr_, + "Error already set on this Future: ", + tryRetrieveErrorMessageInternal(eptr_), + ", trying to set error: ", + tryRetrieveErrorMessageInternal(eptr)); + TORCH_INTERNAL_ASSERT(!completed(), "Future is already marked completed"); + completed_ = true; + eptr_ = std::move(eptr); + + std::vector cbs; + cbs.swap(callbacks_); + lock.unlock(); + + finished_cv_.notify_all(); + for (const auto& callback : cbs) { + invokeCallback(callback.callback, callback.uses_future); + } + } + + // Tries to retrieve the error message from std::exception_ptr. + std::string tryRetrieveErrorMessageInternal(std::exception_ptr eptr) const { + try { + std::rethrow_exception(std::move(eptr)); + } catch (const std::exception& e) { + return e.what(); + } catch (...) { + return "Unknown Exception Type"; + } + } + + // Defined in ivalue.cpp. + static std::vector extractStorages( + const at::IValue& value); + + static std::vector getDevicesOfStorages( + const c10::impl::VirtualGuardImpl& impl, + const std::vector& storages) { + c10::DeviceIndex deviceCount = impl.deviceCount(); + std::vector isDeviceUsed(deviceCount, false); + for (const WeakStorage& weak_storage : storages) { + c10::intrusive_ptr storage = weak_storage.lock(); + if (!storage) { + continue; + } + c10::Device device = storage->device(); + if (!device.is_cpu()) { + TORCH_CHECK_VALUE( + device.type() == impl.type(), + "Expected all data ptrs to be on a device of type ", + impl.type(), + ", got one on device ", + device); + isDeviceUsed[device.index()] = true; + } + } + std::vector devices; + for (c10::DeviceIndex idx = 0; idx < deviceCount; idx++) { + if (isDeviceUsed[idx]) { + devices.emplace_back(impl.type(), idx); + } + } + return devices; + } + + static std::string formatSetOfDevices( + const std::vector& devices) { + if (devices.empty()) { + return "(none)"; + } + std::ostringstream oss; + oss << devices[0]; + for (const auto idx : c10::irange(1, devices.size())) { + if (idx == devices.size() - 1) { + oss << " and "; + } else { + oss << ", "; + } + oss << devices[idx]; + } + return oss.str(); + } + + static c10::DeviceType getTypeOfDevices( + const std::vector& devices) { + if (devices.empty()) { + return c10::kCPU; + } + c10::DeviceType deviceType = devices[0].type(); + for (const auto idx : c10::irange(1, devices.size())) { + TORCH_CHECK_VALUE( + devices[idx].type() == deviceType, + "Expected all devices to be of the same type, but got a mismatch between ", + devices[0], + " and ", + devices[idx]); + } + return deviceType; + } + + // We need devices to be sorted in order to use ensureIsSubsetOfDevices. + static std::vector sortAndDeduplicateDevices( + const c10::impl::VirtualGuardImpl& /*impl*/, + std::vector devices) { + std::sort( + devices.begin(), devices.end(), + [](const c10::Device& a, const c10::Device& b) { return a.index() < b.index(); }); + // Deduplicate by compacting. + size_t targetIdx = 0; + for (const auto sourceIdx : c10::irange(devices.size())) { + TORCH_CHECK_VALUE( + devices[sourceIdx].has_index(), + "Expected devices to have indices, got ", devices[sourceIdx]); + if (targetIdx > 0 && devices[targetIdx - 1].index() == devices[sourceIdx].index()) { + // It's a duplicate, skip it. + continue; + } + if (sourceIdx != targetIdx) { + devices[targetIdx] = devices[sourceIdx]; + } + targetIdx++; + } + // If there were duplicates there's now a gap at the end: trim it. Resizing + // requires the item type to be default-constructible (which c10::Device is + // not) because in principle it could be required to create new items. Since + // we know we'll shrink the vector, we provide a custom dummy value instead. + devices.resize(targetIdx, c10::Device(c10::kCPU)); + return devices; + } + + static void ensureIsSubsetOfDevices( + const std::vector& subset, + const std::vector& superset) { + // We assume the devices in both vectors have the same consistent type, and + // their indices are unique and sorted. + std::vector excessDevices; + std::set_difference( + subset.begin(), + subset.end(), + superset.begin(), + superset.end(), + std::back_inserter(excessDevices), + [](const c10::Device& a, const c10::Device& b) { return a.index() < b.index(); }); + TORCH_CHECK_VALUE( + excessDevices.empty(), + "The result contained tensors residing on device(s) ", + formatSetOfDevices(excessDevices), + " which are not among the expected device(s) ", + formatSetOfDevices(superset)); + } + + mutable std::mutex mutex_; + std::atomic_bool completed_ = {false}; // is this future complete + std::condition_variable finished_cv_; + + IValue value_; // when finished the value + TypePtr type_; + std::vector callbacks_; + std::exception_ptr eptr_; + + // An upcast pointer to a virtual class which allows us to manipulate events, + // streams, ... in a generic way, without an explicit dependency on CUDA. + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const c10::impl::VirtualGuardImpl impl_; + + // The device that was current when markCompleted was called, which we'll + // restore when invoking callbacks. It's optional because we'll only store it + // if the future completes successfully. + std::optional currentDevice_; + + // The events that correspond to the completion of the async I/O kernels. They + // are recorded on the appropriate streams when the future is marked completed + // and can then be queried/waited/blocked on. There is one event for each + // distinct device on which the value's tensors reside. + std::vector events_; + + // A cached version of the storages extracted from the value when the future + // is first marked completed. + std::vector storages_; + + // The bounding set of devices that this future, and any of its children, is + // allowed to use. This is a superset of the set of devices used by the events + // above. We need this to know what streams (for which devices) to set as + // current when invoking a callback, thus allowing the callback to use devices + // that the parent future didn't use. This field is set to the value provided + // in the constructor and will be "inherited" by all child futures. + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const std::vector devices_; +}; + +struct C10_EXPORT ivalue::Await final : c10::intrusive_ptr_target { + private: + explicit Await(TypePtr elType, std::function fn) + : elType_(std::move(elType)), type_(AwaitType::create(elType_)), fn_(std::move(fn)) {} + + explicit Await(TypePtr elType) : elType_(std::move(elType)), type_(AwaitType::create(elType_)) { } + + friend c10::intrusive_ptr; + + public: + Await(const Await&) = delete; + Await(Await&&) = delete; + Await& operator=(const Await&) = delete; + Await& operator=(Await&&) = delete; + ~Await() override = default; + + IValue wait() { + if (!completed_) { + TORCH_CHECK(fn_, "Incompleted Await: fn can't be None"); + value_ = fn_(); + completed_ = true; + args_ = {}; + } + return value_; + } + + IValue value() { + TORCH_CHECK(completed_, "Await must be completed"); + return value_; + } + + void setFn(std::function fn) { + fn_ = std::move(fn); + } + + bool completed() { + return completed_; + } + + void markCompleted(IValue value) { + value_ = std::move(value); + completed_ = true; + } + + TORCH_API friend std::ostream& operator<<( + std::ostream& out, + const Await& v); + + const TypePtr& elementType() const { + return elType_; + } + + const TypePtr& type() const { + return type_; + } + + void setArgs(std::vector args) { + args_ = std::move(args); + } + + std::vector& args() { + return args_; + } + + private: + TypePtr elType_; + TypePtr type_; + std::vector args_; + std::function fn_; + IValue value_; + bool completed_{}; +}; + +// Input is a list of Futures with the same target type. +// Output is a Future to the List of completed Futures. +TORCH_API intrusive_ptr collectAll( + const c10::List>& srcs); +// Input is a List of Futures with the same target type. +// Output is a Future that will be updated with a seen value. +TORCH_API intrusive_ptr collectAny( + const c10::List>& srcs); + +// User-defined object. +struct C10_EXPORT ivalue::Object final : c10::intrusive_ptr_target { + public: + // In general, class types hold a shared_ptr to its owning CompilationUnit, + // so that its type and methods do not get deallocated while the class exists. + // However, the CompilationUnit holds ownership of the type's graphs, so + // inserting a constant object into a Graph would create a reference cycle if + // that constant object held a shared_ptr to its CU. For these objects we + // instantiate them with non-owning references to its CU + Object(WeakOrStrongTypePtr type, size_t numSlots) : type_(std::move(type)) { + slots_.resize(numSlots); + } + + Object(StrongTypePtr type, size_t numSlots) + : type_(WeakOrStrongTypePtr(std::move(type))) { + slots_.resize(numSlots); + } + + static c10::intrusive_ptr create( + WeakOrStrongTypePtr type, + size_t numSlots) { + return c10::make_intrusive(std::move(type), numSlots); + } + + static c10::intrusive_ptr create( + StrongTypePtr type, + size_t numSlots) { + return c10::make_intrusive(std::move(type), numSlots); + } + + static c10::intrusive_ptr create(ClassTypePtr classType, size_t numSlots); + + /** + * Slot API. + * + * Attributes are stored as a simple vector so that lookups are fast at + * runtime. A "slot" is just an index into that vector, which can be computed + * statically if you have access to the class type. Use this API if you are + * writing compiler stuff. + */ + void setSlot(size_t slot, IValue v) { + if (slot >= slots_.size()) { + // for module types, it is possible that the members of the class have + // expanded after the object was created. In this case, we expand + // the slots to the right size + resizeObject(slot); + } + slots_[slot] = std::move(v); + } + + const IValue& getSlot(size_t slot) const { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(slot < slots_.size()); + // NOTE: This lookup is fairly hot, so we use unchecked access to the + // vector. Errors should still be detectable with ASan. + return slots_[slot]; + } + + void unsafeRemoveSlot(size_t slot) { + TORCH_CHECK(slot < slots_.size()); + slots_.erase(slots_.begin() + static_cast(slot)); + } + + /** + * Attribute API. + * + * Wrappers around the slot stuff so that users can access attributes + * directly. Use this API if you are a user. + * + * Note: Unlike in Python, TorchScript must make a distinction between + * attributes (which are IValues) and methods (which are Methods). If you + * want a method, use `obj.type()->getMethod()` + */ + IValue getAttr(const std::string& name) const; + void setAttr(const std::string& name, IValue v); + // Remove attribute by name, caller is responsible for + // the safety of this operation + // We didn't remove the attribute in the type because the type + // might be shared by multiple objects. + // Therefore after removing attribute, the object is in an inconsistent + // state where it has more attribute types in its Type than + // the attribute slots it has, user needs to make sure the object + // has consistent by removing the attribute in type as well + void unsafeRemoveAttr(const std::string& name); + + std::string name() const; + + const std::vector& slots() const { + return slots_; + } + std::shared_ptr type() const; + + std::shared_ptr compilation_unit() { + if (type_.holds_strong_ref()) { + return type_.cu_.getStrongRefOrThrow(); + } else { + auto weak_ptr = type_.cu_.getWeakRefOrThrow(); + return std::shared_ptr(weak_ptr); + } + } + + c10::intrusive_ptr copy_to_weak_compilation_ref() const; + + void unsafe_make_weak_compilation_ref() { + type_ = WeakOrStrongTypePtr(type_.asWeakTypePtr()); + } + + c10::intrusive_ptr copy() const; + + c10::intrusive_ptr deepcopy( + std::optional device = std::nullopt) const; + + c10::intrusive_ptr deepcopy( + IValue::HashIdentityIValueMap& memo, + std::optional device = std::nullopt) const; + + bool is_weak_compilation_ref() const { + return !type_.holds_strong_ref(); + } + + bool is_empty_strong_compilation_ref() const { + return type_.holds_empty_strong_ref(); + } + + private: + void resizeObject(size_t slot); + WeakOrStrongTypePtr type_; + std::vector slots_; +}; + +// virtual ivalue PyObjectHolder that hold a py::object, we make this virtual +// because the py::object and refcounting logic should happen in libtorch_python +// see concrete implementation in python_ivalue.h +struct ivalue::PyObjectHolder : c10::intrusive_ptr_target { + public: + virtual PyObject* getPyObject() = 0; + virtual c10::InferredType tryToInferType() = 0; + virtual IValue toIValue(const TypePtr& type, std::optional N = std::nullopt) = 0; + virtual std::string toStr() = 0; + virtual std::vector extractTensors() = 0; + + ~PyObjectHolder() override = default; +}; + +struct ivalue::EnumHolder : c10::intrusive_ptr_target { + public: + EnumHolder(std::shared_ptr type, std::string name, IValue value) + : type_(std::move(type)), + name_(std::move(name)), + value_(std::move(value)) {} + + bool is(const ivalue::EnumHolder& rhs) { + return *this == rhs; + } + + friend bool operator==( + const ivalue::EnumHolder& lhs, + const ivalue::EnumHolder& rhs); + + TORCH_API friend std::ostream& operator<<( + std::ostream& out, + const ivalue::EnumHolder& v); + + TORCH_API const std::string& qualifiedClassName() const; + + const std::string& unqualifiedClassName() const; + + const std::string& name() const { + return name_; + } + + const IValue& value() const { + return value_; + } + + std::shared_ptr type() const { + return type_; + } + + private: + std::shared_ptr type_; + std::string name_; + IValue value_; +}; + +#undef TORCH_FORALL_TAGS + +namespace detail { + +struct _guarded_unsigned_long_unique_dummy final { + _guarded_unsigned_long_unique_dummy(int64_t /*unused*/){} +}; +using _guarded_unsigned_long = std::conditional_t< + std::is_same_v || + std::is_same_v, + _guarded_unsigned_long_unique_dummy, + unsigned long>; + +} // namespace detail + +inline ivalue::Object& IValue::toObjectRef() const { + AT_ASSERT(isObject(), "Expected Object but got ", tagKind()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton(), "Attempted to create null reference"); + return *static_cast(payload.u.as_intrusive_ptr); +} + +// note: when adding a DEFINE_TO case here you should also add a +// toX method to IValue. These named methods are much more discoverable +// than the to templated function. + +#define DEFINE_TO(T, method_name) \ + template <> \ + inline T IValue::to()&& { \ + return static_cast(std::move(*this).method_name()); \ + } \ + template <> \ + inline c10::detail::ivalue_to_const_ref_overload_return::type IValue::to() const& { \ + typedef c10::detail::ivalue_to_const_ref_overload_return::type return_type; \ + return static_cast(this->method_name()); \ + } + +DEFINE_TO(at::Tensor, toTensor) +DEFINE_TO(at::Storage, toStorage) +DEFINE_TO(c10::Stream, toStream) +DEFINE_TO(float, toDouble) +DEFINE_TO(double, toDouble) +DEFINE_TO(c10::complex, toComplexDouble) +DEFINE_TO(unsigned char, toInt) +DEFINE_TO(signed char, toInt) +DEFINE_TO(unsigned short, toInt) +DEFINE_TO(short, toInt) +DEFINE_TO(int, toInt) +DEFINE_TO(uint32_t, toInt) +DEFINE_TO(uint64_t, toInt) +DEFINE_TO(detail::_guarded_unsigned_long, toInt) +DEFINE_TO(int64_t, toInt) +DEFINE_TO(bool, toBool) +DEFINE_TO(c10::intrusive_ptr, toBlob) +DEFINE_TO(c10::intrusive_ptr, toString) +DEFINE_TO(c10::intrusive_ptr, toObject) +DEFINE_TO(at::Scalar, toScalar) +DEFINE_TO(c10::List, toIntList) +DEFINE_TO(c10::List, toSymIntList) +DEFINE_TO(c10::List, toDoubleList) +DEFINE_TO(c10::List>, toComplexDoubleList) +DEFINE_TO(c10::List, toBoolList) +DEFINE_TO(c10::List, toTensorList) +DEFINE_TO(c10::impl::GenericList, toList) +DEFINE_TO(c10::impl::GenericDict, toGenericDict) +DEFINE_TO(c10::intrusive_ptr, toTuple) +DEFINE_TO(std::string, toStringRef) +DEFINE_TO(std::string_view, toStringView) +DEFINE_TO(c10::intrusive_ptr, toFuture) +DEFINE_TO(c10::intrusive_ptr, toAwait) +DEFINE_TO(c10::intrusive_ptr, toRRef) +DEFINE_TO(c10::intrusive_ptr, toQuantizer) +DEFINE_TO(IValue, toIValue) +DEFINE_TO(c10::Device, toDevice) +DEFINE_TO(at::ScalarType, toScalarType) +DEFINE_TO(at::Layout, toLayout) +DEFINE_TO(at::MemoryFormat, toMemoryFormat) +DEFINE_TO(at::QScheme, toQScheme) +DEFINE_TO(at::Dimname, toDimname) +DEFINE_TO(at::Generator, toGenerator) +DEFINE_TO(c10::SymInt, toSymInt) +DEFINE_TO(c10::SymFloat, toSymFloat) +DEFINE_TO(c10::SymBool, toSymBool) + +template +struct _fake_type {}; + +// generic_to converts an IValue from a generic list or generic dict +// to a concrete list/dict type likelike List, Dict<...> or std::optional. +// Note that in the case of lists, this only works for IValue-based lists, +// i.e. not for int64_t, double, ... +// generic_to is an implementation detail of IValue::to and not +// supposed to be called directly. +// The _fake_type parameter allows us to overload +// based on the return type. +template +// TODO this is deprecated but we don't throw a warning because a lot of ops in +// native_functions.yaml still return std::vector. +// C10_DEPRECATED_MESSAGE("IValues based on std::vector are potentially slow +// and deprecated. Please use torch::List instead.") +std::vector generic_to(IValue ivalue, _fake_type> /*unused*/) { + // We need to do a deep copy of the vector because there might be other + // references to this same IValue that also use the list. We can't just + // move the elements out. + auto list = std::move(ivalue).template to>(); + std::vector result; + result.reserve(list.size()); + for (Elem v : list) { + result.push_back(std::move(v)); + } + return result; +} + +template +c10::intrusive_ptr IValue::toCustomClass() && { + static_assert( + std::is_base_of_v == true, + "toCustomClass requires that template parameter T must inherit " + "from torch::CustomClassHolder"); + auto obj = toObject(); + TORCH_CHECK( + obj->slots().size() == 1, + "Tried to cast IValue to custom class but it did " + "not contain a custom class!"); + const auto* expected_type = c10::getCustomClassType>().get(); + ivalue::checkCustomClassType(expected_type, type().get()); + auto userObj = + c10::static_intrusive_pointer_cast(obj->getSlot(0).toCapsule()); + return userObj; +} + +template +c10::intrusive_ptr IValue::toCustomClass() const& { + static_assert( + std::is_base_of_v == true, + "toCustomClass requires that template parameter T must inherit " + "from torch::CustomClassHolder"); + auto obj = toObject(); + TORCH_CHECK( + obj->slots().size() == 1, + "Tried to cast IValue to custom class but it did " + "not contain a custom class!"); + const auto* expected_type = c10::getCustomClassType>().get(); + ivalue::checkCustomClassType(expected_type, type().get()); + auto userObj = + c10::static_intrusive_pointer_cast(obj->getSlot(0).toCapsule()); + return userObj; +} + +template +T generic_to(IValue ivalue, _fake_type /*unused*/) { + using ElemType = typename std::remove_pointer::type::element_type; + return std::move(ivalue).template toCustomClass(); +} + +template +tagged_capsule generic_to(IValue ivalue, _fake_type> /*unused*/) { + return tagged_capsule{std::move(ivalue)}; +} + +template +c10::List generic_to(IValue ivalue, _fake_type> /*unused*/) { + return impl::toTypedList(std::move(ivalue).toList()); +} + +template +static T createVectorLikeFromList(const c10::detail::ListImpl* impl) { + T result; + result.reserve(impl->list.size()); + for (const auto & i : impl->list) { + result.push_back(i.to()); + } + return result; +} + +template +static std::vector createVectorFromList(const c10::detail::ListImpl* impl) { + return createVectorLikeFromList>(impl); +} + +template +std::vector createVectorFromList(const c10::List& impl) { + std::vector result; + result.reserve(impl.size()); + for (size_t i = 0, N = impl.size(); i < N; ++i) { + result.push_back(impl[i]); + } + return result; +} + +template +OptionalArray generic_to(IValue ivalue, _fake_type> /*unused*/) { + if (ivalue.isNone()) { + return {}; + } + return createVectorFromList( + std::move(ivalue).template to>() + ); +} + +namespace detail { +template +std::array generic_to_array( + IValue ivalue, + _fake_type> /*unused*/, + std::index_sequence /*unused*/) { + // We need to do a deep copy of the array because there might be other + // references to this same IValue that also use the list. We can't just + // move the elements out. + auto list = std::move(ivalue).template to>(); + TORCH_CHECK( + list.size() == sizeof...(I), + "Tried to convert a List with ", + list.size(), + " elements to a fixed-size array of size ", + sizeof...(I)); + return {list[I]...}; +} +} // namespace detail + +template +std::array generic_to( + IValue ivalue, + _fake_type> ft) { + return detail::generic_to_array(ivalue, ft, std::make_index_sequence()); +} + +template +c10::Dict generic_to( + IValue ivalue, + _fake_type> /*unused*/) { + return impl::toTypedDict(std::move(ivalue).toGenericDict()); +} + +template +C10_DEPRECATED_MESSAGE( + "IValues based on std::unordered_map are slow and deprecated. Please use c10::Dict instead.") +std::unordered_map generic_to( + IValue ivalue, + _fake_type> /*unused*/) { + std::unordered_map specialized_dict; + + for (const auto& item : std::move(ivalue).toGenericDict()) { + specialized_dict[item.key().template to()] = item.value().template to(); + } + + return specialized_dict; +} + +template +std::optional generic_to(IValue ivalue, _fake_type> /*unused*/) { + if (ivalue.isNone()) { + return std::nullopt; + } + return std::move(ivalue).template to(); +} + +namespace detail { +template +Tuple generic_to_tuple_impl( + const ivalue::TupleElements& t, + std::index_sequence /*unused*/) { + return std::make_tuple( + t[INDEX].to::type>()...); +} +} // namespace detail + +template < + typename... Args, + typename Indices = std::make_index_sequence, + std::enable_if_t< + !std::disjunction_v< + std::is_lvalue_reference..., + std::negation>...>, + std::nullptr_t> = nullptr> +std::tuple generic_to(const IValue& ivalue, _fake_type> /*unused*/) { + const auto& vals = ivalue.toTupleRef().elements(); + TORCH_CHECK(vals.size() == sizeof...(Args)); + return detail::generic_to_tuple_impl>(vals, Indices{}); +} + +template +inline T IValue::to() && { + return generic_to(std::move(*this), _fake_type{}); +} + +template <> +inline std::optional IValue::to() && { + // In the default implementation, the IValue is destroyed with std::move. + // But if the unboxed type is std::optional we cannot destroy + // the IValue. + return generic_to(*this, _fake_type>{}); +} + +template +inline typename c10::detail::ivalue_to_const_ref_overload_return::type IValue::to() const& { + return generic_to(*this, _fake_type{}); +} + +inline c10::List IValue::toIntList() && { + AT_ASSERT(isIntList(), "Expected IntList but got ", tagKind()); + return c10::List(moveToIntrusivePtr()); +} +inline c10::List IValue::toIntList() const& { + AT_ASSERT(isIntList(), "Expected IntList but got ", tagKind()); + return c10::List(toIntrusivePtr()); +} +inline std::vector IValue::toIntVector() const { + AT_ASSERT(isIntList(), "Expected IntList but got ", tagKind()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton(), + "called toIntVector on null intrusive_ptr IValue"); + return createVectorFromList( + static_cast(payload.u.as_intrusive_ptr)); +} +inline c10::List IValue::toSymIntList() && { + AT_ASSERT( + isSymIntList() || isIntList(), + "Expected SymIntList or IntList but got ", + tagKind()); + return c10::List(moveToIntrusivePtr()); +} +inline c10::List IValue::toSymIntList() const& { + AT_ASSERT( + isSymIntList() || isIntList(), + "Expected SymIntList or IntList but got ", + tagKind()); + return c10::List(toIntrusivePtr()); +} +inline std::vector IValue::toSymIntVector() const { + AT_ASSERT(isSymIntList() || isIntList(), "Expected SymIntList or IntList but got ", tagKind()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton(), + "called toSymIntVector on null intrusive_ptr IValue"); + return createVectorFromList( + static_cast(payload.u.as_intrusive_ptr)); +} +inline at::DimVector IValue::toDimVector() const { + AT_ASSERT(isIntList(), "Expected IntList but got ", tagKind()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton(), + "called toDimVector on null intrusive_ptr IValue"); + return createVectorLikeFromList( + static_cast(payload.u.as_intrusive_ptr)); +} +inline c10::List IValue::toDoubleList() && { + AT_ASSERT(isDoubleList(), "Expected DoubleList but got ", tagKind()); + return c10::List(moveToIntrusivePtr()); +} +inline c10::List IValue::toDoubleList() const& { + AT_ASSERT(isDoubleList(), "Expected DoubleList but got ", tagKind()); + return c10::List(toIntrusivePtr()); +} +inline std::vector IValue::toDoubleVector() const { + AT_ASSERT(isDoubleList(), "Expected DoubleList but got ", tagKind()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton(), + "called toDoubleVector on null intrusive_ptr IValue"); + return createVectorFromList( + static_cast(payload.u.as_intrusive_ptr)); +} +inline c10::List> IValue::toComplexDoubleList() && { + AT_ASSERT(isComplexDoubleList(), "Expected ComplexDoubleList but got ", tagKind()); + return c10::List>(moveToIntrusivePtr()); +} +inline c10::List> IValue::toComplexDoubleList() const& { + AT_ASSERT(isComplexDoubleList(), "Expected ComplexDoubleList but got ", tagKind()); + return c10::List>(toIntrusivePtr()); +} +inline std::vector> IValue::toComplexDoubleVector() const { + AT_ASSERT(isComplexDoubleList(), "Expected ComplexDoubleList but got ", tagKind()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton(), + "called toComplexDoubleVector on null intrusive_ptr IValue"); + return createVectorFromList>( + static_cast(payload.u.as_intrusive_ptr)); +} +inline c10::List IValue::toBoolList() && { + AT_ASSERT(isBoolList(), "Expected BoolList but got ", tagKind()); + return c10::List(moveToIntrusivePtr()); +} +inline c10::List IValue::toBoolList() const& { + AT_ASSERT(isBoolList(), "Expected BoolList but got ", tagKind()); + return c10::List(toIntrusivePtr()); +} +inline c10::List IValue::toTensorList() && { + AT_ASSERT(isTensorList(), "Expected TensorList but got ", tagKind()); + return c10::List(moveToIntrusivePtr()); +} +inline c10::List IValue::toTensorList() const& { + AT_ASSERT(isTensorList(), "Expected TensorList but got ", tagKind()); + return c10::List(toIntrusivePtr()); +} +inline std::vector IValue::toTensorVector() const { + AT_ASSERT(isTensorList(), "Expected TensorList but got ", tagKind()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton(), + "called toTensorVector on null intrusive_ptr IValue"); + return createVectorFromList( + static_cast(payload.u.as_intrusive_ptr)); +} +inline c10::List> IValue::toOptionalTensorList() && { + AT_ASSERT(isOptionalTensorList(), "Expected OptionalTensorList but got ", tagKind()); + return c10::List>(moveToIntrusivePtr()); +} +inline c10::List> IValue::toOptionalTensorList() const& { + AT_ASSERT(isOptionalTensorList(), "Expected OptionalTensorList but got ", tagKind()); + return c10::List>(toIntrusivePtr()); +} +inline std::vector> IValue::toOptionalTensorVector() const { + AT_ASSERT(isOptionalTensorList(), "Expected OptionalTensorList but got ", tagKind()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton(), + "called toOptionalTensorVector on null intrusive_ptr IValue"); + return createVectorFromList>( + static_cast(payload.u.as_intrusive_ptr)); +} +inline c10::List IValue::toList() && { + AT_ASSERT(isList(), "Expected GenericList but got ", tagKind()); + return c10::List(moveToIntrusivePtr()); +} +inline c10::List IValue::toList() const& { + AT_ASSERT(isList(), "Expected GenericList but got ", tagKind()); + return c10::List(toIntrusivePtr()); +} +inline c10::ArrayRef IValue::toListRef() const { + AT_ASSERT(isList(), "Expected GenericList but got ", tagKind()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton(), + "called toListRef on null intrusive_ptr IValue"); + return static_cast(payload.u.as_intrusive_ptr) + ->list; +} +inline c10::Dict IValue::toGenericDict() && { + AT_ASSERT(isGenericDict(), "Expected GenericDict but got ", tagKind()); + return c10::Dict(moveToIntrusivePtr()); +} +inline c10::Dict IValue::toGenericDict() const& { + AT_ASSERT(isGenericDict(), "Expected GenericDict but got ", tagKind()); + return c10::Dict(toIntrusivePtr()); +} +inline c10::intrusive_ptr IValue::toTuple() && { + AT_ASSERT(isTuple(), "Expected Tuple but got ", tagKind()); + return moveToIntrusivePtr(); +} +inline c10::intrusive_ptr IValue::toTuple() const& { + AT_ASSERT(isTuple(), "Expected Tuple but got ", tagKind()); + return toIntrusivePtr(); +} +inline ivalue::Tuple& IValue::toTupleRef() const { + AT_ASSERT(isTuple(), "Expected Tuple but got ", tagKind()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton(), + "called toTupleRef on null intrusive_ptr IValue"); + return *static_cast( + payload.u.as_intrusive_ptr); +} + +inline IValue::IValue(c10::intrusive_ptr v) + : tag(Tag::Tuple) { + payload.u.as_intrusive_ptr = null_to_undefined_tensor(v.release()); +} +template < + typename... Args, + std::enable_if_t< + !std::disjunction_v< + std::is_lvalue_reference..., + std::negation>...>, + std::nullptr_t>> +inline IValue::IValue(const std::tuple& t) + : IValue(std::apply(c10::ivalue::Tuple::create, t)) { +} + +template < + typename... Args, + std::enable_if_t< + !std::disjunction_v< + std::is_lvalue_reference..., + std::negation>...>, + std::nullptr_t>> +inline IValue::IValue(std::tuple&& t) + : IValue(std::apply(c10::ivalue::Tuple::create, std::move(t))) { +} + +inline IValue::IValue(c10::intrusive_ptr v) + : tag(Tag::String) { + payload.u.as_intrusive_ptr = null_to_undefined_tensor(v.release()); +} +inline IValue::IValue(std::string v) + : IValue(ivalue::ConstantString::create(std::move(v))) {} + +inline IValue::IValue(c10::impl::GenericList v) + : tag(Tag::GenericList) { + payload.u.as_intrusive_ptr = null_to_undefined_tensor(v.impl_.release()); +} + +template > +inline IValue::IValue(c10::List&& v) : IValue(impl::toList(std::move(v))) {} +template > +inline IValue::IValue(const c10::List& v) : IValue(impl::toList(v)) {} +template > +inline IValue::IValue(at::ArrayRef v) : IValue(c10::List()) { + auto list = to>(); + list.reserve(v.size()); + for (const auto& e : v) { + list.push_back(e); + } +} +template > +inline IValue::IValue(at::ArrayRef v) : IValue() { + auto vi = c10::asIntArrayRefSlowOpt(v); + if (vi.has_value()) { + // This list is entirely integers; ensure it is typed as + // an IntList so toIntList works + *this = IValue(*vi); + } else { + // This list has SymInts; type it as a SymInt + *this = IValue(impl::toList(c10::List())); + auto list = to>(); + list.reserve(v.size()); + for (const auto& e : v) { + list.push_back(e); + } + } +} +template > +inline IValue::IValue(at::OptionalArrayRef mb_v) : IValue() { + if (!mb_v.has_value()) return; + *this = IValue(*mb_v); +} +template > +inline IValue::IValue(const std::vector& v) : IValue() { + *this = IValue(at::ArrayRef(v)); +} +template > +inline IValue::IValue(std::vector&& v) : IValue() { + auto vi = c10::asIntArrayRefSlowOpt(v); + if (vi.has_value()) { + // This list is entirely integers; ensure it is typed as + // an IntList so toIntList works + *this = IValue(*vi); + } else { + // This list has SymInts; type it as a SymInt + *this = IValue(impl::toList(c10::List())); + auto list = to>(); + list.reserve(v.size()); + for (auto&& e : std::move(v)) { + list.push_back(std::move(e)); + } + } +} +template > +inline IValue::IValue(const std::vector& v) : IValue(c10::List()) { + auto list = to>(); + list.reserve(v.size()); + for (const auto& e : v) { + list.push_back(e); + } +} + +template > +inline IValue::IValue(std::vector&& v) : IValue(c10::List()) { + auto list = to>(); + list.reserve(v.size()); + if constexpr (std::is_same_v) { + for (auto e : v) { + list.push_back(e); + } + } else { + for (auto&& e : std::move(v)) { + list.push_back(std::move(e)); + } + } +} + +template > +inline IValue::IValue(c10::OptionalArrayRef v) : IValue() { + if (v.has_value()) { + *this = IValue(std::move(*v)); + } +} + +template +inline IValue::IValue(std::array v) : IValue(c10::List()) { + auto list = to>(); + list.reserve(v.size()); + for (auto& e : v) { + list.push_back(std::move(e)); + } +} + +template > +inline IValue::IValue(c10::IListRef v) : IValue() { + constexpr bool boxed_type_constructs_ivalue = + std::is_constructible_v::boxed_type>; + // First, we try to use the boxed value. + // If we fail (either it's not in the boxed state, or its boxed type + // can not construct an IValue), we fallback to copying the list. + if (boxed_type_constructs_ivalue && v.isBoxed()) { + *this = IValue(impl::toList(v.toBoxed())); + } else { + c10::List list; + list.reserve(v.size()); + for (const auto& t : v) { + list.push_back(t); + } + *this = IValue(impl::toList(std::move(list))); + } +} + +inline IValue::IValue(c10::impl::GenericDict v) + : tag(Tag::GenericDict) { + payload.u.as_intrusive_ptr = null_to_undefined_tensor(v.impl_.release()); +} +template +inline IValue::IValue(c10::Dict v) + : IValue(impl::toGenericDict(std::move(v))) {} + +template +inline IValue::IValue(std::unordered_map v) + : IValue(Dict()) { + auto dict = to>(); + dict.reserve(v.size()); + for (auto& e : v) { + dict.insert(std::move(e.first), std::move(e.second)); + } +} + +template > +inline IValue::IValue(std::optional v) : IValue() { + if (v.has_value()) { + *this = IValue(std::move(*v)); + } +} + +inline IValue::IValue(std::nullopt_t /*unused*/) : IValue() {} + +inline IValue::IValue(c10::intrusive_ptr v) + : tag(Tag::Object) { + payload.u.as_intrusive_ptr = null_to_undefined_tensor(v.release()); +} + +inline IValue::IValue(c10::intrusive_ptr v) + : tag(Tag::PyObject) { + payload.u.as_intrusive_ptr = null_to_undefined_tensor(v.release()); +} + +inline IValue::IValue(c10::intrusive_ptr v) + : tag(Tag::Enum) { + payload.u.as_intrusive_ptr = null_to_undefined_tensor(v.release()); +} + +inline IValue IValue::make_capsule( + intrusive_ptr blob) { + IValue iv; + iv.tag = Tag::Capsule; + iv.payload.u.as_intrusive_ptr = null_to_undefined_tensor(blob.release()); + return iv; +} + +template < + typename T, + std::enable_if_t, int>> +IValue::IValue(c10::intrusive_ptr custom_class) : tag(Tag::Object) { + auto classType = []() { + try { + return c10::getCustomClassType>(); + } catch (const c10::Error&) { + throw c10::Error( + "Trying to instantiate a class that isn't a registered custom class: " + + std::string(c10::util::get_fully_qualified_type_name())); + } + }(); + auto ivalue_obj = c10::ivalue::Object::create(std::move(classType), /* numSlots */1); + ivalue_obj->setSlot(0, IValue::make_capsule(std::move(custom_class))); + payload.u.as_intrusive_ptr = null_to_undefined_tensor(ivalue_obj.release()); + +} + +inline IValue::IValue(c10::intrusive_ptr v) + : tag(Tag::Future) { + payload.u.as_intrusive_ptr = null_to_undefined_tensor(v.release()); +} + +inline IValue::IValue(c10::intrusive_ptr v) + : tag(Tag::Await) { + payload.u.as_intrusive_ptr = null_to_undefined_tensor(v.release()); +} + +inline IValue::IValue(c10::intrusive_ptr v) + : tag(Tag::RRef) { + payload.u.as_intrusive_ptr = null_to_undefined_tensor(v.release()); +} + +inline IValue::IValue(c10::intrusive_ptr v) + : tag(Tag::Quantizer) { + payload.u.as_intrusive_ptr = null_to_undefined_tensor(v.release()); +} + +template +inline IValue::IValue(c10::complex c) + : tag(Tag::ComplexDouble) { + auto v = c10::make_intrusive(c); + payload.u.as_intrusive_ptr = v.release(); +} + +inline const std::string& IValue::toStringRef() const { + AT_ASSERT(isString(), "Expected String but got ", tagKind()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton(), + "called toStringRef on null intrusive_ptr IValue"); + return static_cast( + payload.u.as_intrusive_ptr) + ->string(); +} +inline std::optional> IValue:: + toOptionalStringRef() const { + if (isNone()) { + return std::nullopt; + } + AT_ASSERT(isString(), "Expected std::optional but got ", tagKind()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton(), + "called toOptionalStringRef on null intrusive_ptr IValue"); + return std::reference_wrapper( + static_cast(payload.u.as_intrusive_ptr) + ->string()); +} + +inline std::string_view IValue::toStringView() const { + AT_ASSERT(isString(), "Expected String but got ", tagKind()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + payload.u.as_intrusive_ptr != c10::UndefinedTensorImpl::singleton(), + "called toStringView on null intrusive_ptr IValue"); + return static_cast( + payload.u.as_intrusive_ptr) + ->string_view(); +} + +inline PyObject* IValue::toPyObject() const { + return toPyObjectHolder()->getPyObject(); +} + +template +inline std::optional IValue::toOptional() { + if (this->isNone()) { + return std::nullopt; + } + return this->to(); +} + +template +inline std::optional IValue::toOptional() const { + if (this->isNone()) { + return std::nullopt; + } + return this->to(); +} + +inline bool IValue::isCustomClass() const { + return torch::isCustomClass(*this); +} + +inline bool IValue::isSameIdentity(const IValue& rhs) const { + // We choose to not use memcmp for payload check due to potential random + // padding characters on union type + + // Semantics: + // 1. Immutable primitive values of the same type (Int, Double, None, Bool, + // Str) return value equality + // 2. If it is a tensor type, we need to take undefined tensor into account + // 3. Undefined_tensor is None and vice versa should be true + // 4. If it is a reference type (i.e. isIntrusivePtr()), then is True when + // the pointed-to object is the same. + // 5. False for all other comparisons. + if (this->isNone() && rhs.isNone()) { + return true; + } else if (this->isBool() && rhs.isBool()) { + // for bool type, do equality check + return this->toBool() == rhs.toBool(); + } else if (this->isTensor() && rhs.isTensor()) { + return this->payload.as_tensor.is_same(rhs.payload.as_tensor); + } else if (this->isTensor() && rhs.isNone()) { + // special case: undefined tensor and None are the same identity + return !this->payload.as_tensor.defined(); + } else if (this->isNone() && rhs.isTensor()) { + // special case: undefined tensor and None are the same identity + return !rhs.payload.as_tensor.defined(); + } else if (this->isInt() && rhs.isInt()) { + return this->toInt() == rhs.toInt(); + } else if (this->isDouble() && rhs.isDouble()) { + return this->toDouble() == rhs.toDouble(); + } else if (this->isString() && rhs.isString()) { + return this->toStringRef() == rhs.toStringRef(); + } else { + // for objects holding in IValue, do shallow compare on pointer address to + // testify the identity + return this->isIntrusivePtr() && rhs.isIntrusivePtr() && + this->payload.u.as_intrusive_ptr == rhs.payload.u.as_intrusive_ptr; + } +} + +namespace ivalue { +namespace detail { + +template +IValue from_(T&& x, std::true_type /*unused*/) { + return IValue(std::forward(x)); +} +template +IValue from_(c10::intrusive_ptr x, std::false_type /*unused*/) { + return IValue(std::move(x)); +} +template +IValue from_(T&& /*x*/, std::false_type /*unused*/) { + static_assert( + guts::false_t::value, + "You are calling from with a type that it doesn't support, and isn't a potential custom class (ie: is an intrusive_ptr)"); + return IValue(); +} +} // namespace detail + +template +IValue from(T&& x) { + return detail::from_( + std::forward(x), typename std::is_constructible::type{}); +} + +} // namespace ivalue + + +template <> +struct MaybeOwnedTraits { + using owned_type = IValue; + using borrow_type = IValue; + + static borrow_type createBorrow(const owned_type& from) { + if (!from.isPtrType()) { + return from; + } + if (from.isTensor()) { + return IValue(MaybeOwnedTraits::createBorrow(from.toTensor())); + } else { + return IValue(from.payload, from.tag); + } + } + + static void assignBorrow(borrow_type& lhs, const borrow_type& rhs) { + lhs.clearToNone(); + if (!rhs.isPtrType()) { + lhs = rhs; + } else if (rhs.isTensor()) { + lhs = IValue(MaybeOwnedTraits::createBorrow(rhs.toTensor())); + } else { + lhs = IValue(rhs.payload, rhs.tag); + } + } + + static void destroyBorrow(borrow_type& toDestroy) { + toDestroy.clearToNone(); + } + + static const owned_type& referenceFromBorrow(const borrow_type& borrow) { + return borrow; + } + + static const owned_type* pointerFromBorrow(const borrow_type& borrow) { + return &borrow; + } + + static bool debugBorrowIsValid(const borrow_type& /*unused*/) { + return true; + } +}; + +template <> +struct IValue::TagType { + static TORCH_API c10::TypePtr get(const IValue& /*v*/); +}; + +template <> +struct IValue::TagType { + static TORCH_API c10::TypePtr get(const IValue& /*v*/); +}; + +template +TypePtr IValue::type() const { + return IValue::TagType::get(*this); +} + +} // namespace c10 + +C10_DIAGNOSTIC_POP() + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/ivalue_to.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/ivalue_to.h new file mode 100644 index 0000000000000000000000000000000000000000..52a3f23bbb0475948e101e1d368d7952888f8528 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/ivalue_to.h @@ -0,0 +1,41 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace at { +class Tensor; +} // namespace at + +namespace c10 { +struct IValue; +namespace detail { +// Determine the return type of `IValue::to() const &`. It's a const +// reference when possible and a copy otherwise. It is in this +// separate header so that List can use it as well. +template +struct ivalue_to_const_ref_overload_return { + using type = T; +}; + +template<> +struct ivalue_to_const_ref_overload_return { + using type = const at::Tensor&; +}; + +template<> +struct ivalue_to_const_ref_overload_return { + using type = const std::string&; +}; + +template<> +struct ivalue_to_const_ref_overload_return { + using type = const IValue&; +}; + +} // namespace detail +} // namespace c10 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/jit_type.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/jit_type.h new file mode 100644 index 0000000000000000000000000000000000000000..bec7f2e5823177274af77717db8f59090914c676 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/jit_type.h @@ -0,0 +1,2435 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + + +namespace torch::jit { +struct Function; +} // namespace torch::jit + + +namespace c10 { + +template +class Dict; +struct IValue; +struct FunctionSchema; +struct NamedType; +using OptNameList = std::optional>; + +void standardizeVectorForUnion(std::vector& reference, std::vector* to_fill); +void standardizeVectorForUnion(std::vector* to_flatten); + +inline bool is_contiguous_strides( + const IntArrayRef sizes, + const IntArrayRef strides) { + size_t n_dim = sizes.size(); + if (n_dim == 0) { + return true; + } + + if (strides[n_dim - 1] != 1) { + return false; + } + + for (int i = static_cast(n_dim) - 2; i >= 0; i--) { + if (strides[i] != strides[i + 1] * sizes[i + 1]) { + return false; + } + } + return true; +} + +struct AnyType; +using AnyTypePtr = SingletonTypePtr; +// Any is the top of the type hierarchy, all other types are subtypes +// T <: Any, forall T +struct TORCH_API AnyType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "Any"; + } + static const TypeKind Kind = TypeKind::AnyType; + // global singleton + static AnyTypePtr get(); + + private: + AnyType() : Type(TypeKind::AnyType) {} +}; + +inline std::string toString(const Type& type) { + return type.str(); +} + +// Shim for compatibility with code that uses TypePtr. +inline std::string toString(const TypePtr& typePtr) { + return toString(*typePtr); +} + +inline bool operator!=(const Type& lhs, const Type& rhs) { + return !(lhs == rhs); +} + +// common base for all types that have a single sub element +// e.g. Future[T], Optional[T], List[T] +template +struct SingleElementType : public SharedType { + static const TypeKind Kind = K; + + const TypePtr& getElementType() const { + return elem; + } + + bool hasFreeVariables() const override { + return getElementType()->hasFreeVariables(); + } + + at::ArrayRef containedTypes() const override { + return elem; + } + + bool equals(const Type& rhs) const override { + if (auto rhs_ = rhs.cast()) { + return *getElementType() == *rhs_->getElementType(); + } + return false; + } + + protected: + SingleElementType(TypePtr elem) : SharedType(Kind), elem(std::move(elem)) { + TORCH_CHECK(this->elem, c10::str( + "Can not create ", typeKindToString(Kind), " with None type")); + } + + private: + TypePtr elem; +}; + +struct UnionType; +using UnionTypePtr = std::shared_ptr; +struct TORCH_API UnionType : public SharedType { + friend struct Type; + + static const TypeKind Kind = TypeKind::UnionType; + + bool isSubtypeOfExt(const Type& rhs_, std::ostream* why_not) const override; + + std::string str() const override; + + static UnionTypePtr create(std::vector reference); + + bool equals(const Type& rhs) const override; + + bool isUnionType() const override { + return true; + } + + at::ArrayRef containedTypes() const override { + return types_; + } + + // For testing purposes only + at::ArrayRef getTypes() const { + return types_; + } + + TypePtr createWithContained(std::vector contained_types) const override { + return create(std::move(contained_types)); + } + + bool canHoldType(const Type& type) const; + + bool hasFreeVariables() const override { + return has_free_variables_; + } + + std::optional toOptional() const; + + std::optional subtractTypeSet(std::vector& to_subtract) const; + + protected: + explicit UnionType(std::vector types, TypeKind kind=TypeKind::UnionType); + std::string annotation_str_impl(const TypePrinter& printer = nullptr) const override; + std::string unionStr( + const TypePrinter& printer = nullptr, + bool is_annotation_str = false) const; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + bool has_free_variables_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::vector types_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + bool can_hold_none_; + +}; + +struct OptionalType; +using OptionalTypePtr = std::shared_ptr; +// This type represents an optional type. There is one `Optional` for +// each element type. `Optional[T]` can accept both `T` and +// `None`(`std::nullopt` in C++) +// Subtype hierarchy for Optional: +// - Optional[T] <: Optional[R] iff T <: R +// - T <: Optional[R] if T <: R +// - None <: Optional[T] for all T +// - Optional[T] == Union[T, None] for all T +struct TORCH_API OptionalType : public UnionType { + static OptionalTypePtr create(const TypePtr& contained); + + static const TypeKind Kind = TypeKind::OptionalType; + + friend struct Type; + + bool equals(const Type& rhs) const override; + + const TypePtr& getElementType() const { + return contained_; + } + + at::ArrayRef containedTypes() const override { + return contained_; + } + + std::string str() const override { + std::stringstream ss; + ss << getElementType()->str() << '?'; + return ss.str(); + } + + TypePtr createWithContained( + std::vector contained_types) const override { + AT_ASSERT(contained_types.size() == 1); + return create(contained_types[0]); + } + + bool isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const override; + + bool isUnionType() const override { + return true; + } + + // common cast Optional[Tensor] for undefined tensor type + static TypePtr ofTensor(); + // + // global singleton + static TypePtr get(TypePtr inner); + + private: + explicit OptionalType(const TypePtr& contained); + + TypePtr contained_; + + std::string annotation_str_impl(const TypePrinter& printer = nullptr) const override { + std::stringstream ss; + ss << "Optional[" << getElementType()->annotation_str(printer) << ']'; + return ss.str(); + } +}; + +template +inline std::optional merge_primitive( + const std::optional& a, + const std::optional& b) { + if (a.has_value() && b.has_value() && a.value() == b.value()) { + return a; + } + return std::optional{}; +} + +// If we see `a + b + c` and know that a, b, and c are the same size and have +// two dimensions (WxH), then we can generate a fused kernel for them. That +// fused kernel would likely have indexing math to handling both the W and H +// dimensions. However, if we knew the WxH dimensions were contiguous, we can +// pretend like we only have a single dimension, simplifying the indexing logic. +// This can be performed even if the dimensions are transposed, +// as long as a, b, and c are transposed in the same way. +// We'd like to have the compiler be able to do this dimensionality reduction, +// but simply knowing sizes is not enough. +// We can extend profiling to also record stride information. +// Rather than recording specific strides, +// we can simply order the strides from smallest to largest with +// `stride_indices` A contiguity marker on the smallest stride (c0) indicates +// the stride is precisely 1, otherwise a contiguity marker means that $stride_n +// = size_{n-1}*stride_{n-1}$ +struct TORCH_API Stride { + Stride() = default; + Stride( + const std::optional& stride_index, + std::optional contiguous, + const std::optional& stride) + : stride_index_(stride_index), contiguous_(contiguous), stride_(stride) {} + + bool operator==(const Stride& b) const { + return stride_index_ == b.stride_index_ && contiguous_ == b.contiguous_ && + stride_ == b.stride_; + } + + bool isComplete() const { + return stride_index_ && contiguous_ && stride_; + } + + std::optional stride_index_; + std::optional contiguous_; + std::optional stride_; +}; + +template <> +inline std::optional merge_primitive( + const std::optional& a, + const std::optional& b) { + std::optional left = a; + std::optional right = b; + if (!left.has_value()) { + left = {Stride()}; + } + if (!right.has_value()) { + right = {Stride()}; + } + + auto merged_index = + merge_primitive(left->stride_index_, right->stride_index_); + auto merged_cont = merge_primitive(left->contiguous_, right->contiguous_); + auto merged_stride = merge_primitive(left->stride_, right->stride_); + auto r = Stride(merged_index, merged_cont, merged_stride); + // normalize + if (!r.stride_index_.has_value() && !r.contiguous_.has_value() && + !r.stride_.has_value()) { + return std::optional{}; + } + + return r; +} + +struct TORCH_API ShapeSymbol { + // needed for use in `std::map` + ShapeSymbol() : value_(-1) {} + // is this symbol a fixed/static dimension + bool is_static() const { + return value_ >= 0; + } + bool operator==(const ShapeSymbol& b) const { + return value_ == b.value_; + } + bool operator<(const ShapeSymbol& b) const { + return value_ < b.value_; + } + + static ShapeSymbol fromStaticSize(int64_t val) { + return ShapeSymbol(val); + } + int64_t static_size() const { + TORCH_CHECK(is_static()); + return value_; + } + + int64_t value() const { + return value_; + } + + static ShapeSymbol newSymbol() { + return fromStaticSize(-static_cast(++num_symbols)); + } + friend TORCH_API std::ostream& operator<<( + std::ostream& os, + const ShapeSymbol& s); + + private: + ShapeSymbol(int64_t val) : value_(val) {} + int64_t value_; + static std::atomic num_symbols; +}; + +inline ShapeSymbol merge_primitive( + const ShapeSymbol& a, + const ShapeSymbol& b) { + if (a.is_static() && b.is_static() && a == b) { + return a; + } + return ShapeSymbol::newSymbol(); +} + +// Shape of a Tensor represented with ShapeSymbol's. Unranked, ranked unknown +// dims, partially known and fully known shapes are all supported. +struct TORCH_API SymbolicShape { + // Unranked shape constructor. + SymbolicShape() : dims_(std::nullopt) {} + + // Known rank but unknown dimensions. + SymbolicShape(std::optional rank) : dims_(std::nullopt) { + if(!rank) { + return; + } + + std::vector shape_symbols; + shape_symbols.reserve(*rank); + for(size_t i = 0; i < *rank; ++i) { + shape_symbols.push_back(ShapeSymbol::newSymbol()); + } + dims_ = shape_symbols; + } + + // Mix of known and unknown ranks + SymbolicShape(const std::vector>& dims) { + std::vector shape_symbols; + shape_symbols.reserve(dims.size()); + for(std::optional dim: dims) { + if(!dim) { + shape_symbols.push_back(ShapeSymbol::newSymbol()); + } else { + shape_symbols.push_back(ShapeSymbol::fromStaticSize(*dim)); + } + } + dims_ = shape_symbols; + } + + void dump() const; + + SymbolicShape(std::vector dims) : dims_(std::move(dims)) {} + + SymbolicShape(c10::IntArrayRef dims) { + std::vector shape_symbols; + shape_symbols.reserve(dims.size()); + for(int64_t dim : dims) { + shape_symbols.push_back(ShapeSymbol::fromStaticSize(dim)); + } + dims_ = shape_symbols; + } + + ShapeSymbol operator[](size_t i) const { + TORCH_CHECK(dims_, "Rank isn't fixed"); + return (*dims_).at(i); + } + + ShapeSymbol at(size_t i) const { + TORCH_CHECK(dims_, "Rank isn't fixed"); + return (*dims_).at(i); + } + + // Returns rank or nullopt in case of unranked shape. + std::optional rank() const { + if(!dims_) { + return std::nullopt; + } + return dims_->size(); + } + + std::optional> sizes() const { + return dims_; + } + + std::optional> symbolicDims() const { + if (!dims_) { + return std::nullopt; + } + auto symbolic_dims = std::vector(); + for (const ShapeSymbol& s : *dims_) { + symbolic_dims.push_back(!s.is_static()); + } + return symbolic_dims; + } + + // Checks whether the shape is fully defined/complete, ie. rank and sizes + // of every dimension are known. + bool isComplete() const { + if(!dims_) { + return false; + } + for(auto d : *dims_) { + if(!d.is_static()) { + return false; + } + } + return true; + } + + // Create new SymbolicShape that is result of merging self and another + // SymbolicShape. Only dimensions that are static and equal will be + // preserved. + // If either of two shapes are of unknown rank or they have unmatching rank, + // result will be unranked. + SymbolicShape merge(const SymbolicShape& other) const; + + friend bool operator==(const SymbolicShape& lhs, const SymbolicShape& rhs) { + return lhs.dims_ == rhs.dims_; + } + + friend bool operator!=(const SymbolicShape& lhs, const SymbolicShape& rhs) { + return !(lhs == rhs); + } + + private: + std::optional> dims_; +}; + +namespace detail { +inline bool isComplete(const Stride& s) { + return s.isComplete(); +} + +template +inline bool isComplete(const T& /*t*/) { + return true; +} +} + +template +struct VaryingShape { + using ListOfOptionalElements = std::vector>; + VaryingShape(const std::vector& vec) + : VaryingShape(ListOfOptionalElements(vec.begin(), vec.end())) {} + + VaryingShape(c10::ArrayRef vec) + : VaryingShape(ListOfOptionalElements(vec.begin(), vec.end())) {} + + VaryingShape(std::optional size = std::nullopt) : dims_(std::nullopt) { + if (size) { + dims_ = ListOfOptionalElements(*size); + } + } + + VaryingShape(ListOfOptionalElements dims) : dims_(std::move(dims)) {} + + VaryingShape(size_t size) : VaryingShape(std::optional(size)) {} + + bool operator==(const VaryingShape& other) const { + return dims_ == other.dims_; + } + + const std::optional &operator[](size_t i) const { + TORCH_CHECK(dims_, "Rank isn't fixed"); + return (*dims_).at(i); + } + + std::optional size() const { + if (!dims_) { + return std::nullopt; + } + const auto& dims = dims_.value(); + return dims.size(); + } + + const std::optional& sizes() const { + return dims_; + } + + TORCH_API VaryingShape merge(const VaryingShape& other) const; + + std::optional> concrete_sizes() const { + if (!dims_) { + return std::nullopt; + } + std::vector sizes; + sizes.reserve(dims_.value().size()); + for (auto d : *dims_) { + if (!d) { + return std::nullopt; + } + sizes.push_back(d.value()); + } + return sizes; + } + + bool isComplete() const { + if (!dims_) { + return false; + } + for (auto d : *dims_) { + if (!d || !detail::isComplete(*d)) { + return false; + } + } + return true; + } + + private: + std::optional dims_; +}; + +struct TensorType; +// TODO: investigate making this SingletonOrSharedTypePtr +using TensorTypePtr = std::shared_ptr; +// This type represents a single Tensor with a specific size +struct TORCH_API TensorType : public SharedType { + static TensorTypePtr create(const at::Tensor& t); + + // used by TensorType::create(size_t dim) which in turn used by + // shape_analysis.cpp + static TensorTypePtr create( + std::optional scalar_type, + std::optional device, + const VaryingShape& sizes, + const VaryingShape& strides, + std::optional requires_grad, + std::optional undefined = false, + bool tensor_contiguity = false); + + static TensorTypePtr create( + std::optional scalar_type, + std::optional device, + SymbolicShape sizes, + VaryingShape stride_, + std::optional requires_grad, + std::optional undefined = false); + + static TensorTypePtr create( + std::optional scalar_type, + std::optional device, + std::optional dim, + std::optional requires_grad); + + // overloaded create variadic template argument as it could not distinguish + // initializer list + static TensorTypePtr createContiguous( + at::ScalarType scalar_type, + at::Device device, + at::IntArrayRef sizes); + + static TypePtr fromNumberType(const Type& typ); + static TypePtr fromBoolType(); + + std::optional dim() const { + return sizes().size(); + } + + VaryingShape sizes() const; + + VaryingShape strides() const; + + const VaryingShape& stride_properties() const { + return strides_; + } + + const std::optional& device() const { + return device_; + } + const std::optional& scalarType() const { + return scalar_type_; + } + const std::optional& requiresGrad() const { + return requires_grad_; + } + bool requires_grad() const override { + return requires_grad_ ? *requires_grad_ : true; + } + + bool equals(const Type& rhs) const override; + bool isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const override; + + std::string str() const override; + + std::string repr_str() const override { + if (isInferredType()) { + return str() + " (inferred)"; + } else { + return str(); + } + } + + std::optional numel() const { + size_t prod = 1; + const auto& shape = sizes(); + + for (size_t i = 0; i < shape.size(); i++) { + auto const &s = shape[i]; + if (!s.has_value()) { + return std::optional{}; + } + prod *= s.value(); + } + return prod; + } + + TensorTypePtr withRequiresGrad(std::optional s) { + auto copy = clone(); + copy->requires_grad_ = s; + return copy; + } + + TensorTypePtr withScalarType(std::optional st) { + auto copy = clone(); + copy->scalar_type_ = st; + return copy; + } + + TensorTypePtr withDim(std::optional d) { + auto copy = clone(); + // withDim is only used by the legacy executor + // that only cares about the rank, so create dummy symbols)) : + copy->sizes_ = SymbolicShape(d); + copy->strides_ = VaryingShape(d); + return copy; + } + + TensorTypePtr withStrides(VaryingShape sstrides) const { + auto cloned = clone(); + cloned->strides_ = std::move(sstrides); + return cloned; + } + + TensorTypePtr withSizesStrides( + at::IntArrayRef sizes, + at::IntArrayRef strides) const { + auto cloned = clone(); + auto ssizes = SymbolicShape(sizes); + cloned->sizes_ = ssizes; + cloned->strides_ = computeStrideProps(sizes, strides); + return cloned; + } + + TensorTypePtr withSymbolicShapes(SymbolicShape ssizes) const { + auto cloned = clone(); + cloned->sizes_ = std::move(ssizes); + return cloned; + } + + TensorTypePtr withSizes(at::IntArrayRef sizes) const { + return withSizesStrides( + sizes, contiguousStridesOf(sizes)); + } + + TensorTypePtr withDevice(const std::optional device) const { + auto copy = clone(); + copy->device_ = device; + return copy; + } + + TensorTypePtr dimensionedOnly() const { + auto copy = clone(); + copy->sizes_ = SymbolicShape(sizes().size()); + copy->strides_ = VaryingShape(sizes().size()); + return copy; + } + + TensorTypePtr contiguous() const { + auto cloned = clone(); + auto concrete_sizes = sizes().concrete_sizes(); + TORCH_INTERNAL_ASSERT(concrete_sizes.has_value()); + auto strides = computeStrideProps( + *concrete_sizes, + contiguousStridesOf(*concrete_sizes)); + cloned->strides_ = strides; + return cloned; + } + + const SymbolicShape& symbolic_sizes() const; + + TensorTypePtr merge(const TensorType& other, bool merge_sizes = true) const; + + bool matchTensor(const at::Tensor& t); + + // is all information about the type specified except for autograd? + // This replaces the notion of a 'CompleteTensorType' that used to exist + // in the type-hierarchy. Excluding require_grad and undefined allows + // this to match the old behavior. + bool isComplete() const { + return scalar_type_ && device_ && sizes_.isComplete() && strides_.isComplete(); + } + + bool isInferredType() const { + return is_inferred_; + } + + static TensorTypePtr getInferred() { + static auto valueInferred = TensorType::create( + /*scalar_type=*/{}, + /*device=*/{}, + /*sizes=*/SymbolicShape(), + /*stride=*/VaryingShape{}, + /*requires_grad=*/{}, + /*undefined=*/false); + valueInferred->is_inferred_ = true; + return valueInferred; + } + + // this property is used by GuardElimination + // please see `checkInputs` for more details + bool isSummarized() const { + return !(isComplete() && requiresGrad().has_value() && + undefined().has_value()); + } + + TensorTypePtr withUndefined() { + auto r = clone(); + r->undefined_ = true; + return r; + } + + TensorTypePtr withPossiblyUndefined() { + auto r = clone(); + r->undefined_ = std::nullopt; + return r; + } + + std::optional undefined() const { return undefined_; } + + static const TensorTypePtr& get(); + + static const TypeKind Kind = TypeKind::TensorType; + + static std::vector contiguousStridesOf( + at::IntArrayRef in_sizes, + at::MemoryFormat memory_format = MemoryFormat::Contiguous) { + auto contiguous_fn = [](const at::IntArrayRef& sizes, + const std::vector& dim_order) { + std::vector strides(sizes.size()); + if (sizes.empty()) // zero-dim case + return strides; + + strides[dim_order[0]] = 1; + for (size_t i = 1; i < dim_order.size(); i++) { + auto cur_dim = dim_order[i]; + auto pre_dim = dim_order[i - 1]; + strides[cur_dim] = strides[pre_dim] * sizes[pre_dim]; + } + return strides; + }; + + std::vector dim_order(in_sizes.size()); + if (memory_format == MemoryFormat::ChannelsLast) { + dim_order = {1, 3, 2, 0}; + } else if (memory_format == MemoryFormat::ChannelsLast3d) { + dim_order = {1, 4, 3, 2, 0}; + } else { + auto ndims = in_sizes.size(); + for (size_t i = 0; i < ndims; i++) { + dim_order[i] = static_cast(ndims - i - 1); // Reverse + } + } + return contiguous_fn(in_sizes, dim_order); + } + + private: + TensorType( + std::optional scalar_type, + std::optional device, + SymbolicShape sizes, + VaryingShape strides, + std::optional requires_grad, + std::optional undefined = false); + + TensorTypePtr clone() const { + return TensorTypePtr(new TensorType( + scalar_type_, device_, sizes_, strides_, requires_grad_, undefined_)); + } + + static VaryingShape computeStrideProps( + at::IntArrayRef sizes, + at::IntArrayRef strides, + bool tensor_contiguity = false); + + std::optional scalar_type_; + std::optional device_; + SymbolicShape sizes_; + VaryingShape strides_; + std::optional requires_grad_; + // we exploit the fact certain tensors must be zero in the autograd to + // optimize gradient computation. Such zero tensors are currently implemented + // with `UndefinedTensorImpl.` They can be handled only by special operators + // (e.g. `AutogradAdd`) and their `Tensor::defined()` property returns false. + // Normally, `undefined_` is set to false, unless a type was created + // with `withUndefined` + // This will also mean that `undefined` tensors will fail + // `subtypeOf(TensorType::get())` check + // undefined_ may become `std::nullopt` if the tensor was observed to be both + // defined and undefined. However, no tensor type starts out with + // `undefined_` set to `std::nullopt` + std::optional undefined_; + // Represents whether or not this type was inferred. + bool is_inferred_ = false; +}; + +struct ListType; +using ListTypePtr = std::shared_ptr; +struct TORCH_API ListType + : public SingleElementType { + // It's not exactly a singleton, but there should be exactly one instance of + // List[T] for every T + friend struct Type; + template + static ListTypePtr create(T&&... all) { + return ListTypePtr( + new ListType(std::forward(all)...)); // NOLINT(modernize-make-shared) + } + + std::string str() const override { + std::stringstream ss; + ss << getElementType()->str() << "[]"; + return ss.str(); + } + TypePtr createWithContained( + std::vector contained_types) const override { + return create(std::move(contained_types.at(0))); + } + + bool isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const override; + + // global singleton + // Given an inner type T and an identifier, + // this function will return the global singleton type pointer + // the type List. + // The extra "identifier" argument is needed because we have multiple container types + // that all reuse this function (List, array, etc.) + static TypePtr get(const std::string& identifier, TypePtr inner); + + // common cast List[Tensor] + static ListTypePtr ofTensors(); + static ListTypePtr ofOptionalTensors(); + static ListTypePtr ofInts(); + static ListTypePtr ofSymInts(); + static ListTypePtr ofFloats(); + static ListTypePtr ofComplexDoubles(); + static ListTypePtr ofBools(); + static ListTypePtr ofStrings(); + static ListTypePtr ofNumbers(); + + private: + ListType(TypePtr elem) : SingleElementType(std::move(elem)) {} + + std::string annotation_str_impl(const TypePrinter& printer = nullptr) const override { + std::stringstream ss; + ss << "List[" << getElementType()->annotation_str(printer) << ']'; + return ss.str(); + } +}; + +struct DictType; +using DictTypePtr = std::shared_ptr; +struct TORCH_API DictType : public SharedType { + friend struct Type; + static const TypeKind Kind = TypeKind::DictType; + + static DictTypePtr create(TypePtr key, TypePtr value) { + auto kind = key->kind(); + if (auto dyn = key->castRaw()) { + kind = dyn->dynamicKind(); + } + C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wswitch-enum") + switch (kind) { + case TypeKind::AnyType: + case TypeKind::IntType: + case TypeKind::BoolType: + case TypeKind::FloatType: + case TypeKind::ComplexType: + case TypeKind::StringType: + case TypeKind::TensorType: + case TypeKind::DeviceObjType: + return DictTypePtr(new DictType(std::move(key), std::move(value))); + default: + TORCH_CHECK(false, + "Cannot create dict for key type '", + key->str(), + "', only int, float, complex, Tensor, device and string keys are supported"); + } + C10_DIAGNOSTIC_POP() + } + + // aligned with the format in FunctionSchema + std::string str() const override { + std::stringstream ss; + ss << "Dict(" << getKeyType()->str() << ", " << getValueType()->str() + << ')'; + return ss.str(); + } + + TypePtr createWithContained( + std::vector contained_types) const override { + TORCH_CHECK(contained_types.size() == 2, "Expected 2 contained types"); + return create(std::move(contained_types.at(0)), std::move(contained_types.at(1))); + } + + const TypePtr& getKeyType() const { + return types.at(0); + } + + const TypePtr& getValueType() const { + return types.at(1); + } + + bool hasFreeVariables() const override { + return has_free_variables; + } + + at::ArrayRef containedTypes() const override { + return types; + } + + bool equals(const Type& rhs) const override { + if (auto* dict_rhs = rhs.castRaw()) { + return *getKeyType() == *(dict_rhs->getKeyType()) && + *getValueType() == *(dict_rhs->getValueType()); + } + return false; + } + + // global singleton + // Given an inner type T and an identifier, + // this function will return the global singleton type pointer + // the type List. + // The extra "identifier" argument is needed because we have multiple container types + // that all reuse this function (Dict and unordered_map) + static TypePtr get(const std::string& identifier, TypePtr key, TypePtr val); + + private: + DictType(TypePtr key, TypePtr value) + : SharedType(TypeKind::DictType), + has_free_variables( + key->hasFreeVariables() || value->hasFreeVariables()) { + types.reserve(2); + types.push_back(std::move(key)); + types.push_back(std::move(value)); + } + + std::string annotation_str_impl(const TypePrinter& printer = nullptr) const override; + + std::vector types; + bool has_free_variables; +}; + +struct FutureType; +using FutureTypePtr = std::shared_ptr; + +struct TORCH_API FutureType + : public SingleElementType { + friend struct Type; + template + static FutureTypePtr create(TypePtr elem) { + return FutureTypePtr( + new FutureType(std::move(elem))); // NOLINT(modernize-make-shared) + } + + std::string str() const override { + std::stringstream ss; + ss << "Future(" << getElementType()->str() << ')'; + return ss.str(); + } + TypePtr createWithContained( + std::vector contained_types) const override { + return create(std::move(contained_types.at(0))); + } + + bool isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const override { + if (Type::isSubtypeOfExt(rhs, why_not)) { + return true; + } + if (auto rhs_ = rhs.castRaw()) { + return getElementType()->isSubtypeOfExt(*rhs_->getElementType(), why_not); + } + return false; + } + + private: + FutureType(TypePtr elem) : SingleElementType(std::move(elem)) {} + + std::string annotation_str_impl(const TypePrinter& printer = nullptr) const override { + std::stringstream ss; + ss << "Future[" << getElementType()->annotation_str(printer) << ']'; + return ss.str(); + } +}; + +struct AwaitType; +using AwaitTypePtr = std::shared_ptr; + +struct TORCH_API AwaitType + : public SingleElementType { + friend struct Type; + template + static AwaitTypePtr create(TypePtr elem) { + return AwaitTypePtr( + new AwaitType(std::move(elem))); // NOLINT(modernize-make-shared) + } + + std::string str() const override { + std::stringstream ss; + ss << "Await(" << getElementType()->str() << ')'; + return ss.str(); + } + TypePtr createWithContained( + std::vector contained_types) const override { + return create(std::move(contained_types.at(0))); + } + + bool isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const override { + if (Type::isSubtypeOfExt(rhs, why_not)) { + return true; + } + if (auto rhs_ = rhs.castRaw()) { + return getElementType()->isSubtypeOfExt(*rhs_->getElementType(), why_not); + } + return false; + } + + private: + AwaitType(TypePtr elem) : SingleElementType(std::move(elem)) {} + + std::string annotation_str_impl(const TypePrinter& printer = nullptr) const override { + std::stringstream ss; + ss << "Await[" << getElementType()->annotation_str(printer) << ']'; + return ss.str(); + } +}; + +struct RRefType; +using RRefTypePtr = std::shared_ptr; + +struct TORCH_API RRefType + : public SingleElementType { + friend struct Type; + template + static RRefTypePtr create(TypePtr elem) { + return RRefTypePtr( + new RRefType(std::move(elem))); // NOLINT(modernize-make-shared) + } + + std::string str() const override { + std::stringstream ss; + ss << "RRef(" << getElementType()->str() << ')'; + return ss.str(); + } + TypePtr createWithContained( + std::vector contained_types) const override { + return create(std::move(contained_types.at(0))); + } + + private: + RRefType(TypePtr elem) : SingleElementType(std::move(elem)) {} + + std::string annotation_str_impl(const TypePrinter& printer = nullptr) const override { + std::stringstream ss; + ss << "RRef[" << getElementType()->annotation_str(printer) << ']'; + return ss.str(); + } +}; + +// Any should never appear in a named type like a class, namedtuple or +// interface. If it does, then dynamic type information will be lost in the +// Pickler, leading to hard-to-track-down bugs that will only occur +// after saving or loading a model. This is because we rely on the +// static types in named types to reconstruct type tags of loaded +// values. Lifting this restriction requires solving the serialization +// problem first. +TORCH_API void checkNoAny( + const Type& base, + const char* what, + const std::string& attrname, + const TypePtr& attrtype); + +struct TupleType; +using TupleTypePtr = std::shared_ptr; +using NameList = std::vector; +// This type represents a Tuple +struct TORCH_API TupleType : public NamedType { + + static TupleTypePtr createNamed(const std::optional& name, + const std::vector& field_names, + const std::vector& field_types, + std::vector& field_defaults); + + static TupleTypePtr createNamed(const std::optional& name, + const std::vector& field_names, + const std::vector& field_types); + + static TupleTypePtr createNamed(const std::optional& name, + const std::vector& field_names, + const std::vector& field_types); + + static TupleTypePtr create( + std::vector types) { + return TupleTypePtr(new TupleType( + std::move(types), + std::nullopt, + nullptr)); // NOLINT(modernize-make-shared) + } + static TupleTypePtr create() { + return create({}); + } + + at::ArrayRef elements() const { + return elements_; + } + + bool equals(const Type& rhs) const override; + bool isSubtypeOfExt(const Type& rhs_, std::ostream* why_not) const override; + + std::string str() const override; + bool hasFreeVariables() const override { + return has_free_variables_; + } + at::ArrayRef containedTypes() const override { + return elements_; + } + TypePtr createWithContained( + std::vector contained_types) const override { + return std::shared_ptr( + new TupleType(std::move(contained_types), name(), schema())); + } + const std::shared_ptr& schema() const { + return schema_; + } + std::optional> names() const; + + static const TypeKind Kind = TypeKind::TupleType; + + private: + template + static TupleTypePtr createWithSpec( + const std::optional& name, + const std::vector& field_names, + const std::vector& field_types, + std::vector& field_defaults); + + TupleType( + std::vector elements_, + std::optional name, + std::shared_ptr schema); + + bool compare( + const Type& rhs, + const std::function& fn) const { + if (rhs.kind() != kind()) { + return false; + } + + const auto& l_elements = elements(); + const auto& r_elements = rhs.castRaw()->elements(); + if (l_elements.size() != r_elements.size()) + return false; + for (size_t i = 0; i < l_elements.size(); ++i) { + if (!fn(*l_elements[i], *r_elements[i])) + return false; + } + return true; + } + + std::string annotation_str_impl(const TypePrinter& printer = nullptr) const override; + + std::vector elements_; + bool has_free_variables_; + std::shared_ptr schema_; +}; + +// the common supertype of all Enums, only used in operator registration. +// EnumType <: AnyEnumType for all Enums +struct AnyEnumType; +using AnyEnumTypePtr = SingletonTypePtr; +struct TORCH_API AnyEnumType final : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "AnyEnumType"; + } + static const TypeKind Kind = TypeKind::AnyEnumType; + // global singleton + static AnyEnumTypePtr get(); +private: + AnyEnumType() + : Type(TypeKind::AnyEnumType) {} +}; + +struct NumberType; +using NumberTypePtr = SingletonTypePtr; +// This type represents a Python number +// Subtype hierarchy for Number Types (NumberType as the base type): +// IntType <: NumberType +// FloatType <: NumberType +// ComplexType <:NumberType +// +// WARNING: if you add a new subtype of NumberType that is not +// represented by a global singleton, you need to change NumberTypePtr +// to a SingletonOrSharedTypePtr and deal with NumberType needing to +// both inherit and not inherit from SharedType! +struct TORCH_API NumberType : public Type { + bool equals(const Type& rhs) const override; + + bool isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const override; + + std::string str() const override { + return "Scalar"; // match what PythonArgParser says for clarity + } + static const TypeKind Kind = TypeKind::NumberType; + // global singleton + static NumberTypePtr get(); + + protected: + NumberType(TypeKind kind = TypeKind::NumberType) : Type(kind) {} + + std::string annotation_str_impl( + [[maybe_unused]] const TypePrinter& printer = nullptr) const override { + return "number"; // technically not a valid python type, but + // we need to use it when parsing back in annotations + // for implicit conversions + } +}; + +struct FloatType; +using FloatTypePtr = SingletonTypePtr; +// This type represents a Python float number +struct TORCH_API FloatType : public NumberType { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "float"; + } + bool isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const override { + // NOLINTNEXTLINE(bugprone-parent-virtual-call) + return rhs.kind() == TypeKind::NumberType || Type::isSubtypeOfExt(rhs, why_not); + } + static const TypeKind Kind = TypeKind::FloatType; + // global singleton + static FloatTypePtr get(); + + private: + FloatType() : NumberType(TypeKind::FloatType) {} + std::string annotation_str_impl( + [[maybe_unused]] const TypePrinter& printer = nullptr) const override { + return "float"; + } +}; + +struct ComplexType; +using ComplexTypePtr = SingletonTypePtr; +// This type represents a Python float number +struct TORCH_API ComplexType : public NumberType { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "complex"; + } + bool isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const override { + // NOLINTNEXTLINE(bugprone-parent-virtual-call) + return rhs.kind() == TypeKind::NumberType || Type::isSubtypeOfExt(rhs, why_not); + } + static const TypeKind Kind = TypeKind::ComplexType; + // global singleton + static ComplexTypePtr get(); + + private: + ComplexType() : NumberType(TypeKind::ComplexType) {} + std::string annotation_str_impl( + [[maybe_unused]] const TypePrinter& printer = nullptr) const override { + return "complex"; + } +}; + +// We need to introduce `SymIntType` to represent the `SymInt` type +// used in function schemas e.g. `aten::narrow_copy(... SymInt length) +// `SymInt` will be used to enable tracing arithmetic operations on +// dimension values. Please see [SymInt.h] for more information +struct SymIntType; +using SymIntTypePtr = SingletonTypePtr; +struct TORCH_API SymIntType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "SymInt"; + } + std::string annotation_str_impl(const TypePrinter& printer [[maybe_unused]] = nullptr) const override { + return "int"; + } + static const TypeKind Kind = TypeKind::SymIntType; + // global singleton + static SymIntTypePtr get(); + + private: + SymIntType() : Type(TypeKind::SymIntType) {} +}; + +struct SymFloatType; +using SymFloatTypePtr = SingletonTypePtr; +struct TORCH_API SymFloatType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "SymFloat"; + } + std::string annotation_str_impl(const TypePrinter& printer [[maybe_unused]] = nullptr) const override { + return "float"; + } + static const TypeKind Kind = TypeKind::SymFloatType; + // global singleton + static SymFloatTypePtr get(); + + private: + SymFloatType() : Type(TypeKind::SymFloatType) {} +}; + +struct SymBoolType; +using SymBoolTypePtr = SingletonTypePtr; +struct TORCH_API SymBoolType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "SymBool"; + } + std::string annotation_str_impl(const TypePrinter& printer [[maybe_unused]] = nullptr) const override { + return "bool"; + } + static const TypeKind Kind = TypeKind::SymBoolType; + // global singleton + static SymBoolTypePtr get(); + + private: + SymBoolType() : Type(TypeKind::SymBoolType) {} +}; + +struct IntType; +using IntTypePtr = SingletonTypePtr; +// This type represents a Python int number +struct TORCH_API IntType : public NumberType { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "int"; + } + bool isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const override { + // NOLINTNEXTLINE(bugprone-parent-virtual-call) + return rhs.kind() == TypeKind::NumberType || Type::isSubtypeOfExt(rhs, why_not); + } + static const TypeKind Kind = TypeKind::IntType; + // global singleton + static IntTypePtr get(); + + private: + IntType() : NumberType(TypeKind::IntType) {} + std::string annotation_str_impl( + [[maybe_unused]] const TypePrinter& printer = nullptr) const override { + return "int"; + } +}; + +struct BoolType; +using BoolTypePtr = SingletonTypePtr; +// This node represents a Python bool value +struct TORCH_API BoolType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "bool"; + } + static const TypeKind Kind = TypeKind::BoolType; + // global singleton + static BoolTypePtr get(); + + private: + BoolType() : Type(TypeKind::BoolType) {} +}; + +struct StringType; +using StringTypePtr = SingletonTypePtr; +// This type represents a Python string +struct TORCH_API StringType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + // we only use "str" (not "string") in both FunctionSchema and script + return annotation_str(); + } + std::string annotation_str_impl( + [[maybe_unused]] const TypePrinter& printer = nullptr) const override { + return "str"; + } + static const TypeKind Kind = TypeKind::StringType; + // global singleton + static StringTypePtr get(); + + private: + StringType() : Type(TypeKind::StringType) {} +}; + +struct StorageType; +using StorageTypePtr = SingletonTypePtr; +struct TORCH_API StorageType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return annotation_str(); + } + std::string annotation_str_impl( + [[maybe_unused]] const TypePrinter& printer = nullptr) const override { + return "Storage"; + } + static const TypeKind Kind = TypeKind::StorageType; + // global singleton + static StorageTypePtr get(); + + private: + StorageType() : Type(TypeKind::StorageType) {} +}; + +struct FunctionType; +using FunctionTypePtr = std::shared_ptr; +struct TORCH_API FunctionType : public NamedType { + static FunctionTypePtr create(torch::jit::Function* function) { + return FunctionTypePtr( + new FunctionType(function)); // NOLINT(modernize-make-shared) + } + bool equals(const Type& rhs) const override { + if (auto func_type = rhs.cast()) { + return func_type->function_ == function_; + } + + return false; + } + std::string str() const override { + return "Function"; + } + torch::jit::Function* function() const { + return function_; + } + static const TypeKind Kind = TypeKind::FunctionType; + + private: + FunctionType(torch::jit::Function* function); + std::string annotation_str_impl( + [[maybe_unused]] const TypePrinter& printer = nullptr) const override { + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) + return name()->qualifiedName(); + } + torch::jit::Function* function_; +}; + +struct NoneType; +using NoneTypePtr = SingletonTypePtr; +// This type represents a Python None +struct TORCH_API NoneType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "NoneType"; + } + bool isSubtypeOfExt(const Type& rhs, std::ostream *why_not) const override; + + static const TypeKind Kind = TypeKind::NoneType; + // global singleton + static NoneTypePtr get(); + + private: + NoneType() : Type(TypeKind::NoneType) {} +}; + +struct GeneratorType; +using GeneratorTypePtr = SingletonTypePtr; +// This type represents a Generator +struct TORCH_API GeneratorType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "Generator"; + } + static const TypeKind Kind = TypeKind::GeneratorType; + // global singleton + static GeneratorTypePtr get(); + + private: + GeneratorType() : Type(TypeKind::GeneratorType) {} +}; + +struct QuantizerType; +using QuantizerTypePtr = SingletonTypePtr; +// This type represents a Quantizer +struct TORCH_API QuantizerType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "Quantizer"; + } + static const TypeKind Kind = TypeKind::QuantizerType; + // global singleton + static QuantizerTypePtr get(); + + private: + QuantizerType() : Type(TypeKind::QuantizerType) {} +}; + +struct QSchemeType; +using QSchemeTypePtr = SingletonTypePtr; +// This type represents a QScheme +struct TORCH_API QSchemeType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "QScheme"; + } + static const TypeKind Kind = TypeKind::QSchemeType; + // global singleton + static QSchemeTypePtr get(); + + private: + QSchemeType() : Type(TypeKind::QSchemeType) {} +}; + +struct DeviceObjType; +using DeviceObjTypePtr = SingletonTypePtr; +// This type represents a Device +struct TORCH_API DeviceObjType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "Device"; + } + static const TypeKind Kind = TypeKind::DeviceObjType; + // global singleton + static DeviceObjTypePtr get(); + + private: + DeviceObjType() : Type(TypeKind::DeviceObjType) {} +}; + +struct StreamObjType; +using StreamObjTypePtr = SingletonTypePtr; +// This type represents a Generator +struct TORCH_API StreamObjType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "Stream"; + } + static const TypeKind Kind = TypeKind::StreamObjType; + // global singleton + static StreamObjTypePtr get(); + +private: + StreamObjType() : Type(TypeKind::StreamObjType) {} +}; + +struct VarType; +using VarTypePtr = std::shared_ptr; +// This type represents a type variable, used in FunctionSchema +struct VarType : public SharedType { + static VarTypePtr create(std::string name_) { + return VarTypePtr(new VarType(std::move(name_))); + } + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return name(); + } + const std::string& name() const { + return name_; + } + bool hasFreeVariables() const override { + return true; + } + static const TypeKind Kind = TypeKind::VarType; + + private: + VarType(std::string name_) + : SharedType(TypeKind::VarType), name_(std::move(name_)) {} + std::string name_; +}; + +struct CapsuleType; +using CapsuleTypePtr = SingletonTypePtr; +// This type represents a Python Capsule. +// It does not appear in the IR and is only used during runtime +struct TORCH_API CapsuleType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "Capsule"; + } + static const TypeKind Kind = TypeKind::CapsuleType; + // global singleton + static CapsuleTypePtr get(); +private: + CapsuleType() + : Type(TypeKind::CapsuleType) {} +}; + +struct PyObjectType; +using PyObjectTypePtr = SingletonTypePtr; +// This type represents a PyObject Type +struct TORCH_API PyObjectType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "PyObject"; + } + static const TypeKind Kind = TypeKind::PyObjectType; + // global singleton + static PyObjectTypePtr get(); +private: + PyObjectType() + : Type(TypeKind::PyObjectType) {} +}; + +enum class TypeVerbosity { + None, + Type, + TypeAndStride, + Full, + Symbolic, + Default = Full, +}; + +TORCH_API TypeVerbosity type_verbosity(); + +TORCH_API std::ostream& operator<<(std::ostream& out, const Type& t); +template +TORCH_API std::ostream& operator<<( + std::ostream& out, + const VaryingShape& t); +TORCH_API std::ostream& operator<<(std::ostream& os, const SymbolicShape& s); +TORCH_API std::ostream& operator<<(std::ostream& os, const ShapeSymbol& s); +TORCH_API std::ostream& operator<<(std::ostream& os, const Stride& s); +// what is the type, ignoring extra size/shape information? +// e.g. Tensor(2x3) -> Dynamic, and Tuple(Tensor(2x3),...) -> Tuple(Dynamic,...) + +// `unshapedType` is used to remove Tensor subtypes. We treat all Tensor +// subtypes as simply "Tensor"; we also create a new version of any +// container types in which internal Tensors have undergone the same +// operation. This is used for type comparisons between two Tensor types +// (`unshapedType` means that we don't falsely return `false` for e.g. +// Tensors of different dimensions). It's also used in the alias +// analysis pass. +// Be careful with calls because this can be very slow. If calling this +// on a graph, use `EraseShapeInformation` in shape_analysis.h +inline TypePtr unshapedType(const TypePtr& type) { + if (type->isSubtypeOf(*TensorType::get())) { + return TensorType::get(); + } + at::ArrayRef contained = type->containedTypes(); + if (contained.empty()) { + return type; + } + return type->withContained(fmap(type->containedTypes(), unshapedType)); +} + +inline TypePtr TensorType::fromNumberType(const Type& typ) { + if (typ.isSubtypeOf(*IntType::get())) { + return TensorType::createContiguous(at::kLong, at::kCPU, {}); + } else if (typ.isSubtypeOf(*FloatType::get())) { + return TensorType::createContiguous(at::kDouble, at::kCPU, {}); + } else if (typ.isSubtypeOf(*BoolType::get())) { + return TensorType::createContiguous(at::kBool, at::kCPU, {}); + } else if (typ.kind() == NumberType::Kind) { + return TensorType::create(std::nullopt, at::kCPU, {}, std::nullopt); + } + TORCH_CHECK(false, "Unknown number type: ", typ.str()); +} +inline TypePtr TensorType::fromBoolType() { + return TensorType::createContiguous(at::kBool, at::kCPU, {}); +} + +inline std::optional tryScalarTypeFromJitType(const Type& type) { + if (type == *FloatType::get()) { + return at::typeMetaToScalarType(c10::get_default_dtype()); + } else if (type == *IntType::get()) { + return at::ScalarType::Long; + } else if (type == *BoolType::get()) { + return at::ScalarType::Bool; + } + return std::nullopt; +} + +inline at::ScalarType scalarTypeFromJitType(const Type& type) { + auto result = tryScalarTypeFromJitType(type); + TORCH_CHECK( + result, + "Add new condition, expected Float, Complex, Int, or Bool but got", + type.str()); + return *result; +} + +// Attempt to find the correct supertype of the two types `t1` and `t2`. +// If no supertype is found, then nullopt will be returned if +// `default_to_union` is false, and `Union[t1, t2]` will be returned +// if it is true. If `t1 == t2`, or `t1` is a type refinement of `t2`, +// then `t2` will be returned (and vice versa). +// +// Two different tensortypes will return dynamic. +// +// Currently we chose not to support returning a NumberType for +// two types from the set of {FloatType, IntType, ComplexType}, because +// there is a lack of operator support for NumberType. +// +// If `type_hint` is an `InterfaceType`, then we can use that as a +// potential supertype for `ClassType`s in the list. Otherwise, we have +// no way to find and use some common interface type +TORCH_API std::optional unifyTypes( + const TypePtr& t1, + const TypePtr& t2, + bool default_to_union = false, + const TypePtr& type_hint = nullptr); + +TORCH_API std::optional unifyTypeList( + at::ArrayRef elements, + std::ostream& why_not, + bool default_to_union = false, + const TypePtr& type_hint = nullptr); + +namespace detail { +template +struct getTypePtr_ final { + static decltype(auto) call() { + return ([]() { + try { + return getCustomClassType(); + } catch(const c10::Error&) { + TORCH_CHECK( + false, + "Type ", + c10::util::get_fully_qualified_type_name(), + " could not be converted to any of the known types." + ); + } + }()); + } +}; + +template +struct getMaybeFakeTypePtr_ final { + static decltype(auto) call() { + return getTypePtr_::call(); + } +}; + +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return AnyType::get(); + } +}; + +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return TensorType::get(); + } +}; +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return StorageType::get(); + } +}; +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return StreamObjType::get(); + } +}; +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return FloatType::get(); + } +}; +template <> +struct getTypePtr_> final { + static decltype(auto) call() { + return ComplexType::get(); + } +}; +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return IntType::get(); + } +}; + +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return IntType::get(); + } +}; + +template <> +struct getMaybeFakeTypePtr_ final { + static decltype(auto) call() { + return SymIntType::get(); + } +}; +template <> +struct getMaybeFakeTypePtr_ final { + static decltype(auto) call() { + return IntType::get(); + } +}; + +template <> +struct getMaybeFakeTypePtr_ final { + static decltype(auto) call() { + return SymFloatType::get(); + } +}; +template <> +struct getMaybeFakeTypePtr_ final { + static decltype(auto) call() { + return FloatType::get(); + } +}; + +template <> +struct getMaybeFakeTypePtr_ final { + static decltype(auto) call() { + return SymBoolType::get(); + } +}; +template <> +struct getMaybeFakeTypePtr_ final { + static decltype(auto) call() { + return BoolType::get(); + } +}; + +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return DeviceObjType::get(); + } +}; +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return BoolType::get(); + } +}; +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return NumberType::get(); + } +}; +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return QSchemeType::get(); + } +}; +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return TypeFactory::create( + TypeFactory::get()); + } +}; +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return StringType::get(); + } +}; +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return StringType::get(); + } +}; +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return StringType::get(); + } +}; +template +struct getMaybeFakeTypePtr_, fake> final { + static const auto& call() { + static auto inner_type = getMaybeFakeTypePtr_::call(); + // The "per vector" static singleton needs to live in a .cpp file, + // otherwise we'll end up with one singleton instance per shared library. + static auto type = ListType::get("vector", inner_type); + return type; + } +}; +template +struct getMaybeFakeTypePtr_, fake> final { + static const auto& call() { + static auto inner_type = getMaybeFakeTypePtr_::call(); + // The "per ArrayRef" static singleton needs to live in a .cpp file, + // otherwise we'll end up with one singleton instance per shared library. + static auto type = ListType::get("ArrayRef", inner_type); + return type; + } +}; +template +struct getMaybeFakeTypePtr_ final { + static const auto& call() { + static auto type = ListType::create(getMaybeFakeTypePtr_::call()); + return type; + } +}; +template +struct getMaybeFakeTypePtr_, fake> final { + static const auto& call() { + static auto inner_type = getMaybeFakeTypePtr_::call(); + // The "per List" static singleton needs to live in a .cpp file, + // otherwise we'll end up with one singleton instance per shared library. + static auto type = ListType::get("List", inner_type); + return type; + } +}; +template +struct getMaybeFakeTypePtr_, fake> final { + static const auto& call() { + static auto inner_type = getMaybeFakeTypePtr_::call(); + static auto type = ListType::get("List", inner_type); + return type; + } +}; +template +struct getMaybeFakeTypePtr_, fake> final { + static const auto& call() { + static auto inner_type = getMaybeFakeTypePtr_::call(); + // The "per array" static singleton needs to live in a .cpp file, + // otherwise we'll end up with one singleton instance per shared library. + // (Concatenating the length onto the end of the string because we want a unique + // type_ptr created for every std::array type). + static auto type = ListType::get(std::string("array") + std::to_string(N), inner_type); + return type; + } +}; +template +struct getMaybeFakeTypePtr_, fake> final { + static const auto& call() { + static auto inner_key_type = getMaybeFakeTypePtr_::call(); + static auto inner_val_type = getMaybeFakeTypePtr_::call(); + // The "per unordered_map" static singleton needs to live in a .cpp file, + // otherwise we'll end up with one singleton instance per shared library. + static auto type = DictType::get("unordered_map", inner_key_type, inner_val_type); + return type; + } +}; +template +struct getMaybeFakeTypePtr_, fake> final { + static const auto& call() { + static auto inner_key_type = getMaybeFakeTypePtr_::call(); + static auto inner_val_type = getMaybeFakeTypePtr_::call(); + // The "per Dict" static singleton needs to live in a .cpp file, + // otherwise we'll end up with one singleton instance per shared library. + static auto type = DictType::get("Dict", inner_key_type, inner_val_type); + return type; + } +}; + +template +struct getMaybeFakeTypePtr_, fake> final { + static const auto& call() { + static auto inner_type = getMaybeFakeTypePtr_::call(); + // The "per std::optional" static singleton needs to live in a .cpp file, + // otherwise we'll end up with one singleton instance per shared library. + static auto type = OptionalType::get(inner_type); + return type; + } +}; + + +template<> +struct getTypePtr_ final { + static const auto& call() { + static auto inner_type = getMaybeFakeTypePtr_::call(); + // The "per std::optional" static singleton needs to live in a .cpp file, + // otherwise we'll end up with one singleton instance per shared library. + static auto type = OptionalType::get(inner_type); + return type; + } +}; + +template +struct getMaybeFakeTypePtr_ final { + static const auto& call() { + // The "per std::optional" static singleton needs to live in a .cpp file, + // otherwise we'll end up with one singleton instance per shared library. + static auto inner_type = getMaybeFakeTypePtr_::call(); + static auto type = OptionalType::get(inner_type); + return type; + } +}; + +template +struct getMaybeFakeTypePtr_, fake> final { + static const auto& call() { + static auto type = ([]() { + std::vector contained_types = { + (getMaybeFakeTypePtr_::call())... + }; + return TupleType::create(std::move(contained_types)); + })(); + return type; + } +}; +template <> +struct getTypePtr_ final { + static decltype(auto) call() { + return NoneType::get(); + } +}; +} // namespace detail +template +inline decltype(auto) getTypePtr() { + // TODO: static_assert that a templated function exists, and throw a friendly + // error message if not + return detail::getMaybeFakeTypePtr_::call(); +} + +template +inline TypePtr getTypePtrCopy() { + // TODO: static_assert that a templated function exists, and throw a friendly + // error message if not + return getTypePtr(); +} + +template +inline decltype(auto) getFakeTypePtr() { + return detail::getMaybeFakeTypePtr_::call(); +} + +template +inline TypePtr getFakeTypePtrCopy() { + return getFakeTypePtr(); +} + +using TypeEnv = std::unordered_map; +struct MatchTypeReturn { + MatchTypeReturn(std::string reason) : reason_(std::move(reason)) {} + static MatchTypeReturn Success() { + return MatchTypeReturn(); + } + bool success() const { + return !reason_.has_value(); + } + const std::string& reason() const { + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) + return reason_.value(); + } + + private: + MatchTypeReturn() + : reason_(std::nullopt) {} + std::optional reason_; // is there is no match, this contains the reason +}; + +// attempt to match the type variables in formal to actual, adding them to type_env. +// If no match is possible this returns a MatchTypeReturn with r.success() == false +// and a r.reason() that describes why it could not match. +// note: It is possible to successfully match a formal, but for type variables +// in the formal to still not be defined. In particular, None matches Optional[T] +// but does not define the value of T. +TORCH_API MatchTypeReturn +matchTypeVariables(const TypePtr& formal, const TypePtr& actual, TypeEnv& type_env); + +// replace type variables appearing in `type` with the values in +// `type_env`. Returns nullptr if a variable used in `type` +// does not appear in `type_env` +TORCH_API TypePtr tryEvalTypeVariables(const TypePtr& type, TypeEnv& type_env); + +TORCH_API bool elementTypeCanBeInferredFromMembers(const TypePtr& elem_type); + +struct InterfaceType; +using InterfaceTypePtr = std::shared_ptr; + +// Interfaces are a list of abstract methods that a class might meet. +// If a class provides those methods, it implicitly meets the interface. + +// Subtype relations for Interface with ClassType: +// lhs (ClassType or InterfaceType) is a subtype of rhs if: +// 1. lhs methods are a superset of rhs methods +// 2. if rhs is module interface, the lhs must be module interface or module itself +struct TORCH_API InterfaceType : public NamedType { + static InterfaceTypePtr create( + QualifiedName qualifiedName, bool is_module=false); + + bool equals(const Type& rhs) const override { + if (auto user_rhs = rhs.castRaw()) { + return isSubTypeImpl(*this, *user_rhs, nullptr) && + isSubTypeImpl(*user_rhs, *this, nullptr); + } + return false; + } + + std::string str() const override { + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) + return std::string("InterfaceType<") + name()->name() + ">"; + } + + bool isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const override; + + // try to find a method of this interface, + // returns nullptr if not found. + const FunctionSchema* getMethod(const std::string& name) const; + void addMethod(FunctionSchema schema); + const std::vector& methods() const { + return *methods_; + } + + bool is_module() const override{ + return is_module_; + } + static const TypeKind Kind = TypeKind::InterfaceType; + ~InterfaceType() override = default; + private: + InterfaceType(QualifiedName name, bool is_module); + static bool isSubTypeImpl( + const InterfaceType& lhs, + const InterfaceType& rhs, + std::ostream* why_not); + + std::string annotation_str_impl( + [[maybe_unused]] const TypePrinter& printer = nullptr) const override { + // NOLINTNEXTLINE(bugprone-unchecked-optional-access) + return name()->qualifiedName(); + } + + // shared_ptr so that this header does not have to depend on + // FunctionSchema.h + std::shared_ptr> methods_; + // flag to distinguish if it's an interface type from a module or not + bool is_module_; +}; + +template +struct EnumerationType : public Type { +static const TypeKind Kind = K; + +bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); +} + +protected: +EnumerationType() : Type(Kind) {} +}; + +// WARNING: These enumeration types below DO NOT actually get parsed out +// from the logical schema strings, instead they are mapped as ints. To +// observe these types, use real_type() instead of type() on Argument + +struct ScalarTypeType; +using ScalarTypeTypePtr = SingletonTypePtr; +struct TORCH_API ScalarTypeType : public EnumerationType { +std::string str() const override { +return "ScalarType"; +} +static const TypeKind Kind = TypeKind::ScalarTypeType; +// global singleton +static ScalarTypeTypePtr get(); + +private: +ScalarTypeType() {} +}; + +struct MemoryFormatType; +using MemoryFormatTypePtr = SingletonTypePtr; +struct TORCH_API MemoryFormatType : public EnumerationType { +std::string str() const override { +return "MemoryFormat"; +} +static const TypeKind Kind = TypeKind::MemoryFormatType; +// global singleton +static MemoryFormatTypePtr get(); + +private: +MemoryFormatType() {} +}; + +struct LayoutType; +using LayoutTypePtr = SingletonTypePtr; +struct TORCH_API LayoutType : public EnumerationType { +std::string str() const override { +return "Layout"; +} +static const TypeKind Kind = TypeKind::LayoutType; +// global singleton +static LayoutTypePtr get(); + +private: +LayoutType() {} +}; + +namespace detail { +template <> +struct getMaybeFakeTypePtr_ final { + static decltype(auto) call() { + return ScalarTypeType::get(); + } +}; +template <> +struct getMaybeFakeTypePtr_ final { + static decltype(auto) call() { + return LayoutType::get(); + } +}; +template <> +struct getMaybeFakeTypePtr_ final { + static decltype(auto) call() { + return MemoryFormatType::get(); + } +}; +template <> +struct getMaybeFakeTypePtr_ final { + static decltype(auto) call() { + return IntType::get(); + } +}; +template <> +struct getMaybeFakeTypePtr_ final { + static decltype(auto) call() { + return IntType::get(); + } +}; +template <> +struct getMaybeFakeTypePtr_ final { + static decltype(auto) call() { + return IntType::get(); + } +}; +} // namespace detail + +// the common supertype of all lists, +// List[T] <: AnyList for all T +struct AnyListType; +using AnyListTypePtr = SingletonTypePtr; +struct TORCH_API AnyListType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "list"; + } + static const TypeKind Kind = TypeKind::AnyListType; + // global singleton + static AnyListTypePtr get(); +private: + AnyListType() + : Type(TypeKind::AnyListType) {} +}; + +// the common supertype of all tuples, +// Tuple[T...] <: AnyTuple for all T +struct AnyTupleType; +using AnyTupleTypePtr = SingletonTypePtr; +struct TORCH_API AnyTupleType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + + std::string str() const override { + return "tuple"; + } + static const TypeKind Kind = TypeKind::AnyTupleType; + + // global singleton + static AnyTupleTypePtr get(); +private: + AnyTupleType() + : Type(TypeKind::AnyTupleType) {} +}; + +// the common supertype of all classes, +// ClassType <: AnyClassType for all classes +struct AnyClassType; +using AnyClassTypePtr = SingletonTypePtr; +struct TORCH_API AnyClassType : public Type { + bool equals(const Type& rhs) const override { + return rhs.kind() == kind(); + } + std::string str() const override { + return "AnyClassType"; + } + static const TypeKind Kind = TypeKind::AnyClassType; + // global singleton + static AnyClassTypePtr get(); +private: + AnyClassType() + : Type(TypeKind::AnyClassType) {} +}; + +template<> +inline detail::CastReturnType::type Type::cast() { + if (kind() == TypeKind::TupleType || kind() == TypeKind::FunctionType || + kind() == TypeKind::ClassType || kind() == TypeKind::InterfaceType) { + return std::static_pointer_cast(static_cast(this)->shared_from_this()); + } + return nullptr; +} + +template<> +inline detail::CastConstReturnType::type Type::cast() const { + if (kind() == TypeKind::TupleType || kind() == TypeKind::FunctionType || + kind() == TypeKind::ClassType || kind() == TypeKind::InterfaceType) { + return std::static_pointer_cast(static_cast(this)->shared_from_this()); + } + return nullptr; +} + +template<> +inline const NamedType* Type::castRaw() const { + if (kind() == TypeKind::TupleType || kind() == TypeKind::FunctionType || + kind() == TypeKind::ClassType || kind() == TypeKind::InterfaceType) { + return static_cast(this); + } + return nullptr; +} + +// Used as a return type when inferring the IValue type of a Python object. +struct InferredType { + /* implicit */ InferredType(TypePtr type) : type_(std::move(type)) {} + /* implicit */ InferredType(std::string reason) + : type_(nullptr), reason_(std::move(reason)) {} + TypePtr type() const { + TORCH_INTERNAL_ASSERT( + type_, + "Tried to get the type from an InferredType but the type is null. ", + "Reason: ", + reason_); + return type_; + } + bool success() const { + return type_ != nullptr; + } + const std::string& reason() const { + TORCH_INTERNAL_ASSERT(!type_); + return reason_; + } + +private: + TypePtr type_; + std::string reason_; +}; + +TORCH_API bool containsAnyType(const TypePtr& type); + +} // namespace c10 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/jit_type_base.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/jit_type_base.h new file mode 100644 index 0000000000000000000000000000000000000000..c7ffbb33786aa5d1118243b047836a189817ac67 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/jit_type_base.h @@ -0,0 +1,602 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace c10 { + +#define C10_FORALL_TYPES(_) \ + _(AnyType) \ + _(EnumType) \ + _(AnyEnumType) \ + _(TensorType) \ + _(StorageType) \ + _(TupleType) \ + _(ListType) \ + _(DictType) \ + _(NumberType) \ + _(FloatType) \ + _(ComplexType) \ + _(FutureType) \ + _(AwaitType) \ + _(RRefType) \ + _(IntType) \ + _(NoneType) \ + _(StringType) \ + _(GeneratorType) \ + _(QuantizerType) \ + _(BoolType) \ + _(OptionalType) \ + _(VarType) \ + _(DeviceObjType) \ + _(StreamObjType) \ + _(FunctionType) \ + _(ClassType) \ + _(PyObjectType) \ + _(CapsuleType) \ + _(InterfaceType) \ + _(QSchemeType) \ + _(ScalarTypeType) \ + _(LayoutType) \ + _(MemoryFormatType) \ + _(AnyListType) \ + _(AnyTupleType) \ + _(AnyClassType) \ + _(SymIntType) \ + _(SymFloatType) \ + _(SymBoolType) \ + _(UnionType) \ + _(DynamicType) + +enum class TypeKind { +#define DEFINE_TYPE(T) T, + C10_FORALL_TYPES(DEFINE_TYPE) +#undef DEFINE_TYPE +}; + +TORCH_API const char* typeKindToString(TypeKind kind); + +struct Type; +struct SharedType; + +// Use this to customize how a Type is printed using `annotation_str()`. If +// std::nullopt is returned, `annotation_str()` falls through to its default +// implementation. +using TypePrinter = std::function(const Type&)>; + +namespace detail { +template +struct IsSingletonType : public std::integral_constant {}; +} // namespace detail +#define TORCH_DECLARE_SINGLETON(Type) \ + struct Type; \ + namespace detail { \ + template <> struct IsSingletonType : public std::integral_constant {}; \ + } + +TORCH_DECLARE_SINGLETON(AnyType) +TORCH_DECLARE_SINGLETON(AnyEnumType) +TORCH_DECLARE_SINGLETON(NumberType) +TORCH_DECLARE_SINGLETON(FloatType) +TORCH_DECLARE_SINGLETON(ComplexType) +TORCH_DECLARE_SINGLETON(IntType) +TORCH_DECLARE_SINGLETON(BoolType) +TORCH_DECLARE_SINGLETON(StringType) +TORCH_DECLARE_SINGLETON(StorageType) +TORCH_DECLARE_SINGLETON(NoneType) +TORCH_DECLARE_SINGLETON(GeneratorType) +TORCH_DECLARE_SINGLETON(QuantizerType) +TORCH_DECLARE_SINGLETON(QSchemeType) +TORCH_DECLARE_SINGLETON(DeviceObjType) +TORCH_DECLARE_SINGLETON(StreamObjType) +TORCH_DECLARE_SINGLETON(CapsuleType) +TORCH_DECLARE_SINGLETON(PyObjectType) +TORCH_DECLARE_SINGLETON(ScalarTypeType) +TORCH_DECLARE_SINGLETON(LayoutType) +TORCH_DECLARE_SINGLETON(MemoryFormatType) +TORCH_DECLARE_SINGLETON(AnyListType) +TORCH_DECLARE_SINGLETON(AnyTupleType) +TORCH_DECLARE_SINGLETON(AnyClassType) + +namespace detail { +template +struct CastReturnType { + using type = std::shared_ptr; +}; + +template +struct CastReturnType::value>> { + using type = SingletonTypePtr; +}; + +template +struct CastConstReturnType { + using type = std::shared_ptr; +}; + +template +struct CastConstReturnType::value>> { + using type = SingletonTypePtr; +}; + +template +struct as_shared_type { + using type = SharedType*; +}; + +template +struct as_shared_type { + using type = const SharedType *; +}; +} // namespace detail + +struct TORCH_API Type { + friend TORCH_API bool operator==(const Type& lhs, const Type& rhs); + private: + TypeKind kind_; + + protected: + Type(TypeKind kind) : kind_(kind) {} + + Type(const Type&) = default; + Type& operator=(const Type&) = default; + Type(Type&&) noexcept = default; + Type& operator=(Type&&) noexcept = default; + + virtual std::string annotation_str_impl(const TypePrinter& /*printer*/) const { + return str(); + } + // a == b + virtual bool equals(const Type& rhs) const = 0; + // a == b <=> b == a + virtual bool symmetric() const { + return true; + } + + public: + template + class SingletonOrSharedTypePtr { + public: + using element_type = typename std::shared_ptr::element_type; + + SingletonOrSharedTypePtr() = default; + + /* implicit */ SingletonOrSharedTypePtr(std::shared_ptr x) + : repr_(std::move(x)) {} + + template , bool> = true> + /* implicit */ SingletonOrSharedTypePtr(std::shared_ptr x) + : repr_(std::move(x)) {} + + /* implicit */ SingletonOrSharedTypePtr(std::nullptr_t) + : repr_(nullptr) {} + + /* implicit */ SingletonOrSharedTypePtr(SingletonTypePtr p) + : repr_(makeSingletonSharedPtr(p.get())) {} + + template , bool> = true> + /* implicit */ SingletonOrSharedTypePtr(SingletonTypePtr p) + : repr_(makeSingletonSharedPtr(static_cast(p.get()))) {} + + + // We need to support construction from T* for pybind. The problem + // is that it's not clear if we are supposed to be taking shared + // ownership or not. + // + // Case 1: if T is known statically to derive from SharedType, we should use + // shared_from_this() and take shared_ownership. + // + // Case 2: if T is exactly Type, we need to do a dynamic_cast to + // check if it's a SharedType and do the right thing. + // + // Case 3: Otherwise, T is not a SharedType. Use a singleton + // pointer. + + template , bool> = true> + /* implicit */ SingletonOrSharedTypePtr(T* p) : SingletonOrSharedTypePtr(static_cast::type>(p)->shared_from_this()) {} + + template , bool> = true> + /* implicit */ SingletonOrSharedTypePtr(T* p) { + if (auto* shared_p = dynamic_cast::type>(p)) { + repr_ = shared_p->shared_from_this(); + } else { + repr_ = makeSingletonSharedPtr(p); + } + } + + template && !std::is_base_of_v, bool> = true> + /* implicit */ SingletonOrSharedTypePtr(T* p) + : repr_(makeSingletonSharedPtr(p)) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(dynamic_cast::type>(p) == nullptr); + } + + SingletonOrSharedTypePtr(const SingletonOrSharedTypePtr&) = default; + SingletonOrSharedTypePtr(SingletonOrSharedTypePtr&&) noexcept = default; + SingletonOrSharedTypePtr& operator=(const SingletonOrSharedTypePtr&) = default; + SingletonOrSharedTypePtr& operator=(SingletonOrSharedTypePtr&&) noexcept = default; + ~SingletonOrSharedTypePtr() = default; + + T* get() const { + return repr_.get(); + } + + operator bool() const { + return repr_ != nullptr; + } + + bool operator==(std::nullptr_t) const { + return repr_ == nullptr; + } + + bool operator!=(std::nullptr_t) const { + return repr_ != nullptr; + } + + template , void>, bool> = true> + U& operator*() const { + return *get(); + } + + T* operator->() const { + return get(); + } + + private: + // Use shared_ptr's aliasing constructor to create a non-owning pointer + // to a singleton. The lifetime is tied to the null shared_ptr, so there's + // no reference counting overhead for the singleton itself. + static std::shared_ptr makeSingletonSharedPtr(T* ptr) { + return std::shared_ptr(std::shared_ptr(), ptr); + } + + std::shared_ptr repr_; + }; + + using TypePtr = SingletonOrSharedTypePtr; + using Ptr = TypePtr; + using ElementType = Type; + + // subtyping relation. By default, we return true for the case + // when the type is exactly equal or if this <: T where rhs = Optional[T] + + // if this returns false and the why_not stream is non-null, it contains + // additional details that describe why this is not a subtype of 'rhs'. + // This additional information should only contain details that are not + // obvious from the annotation_str() that describes the type. For instance it + // is clear that `int <: str` is false but not clear why `Foo <: InterfaceBar` + // might be false. + virtual bool isSubtypeOfExt(const Type& rhs, std::ostream* why_not) const; + virtual bool is_module() const; + bool isSubtypeOf(const Type& rhs) const { + return isSubtypeOfExt(rhs, nullptr); + } + // Compatibility shims to accommodate existing code that passes shared_ptrs + // around. Ideally, we would just delete this, but it should be harmless. + template + std::enable_if_t, bool> + isSubtypeOf(const std::shared_ptr& rhs) const { + return isSubtypeOf(*rhs); + } + + template + std::enable_if_t, bool> + isSubtypeOf(const SingletonOrSharedTypePtr& rhs) const { + return isSubtypeOf(*rhs); + } + + template + std::enable_if_t, bool> + isSubtypeOf(SingletonTypePtr rhs) const { + return isSubtypeOf(*rhs); + } + + template + std::enable_if_t, bool> + isSubtypeOfExt(const SingletonOrSharedTypePtr& rhs, std::ostream* why_not) const { + return isSubtypeOfExt(*rhs, why_not); + } + + template + std::enable_if_t, bool> + isSubtypeOfExt(const std::shared_ptr& rhs, std::ostream* why_not) const { + return isSubtypeOfExt(*rhs, why_not); + } + + template + std::enable_if_t, bool> + isSubtypeOfExt(SingletonTypePtr rhs, std::ostream* why_not) const { + return isSubtypeOfExt(*rhs, why_not); + } + + // How this type will appear in FunctionSchema declarations + virtual std::string str() const = 0; + + // How this type will appear as if it were a type annotation in Python + // which is sometimes different than how it appears in declarations (e.g. + // int[] vs List[int]) + // + // Takes a custom printer that users can pass in to customize the output of + // this method. + std::string annotation_str(const TypePrinter& printer) const { + if (printer) { + // the printer can return std::nullopt to fall through to the default impl + if (auto renamed = printer(*this)) { + return *renamed; + } + } + return annotation_str_impl(printer); + } + std::string annotation_str() const { + // Overload instead of define a default value for `printer` to help + // debuggers out. + return annotation_str(nullptr); + } + + // Returns a human readable string that includes additional information like + // "type is inferred rather than explicitly defined" to help construct more + // user-friendly messages. + virtual std::string repr_str() const { + return annotation_str(); + } + + TypeKind kind() const { + return kind_; + } + + virtual bool isUnionType() const { + return false; + } + + virtual bool requires_grad() const { + for (const auto& ct : containedTypes()) { + if (ct->requires_grad()) { + return true; + } + } + return false; + } + + // Dynamically cast this object to the subclass indicated by the + // template variable, returning nullptr if the cast is invalid. + template ::value, bool> = true> + typename detail::CastReturnType::type cast() { + if (T::Kind == kind()) { + return std::static_pointer_cast(static_cast(this)->shared_from_this()); + } + return nullptr; + } + template ::value, bool> = true> + typename detail::CastReturnType::type cast() { + if (T::Kind == kind()) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(this == T::get().get()); + return typename detail::CastReturnType::type(static_cast(this)); + } + return nullptr; + } + template ::value, bool> = true> + typename detail::CastConstReturnType::type cast() const { + if (T::Kind == kind()) { + return std::static_pointer_cast(static_cast(this)->shared_from_this()); + } + return nullptr; + } + template ::value, bool> = true> + typename detail::CastConstReturnType::type cast() const { + if (T::Kind == kind()) { + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(this == T::get().get()); + return typename detail::CastConstReturnType::type(static_cast(this)); + } + return nullptr; + } + template + T* castRaw() { + if (T::Kind == kind()) { + return static_cast(this); + } + return nullptr; + } + template + const T* castRaw() const { + if (T::Kind == kind()) { + return static_cast(this); + } + return nullptr; + } + template + auto expect() { + auto r = cast(); + AT_ASSERT(r); + return r; + } + template + auto expect() const { + auto r = cast(); + AT_ASSERT(r); + return r; + } + template + T& expectRef() { + auto* r = castRaw(); + AT_ASSERT(r); + return *r; + } + template + const T& expectRef() const { + auto* r = castRaw(); + AT_ASSERT(r); + return *r; + } + virtual ~Type() = default; + virtual bool hasFreeVariables() const { + return false; + } + // list of types this type contains, e.g. for a List then element type of a + // list for a tuple, the types of the tuple elements + virtual at::ArrayRef containedTypes() const { + return {}; + } + virtual TypePtr containedType(size_t i) const { + return containedTypes().at(i); + } + virtual size_t containedTypeSize() const { + return containedTypes().size(); + } + // create a new version of this type, replacing its contained types with + // contained_types + TypePtr withContained(std::vector contained_types); + // per-type constructor, you only need to override this if the + // containedTypes() is not empty + virtual TypePtr createWithContained( + // NOLINTNEXTLINE(performance-unnecessary-value-param) + std::vector /*contained_types*/) const { + TORCH_CHECK(false, + "type with contained types did not overload createWithContained: ", + str()); + } + +}; + +template +using SingletonOrSharedTypePtr = Type::SingletonOrSharedTypePtr; + + +template +bool operator==(const SingletonOrSharedTypePtr& x, const SingletonOrSharedTypePtr& y) { + return (void*)x.get() == (void*)y.get(); +} + +template +bool operator==(const SingletonOrSharedTypePtr& x, const std::shared_ptr& y) { + return (void*)x.get() == (void*)y.get(); +} + +template +bool operator==(const std::shared_ptr& x, const SingletonOrSharedTypePtr& y) { + return (void*)x.get() == (void*)y.get(); +} + +template +bool operator==(const SingletonOrSharedTypePtr& x, const SingletonTypePtr& y) { + return (void*)x.get() == (void*)y.get(); +} + +template +bool operator==(const SingletonTypePtr& x, const SingletonOrSharedTypePtr& y) { + return (void*)x.get() == (void*)y.get(); +} + +template +bool operator!=(const SingletonOrSharedTypePtr& x, const SingletonOrSharedTypePtr& y) { + return !(x == y); +} + +template +bool operator!=(const SingletonOrSharedTypePtr& x, const std::shared_ptr& y) { + return !(x == y); +} + +template +bool operator!=(const std::shared_ptr& x, const SingletonOrSharedTypePtr& y) { + return !(x == y); +} + +template +bool operator!=(const SingletonOrSharedTypePtr& x, const SingletonTypePtr& y) { + return !(x == y); +} + +template +bool operator!=(const SingletonTypePtr& x, const SingletonOrSharedTypePtr& y) { + return !(x == y); +} + +using TypePtr = SingletonOrSharedTypePtr; +using ConstTypePtr = SingletonOrSharedTypePtr; + +// Explicitly enable MaybeOwned>, rather than allowing +// MaybeOwned to be used for any type right away. +template +struct MaybeOwnedTraits> + : public MaybeOwnedTraitsGenericImpl> {}; + +// Base class for Types that are guaranteed to be owned by std::shared_ptr. +struct TORCH_API SharedType : public Type, public std::enable_shared_from_this { + using Type::Type; +}; + +inline TypePtr Type::withContained(std::vector contained_types) { + auto current_contained = containedTypes(); + // Types with no contained_types don't need this call. Check before calling! + // + // (We can't support this efficiently because types without + // contained types may be singletons, in which case + // shared_from_this will crash; we would have to provide a virtual + // typeptr_from_this or isSingleton.) + TORCH_INTERNAL_ASSERT(!current_contained.empty() && current_contained.size() == contained_types.size()); + if (current_contained.equals(contained_types)) { + return std::static_pointer_cast(static_cast(this)->shared_from_this()); + } + return createWithContained(std::move(contained_types)); +} + + +inline bool operator==(const Type& lhs, const Type& rhs) { + if (C10_UNLIKELY(!rhs.symmetric())) { + return rhs.equals(lhs); + } + return lhs.equals(rhs); +} + +struct NamedType; +using NamedTypePtr = std::shared_ptr; +using ConstNamedTypePtr = std::shared_ptr; + +struct TORCH_API NamedType : public SharedType { + NamedType(TypeKind tk, std::optional name) + : SharedType(tk), name_(std::move(name)) { + TORCH_INTERNAL_ASSERT( + tk == TypeKind::TupleType || tk == TypeKind::FunctionType || + tk == TypeKind::ClassType || tk == TypeKind::InterfaceType || + tk == TypeKind::EnumType, + "If you add a new kind of NamedType, ", + "please update the cast specialization and this assert"); + } + + // Fully qualified name of type + // Looks like: "foo.bar.Baz". + const std::optional& name() const { + return name_; + } + + private: + std::optional name_; +}; + +} // namespace c10 + +namespace std { +template +struct hash> { + size_t operator()(const c10::SingletonOrSharedTypePtr& x) const { + return std::hash()(x.get()); + } +}; +} // namespace std + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/operator_name.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/operator_name.h new file mode 100644 index 0000000000000000000000000000000000000000..3dbd04bdea8b71e88fbebdced2eea6c10b295193 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/operator_name.h @@ -0,0 +1,103 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace c10 { + +// TODO: consider storing namespace separately too +struct OperatorName final { + std::string name; + std::string overload_name; + OperatorName(std::string name, std::string overload_name) + : name(std::move(name)), overload_name(std::move(overload_name)) {} + + // TODO: These two functions below are slow! Fix internal data structures so + // I don't have to manually reconstruct the namespaces! + + // Return the namespace of this OperatorName, if it exists. The + // returned string_view is only live as long as the OperatorName + // exists and name is not mutated + std::optional getNamespace() const { + auto pos = name.find("::"); + if (pos == std::string::npos) { + return std::nullopt; + } else { + return std::string_view(name.data(), pos); + } + } + + // Returns true if we successfully set the namespace + bool setNamespaceIfNotSet(const char* ns) { + if (!getNamespace().has_value()) { + const auto ns_len = strlen(ns); + const auto old_name_size = name.size(); + name.resize(ns_len + 2 + old_name_size); + // Shift current value of name to the end of the new space. + name.replace( + name.size() - old_name_size, old_name_size, name, 0, old_name_size); + name.replace(0, ns_len, ns, ns_len); + name[ns_len] = ':'; + name[ns_len + 1] = ':'; + return true; + } else { + return false; + } + } +}; + +// Non-owning view of an OperatorName. Unlike OperatorName, most of +// its functions are constexpr, so it can be used for compile time +// computations +struct OperatorNameView final { + std::string_view name; + std::string_view overload_name; + constexpr OperatorNameView( + std::string_view name, + std::string_view overload_name) + : name(name), overload_name(overload_name) {} + // Parses strings like "foo.overload" and also "foo" + constexpr static OperatorNameView parse(std::string_view full_name) { + auto i = full_name.find('.'); + if (i == std::string_view::npos) { + return OperatorNameView(full_name, std::string_view()); + } else { + return OperatorNameView(full_name.substr(0, i), full_name.substr(i + 1)); + } + } +}; + +inline bool operator==(const OperatorName& lhs, const OperatorName& rhs) { + return lhs.name == rhs.name && lhs.overload_name == rhs.overload_name; +} + +inline bool operator!=(const OperatorName& lhs, const OperatorName& rhs) { + return !operator==(lhs, rhs); +} + +TORCH_API std::string toString(const OperatorName& opName); +TORCH_API std::ostream& operator<<(std::ostream& /*os*/, const OperatorName& /*opName*/); + +} // namespace c10 + +namespace std { +template <> +struct hash<::c10::OperatorName> { + size_t operator()(const ::c10::OperatorName& x) const { + return std::hash()(x.name) ^ + (~std::hash()(x.overload_name)); + } +}; +} // namespace std + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/qualified_name.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/qualified_name.h new file mode 100644 index 0000000000000000000000000000000000000000..60e05fd9033486cc08999b939836359024313d70 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/qualified_name.h @@ -0,0 +1,166 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace c10 { + +// Represents a name of the form "foo.bar.baz" +struct QualifiedName { + QualifiedName() = default; + + // `name` can be a dotted string, like "foo.bar.baz", or just a bare name. + /* implicit */ QualifiedName(const std::string& name) { + TORCH_CHECK(!name.empty()); + // split the string into its atoms. + size_t startSearchFrom = 0; + size_t pos = name.find(delimiter_, startSearchFrom); + + while (pos != std::string::npos) { + auto atom = name.substr(startSearchFrom, pos - startSearchFrom); + TORCH_INTERNAL_ASSERT( + !atom.empty(), "Invalid name for qualified name: '", name, "'"); + atoms_.push_back(std::move(atom)); + startSearchFrom = pos + 1; + pos = name.find(delimiter_, startSearchFrom); + } + + auto finalAtom = name.substr(startSearchFrom); + TORCH_INTERNAL_ASSERT( + !finalAtom.empty(), "Invalid name for qualified name: '", name, "'"); + atoms_.emplace_back(std::move(finalAtom)); + + cacheAccessors(); + } + + explicit QualifiedName(std::vector atoms) : atoms_(std::move(atoms)) { + for (const auto& atom : atoms_) { + TORCH_CHECK(!atom.empty(), "Atom cannot be empty"); + TORCH_CHECK( + atom.find(delimiter_) == std::string::npos, + "Delimiter not allowed in atom"); + } + + cacheAccessors(); + } + // Unnecessary copy. Ideally we'd use something like std::string_view. + /* implicit */ QualifiedName(const char* name) + : QualifiedName(std::string(name)) {} + + // `name` must be a bare name (no dots!) + explicit QualifiedName(const QualifiedName& prefix, std::string name) { + TORCH_INTERNAL_ASSERT(!name.empty()); + TORCH_INTERNAL_ASSERT(name.find(delimiter_) == std::string::npos); + atoms_.insert(atoms_.begin(), prefix.atoms_.begin(), prefix.atoms_.end()); + atoms_.push_back(std::move(name)); + + cacheAccessors(); + } + + // Is `this` a prefix of `other`? + // For example, "foo.bar" is a prefix of "foo.bar.baz" + bool isPrefixOf(const QualifiedName& other) const { + const auto& thisAtoms = atoms_; + const auto& otherAtoms = other.atoms_; + + if (thisAtoms.size() > otherAtoms.size()) { + // Can't be a prefix if it's bigger + return false; + } + for (const auto i : c10::irange(thisAtoms.size())) { + if (thisAtoms[i] != otherAtoms[i]) { + return false; + } + } + return true; + } + + // The fully qualified name, like "foo.bar.baz" + const std::string& qualifiedName() const { + return qualifiedName_; + } + + // The leading qualifier, like "foo.bar" + const std::string& prefix() const { + return prefix_; + } + + // The base name, like "baz" + const std::string& name() const { + return name_; + } + + const std::vector& atoms() const { + return atoms_; + } + + bool operator==(const QualifiedName& other) const { + return this->qualifiedName_ == other.qualifiedName_; + } + + bool operator!=(const QualifiedName& other) const { + return !(*this == other); + } + + private: + static constexpr char delimiter_ = '.'; + + // Helper for cacheAccessors() below. + template + std::string join(char delimiter, const T& v) { + std::string out; + size_t reserve = 0; + for (const auto& e : v) { + reserve += e.size() + 1; + } + out.reserve(reserve); + for (const auto i : c10::irange(v.size())) { + if (i != 0) { + out.push_back(delimiter); + } + out.append(v[i]); + } + return out; + } + + void cacheAccessors() { + qualifiedName_ = join(delimiter_, atoms_); + if (atoms_.size() > 1) { + ArrayRef view(atoms_); + const auto prefixView = view.slice(0, view.size() - 1); + prefix_ = join(delimiter_, prefixView); + } + + if (!atoms_.empty()) { + name_ = atoms_.back(); + } + } + + // The actual list of names, like "{foo, bar, baz}" + std::vector atoms_; + + /* + * Cached accessors, derived from `atoms_`. + */ + std::string qualifiedName_; + std::string prefix_; + std::string name_; +}; +} // namespace c10 + +namespace std { +template <> +struct hash { + size_t operator()(const c10::QualifiedName& n) const noexcept { + return std::hash()(n.qualifiedName()); + } +}; +} // namespace std + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/rref_interface.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/rref_interface.h new file mode 100644 index 0000000000000000000000000000000000000000..b40fe9ed5c2e1e1bae0f0eac22c6cdd4f318c042 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/rref_interface.h @@ -0,0 +1,46 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace c10 { + +struct Type; +using worker_id_t = int16_t; + +// This abstract class contains only user-facing APIs, and will be shared +// between jit and distributed to implement TorchScript support. +class C10_EXPORT RRefInterface : public c10::intrusive_ptr_target { + public: + RRefInterface() = default; + // RRef is made NOT copyable NOT movable to prevent messing up reference + // counting. + RRefInterface(const RRefInterface& other) = delete; + RRefInterface(RRefInterface&& other) = delete; + RRefInterface& operator=(const RRefInterface& other) = delete; + RRefInterface& operator=(RRefInterface&& other) = delete; + + ~RRefInterface() override = default; + + // returns the worker id of the owner + virtual worker_id_t owner() const = 0; + + // returns the worker name of the owner + virtual std::string ownerName() const = 0; + + // Returns true if this is the ``OwnerRRef`` + virtual bool isOwner() const = 0; + + // Returns true if this is an ``OwnerRRef`` or if this ``UserRRef`` has been + // confirmed by its owner. + virtual bool confirmedByOwner() const = 0; + + virtual const TypePtr type() const = 0; +}; + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/stack.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/stack.h new file mode 100644 index 0000000000000000000000000000000000000000..6da9ad54ddcb22efa9185a31d95e30c94d822371 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/stack.h @@ -0,0 +1,209 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include +#include + +// TODO move this to c10 namespace + + +namespace torch::jit { + +using c10::IValue; +using Stack = std::vector; + +class Operation { + template + using accepts = std::is_constructible, F&&>; + + public: + template ::value, int> = 0> + C10_DEPRECATED_MESSAGE("Please use void(Stack&) to register operator instead.") + Operation(F&& raw): op_([raw = std::forward(raw)](Stack& stack) { + raw(&stack); + }) {} + + template ::value && + !std::is_same_v, Operation>, int> = 0> + Operation(F&& op): op_(std::forward(op)) {} + + Operation(std::nullptr_t) noexcept {} + + explicit operator bool() const noexcept { + return op_ ? true : false; + } + + void operator()(Stack& stack) { + op_(stack); + } + + template + T* target() noexcept { + return op_.target(); + } + + private: + std::function op_; +}; + +// An operation with N inputs and M outputs pops the last N inputs off +// the stack and pushes its M inputs onto the stack +// before: I0, I1, ... IN <- stack.back() +// after: O0, O1, ... OM +// operations are defined this way so that ownership of inputs can be +// transferred to the operation and it can incrementally drop ownership of +// tensors when they become unneeded. For large operations, like 'run an entire +// subgraph', this functionality is very important for minimizing gpu memory +// usage return value is the relative 'offset' to jump to for the next +// operation: +// pc += 1 + offset +// so a return value of 0 goes to the next instruction + +// treat the last N elements of the stack as a list, looking up +// element i +inline IValue& peek(Stack& stack, size_t i, size_t N) { + // NOLINTNEXTLINE(*-narrowing-conversions) + return *(stack.end() - N + i); +} +inline IValue& peek(Stack* stack, size_t i, size_t N) { + return peek(*stack, i, N); +} +inline const IValue& peek(const Stack& stack, size_t i, size_t N) { + // NOLINTNEXTLINE(*-narrowing-conversions) + return *(stack.end() - N + i); +} +inline const IValue& peek(const Stack* stack, size_t i, size_t N) { + return peek(*stack, i, N); +} +// treat the last N elements of the stack as a list, looking up the +// slice starting at index i and having length len +inline at::ArrayRef peekSlice( + const Stack& stack, + size_t i, + size_t len, + size_t N) { + return at::ArrayRef(stack).slice(stack.size() - N + i, len); +} +inline at::ArrayRef last(const Stack& stack, size_t N) { + return peekSlice(stack, 0, N, N); +} +inline at::ArrayRef last(const Stack* stack, size_t N) { + return last(*stack, N); +} +inline void drop(Stack& stack, size_t n) { + // NOLINTNEXTLINE(*-narrowing-conversions) + stack.erase(stack.end() - n, stack.end()); +} +inline void drop(Stack* stack, size_t n) { + drop(*stack, n); +} +inline IValue pop(Stack& stack) { + TORCH_CHECK(!stack.empty(), "pop() called on empty stack"); + auto r = std::move(stack.back()); + stack.pop_back(); + return r; +} +inline IValue pop(Stack* stack) { + return pop(*stack); +} +inline std::vector pop(Stack& stack, size_t n) { + std::vector result; + result.reserve(n); + for (const auto i : c10::irange(n)) { + result.push_back(std::move(peek(stack, i, n))); + } + drop(stack, n); + return result; +} + +// variadic pop: +// int64_t a; at::Tensor b; +// pop(stack, a, b); +// equivalent to: +// b = pop(stack).toTensor(); +// a = pop(stack).toInt(); +template +inline void pop(Stack& stack, Types&... args) { + size_t i = 0; + constexpr size_t N = sizeof...(args); + (void)std::initializer_list{ + (args = std::move(peek(stack, i++, N)).template to(), 0)...}; + drop(stack, N); +} +template +inline void pop(Stack* stack, Types&... args) { + pop(*stack, args...); +} +template +inline void push_one(Stack& stack, Type&& arg) { + stack.emplace_back(std::forward(arg)); +} + +inline void push_one(Stack& stack, c10::TensorOptions options) { + stack.emplace_back(c10::typeMetaToScalarType(options.dtype())); + stack.emplace_back(options.layout()); + stack.emplace_back(options.device()); + stack.emplace_back(options.pinned_memory()); +} + +template +inline void push(Stack& stack, Types&&... args) { + (void)std::initializer_list{(push_one(stack, std::forward(args)), 0)...}; +} +template +inline void push(Stack* stack, Types&&... args) { + return push(*stack, std::forward(args)...); +} +template +inline void push_list_elements(Stack& stack, const c10::List& elements) { + for (T elem : elements) { + stack.push_back(std::move(elem)); + } +} + +// The packer here is carefully written not to make any unnecessary +// copies. + +// pack takes the return values of aten functions pushes them onto the stack +template +inline void pack(Stack& stack, T&& v) { + stack.emplace_back(std::forward(v)); +} +template +inline void pack(Stack* stack, T&& v) { + pack(*stack, std::forward(v)); +} + +template +struct TuplePacker { + // NB: *Not* a universal reference. + static void execute(Stack& stack, std::tuple&& t) { + // NB: The move here does not "destroy" the entire tuple, that is + // not what std::move does; only the particular tuple index + // processed here gets stolen. + pack(stack, std::get(std::move(t))); + TuplePacker::execute(stack, std::move(t)); + } +}; + +template +struct TuplePacker<0, Args...> { + // NOLINTNEXTLINE(cppcoreguidelines-rvalue-reference-param-not-moved) + static void execute(Stack& /*stack*/, std::tuple&& /*t*/){} +}; + +template +inline void pack(Stack& stack, std::tuple&& t) { + TuplePacker::execute(stack, std::move(t)); +} + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/symbol.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/symbol.h new file mode 100644 index 0000000000000000000000000000000000000000..759d2ae7602ce3fba83bf74900e1085ac3fb0b51 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/symbol.h @@ -0,0 +1,152 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include // For std::hash +#include + + +namespace c10 { + +// 'prim' symbols are synthetic operators that occur only in the IR +// and don't have corresponding implementations in ATen. + +// 'onnx' symbols correspond to ONNX operators. Their semantics +// are defined in https://github.com/onnx/onnx/blob/master/docs/Operators.md +// The particular version we are targeting is specified by '_onnx_opset_version' +// in torch.onnx.symbolic_helper +// +// In general, most ONNX operators won't get an entry here, because they +// are handled from the Python end. However, you may occasionally need +// to intern an ONNX symbol here so that you can conveniently write an +// optimization on ONNX operations. + +// 'attr' symbols are attribute keys. They are shared between both ONNX and ATen +// operators (you disambiguate their meaning by looking at the operator itself). +// In general, you only need to define attribute keys that are used by +// onnx or prim; ATen attributes are automatically generated in FORALL_ATTR_BASE_SYMBOLS. + +// Note [Symbol allocation] +// ~~~~~~~~~~~~~~~~~~~~~~~~ +// +// 1. Symbol namespace is split up into namespaces. +// +// 2. The intended access pattern for built-in symbols is onnx::MatMul +// in the c10 namespace (this is a Symbol). +// + +// Built-in constant definition strategy: +// - Enum is the most convenient way to generate a contiguous sequence +// of numbers for an identifier. +// - However, an enum gives you a fresh type. We want onnx::MatMul to +// be type Symbol, not some random enum type! +// - Therefore, after using enums to generate the sequence of integers, +// we then declare constexpr Symbols to get everything the actual Symbol +// type we want. Symbols must be constexpr to be valid to be "case"ed on. + +using unique_t = uint32_t; + +const std::string& domain_prefix(); + +// A Symbol is like an interned string, but with a little extra +// structure; it is namespaced via SymbolNamespace and the resulting +// intern pointers support efficient namespace testing. +struct TORCH_API Symbol { + explicit constexpr Symbol() : value(0) {} + explicit constexpr Symbol(unique_t uniq) + : value(uniq) {} + + // Get a Symbol for a qualified string like "attr::bar" + static Symbol fromQualString(const std::string & s); + + // Get a Symbol from a domain and an unqualified string like "org.pytorch.attr" and "bar" + static Symbol fromDomainAndUnqualString(const std::string & d, const std::string & s); + + // Constructors for our various namespaced strings. This will construct + // the appropriate namespaced string, e.g., "attr::foo" for the + // argument "foo", and then attempt to intern it. DO NOT USE THIS + // with a string literal; attr::foo should be available in that case + // (and if it's not, you should add it to the built-ins list above.) + static Symbol attr(const std::string & s); + static Symbol aten(const std::string & s); + static Symbol cuda(const std::string & s); + static Symbol onnx(const std::string & s); + static Symbol prim(const std::string & s); + static Symbol user(const std::string & s); + static Symbol caffe2(const std::string & s); + static Symbol dimname(const std::string & s); + // TODO: eliminate me + static Symbol scope(const std::string & s); + + bool is_attr() const; + bool is_aten() const; + bool is_cuda() const; + bool is_prim() const; + bool is_prims() const; + bool is_nvprims() const; + bool is_onnx() const; + bool is_user() const; + bool is_caffe2() const; + bool is_dimname() const; + + // So we can switch on this + constexpr operator unique_t() const { + return value; + } + + Symbol ns() const; + + // Give a string corresponding to the unqualified version of this name, e.g., + // "mm". Use this in a context where the intended namespace of the string is + // obvious; this is a *lossy* conversion. + const char * toUnqualString() const; + + // Give a string corresponding to the qualified version of this name, + // e.g., "aten::mm". This string format is made available to Python bindings + // (so we know how to parse it.) + const char * toQualString() const; + + // This describes a symbol in a case where humans read it. At the moment it's + // the same as toQualString. This has to be a const char* returned because + // a lot of printf style macros use it. + const char * toDisplayString() const; + + // Give a string corresponding to the domain name for the symbol, + // e.g., "org.pytorch.aten". + std::string domainString() const; + +private: + + explicit Symbol(Symbol ns, const std::string & s); + unique_t value; +}; + +static inline bool operator==(Symbol lhs, Symbol rhs) { + return static_cast(lhs) == static_cast(rhs); +} + +inline Symbol Symbol::attr(const std::string & s) { return Symbol::fromQualString("attr::" + s); } +inline Symbol Symbol::aten(const std::string & s) { return Symbol::fromQualString("aten::" + s); } +inline Symbol Symbol::cuda(const std::string & s) { return Symbol::fromQualString("cuda::" + s); } +inline Symbol Symbol::onnx(const std::string & s) { return Symbol::fromQualString("onnx::" + s); } +inline Symbol Symbol::prim(const std::string & s) { return Symbol::fromQualString("prim::" + s); } +inline Symbol Symbol::scope(const std::string & s) { return Symbol::fromQualString("scope::" + s); } +inline Symbol Symbol::user(const std::string & s) { return Symbol::fromQualString("user::" + s); } +inline Symbol Symbol::caffe2(const std::string & s) { return Symbol::fromQualString("_caffe2::" + s); } +inline Symbol Symbol::dimname(const std::string & s) { return Symbol::fromQualString("dimname::" + s); } + +} // namespace c10 + +// make symbol behave like an integer in hash tables +namespace std { +template <> +struct hash { + size_t operator()(c10::Symbol s) const { + return std::hash()(static_cast(s)); + } +}; +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/type_factory.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/type_factory.h new file mode 100644 index 0000000000000000000000000000000000000000..a0ffab285716c6b50546690bbd28c25ed2465db2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/type_factory.h @@ -0,0 +1,113 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include + +namespace c10 { + +template +struct TORCH_API TypeFactoryBase {}; + +template <> +struct TORCH_API TypeFactoryBase { + template + static c10::DynamicTypePtr create(TypePtr ty, Args&&... args) { + return std::make_shared( + c10::DynamicTypeTrait::tagValue(), + c10::DynamicType::Arguments(c10::ArrayRef( + {std::move(ty), std::forward(args)...}))); + } + template + static c10::DynamicTypePtr create(const std::vector& types) { + return std::make_shared( + c10::DynamicTypeTrait::tagValue(), + c10::DynamicType::Arguments(types)); + } + static c10::DynamicTypePtr createNamedTuple( + const std::string& name, + const std::vector& fields, + const std::vector& types) { + return std::make_shared( + c10::DynamicType::Tag::Tuple, + name, + c10::DynamicType::Arguments(fields, types)); + } + template + C10_ERASE static c10::DynamicTypePtr createNamed(const std::string& name) { + return std::make_shared( + c10::DynamicTypeTrait::tagValue(), + name, + c10::DynamicType::Arguments{}); + } + template + C10_ERASE static decltype(auto) get() { + return DynamicTypeTrait::getBaseType(); + } + static const std::unordered_map& basePythonTypes(); +}; + +using DynamicTypeFactory = TypeFactoryBase; + +// Helper functions for constructing DynamicTypes inline. +template < + typename T, + std::enable_if_t::isBaseType, int> = 0> +C10_ERASE DynamicTypePtr dynT() { + return DynamicTypeFactory::get(); +} + +template < + typename T, + typename... Args, + std::enable_if_t::isBaseType, int> = 0> +C10_ERASE DynamicTypePtr dynT(Args&&... args) { + return DynamicTypeFactory::create(std::forward(args)...); +} + +template <> +struct TORCH_API TypeFactoryBase { + template + static c10::TypePtr create(TypePtr ty, Args&&... args) { + return T::create(std::move(ty), std::forward(args)...); + } + template + static c10::TypePtr create(std::vector types) { + return T::create(std::move(types)); + } + static c10::TypePtr createNamedTuple( + const std::string& name, + const std::vector& fields, + const std::vector& types); + template + C10_ERASE static c10::TypePtr createNamed(const std::string& name) { + return T::create(name); + } + static const std::unordered_map& basePythonTypes(); + template + C10_ERASE static c10::TypePtr get() { + return T::get(); + } +}; + +using DefaultTypeFactory = TypeFactoryBase; + +using PlatformType = +#ifdef C10_MOBILE + c10::DynamicType +#else + c10::Type +#endif + ; + +using TypeFactory = TypeFactoryBase; + +} // namespace c10 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/type_ptr.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/type_ptr.h new file mode 100644 index 0000000000000000000000000000000000000000..5574060d262a7daef645775809f527fe2736b77f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/type_ptr.h @@ -0,0 +1,59 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include + +namespace c10 { + +// Compatibility wrapper around a raw pointer so that existing code +// written to deal with a shared_ptr can keep working. +template +class SingletonTypePtr { + public: + /* implicit */ SingletonTypePtr(T* p) : repr_(p) {} + + // We need this to satisfy Pybind11, but it shouldn't be hit. + explicit SingletonTypePtr(std::shared_ptr /*unused*/) { TORCH_CHECK(false); } + + using element_type = typename std::shared_ptr::element_type; + + template , void>, bool> = true> + T& operator*() const { + return *repr_; + } + + T* get() const { + return repr_; + } + + T* operator->() const { + return repr_; + } + + operator bool() const { + return repr_ != nullptr; + } + + private: + T* repr_{nullptr}; +}; + +template +bool operator==(SingletonTypePtr lhs, SingletonTypePtr rhs) { + return (void*)lhs.get() == (void*)rhs.get(); +} + +template +bool operator!=(SingletonTypePtr lhs, SingletonTypePtr rhs) { + return !(lhs == rhs); +} + +} // namespace c10 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/typeid.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/typeid.h new file mode 100644 index 0000000000000000000000000000000000000000..d355739ecf46b3eca4002f380bdbdbad70b33298 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/core/typeid.h @@ -0,0 +1,6 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/ATenCUDAGeneral.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/ATenCUDAGeneral.h new file mode 100644 index 0000000000000000000000000000000000000000..2363901b7dfab28b077df7bc77ae16c839b7614b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/ATenCUDAGeneral.h @@ -0,0 +1,14 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +// Use TORCH_CUDA_CPP_API or TORCH_CUDA_CU_API for exports from this folder + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/ApplyGridUtils.cuh b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/ApplyGridUtils.cuh new file mode 100644 index 0000000000000000000000000000000000000000..aa7533ac136233c2e307fcc044f7d5a26fce4ba0 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/ApplyGridUtils.cuh @@ -0,0 +1,52 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include + +#include + +namespace at::cuda { + +/** + Computes ceil(a / b) +*/ +template +__host__ __device__ __forceinline__ T ATenCeilDiv(T a, T b) { + return (a + b - 1) / b; +} + +namespace { + +// Threads per block for our apply kernel +// FIXME: use occupancy calculator instead +constexpr uint32_t AT_APPLY_THREADS_PER_BLOCK = 512; +constexpr uint32_t AT_APPLY_BLOCKS_PER_SM = 4; + +template +inline bool getApplyGrid(uint64_t totalElements, dim3& grid, c10::DeviceIndex curDevice, int max_threads_per_block=AT_APPLY_THREADS_PER_BLOCK) { + if (curDevice == -1) return false; + uint64_t numel_per_thread = static_cast(max_threads_per_block) * static_cast(step); + uint64_t numBlocks = ATenCeilDiv(totalElements, numel_per_thread); + uint64_t maxGridX = at::cuda::getDeviceProperties(curDevice)->maxGridSize[0]; + if (numBlocks > maxGridX) + numBlocks = maxGridX; + grid = dim3(numBlocks); + return true; +} + +constexpr int getApplyBlocksPerSM() { + return AT_APPLY_BLOCKS_PER_SM; +} + +constexpr int getApplyBlockSize() { + return AT_APPLY_THREADS_PER_BLOCK; +} + +inline dim3 getApplyBlock(int max_threads_per_block=AT_APPLY_THREADS_PER_BLOCK) { + return dim3(max_threads_per_block); +} + +} // anonymous namespace +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/AsmUtils.cuh b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/AsmUtils.cuh new file mode 100644 index 0000000000000000000000000000000000000000..d629ff658bb4cb2a1be10c3eb1d2e087c507ef7d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/AsmUtils.cuh @@ -0,0 +1,154 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +// Collection of direct PTX functions + +namespace at::cuda { + +template +struct Bitfield {}; + +template <> +struct Bitfield { + static __device__ __host__ __forceinline__ + unsigned int getBitfield(unsigned int val, int pos, int len) { +#if !defined(__CUDA_ARCH__) + pos &= 0xff; + len &= 0xff; + + unsigned int m = (1u << len) - 1u; + return (val >> pos) & m; +#else + unsigned int ret; + asm("bfe.u32 %0, %1, %2, %3;" : "=r"(ret) : "r"(val), "r"(pos), "r"(len)); + return ret; +#endif + } + + static __device__ __host__ __forceinline__ + unsigned int setBitfield(unsigned int val, unsigned int toInsert, int pos, int len) { +#if !defined(__CUDA_ARCH__) + pos &= 0xff; + len &= 0xff; + + unsigned int m = (1u << len) - 1u; + toInsert &= m; + toInsert <<= pos; + m <<= pos; + + return (val & ~m) | toInsert; +#else + unsigned int ret; + asm("bfi.b32 %0, %1, %2, %3, %4;" : + "=r"(ret) : "r"(toInsert), "r"(val), "r"(pos), "r"(len)); + return ret; +#endif + } +}; + +template <> +struct Bitfield { + static __device__ __host__ __forceinline__ + uint64_t getBitfield(uint64_t val, int pos, int len) { +#if !defined(__CUDA_ARCH__) + pos &= 0xff; + len &= 0xff; + + uint64_t m = (1u << len) - 1u; + return (val >> pos) & m; +#else + uint64_t ret; + asm("bfe.u64 %0, %1, %2, %3;" : "=l"(ret) : "l"(val), "r"(pos), "r"(len)); + return ret; +#endif + } + + static __device__ __host__ __forceinline__ + uint64_t setBitfield(uint64_t val, uint64_t toInsert, int pos, int len) { +#if !defined(__CUDA_ARCH__) + pos &= 0xff; + len &= 0xff; + + uint64_t m = (1u << len) - 1u; + toInsert &= m; + toInsert <<= pos; + m <<= pos; + + return (val & ~m) | toInsert; +#else + uint64_t ret; + asm("bfi.b64 %0, %1, %2, %3, %4;" : + "=l"(ret) : "l"(toInsert), "l"(val), "r"(pos), "r"(len)); + return ret; +#endif + } +}; + +__device__ __forceinline__ int getLaneId() { +#if defined(USE_ROCM) + return __lane_id(); +#else + int laneId; + asm("mov.s32 %0, %%laneid;" : "=r"(laneId) ); + return laneId; +#endif +} + +#if defined(USE_ROCM) +__device__ __forceinline__ unsigned long long int getLaneMaskLt() { + const std::uint64_t m = (1ull << getLaneId()) - 1ull; + return m; +} +#else +__device__ __forceinline__ unsigned getLaneMaskLt() { + unsigned mask; + asm("mov.u32 %0, %%lanemask_lt;" : "=r"(mask)); + return mask; +} +#endif + +#if defined (USE_ROCM) +__device__ __forceinline__ unsigned long long int getLaneMaskLe() { + std::uint64_t m = UINT64_MAX >> (sizeof(std::uint64_t) * CHAR_BIT - (getLaneId() + 1)); + return m; +} +#else +__device__ __forceinline__ unsigned getLaneMaskLe() { + unsigned mask; + asm("mov.u32 %0, %%lanemask_le;" : "=r"(mask)); + return mask; +} +#endif + +#if defined(USE_ROCM) +__device__ __forceinline__ unsigned long long int getLaneMaskGt() { + const std::uint64_t m = getLaneMaskLe(); + return m ? ~m : m; +} +#else +__device__ __forceinline__ unsigned getLaneMaskGt() { + unsigned mask; + asm("mov.u32 %0, %%lanemask_gt;" : "=r"(mask)); + return mask; +} +#endif + +#if defined(USE_ROCM) +__device__ __forceinline__ unsigned long long int getLaneMaskGe() { + const std::uint64_t m = getLaneMaskLt(); + return ~m; +} +#else +__device__ __forceinline__ unsigned getLaneMaskGe() { + unsigned mask; + asm("mov.u32 %0, %%lanemask_ge;" : "=r"(mask)); + return mask; +} +#endif + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/Atomic.cuh b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/Atomic.cuh new file mode 100644 index 0000000000000000000000000000000000000000..df113e0b3b4b89d4785926444fa79ffccb23c921 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/Atomic.cuh @@ -0,0 +1,530 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +#if !(defined(USE_ROCM) || ((defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 800)))) +#include +#endif + +template +struct AtomicFPOp; + +template <> +struct AtomicFPOp { + template + inline __device__ at::Half operator() (at::Half *address, at::Half val, const func_t& func) { + unsigned int * address_as_ui = + (unsigned int *) ((char *)address - ((size_t)address & 2)); + unsigned int old = *address_as_ui; + unsigned int assumed; + + at::Half hsum; + do { + assumed = old; + hsum.x = (size_t)address & 2 ? (old >> 16) : (old & 0xffff); + hsum = func(hsum, val); + old = (size_t)address & 2 ? (old & 0xffff) | (hsum.x << 16) : (old & 0xffff0000) | hsum.x; + old = atomicCAS(address_as_ui, assumed, old); + } while (assumed != old); + hsum.x = (size_t)address & 2 ? (old >> 16) : (old & 0xffff); + return hsum; + } +}; + +template <> +struct AtomicFPOp { + template + inline __device__ at::BFloat16 operator() (at::BFloat16 *address, at::BFloat16 val, const func_t& func) { + unsigned int * address_as_ui = + (unsigned int *) ((char *)address - ((size_t)address & 2)); + unsigned int old = *address_as_ui; + unsigned int assumed; + + at::BFloat16 bsum; + do { + assumed = old; + bsum.x = (size_t)address & 2 ? (old >> 16) : (old & 0xffff); + bsum = func(bsum, val); + old = (size_t)address & 2 ? (old & 0xffff) | (bsum.x << 16) : (old & 0xffff0000) | bsum.x; + old = atomicCAS(address_as_ui, assumed, old); + } while (assumed != old); + bsum.x = (size_t)address & 2 ? (old >> 16) : (old & 0xffff); + return bsum.x; + } +}; + +template <> +struct AtomicFPOp { + template + inline __device__ double operator() (double * address, double val, const func_t& func) { + unsigned long long int* address_as_ull = (unsigned long long int*)address; + unsigned long long int old = *address_as_ull; + unsigned long long int assumed; + + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, func(val, assumed)); + // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) + } while (assumed != old); + + return __longlong_as_double(old); + } +}; + +#define ATOMIC_INTEGER_IMPL(NAME) \ +template \ +struct Atomic##NAME##IntegerImpl; \ + \ +template \ +struct Atomic##NAME##IntegerImpl { \ + template \ + inline __device__ void operator()(T *address, T val, const func_t& func) { \ + size_t offset = (size_t)address & 3; \ + uint32_t * address_as_ui = (uint32_t *)((char *)address - offset); \ + uint32_t old = *address_as_ui; \ + uint32_t shift = offset * 8; \ + uint32_t old_byte; \ + uint32_t newval; \ + uint32_t assumed; \ + \ + do { \ + assumed = old; \ + old_byte = (old >> shift) & 0xff; \ + newval = static_cast(func(val, static_cast(old_byte))); \ + newval = (old & ~(0x000000ff << shift)) | (newval << shift); \ + old = atomicCAS(address_as_ui, assumed, newval); \ + } while (assumed != old); \ + } \ +}; \ + \ +template \ +struct Atomic##NAME##IntegerImpl { \ + template \ + inline __device__ void operator()(T *address, T val, const func_t& func) { \ + size_t offset = (size_t)address & 2; \ + uint32_t * address_as_ui = (uint32_t *)((char *)address - offset); \ + bool is_32_align = offset; \ + uint32_t old = *address_as_ui; \ + uint32_t old_bytes; \ + uint32_t newval; \ + uint32_t assumed; \ + \ + do { \ + assumed = old; \ + old_bytes = is_32_align ? old >> 16 : old & 0xffff; \ + newval = static_cast(func(val, static_cast(old_bytes))); \ + newval = is_32_align ? (old & 0xffff) | (newval << 16) : (old & 0xffff0000) | newval; \ + old = atomicCAS(address_as_ui, assumed, newval); \ + } while (assumed != old); \ + } \ +}; \ + \ +template \ +struct Atomic##NAME##IntegerImpl { \ + template \ + inline __device__ void operator()(T *address, T val, const func_t& func) { \ + uint32_t * address_as_ui = (uint32_t *) (address); \ + uint32_t old = *address_as_ui; \ + uint32_t newval; \ + uint32_t assumed; \ + \ + do { \ + assumed = old; \ + newval = static_cast(func(val, static_cast(old))); \ + old = atomicCAS(address_as_ui, assumed, newval); \ + } while (assumed != old); \ + } \ +}; \ + \ +template \ +struct Atomic##NAME##IntegerImpl { \ + template \ + inline __device__ void operator()(T *address, T val, const func_t& func) { \ + unsigned long long * address_as_ui = (unsigned long long *) (address); \ + unsigned long long old = *address_as_ui; \ + unsigned long long newval; \ + unsigned long long assumed; \ + \ + do { \ + assumed = old; \ + newval = static_cast(func(val, static_cast(old))); \ + old = atomicCAS(address_as_ui, assumed, newval); \ + } while (assumed != old); \ + } \ +}; + + +# define GPU_ATOMIC_INTEGER(NAME, OP, DTYPE) \ +inline __device__ void gpuAtomic##NAME(DTYPE *address, DTYPE val) { \ +Atomic##NAME##IntegerImpl()(address, \ + val, \ + [](DTYPE a, DTYPE b) { \ + return OP; \ + }); \ +} \ + +ATOMIC_INTEGER_IMPL(Add) +GPU_ATOMIC_INTEGER(Add, a || b, bool) + +// Don't instantiate gpuAtomicAdd with the macro as it seems non-standard (see int32, int64) +inline __device__ void gpuAtomicAdd(uint8_t *address, uint8_t val) { + AtomicAddIntegerImpl()(address, + val, + [](uint8_t a, uint8_t b) { + return a + b; + }); +} + +inline __device__ void gpuAtomicAdd(int8_t *address, int8_t val) { + AtomicAddIntegerImpl()(address, + val, + [](int8_t a, int8_t b) { + return a + b; + }); +} + +inline __device__ void gpuAtomicAdd(int16_t *address, int16_t val) { + AtomicAddIntegerImpl()(address, + val, + [](int16_t a, int16_t b) { + return a + b; + }); +} + +inline __device__ int32_t gpuAtomicAdd(int32_t *address, int32_t val) { + return atomicAdd(address, val); +} + +inline __device__ void gpuAtomicAdd(int64_t *address, int64_t val) { +#if defined(USE_ROCM) + __atomic_fetch_add(address, val, __ATOMIC_RELAXED); +#else + static_assert(sizeof(unsigned long long int) == sizeof(int64_t), "bitwidth change is not allowed"); + atomicAdd(reinterpret_cast(address), static_cast(val)); +#endif +} + +inline __device__ at::Half gpuAtomicAdd(at::Half *address, at::Half val) { +#if defined(USE_ROCM) || ((defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 700))) + return AtomicFPOp()(address, val, + [](at::Half hsum, at::Half val) { + return hsum + val; + }); +#else + return atomicAdd(reinterpret_cast<__half*>(address), val); +#endif +} + +inline __device__ at::BFloat16 gpuAtomicAdd(at::BFloat16 *address, at::BFloat16 val) { +#if defined(USE_ROCM) || ((defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 800))) +return AtomicFPOp()(address, val, + [](at::BFloat16 bsum, at::BFloat16 val) { + return bsum + val; + }); +#else + __nv_bfloat16 r = atomicAdd(reinterpret_cast<__nv_bfloat16*>(address), *reinterpret_cast<__nv_bfloat16*>(&val)); + return *reinterpret_cast(&r); +#endif +} + +#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 600) +// from CUDA C Programmic Guide +inline __device__ double atomicAdd(double* address, double val) +#if defined(__clang__) && defined(__CUDA__) +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wgcc-compat" + __attribute__((enable_if(true, ""))) +#pragma GCC diagnostic pop +#endif +{ + + return AtomicFPOp()(address, val, + [](double val, unsigned long long int assumed) { + return __double_as_longlong(val + __longlong_as_double(assumed)); + }); +} +#elif defined(USE_ROCM) || !(defined(__CUDA_ARCH__)) + +/* Note [hip-clang differences to hcc] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * The upcoming hip-clang compiler for ROCm differs from hcc in a few details. + * It exports the __HIP__ macro, we can hence differentiate between hcc and + * hip-clang. In the below, hcc only received support for atomicAdd with double + * typing after work week 18312. hip-clang had support from the first version. + * In general, the code-visible differences between hip-clang and hcc will be + * minimal. + */ + +#if defined(USE_ROCM) && __hcc_workweek__ < 18312 && !__HIP__ + // This needs to be defined for the host side pass + inline __device__ double atomicAdd(double *address, double val) { } +#endif +#endif + +inline __device__ double gpuAtomicAdd(double *address, double val) { + return atomicAdd(address, val); +} + +inline __device__ float gpuAtomicAdd(float *address, float val) { + return atomicAdd(address, val); +} + +template +inline __device__ void gpuAtomicAdd(c10::complex *address, c10::complex val) { + gpuAtomicAdd(&address->real_, val.real_); + gpuAtomicAdd(&address->imag_, val.imag_); +} + +/* Note [gpuAtomicAdd vs atomicAdd] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * Some extensions such as torchvision call atomicAdd() + * directly and require non-library provided data type support. Only for these, we + * continue to provide atomicAdd overloads. + */ +inline __device__ at::Half atomicAdd(at::Half *address, at::Half val) { + return gpuAtomicAdd(address, val); +} + +inline __device__ at::BFloat16 atomicAdd(at::BFloat16 *address, at::BFloat16 val) { + return gpuAtomicAdd(address, val); +} + +inline __device__ void atomicAdd(uint8_t *address, uint8_t val) { + gpuAtomicAdd(address, val); +} + +inline __device__ void atomicAdd(int8_t *address, int8_t val) { + gpuAtomicAdd(address, val); +} + +inline __device__ void atomicAdd(int16_t *address, int16_t val) { + gpuAtomicAdd(address, val); +} + +inline __device__ void atomicAdd(int64_t *address, int64_t val) { + gpuAtomicAdd(address, val); +} + +inline __device__ void atomicAdd(bool *address, bool val) { + gpuAtomicAdd(address, val); +} + +/* Note [explicitly non-returning atomics] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * AMD's MI100 (gfx908) provides an optimized fp32 atomicAdd, exposed via atomicAddNoRet(). + * Due to compiler limitations, callers must opt-in to guarantee the optimized instruction. + * This non-returning atomicAddNoRet cannot be used to implement the returning atomicAdd, + * therefore we need a new API 'gpuAtomicAddNoReturn'. + */ +template +inline __device__ void gpuAtomicAddNoReturn(c10::complex *address, c10::complex val) { gpuAtomicAdd(address, val); } +inline __device__ void gpuAtomicAddNoReturn(uint8_t *address, uint8_t val) { gpuAtomicAdd(address, val); } +inline __device__ void gpuAtomicAddNoReturn(int8_t *address, int8_t val) { gpuAtomicAdd(address, val); } +inline __device__ void gpuAtomicAddNoReturn(int16_t *address, int16_t val) { gpuAtomicAdd(address, val); } +inline __device__ void gpuAtomicAddNoReturn(int32_t *address, int32_t val) { gpuAtomicAdd(address, val); } +inline __device__ void gpuAtomicAddNoReturn(int64_t *address, int64_t val) { gpuAtomicAdd(address, val); } +inline __device__ void gpuAtomicAddNoReturn(bool *address, bool val) { gpuAtomicAdd(address, val); } +inline __device__ void gpuAtomicAddNoReturn(at::Half *address, at::Half val) { gpuAtomicAdd(address, val); } +inline __device__ void gpuAtomicAddNoReturn(at::BFloat16 *address, at::BFloat16 val) { gpuAtomicAdd(address, val); } + +/* Note [HIP unsafeAtomicAdd] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~ + * Use unsafeAtomicAdd instead of atomicAdd for fp32 and fp64. + * On HIP, atomicAdd is always correct but is a slow CAS loop. + * unsafeAtomicAdd will use HW instructions and is much faster, + * but the caller must guarantee the pointer is GPU memory. + * If the pointer is system memory, the result is a silent no-op. + * This guarantee is upheld by all PyTorch uses of unsafeAtomicAdd. + * AMD HIP atomic header file is named amd_hip_atomic.h and is + * under the LLVM compiler directory. + */ +#if defined(USE_ROCM) +inline __device__ void gpuAtomicAddNoReturn(float *address, float val) { +#if defined(__gfx908__) + atomicAddNoRet(address, val); +#else + (void)unsafeAtomicAdd(address, val); +#endif +} +inline __device__ void gpuAtomicAddNoReturn(double *address, double val) { (void)unsafeAtomicAdd(address, val); } +#else +inline __device__ void gpuAtomicAddNoReturn(float *address, float val) { gpuAtomicAdd(address, val); } +inline __device__ void gpuAtomicAddNoReturn(double *address, double val) { gpuAtomicAdd(address, val); } +#endif + +// Atomic multiplication implementation. + +ATOMIC_INTEGER_IMPL(Mul) +GPU_ATOMIC_INTEGER(Mul, a * b, uint8_t) +GPU_ATOMIC_INTEGER(Mul, a * b, int8_t) +GPU_ATOMIC_INTEGER(Mul, a * b, int16_t) +GPU_ATOMIC_INTEGER(Mul, a * b, int32_t) +GPU_ATOMIC_INTEGER(Mul, a * b, int64_t) + +inline __device__ at::Half gpuAtomicMul(at::Half * address, at::Half val) { + return AtomicFPOp()(address, val, + [](at::Half bsum, at::Half val) { + return bsum * val; + }); +} + +inline __device__ at::BFloat16 gpuAtomicMul(at::BFloat16 * address, at::BFloat16 val) { + return AtomicFPOp()(address, val, + [](at::BFloat16 bsum, at::BFloat16 val) { + return bsum * val; + }); +} + +inline __device__ double gpuAtomicMul(double * address, double val) { + return AtomicFPOp()(address, val, + [](double val, unsigned long long int assumed) { + return __double_as_longlong(val * __longlong_as_double(assumed)); + }); +} + +// Dont use a templated function for this since the addition function defaults to the CUDA built-in. +inline __device__ float gpuAtomicMul (float * address, float val) { + unsigned int* address_as_ull = (unsigned int*)address; + unsigned int old = *address_as_ull; + unsigned int assumed; + + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, + __float_as_int(val * + __int_as_float(assumed))); + + // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) + } while (assumed != old); + + return __int_as_float(old); +} + +// Atomic maximum implementation. + +template +__host__ __device__ T safe_max(T a, T b) { + #if defined(__HIPCC__) + // TODO: remove this special case for HIP when issue is fixed: + // https://github.com/ROCm/hip/issues/2209 + T max = at::_isnan(a) ? a : (at::_isnan(b) ? b : std::max(a, b)); + #else + T max = at::_isnan(b) ? b : std::max(a, b); + #endif + + return max; +} + +ATOMIC_INTEGER_IMPL(Max) +GPU_ATOMIC_INTEGER(Max, safe_max(a, b), uint8_t) +GPU_ATOMIC_INTEGER(Max, safe_max(a, b), int8_t) +GPU_ATOMIC_INTEGER(Max, safe_max(a, b), int16_t) +GPU_ATOMIC_INTEGER(Max, safe_max(a, b), int32_t) +GPU_ATOMIC_INTEGER(Max, safe_max(a, b), int64_t) + +inline __device__ at::Half gpuAtomicMax(at::Half * address, at::Half val) { + return AtomicFPOp()(address, val, + [](at::Half bsum, at::Half val) { + return safe_max(bsum, val); + }); +} + +inline __device__ at::BFloat16 gpuAtomicMax(at::BFloat16 * address, at::BFloat16 val) { + return AtomicFPOp()(address, val, + [](at::BFloat16 bsum, at::BFloat16 val) { + return safe_max(bsum, val); + }); +} + +inline __device__ double gpuAtomicMax(double * address, double val) { + return AtomicFPOp()(address, val, + [](double val, unsigned long long int assumed) { + return __double_as_longlong(safe_max(val, __longlong_as_double(assumed))); + }); +} + +// Dont use a templated function for this since the addition function defaults to the CUDA built-in. +inline __device__ float gpuAtomicMax(float * address, float val) { + unsigned int* address_as_ull = (unsigned int*)address; + unsigned int old = *address_as_ull; + unsigned int assumed; + + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, + __float_as_int(safe_max(val, __int_as_float(assumed)))); + + // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) + } while (assumed != old); + + return __int_as_float(old); +} + +// Atomic minimum implementation. + +template +__host__ __device__ T safe_min(T a, T b) { + #if defined(__HIPCC__) + // TODO: remove this special case for HIP when issue is fixed: + // https://github.com/ROCm/hip/issues/2209 + T min = at::_isnan(a) ? a : (at::_isnan(b) ? b : std::min(a, b)); + #else + T min = at::_isnan(b) ? b : std::min(a, b); + #endif + + return min; +} + +ATOMIC_INTEGER_IMPL(Min) +GPU_ATOMIC_INTEGER(Min, safe_min(a, b), uint8_t) +GPU_ATOMIC_INTEGER(Min, safe_min(a, b), int8_t) +GPU_ATOMIC_INTEGER(Min, safe_min(a, b), int16_t) +GPU_ATOMIC_INTEGER(Min, safe_min(a, b), int32_t) +GPU_ATOMIC_INTEGER(Min, safe_min(a, b), int64_t) + +inline __device__ at::Half gpuAtomicMin(at::Half * address, at::Half val) { + return AtomicFPOp()(address, val, + [](at::Half bsum, at::Half val) { + return safe_min(bsum, val); + }); +} + +inline __device__ at::BFloat16 gpuAtomicMin(at::BFloat16 * address, at::BFloat16 val) { + return AtomicFPOp()(address, val, + [](at::BFloat16 bsum, at::BFloat16 val) { + return safe_min(bsum, val); + }); +} + +inline __device__ double gpuAtomicMin(double * address, double val) { + return AtomicFPOp()(address, val, + [](double val, unsigned long long int assumed) { + return __double_as_longlong(safe_min(val, __longlong_as_double(assumed))); + }); +} + +// Dont use a templated function for this since the addition function defaults to the CUDA built-in. +inline __device__ float gpuAtomicMin(float * address, float val) { + unsigned int* address_as_ull = (unsigned int*)address; + unsigned int old = *address_as_ull; + unsigned int assumed; + + do { + assumed = old; + old = atomicCAS(address_as_ull, assumed, + __float_as_int(safe_min(val, __int_as_float(assumed)))); + + // Note: uses integer comparison to avoid hang in case of NaN (since NaN != NaN) + } while (assumed != old); + + return __int_as_float(old); +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAApplyUtils.cuh b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAApplyUtils.cuh new file mode 100644 index 0000000000000000000000000000000000000000..73ff79247312123b0cead7bd00734aa259ccd948 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAApplyUtils.cuh @@ -0,0 +1,542 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +// +// This file contains pointwise operation functions and kernels that +// work on both contiguous and non-contiguous tensor arguments of +// arbitrary (up to MAX_CUTORCH_DIMS) dimensioned arguments without +// copying or temporary storage. +// + +/* + NOTE [ CUDA_tensor_applyN helpers ] + + The following CUDA_tensor_applyN (where N currently can be 1, 2, 3, or 4) + functions apply a pointwise operator to N tensor(s). + + The calling convention is + + 1. The template arguments should be, sequentially, + - First N typename args specify the scalar types of each of the N tensors. + - (Optional) `int step` arg specifies the number of elements processed + together at the same time. + Default is 1. + - A usually omitted (i.e., inferred) typename arg specifies the type of the + function/functor applied on `N * step` values in each iteration of each + CUDA thread. + 2. The arguments should be, sequentially, + - N tensors + - op: a function/functor that processes `N * step` values at the same time. + - If `step == 1`, it must have signature + `void(*)(scalar1_t&, scalar2_t&, ..., scalarN_t&)`, where + `scalar*_t`s are the first N typename template args, and the inputs + are the `N` values from the `N` tensors retrieved at a common index. + - Otherwise, it must must have signature + void(*)(int n, scalar1_t&, scalar1_t&, ..., scalar1_t&, // repeat `step` times + scalar2_t&, scalar2_t&, ..., scalar2_t&, // repeat `step` times + ..., + scalarN_t&, scalarN_t&, ..., scalarN_t&) // repeat `step` times + Different from `step == 1` case, it processes `N * step` values taken + from `step` common indices. Moreover, the first input `n` represents the + number of valid indices (it will always have `0 < n <= step`). It will + almost always be `step`, but at the boundary we may not have full `step` + elements and `n` can be a lesser value. + + E.g., if `step == 4` and `N == 2`, `op` could be + + [](int n, scalar1_t &u1, scalar1_t &u2, scalar1_t &u3, scalar1_t &u4, + scalar2_t &v1, scalar2_t &v2, scalar2_t &v3, scalar2_t &v4) { + // Only process u1, ..., un and v1, ..., vn. + // So if `n == 3`, `u4` and `v4` need not to be considered. + } + + In both cases, the references can actually be const, but at least one of + them should be non-const in order to write the output. + - (Optional, but recommended) N TensorArgType args that specify for each + tensor whether `op` reads AND writes ] (i.e., TensorArgType::ReadWrite), + or only reads (i.e., TensorArgType::ReadOnly). + Default is TensorArgType::ReadWrite for first Tensor, and + TensorArgType::ReadOnly for the rest. + + E.g., + + to compute a = b^2 for a and b of same dtype, we can call + + CUDA_tensor_apply2( + a, b, + [] __device__ (scalar &a_val, const scalar &b_val) { a_val = b_val * b_val; } + ); + + to work on 2 values at the same time, we can call + + CUDA_tensor_apply2( + a, b, + [] __device__ (int n, scalar1 &a_val1, scalar1 &a_val2, + const scalar2 &b_val1, const scalar2 &b_val2) { + // call special vectorized op here, or just do elementwise and enjoy unrolling... + // if n == 1, only process a_val1 and b_val1 + } + ); +*/ + +namespace at::cuda { + +// TODO: combine with TensorArg? So far that's been for debugging, and this is functional... +enum class TensorArgType { ReadWrite, ReadOnly }; + +namespace { + +// Rearrange dimensions for pointwise operations so that strides are in +// decreasing order as much as possible, so that kernels have better memory +// access patterns. +// +// For example, consider a binary operation on two "transposed" 2-dim tensors: +// sizes: 256 512 +// aInfo->strides: 1 256 +// bInfo->strides: 1 256 +// +// Given this, each concurrent memory access inside kernelPointwiseApply2() is +// exactly 256 elements apart, resulting in poor performance. +// +// This function exchanges dimensions so that memory access is contiguous: +// sizes: 512 256 +// aInfo->strides: 256 1 +// bInfo->strides: 256 1 +// +// (Actually, it becomes even better because now collapseDims() can turn each +// input into one contiguous array.) +// +// In general, given M (<=4) TensorInfo's with N dimensions, we can view each +// strides[i] (0 <= i < N) as an M-tuple. Given each pair i < j, we exchange +// strides[i] and [j] if +// (1) strides[i][k] < strides[j][k] for some k (0 <= k < M) +// (exchanging them will benefit input #k), and +// (2) strides[i][k] <= strieds[j][k] for all k +// (exchanging them will not make any input worse). +template +inline void rearrangeDims(detail::TensorInfo* aInfo, + detail::TensorInfo* bInfo = nullptr, + detail::TensorInfo* cInfo = nullptr, + detail::TensorInfo* dInfo = nullptr) { + int numInfos = 1; + int dims = aInfo->dims; + IndexType *sizes[4] = { aInfo->sizes, }; + IndexType *strides[4] = { aInfo->strides, }; + + if (bInfo != nullptr) { + ++numInfos; + if (bInfo->dims != dims) return; + sizes[1] = bInfo->sizes; + strides[1] = bInfo->strides; + } + + if (cInfo != nullptr) { + ++numInfos; + if (cInfo->dims != dims) return; + sizes[2] = cInfo->sizes; + strides[2] = cInfo->strides; + } + + if (dInfo != nullptr) { + ++numInfos; + if (dInfo->dims != dims) return; + sizes[3] = dInfo->sizes; + strides[3] = dInfo->strides; + } + + // Bail out if sizes do not match: we are using "deprecated pointwise + // behavior" among tensors of different shapes but same number of elements. + for (int i = 1; i < numInfos; ++i) { + for (int j = 0; j < dims; ++j) { + if (sizes[i][j] != sizes[0][j]) return; + } + } + + for (int i = 0; i < dims - 1; ++i) { + // No need to consider dimensions of size 1. + if (sizes[0][i] == 1) continue; + + for (int j = i + 1; j < dims; ++j) { + if (sizes[0][j] == 1) continue; + + // Compare the relative sizes of strides between dim #i and dim #j. + bool hasIncreasingStrides = false; + bool hasDecreasingStrides = false; + + for (int k = 0; k < numInfos; k++) { + IndexType stride_i = strides[k][i]; + IndexType stride_j = strides[k][j]; + if (stride_i < stride_j) { + hasIncreasingStrides = true; + } else if (stride_i > stride_j) { + hasDecreasingStrides = true; + } + } + + if (hasIncreasingStrides && !hasDecreasingStrides) { + for (int k = 0; k < numInfos; k++) { + IndexType size = sizes[k][i]; + sizes[k][i] = sizes[k][j]; + sizes[k][j] = size; + + IndexType stride = strides[k][i]; + strides[k][i] = strides[k][j]; + strides[k][j] = stride; + } + } + } + } +} + +// The `remaining_steps` argument is used to support Op that operates on +// multiple elements at the same time. Generally, the strategy of ApplyOpN is to +// 1. Initialize `remaining_steps = step`, where `step` is the template arg of +// CUDA_tensor_applyN helpers. The input arg `n` to `apply()` represents the +// number of elements in bound for this call. It will almost always equal to +// `step` except at boundaries. +// 2. If `remaining_steps > 0` convert the current linearIndex to offset (if in +// bound), and recursively call `ApplyOpN` with `remaining_steps - 1`. +// 3. At `remaining_steps = 0`, +// if `step = 1`, call `op(tensor1_val, tensor2_val, ...)`; +// if `step > 1`, call `op(n, tensor1_val1, tensor1_val2, ..., tesor1_valstep, +// tensor2_val1, tensor2_val2, ..., tesor2_valstep, +// ... +// tensorN_val1, tensorN_val2, ..., tesorN_valstep);` +// +// See NOTE [ CUDA_tensor_applyN helpers ] above for how Op may look like. + +template +struct ApplyOp1 { +__device__ __forceinline__ +static void apply(detail::TensorInfo &a, const Op &op, int n, + IndexType linearIndex, Offsets... aOffsets) { + // Convert `linearIndex` into an offset of `a` + const IndexType aOffset = sizeof...(Offsets) < n ? + detail::IndexToOffset::get(linearIndex, a) : 0; + + ApplyOp1::apply( + a, op, n, linearIndex + 1, aOffsets..., aOffset + ); +} +}; + +// Specialize `step=1` case (i.e., `remaining_steps=0` and `len(Offsets)=1`). +// We don't need to pass in how many elements need to processed in this case. +template +struct ApplyOp1 { +__device__ __forceinline__ +static void apply(detail::TensorInfo &a, const Op &op, + int n, IndexType linearIndex, Offset offset) { + op(a.data[offset]); +} +}; + +template +struct ApplyOp1 { +__device__ __forceinline__ +static void apply(detail::TensorInfo &a, const Op &op, int n, + IndexType linearIndex, Offsets... offsets) { + op(n, a.data[offsets]...); +} +}; + +template +#if __CUDA_ARCH__ >= 350 || defined(USE_ROCM) +C10_LAUNCH_BOUNDS_2(AT_APPLY_THREADS_PER_BLOCK, AT_APPLY_BLOCKS_PER_SM) +#endif +__global__ void kernelPointwiseApply1(detail::TensorInfo a, + IndexType totalElements, const Op op) { + for (IndexType linearIndex = (blockIdx.x * blockDim.x + threadIdx.x) * step; + linearIndex < totalElements; + linearIndex += gridDim.x * blockDim.x * step) { + ApplyOp1::apply( + a, op, ::min(step, static_cast(totalElements - linearIndex)), linearIndex); + } +} + + +template +struct ApplyOp2 { +__device__ __forceinline__ +static void apply(detail::TensorInfo &a, + detail::TensorInfo &b, + const Op &op, int64_t n, IndexType linearIndex, + Offsets... aOffsets, Offsets... bOffsets) { + // Convert `linearIndex` into an offset of `a` + const IndexType aOffset = static_cast(sizeof...(Offsets)) < n ? + detail::IndexToOffset::get(linearIndex, a) : 0; + + // Convert `linearIndex` into an offset of `b` + const IndexType bOffset = static_cast(sizeof...(Offsets)) < n ? + detail::IndexToOffset::get(linearIndex, b) : 0; + + ApplyOp2::apply( + a, b, op, n, linearIndex + 1, aOffsets..., aOffset, bOffsets..., bOffset + ); +} +}; + +// Specialize `step=1` case (i.e., `remaining_steps=0` and `len(Offsets)=1`). +// We don't need to pass in how many elements need to processed in this case. +template +struct ApplyOp2 { +__device__ __forceinline__ +static void apply(detail::TensorInfo &a, + detail::TensorInfo &b, + const Op &op, int /*n*/, IndexType /*linearIndex*/, + Offset aOffset, Offset bOffset) { + op(a.data[aOffset], b.data[bOffset]); +} +}; + +template +struct ApplyOp2 { +__device__ __forceinline__ +static void apply(detail::TensorInfo &a, + detail::TensorInfo &b, + const Op &op, int n, IndexType linearIndex, + Offsets... aOffsets, Offsets... bOffsets) { + op(n, a.data[aOffsets]..., b.data[bOffsets]...); +} +}; + +template +#if __CUDA_ARCH__ >= 350 || defined(USE_ROCM) +C10_LAUNCH_BOUNDS_2(max_threads_per_block, min_blocks_per_sm) +#endif +__global__ void +kernelPointwiseApply2(detail::TensorInfo a, + detail::TensorInfo b, + IndexType totalElements, + const Op op) { + for (IndexType linearIndex = (blockIdx.x * blockDim.x + threadIdx.x) * step; + linearIndex < totalElements; + linearIndex += gridDim.x * blockDim.x * step) { + ApplyOp2::apply( + a, b, op, ::min(step, static_cast(totalElements - linearIndex)), + linearIndex); + } +} + +} // anonymous namespace + +template +inline bool CUDA_tensor_apply2(at::TensorBase a, + at::TensorBase b, + const Op op, + TensorArgType aType = TensorArgType::ReadWrite, + TensorArgType bType = TensorArgType::ReadOnly) { + TORCH_CHECK(a.device().is_cuda() && b.device().is_cuda(), + "CUDA_tensor_apply2: Expected tensors to have CUDA DeviceType, but got " + "tensors with type ", a.device().type(), " and ", b.device().type()); + int64_t totalElements = a.numel(); + + if (totalElements != b.numel()) { + return false; + } + + if (a.dim() > MAX_TENSORINFO_DIMS || + b.dim() > MAX_TENSORINFO_DIMS) { + return false; + } + + if (a.numel() == 0) { + // Empty tensor; do nothing + return true; + } + const dim3 block = getApplyBlock(max_threads_per_block); + + dim3 grid; + auto curDevice = current_device(); + if (curDevice == -1) return false; + if (!getApplyGrid(totalElements, grid, curDevice, max_threads_per_block)) { + return false; + } + + /* + Expands readable/writable tensors whose indices may be "overlapped." + This ensures that each element of the tensor is operated on once and only + once. + */ + TensorBase oldA; + TensorBase oldB; + + if (aType == TensorArgType::ReadWrite && detail::maybeOverlappingIndices(a)) { + // Must perform in contiguous space + oldA = std::exchange(a, a.contiguous()); + } + if (bType == TensorArgType::ReadWrite && detail::maybeOverlappingIndices(b)) { + // Must perform in contiguous space + oldB = std::exchange(b, b.contiguous()); + } + + // It is possible that the tensor dimensions are able to be collapsed, + // and thus we can reduce the actual code complexity of the copy by + // exploiting this knowledge statically, since the div/mod is the + // most expensive part of the operation, more so than memory accesses. + // For instance, when copying a non-contiguous to a contiguous tensor + // (or vice versa), the contiguous tensor can be collapsed to one + // dimension, and the loop to translate the linear index to the array + // index can be similarly collapsed. That is what this unrolling is for. + +#define HANDLE_CASE(TYPE, A, B) \ + kernelPointwiseApply2 \ + <<>>( \ + aInfo, bInfo, static_cast(totalElements), op); \ + C10_CUDA_KERNEL_LAUNCH_CHECK(); + +#define HANDLE_B_CASE(TYPE, A, B) { \ + switch (B) { \ + case 1: \ + HANDLE_CASE(TYPE, A, 1); \ + break; \ + case 2: \ + HANDLE_CASE(TYPE, A, 2); \ + break; \ + default: \ + HANDLE_CASE(TYPE, A, -1); \ + break; \ + } \ +} + +#define HANDLE_A_CASE(TYPE, A, B) { \ + switch (A) { \ + case 1: \ + HANDLE_B_CASE(TYPE, 1, B); \ + break; \ + case 2: \ + HANDLE_B_CASE(TYPE, 2, B); \ + break; \ + default: \ + HANDLE_B_CASE(TYPE, -1, B); \ + break; \ + } \ +} + + if (detail::canUse32BitIndexMath(a) && + detail::canUse32BitIndexMath(b)) { + detail::TensorInfo aInfo = + detail::getTensorInfo(a); + + detail::TensorInfo bInfo = + detail::getTensorInfo(b); + rearrangeDims(&aInfo, &bInfo); + aInfo.collapseDims(); + bInfo.collapseDims(); + + HANDLE_A_CASE(unsigned int, aInfo.dims, bInfo.dims); + } else { + detail::TensorInfo aInfo = + detail::getTensorInfo(a); + + detail::TensorInfo bInfo = + detail::getTensorInfo(b); + rearrangeDims(&aInfo, &bInfo); + aInfo.collapseDims(); + bInfo.collapseDims(); + + /* + Only instantiates the all 1D special case and the fallback all nD case for + large (64-bit indexed) tensors to reduce compilation time. + */ + if (aInfo.dims == 1 && bInfo.dims == 1) { + HANDLE_CASE(uint64_t, 1, 1); + } else { + HANDLE_CASE(uint64_t, -1, -1); + } + } +#undef HANDLE_CASE +#undef HANDLE_B_CASE +#undef HANDLE_A_CASE + + if (oldA.defined()) { + at::native::copy_ignoring_overlaps(oldA, a); + } + + if (oldB.defined()) { + at::native::copy_ignoring_overlaps(oldB, b); + } + + return true; +} + +/* Provides default step = 1 to CUDA_tensor_apply2. */ +template +inline bool CUDA_tensor_apply2(const at::TensorBase &a, + const at::TensorBase &b, + const Op op, + TensorArgType aType = TensorArgType::ReadWrite, + TensorArgType bType = TensorArgType::ReadOnly) { + return CUDA_tensor_apply2(a, b, op, aType, bType); +} + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDABlas.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDABlas.h new file mode 100644 index 0000000000000000000000000000000000000000..4be9f1a3f05a4888597a7252d1e8d3c3a305bbe5 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDABlas.h @@ -0,0 +1,398 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +/* + Provides a subset of CUDA BLAS functions as templates: + + gemm(transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, + ldc) + + gemv(transa, m, n, alpha, a, lda, x, incx, beta, y, incy) + + dot(n, x, incx, y, incy, result) + + where Dtype is double, float, at::Half or at::BFloat16 (ROCm, NOT for dot). + The functions are available in at::cuda::blas namespace. + */ + +#include +#include +#include + +namespace at::cuda::blas { + +// RAII guard that sets the CuBLAS pointer mode and restores it to +// its previous value when the guard is destroyed +class PointerModeGuard { +public: + PointerModeGuard(cublasHandle_t handle, cublasPointerMode_t mode) : + handle(handle) { + TORCH_CUDABLAS_CHECK(cublasGetPointerMode(handle, &previous_mode)); + TORCH_CUDABLAS_CHECK(cublasSetPointerMode(handle, mode)); + } + + ~PointerModeGuard() { + cublasSetPointerMode(handle, previous_mode); + } + +private: + cublasHandle_t handle; + cublasPointerMode_t previous_mode{}; +}; + +/* LEVEL 3 BLAS FUNCTIONS */ + +#define CUDABLAS_GEMM_ARGTYPES(Dtype) CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(Dtype, Dtype) + +#define CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype) \ + char transa, char transb, int64_t m, int64_t n, int64_t k, at::opmath_type alpha, \ + const Dtype *a, int64_t lda, const Dtype *b, int64_t ldb, at::opmath_type beta,\ + C_Dtype *c, int64_t ldc + +#define CUDABLAS_GEMM_ARGS(Dtype) transa, transb, m, n, k, alpha, a, lda, b, ldb, beta, c, ldc + +#define CUDABLAS_GEMM_DTYPE_IS_FLOAT_TYPE_AND_C_DTYPE_IS_FLOAT \ + ((std::is_same::value || std::is_same::value) && std::is_same::value) + +template ::type* = nullptr> +inline void gemm(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype)) { + static_assert(false&&sizeof(Dtype),"at::cuda::blas::gemm: not implemented"); +} + +template ::type* = nullptr> +void gemm(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype)); + +template <> +void gemm(CUDABLAS_GEMM_ARGTYPES(double)); +template <> +void gemm(CUDABLAS_GEMM_ARGTYPES(float)); +template <> +void gemm>(CUDABLAS_GEMM_ARGTYPES(c10::complex)); +template <> +void gemm>(CUDABLAS_GEMM_ARGTYPES(c10::complex)); +template <> +void gemm(CUDABLAS_GEMM_ARGTYPES(at::Half)); +template <> +void gemm(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)); +template<> +void gemm(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::Half, float)); +template<> +void gemm(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::BFloat16, float)); + +template +inline void gemm_internal(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype)) { + static_assert(false&&sizeof(Dtype),"at::cuda::blas::gemm_internal: not implemented"); +} + +template <> +void gemm_internal(CUDABLAS_GEMM_ARGTYPES(double)); +template <> +void gemm_internal(CUDABLAS_GEMM_ARGTYPES(float)); +template <> +void gemm_internal>(CUDABLAS_GEMM_ARGTYPES(c10::complex)); +template <> +void gemm_internal>(CUDABLAS_GEMM_ARGTYPES(c10::complex)); +template <> +void gemm_internal(CUDABLAS_GEMM_ARGTYPES(at::Half)); +template <> +void gemm_internal(CUDABLAS_GEMM_ARGTYPES(at::BFloat16)); +template<> +void gemm_internal(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::Half, float)); +template<> +void gemm_internal(CUDABLAS_GEMM_ARGTYPES_AND_C_DTYPE(at::BFloat16, float)); + +enum GEMMAndBiasActivationEpilogue { + None, + RELU, + GELU, +}; + +// NOTE: GELU activation is not supported prior to CUDA 11.4 and will +// do nothing if passed in that case. +template +bool gemm_and_bias( + bool transpose_mat1, + bool transpose_mat2, + int64_t m, + int64_t n, + int64_t k, + at::opmath_type alpha_val, + const Dtype* mat1_ptr, + int64_t mat1_ld, + const Dtype* mat2_ptr, + int64_t mat2_ld, + const Dtype* bias, + C_Dtype* result_ptr, + int64_t result_ld, + GEMMAndBiasActivationEpilogue activation = GEMMAndBiasActivationEpilogue::None); + +void int8_gemm( + bool transpose_mat1, + bool transpose_mat2, + int64_t m, + int64_t n, + int64_t k, + const int8_t* mat1_ptr, + int64_t mat1_ld, + const int8_t* mat2_ptr, + int64_t mat2_ld, + int32_t* result_ptr, + int64_t result_ld); + +void scaled_gemm( + char transa, + char transb, + int64_t m, + int64_t n, + int64_t k, + const void* mat1_ptr, + const void* mat1_scale_ptr, + int64_t mat1_ld, + ScalarType mat1_dtype, + ScalarType mat1_scale_dtype, + at::blas::ScalingType mat1_scaling_type, + const void* mat2_ptr, + const void* mat2_scale_ptr, + int64_t mat2_ld, + ScalarType mat2_dtype, + ScalarType mat2_scale_dtype, + at::blas::ScalingType mat2_scaling_type, + const void* bias_ptr, + ScalarType bias_dtype, + void* result_ptr, + const void* result_scale_ptr, + int64_t result_ld, + ScalarType result_dtype, + bool use_fast_accum, + const std::optional& alpha); + +#define CUDABLAS_BGEMM_ARGTYPES(Dtype) CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(Dtype, Dtype) + +#define CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype) \ + char transa, char transb, int64_t m, int64_t n, int64_t k, at::opmath_type alpha, \ + const Dtype *a, int64_t lda, int64_t stridea, \ + const Dtype *b, int64_t ldb, int64_t strideb, \ + at::opmath_type beta, C_Dtype *c, int64_t ldc, int64_t stridec, int64_t num_batches + +#define CUDABLAS_BGEMM_ARGS(Dtype) \ + transa, transb, m, n, k, alpha, a, lda, stridea, b, ldb, strideb, beta, c, ldc, stridec, num_batches + +template ::type* = nullptr> +inline void bgemm(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype)) { + static_assert(false&&sizeof(Dtype),"at::cuda::blas::bgemm: not implemented"); +} + +template ::type* = nullptr> +void bgemm(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype)); + +template <> +void bgemm(CUDABLAS_BGEMM_ARGTYPES(double)); +template <> +void bgemm(CUDABLAS_BGEMM_ARGTYPES(float)); +template <> +void bgemm>(CUDABLAS_BGEMM_ARGTYPES(c10::complex)); +template <> +void bgemm>(CUDABLAS_BGEMM_ARGTYPES(c10::complex)); +template <> +void bgemm(CUDABLAS_BGEMM_ARGTYPES(at::Half)); +template <> +void bgemm(CUDABLAS_BGEMM_ARGTYPES(at::BFloat16)); +template<> +void bgemm(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(at::Half, float)); +template<> +void bgemm(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(at::BFloat16, float)); + +template +inline void bgemm_internal(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(Dtype, C_Dtype)) { + static_assert(false&&sizeof(Dtype),"at::cuda::blas::bgemm_internal: not implemented"); +} + +template <> +void bgemm_internal(CUDABLAS_BGEMM_ARGTYPES(double)); +template <> +void bgemm_internal(CUDABLAS_BGEMM_ARGTYPES(float)); +template <> +void bgemm_internal>(CUDABLAS_BGEMM_ARGTYPES(c10::complex)); +template <> +void bgemm_internal>(CUDABLAS_BGEMM_ARGTYPES(c10::complex)); +template <> +void bgemm_internal(CUDABLAS_BGEMM_ARGTYPES(at::Half)); +template <> +void bgemm_internal(CUDABLAS_BGEMM_ARGTYPES(at::BFloat16)); +template<> +void bgemm_internal(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(at::Half, float)); +template<> +void bgemm_internal(CUDABLAS_BGEMM_ARGTYPES_AND_C_DTYPE(at::BFloat16, float)); + +#define CUDABLAS_TRSM_ARGTYPES(Dtype) \ + cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, \ + cublasOperation_t trans, cublasDiagType_t diag, int m, int n, \ + const Dtype *alpha, const Dtype *A, int lda, Dtype *B, int ldb + +template +inline void trsm(CUDABLAS_TRSM_ARGTYPES(Dtype)) { + static_assert(false&&sizeof(Dtype), "at::cuda::blas::trsm: not implemented"); +} + +template <> +TORCH_CUDA_CU_API void trsm(CUDABLAS_TRSM_ARGTYPES(float)); +template <> +TORCH_CUDA_CU_API void trsm(CUDABLAS_TRSM_ARGTYPES(double)); +template <> +TORCH_CUDA_CU_API void trsm>(CUDABLAS_TRSM_ARGTYPES(c10::complex)); +template <> +TORCH_CUDA_CU_API void trsm>(CUDABLAS_TRSM_ARGTYPES(c10::complex)); + +#define CUDABLAS_TRSM_BATCHED_ARGTYPES(Dtype) \ + cublasHandle_t handle, cublasSideMode_t side, cublasFillMode_t uplo, \ + cublasOperation_t trans, cublasDiagType_t diag, int m, int n, \ + const Dtype *alpha, Dtype *A[], int lda, Dtype *B[], int ldb, \ + int batchCount + +template +inline void trsmBatched(CUDABLAS_TRSM_BATCHED_ARGTYPES(Dtype)) { + static_assert(false&&sizeof(Dtype), "at::cuda::blas::trsmBatched: not implemented"); +} + +template <> +TORCH_CUDA_CU_API void trsmBatched(CUDABLAS_TRSM_BATCHED_ARGTYPES(float)); +template <> +TORCH_CUDA_CU_API void trsmBatched(CUDABLAS_TRSM_BATCHED_ARGTYPES(double)); +template <> +TORCH_CUDA_CU_API void trsmBatched>(CUDABLAS_TRSM_BATCHED_ARGTYPES(c10::complex)); +template <> +TORCH_CUDA_CU_API void trsmBatched>(CUDABLAS_TRSM_BATCHED_ARGTYPES(c10::complex)); + +/* LEVEL 2 BLAS FUNCTIONS */ + +#define CUDABLAS_GEMV_ARGTYPES(Dtype) \ + char trans, int64_t m, int64_t n, Dtype alpha, const Dtype *a, int64_t lda, \ + const Dtype *x, int64_t incx, Dtype beta, Dtype *y, int64_t incy + +template +inline void gemv(CUDABLAS_GEMV_ARGTYPES(Dtype)) { + static_assert(false&&sizeof(Dtype), "at::cuda::blas::gemv: not implemented"); +} + +template <> +void gemv(CUDABLAS_GEMV_ARGTYPES(double)); +template <> +void gemv(CUDABLAS_GEMV_ARGTYPES(float)); +template <> +void gemv>(CUDABLAS_GEMV_ARGTYPES(c10::complex)); +template <> +void gemv>(CUDABLAS_GEMV_ARGTYPES(c10::complex)); +template <> +void gemv(CUDABLAS_GEMV_ARGTYPES(at::Half)); +template <> +void gemv(CUDABLAS_GEMV_ARGTYPES(at::BFloat16)); + +/* LEVEL 1 BLAS FUNCTIONS */ + +#define CUDABLAS_DOT_ARGTYPES(Dtype) \ + cublasHandle_t handle, int n, const Dtype *x, int incx, const Dtype *y, \ + int incy, Dtype *result + +template +inline void dot(CUDABLAS_DOT_ARGTYPES(Dtype)) { + static_assert(false&&sizeof(Dtype),"at::cuda::blas::dot: not implemented"); +} + +template <> +void dot(CUDABLAS_DOT_ARGTYPES(double)); +template <> +void dot(CUDABLAS_DOT_ARGTYPES(float)); +template <> +void dot(CUDABLAS_DOT_ARGTYPES(at::Half)); +template <> +void dot(CUDABLAS_DOT_ARGTYPES(at::BFloat16)); +template <> +void dot>(CUDABLAS_DOT_ARGTYPES(c10::complex)); +template <> +void dot>(CUDABLAS_DOT_ARGTYPES(c10::complex)); + +template +inline void vdot(CUDABLAS_DOT_ARGTYPES(Dtype)) { + static_assert(false&&sizeof(Dtype),"at::cuda::blas::vdot: not implemented"); +} + +template <> +void vdot>(CUDABLAS_DOT_ARGTYPES(c10::complex)); +template <> +void vdot>(CUDABLAS_DOT_ARGTYPES(c10::complex)); + +#define CUDABLAS_GETRS_ARGTYPES(Dtype) \ + cublasHandle_t handle, cublasOperation_t trans, \ + int n, int nrhs, Dtype** dA_array, int lda, int* ipiv_array, \ + Dtype** dB_array, int ldb, int* info_array, int batchsize + +#define CUDABLAS_GEQRF_BATCHED_ARGTYPES(Dtype) \ + cublasHandle_t handle, int m, int n, Dtype **A_array, int lda, \ + Dtype **tau_array, int *info, int batchsize + +#define CUDABLAS_GETRF_ARGTYPES(Dtype) \ + int n, Dtype** dA_array, int ldda, int* ipiv_array, int* info_array, int batchsize + +#define CUDABLAS_GELS_BATCHED_ARGTYPES(Dtype) \ + cublasHandle_t handle, cublasOperation_t trans, \ + int m, int n, int nrhs, Dtype** dA_array, int ldda, \ + Dtype** dC_array, int lddc, int* info, int *devInfoArray, int batchSize + +template +void getrsBatched(CUDABLAS_GETRS_ARGTYPES(Dtype)) { + static_assert(false&&sizeof(Dtype),"at::cuda::blas::getrsBatched: not implemented"); +} +template<> +TORCH_CUDA_CU_API void getrsBatched(CUDABLAS_GETRS_ARGTYPES(float)); +template<> +TORCH_CUDA_CU_API void getrsBatched(CUDABLAS_GETRS_ARGTYPES(double)); +template<> +TORCH_CUDA_CU_API void getrsBatched>(CUDABLAS_GETRS_ARGTYPES(c10::complex)); +template<> +TORCH_CUDA_CU_API void getrsBatched>(CUDABLAS_GETRS_ARGTYPES(c10::complex)); + +template +void geqrfBatched(CUDABLAS_GEQRF_BATCHED_ARGTYPES(Dtype)) { + static_assert(false&&sizeof(Dtype), "at::cuda::blas::geqrfBatched: not implemented"); +} +template <> +TORCH_CUDA_CU_API void geqrfBatched(CUDABLAS_GEQRF_BATCHED_ARGTYPES(float)); +template <> +TORCH_CUDA_CU_API void geqrfBatched(CUDABLAS_GEQRF_BATCHED_ARGTYPES(double)); +template <> +TORCH_CUDA_CU_API void geqrfBatched>( + CUDABLAS_GEQRF_BATCHED_ARGTYPES(c10::complex)); +template <> +TORCH_CUDA_CU_API void geqrfBatched>( + CUDABLAS_GEQRF_BATCHED_ARGTYPES(c10::complex)); + +template +void getrfBatched(CUDABLAS_GETRF_ARGTYPES(Dtype)) { + static_assert(false&&sizeof(Dtype), "at::cuda::blas::getrfBatched: not implemented"); +} +template<> +TORCH_CUDA_CU_API void getrfBatched(CUDABLAS_GETRF_ARGTYPES(float)); +template<> +TORCH_CUDA_CU_API void getrfBatched(CUDABLAS_GETRF_ARGTYPES(double)); +template<> +TORCH_CUDA_CU_API void getrfBatched>(CUDABLAS_GETRF_ARGTYPES(c10::complex)); +template<> +TORCH_CUDA_CU_API void getrfBatched>(CUDABLAS_GETRF_ARGTYPES(c10::complex)); + +template +void gelsBatched(CUDABLAS_GELS_BATCHED_ARGTYPES(Dtype)) { + static_assert(false&&sizeof(Dtype), "at::cuda::blas::gelsBatched: not implemented"); +} +template<> +TORCH_CUDA_CU_API void gelsBatched(CUDABLAS_GELS_BATCHED_ARGTYPES(double)); +template<> +TORCH_CUDA_CU_API void gelsBatched(CUDABLAS_GELS_BATCHED_ARGTYPES(float)); +template<> +TORCH_CUDA_CU_API void gelsBatched>(CUDABLAS_GELS_BATCHED_ARGTYPES(c10::complex)); +template<> +TORCH_CUDA_CU_API void gelsBatched>(CUDABLAS_GELS_BATCHED_ARGTYPES(c10::complex)); + +} // namespace at::cuda::blas + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAConfig.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAConfig.h new file mode 100644 index 0000000000000000000000000000000000000000..6542be0367efa5eb4170cfea3aebfe58ab274c8f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAConfig.h @@ -0,0 +1,25 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// Test these using #if AT_CUDNN_ENABLED(), not #ifdef, so that it's +// obvious if you forgot to include Config.h +// c.f. https://stackoverflow.com/questions/33759787/generating-an-error-if-checked-boolean-macro-is-not-defined +// +// NB: This header MUST NOT be included from other headers; it should +// only be included from C++ files. +#define AT_CUDNN_ENABLED() 1 +#define AT_CUSPARSELT_ENABLED() 1 +#define AT_HIPSPARSELT_ENABLED() 0 +#define AT_ROCM_ENABLED() 0 +#define AT_MAGMA_ENABLED() 1 + +// Needed for hipMAGMA to correctly identify implementation +#if (AT_ROCM_ENABLED() && AT_MAGMA_ENABLED()) +#define HAVE_HIP 1 +#endif + +#define NVCC_FLAGS_EXTRA "-gencode;arch=compute_70,code=sm_70;-gencode;arch=compute_75,code=sm_75;-gencode;arch=compute_80,code=sm_80;-gencode;arch=compute_86,code=sm_86;-gencode;arch=compute_90,code=sm_90;-gencode;arch=compute_100,code=sm_100;-gencode;arch=compute_120,code=sm_120" + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAContext.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAContext.h new file mode 100644 index 0000000000000000000000000000000000000000..debe2d8ba8e0806f4db9ff25f7b2874e6b369ddb --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAContext.h @@ -0,0 +1,14 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +// Preserved for BC, as many files depend on these includes +#include +#include +#include +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAContextLight.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAContextLight.h new file mode 100644 index 0000000000000000000000000000000000000000..20f00cf2343a6128d7f92107fd8db98eac6d65d2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAContextLight.h @@ -0,0 +1,116 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// Light-weight version of CUDAContext.h with fewer transitive includes + +#include +#include +#include + +#include +#include +#include + +// cublasLT was introduced in CUDA 10.1 but we enable only for 11.1 that also +// added bf16 support +#include + +#ifdef CUDART_VERSION +#include +#endif + +#if defined(USE_CUDSS) +#include +#endif + +#if defined(USE_ROCM) +#include +#endif + +#include +#include + +namespace c10 { +struct Allocator; +} + +namespace at::cuda { + +/* +A common CUDA interface for ATen. + +This interface is distinct from CUDAHooks, which defines an interface that links +to both CPU-only and CUDA builds. That interface is intended for runtime +dispatch and should be used from files that are included in both CPU-only and +CUDA builds. + +CUDAContext, on the other hand, should be preferred by files only included in +CUDA builds. It is intended to expose CUDA functionality in a consistent +manner. + +This means there is some overlap between the CUDAContext and CUDAHooks, but +the choice of which to use is simple: use CUDAContext when in a CUDA-only file, +use CUDAHooks otherwise. + +Note that CUDAContext simply defines an interface with no associated class. +It is expected that the modules whose functions compose this interface will +manage their own state. There is only a single CUDA context/state. +*/ + +/** + * DEPRECATED: use device_count() instead + */ +inline int64_t getNumGPUs() { + return c10::cuda::device_count(); +} + +/** + * CUDA is available if we compiled with CUDA, and there are one or more + * devices. If we compiled with CUDA but there is a driver problem, etc., + * this function will report CUDA is not available (rather than raise an error.) + */ +inline bool is_available() { + return c10::cuda::device_count() > 0; +} + +TORCH_CUDA_CPP_API cudaDeviceProp* getCurrentDeviceProperties(); + +TORCH_CUDA_CPP_API int warp_size(); + +TORCH_CUDA_CPP_API cudaDeviceProp* getDeviceProperties(c10::DeviceIndex device); + +TORCH_CUDA_CPP_API bool canDeviceAccessPeer( + c10::DeviceIndex device, + c10::DeviceIndex peer_device); + +TORCH_CUDA_CPP_API c10::Allocator* getCUDADeviceAllocator(); + +/* Handles */ +TORCH_CUDA_CPP_API cusparseHandle_t getCurrentCUDASparseHandle(); +TORCH_CUDA_CPP_API cublasHandle_t getCurrentCUDABlasHandle(); +TORCH_CUDA_CPP_API cublasLtHandle_t getCurrentCUDABlasLtHandle(); + +TORCH_CUDA_CPP_API void clearCublasWorkspaces(); +struct WorkspaceMapWithMutex { + std::map, at::DataPtr> map; + std::shared_mutex mutex; +}; + +TORCH_CUDA_CPP_API WorkspaceMapWithMutex& cublas_handle_stream_to_workspace(); +TORCH_CUDA_CPP_API WorkspaceMapWithMutex& cublaslt_handle_stream_to_workspace(); +TORCH_CUDA_CPP_API size_t getChosenWorkspaceSize(); +TORCH_CUDA_CPP_API size_t getCUDABlasLtWorkspaceSize(); +TORCH_CUDA_CPP_API void* getCUDABlasLtWorkspace(); + +#if defined(CUDART_VERSION) || defined(USE_ROCM) +TORCH_CUDA_CPP_API cusolverDnHandle_t getCurrentCUDASolverDnHandle(); +#endif + +#if defined(USE_CUDSS) +TORCH_CUDA_CPP_API cudssHandle_t getCurrentCudssHandle(); +#endif + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDADataType.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDADataType.h new file mode 100644 index 0000000000000000000000000000000000000000..874979b76c7b95c86e8e69166239e4d3ae7375d1 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDADataType.h @@ -0,0 +1,107 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include + +namespace at::cuda { + +template +cudaDataType getCudaDataType() { + static_assert(false && sizeof(scalar_t), "Cannot convert type to cudaDataType."); + return {}; +} + +template<> inline cudaDataType getCudaDataType() { + return CUDA_R_16F; +} +template<> inline cudaDataType getCudaDataType() { + return CUDA_R_32F; +} +template<> inline cudaDataType getCudaDataType() { + return CUDA_R_64F; +} +template<> inline cudaDataType getCudaDataType>() { + return CUDA_C_16F; +} +template<> inline cudaDataType getCudaDataType>() { + return CUDA_C_32F; +} +template<> inline cudaDataType getCudaDataType>() { + return CUDA_C_64F; +} + +template<> inline cudaDataType getCudaDataType() { + return CUDA_R_8U; +} +template<> inline cudaDataType getCudaDataType() { + return CUDA_R_8I; +} +template<> inline cudaDataType getCudaDataType() { + return CUDA_R_32I; +} + +template<> inline cudaDataType getCudaDataType() { + return CUDA_R_16I; +} +template<> inline cudaDataType getCudaDataType() { + return CUDA_R_64I; +} +template<> inline cudaDataType getCudaDataType() { + return CUDA_R_16BF; +} + +inline cudaDataType ScalarTypeToCudaDataType(const c10::ScalarType& scalar_type) { + switch (scalar_type) { + case c10::ScalarType::Byte: + return CUDA_R_8U; + case c10::ScalarType::Char: + return CUDA_R_8I; + case c10::ScalarType::Int: + return CUDA_R_32I; + case c10::ScalarType::Half: + return CUDA_R_16F; + case c10::ScalarType::Float: + return CUDA_R_32F; + case c10::ScalarType::Double: + return CUDA_R_64F; + case c10::ScalarType::ComplexHalf: + return CUDA_C_16F; + case c10::ScalarType::ComplexFloat: + return CUDA_C_32F; + case c10::ScalarType::ComplexDouble: + return CUDA_C_64F; + case c10::ScalarType::Short: + return CUDA_R_16I; + case c10::ScalarType::Long: + return CUDA_R_64I; + case c10::ScalarType::BFloat16: + return CUDA_R_16BF; +#if !defined(USE_ROCM) || ROCM_VERSION >= 60300 + case c10::ScalarType::Float8_e4m3fn: + return CUDA_R_8F_E4M3; + case c10::ScalarType::Float8_e5m2: + return CUDA_R_8F_E5M2; +#endif +#if defined(USE_ROCM) + case c10::ScalarType::Float8_e4m3fnuz: + return HIP_R_8F_E4M3_FNUZ; + case c10::ScalarType::Float8_e5m2fnuz: + return HIP_R_8F_E5M2_FNUZ; +#endif +#if (defined(CUDA_VERSION) && CUDA_VERSION >= 12080) || (defined(USE_ROCM) && ROCM_VERSION >= 70000) + case c10::ScalarType::Float4_e2m1fn_x2: + return CUDA_R_4F_E2M1; +#endif + default: + TORCH_INTERNAL_ASSERT(false, "Cannot convert ScalarType ", scalar_type, " to cudaDataType.") + } +} + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDADevice.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDADevice.h new file mode 100644 index 0000000000000000000000000000000000000000..ef242a68e056e8feb2d30963ab975d61bf5eb756 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDADevice.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include + +namespace at::cuda { + +inline Device getDeviceFromPtr(void* ptr) { + cudaPointerAttributes attr{}; + + AT_CUDA_CHECK(cudaPointerGetAttributes(&attr, ptr)); + +#if !defined(USE_ROCM) + TORCH_CHECK(attr.type != cudaMemoryTypeUnregistered, + "The specified pointer resides on host memory and is not registered with any CUDA device."); +#endif + + return {c10::DeviceType::CUDA, static_cast(attr.device)}; +} + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAEvent.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAEvent.h new file mode 100644 index 0000000000000000000000000000000000000000..4d8645caf3b69a06b33771beaea217bfb9e56db5 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAEvent.h @@ -0,0 +1,336 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +/* +* `cudaEventExternal` is a torch-specific flag that is used to +* indicate that the CUDAEvent will be used only for synchronization +* with work outside of the cuda graph, rather than creation of +* cross-stream dependencies within a cuda graph. Resources: +* https://docs.nvidia.com/cuda/archive/12.9.0/cuda-c-programming-guide/index.html#cross-stream-dependencies-and-events +* https://docs.nvidia.com/cuda/archive/12.9.0/cuda-runtime-api/group__CUDART__TYPES.html#group__CUDART__TYPES_1g3457b81d1d32c6a00f6132fbc2693d47 +* https://docs.nvidia.com/cuda/archive/12.9.0/cuda-runtime-api/group__CUDART__TYPES.html#group__CUDART__TYPES_1g0c23426b7252eaa9cef695859991304e +*/ +#define cudaEventExternal 0x08 + +namespace at::cuda { + +/* +* CUDAEvents are movable not copyable wrappers around CUDA's events. +* +* CUDAEvents are constructed lazily when first recorded unless it is +* reconstructed from a cudaIpcEventHandle_t. The event has a device, and this +* device is acquired from the first recording stream. However, if reconstructed +* from a handle, the device should be explicitly specified; or if ipc_handle() is +* called before the event is ever recorded, it will use the current device. +* Later streams that record the event must match this device. +*/ +struct TORCH_CUDA_CPP_API CUDAEvent { + // Constructors + // Default value for `flags` is specified below - it's cudaEventDisableTiming + CUDAEvent() noexcept = default; + CUDAEvent(unsigned int flags) noexcept : flags_{flags} {} + + CUDAEvent( + DeviceIndex device_index, const cudaIpcEventHandle_t* handle) : device_index_(device_index) { + CUDAGuard guard(device_index_); + + AT_CUDA_CHECK(cudaIpcOpenEventHandle(&event_, *handle)); + is_created_ = true; + } + + // Note: event destruction done on creating device to avoid creating a + // CUDA context on other devices. + ~CUDAEvent() { + try { + if (is_created_) { + CUDAGuard guard(device_index_); + const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace(); + if (C10_UNLIKELY(interp)) { + (*interp)->trace_gpu_event_deletion(at::kCUDA, reinterpret_cast(event_)); + } + AT_CUDA_CHECK(cudaEventDestroy(event_)); + } + } catch (...) { /* No throw */ } + } + + CUDAEvent(const CUDAEvent&) = delete; + CUDAEvent& operator=(const CUDAEvent&) = delete; + + CUDAEvent(CUDAEvent&& other) noexcept { moveHelper(std::move(other)); } + CUDAEvent& operator=(CUDAEvent&& other) noexcept { + if (this != &other) { + moveHelper(std::move(other)); + } + return *this; + } + + operator cudaEvent_t() const { return event(); } + + // Less than operator (to allow use in sets) + friend bool operator<(const CUDAEvent& left, const CUDAEvent& right) { + return left.event_ < right.event_; + } + + std::optional device() const { + if (is_created_) { + return at::Device(at::kCUDA, device_index_); + } else { + return {}; + } + } + + bool isCreated() const { return is_created_; } + DeviceIndex device_index() const {return device_index_;} + cudaEvent_t event() const { return event_; } + + // Note: cudaEventQuery can be safely called from any device + bool query() const { + if (!is_created_) { + return true; + } + + cudaError_t err = cudaEventQuery(event_); + if (err == cudaSuccess) { + return true; + } else if (err != cudaErrorNotReady) { + C10_CUDA_CHECK(err); + } else { + // ignore and clear the error if not ready + (void)cudaGetLastError(); + } + + return false; + } + + void record() { record(getCurrentCUDAStream()); } + + void recordOnce(const CUDAStream& stream) { + if (!was_recorded_) record(stream); + } + + // Note: cudaEventRecord must be called on the same device as the event. + void record(const CUDAStream& stream) { + if (!is_created_) { + createEvent(stream.device_index()); + } + + TORCH_CHECK(device_index_ == stream.device_index(), "Event device ", device_index_, + " does not match recording stream's device ", stream.device_index(), "."); + CUDAGuard guard(device_index_); + +#ifndef USE_ROCM + // it is an error to use cudaEventRecordExternal when not doing stream capture + unsigned int flags = (c10::cuda::currentStreamCaptureStatusMayInitCtx() != c10::cuda::CaptureStatus::None && external_) ? cudaEventRecordExternal : cudaEventRecordDefault; + AT_CUDA_CHECK(cudaEventRecordWithFlags(event_, stream, flags)); +#else + AT_CUDA_CHECK(cudaEventRecord(event_, stream)); +#endif + const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace(); + if (C10_UNLIKELY(interp)) { + (*interp)->trace_gpu_event_record(at::kCUDA, + reinterpret_cast(event_), + reinterpret_cast(stream.stream()) + ); + } + was_recorded_ = true; + } + + // Note: cudaStreamWaitEvent must be called on the same device as the stream. + // The event has no actual GPU resources associated with it. + void block(const CUDAStream& stream) { + if (is_created_) { + CUDAGuard guard(stream.device_index()); +#ifndef USE_ROCM + // it is an error to use cudaEventWaitExternal when not doing stream capture + unsigned int flags = (c10::cuda::currentStreamCaptureStatusMayInitCtx() != c10::cuda::CaptureStatus::None && external_) ? cudaEventWaitExternal : cudaEventWaitDefault; + AT_CUDA_CHECK(cudaStreamWaitEvent(stream, event_, flags)); +#else + AT_CUDA_CHECK(cudaStreamWaitEvent(stream, event_)); +#endif + const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace(); + if (C10_UNLIKELY(interp)) { + (*interp)->trace_gpu_event_wait(at::kCUDA, + reinterpret_cast(event_), + reinterpret_cast(stream.stream()) + ); + } + } + } + + // Note: cudaEventElapsedTime can be safely called from any device + float elapsed_time(const CUDAEvent& other) const { + TORCH_CHECK_VALUE( + !(flags_ & cudaEventDisableTiming) && !(other.flags_ & cudaEventDisableTiming), + "Both events must be created with argument 'enable_timing=True'."); + TORCH_CHECK_VALUE( + is_created_ && other.isCreated(), + "Both events must be recorded before calculating elapsed time."); + TORCH_CHECK( + query() && other.query(), + "Both events must be completed before calculating elapsed time."); + + float time_ms = 0; + // We do not strictly have to set the device index to the same as our event, + // but if we don't and the current device is not initialized, it will + // create a new cuda context, which will consume a lot of memory. + CUDAGuard guard(device_index_); + // raise cudaErrorNotReady if either event is recorded but not yet completed + AT_CUDA_CHECK(cudaEventElapsedTime(&time_ms, event_, other.event_)); + return time_ms; + } + + // Note: cudaEventSynchronize can be safely called from any device + void synchronize() const { + if (is_created_) { + const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace(); + if (C10_UNLIKELY(interp)) { + (*interp)->trace_gpu_event_synchronization(at::kCUDA, reinterpret_cast(event_)); + } + AT_CUDA_CHECK(cudaEventSynchronize(event_)); + } + } + + // Note: cudaIpcGetEventHandle must be called on the same device as the event + void ipc_handle(cudaIpcEventHandle_t * handle) { + if (!is_created_) { + // this CUDAEvent object was initially constructed from flags but event_ + // is not created yet. + createEvent(getCurrentCUDAStream().device_index()); + } + CUDAGuard guard(device_index_); + AT_CUDA_CHECK(cudaIpcGetEventHandle(handle, event_)); + } + +private: + unsigned int flags_ = cudaEventDisableTiming; + bool is_created_ = false; + bool was_recorded_ = false; + bool external_ = false; + DeviceIndex device_index_ = -1; + cudaEvent_t event_{}; + + void createEvent(DeviceIndex device_index) { + external_ = (flags_ & cudaEventExternal) != 0; +#ifdef USE_ROCM + TORCH_CHECK(!external_, "External events are disallowed in rocm"); +#endif + flags_ &= ~cudaEventExternal; + device_index_ = device_index; + CUDAGuard guard(device_index_); + AT_CUDA_CHECK(cudaEventCreateWithFlags(&event_, flags_)); + const c10::impl::PyInterpreter* interp = c10::impl::GPUTrace::get_trace(); + if (C10_UNLIKELY(interp)) { + (*interp)->trace_gpu_event_creation(at::kCUDA, reinterpret_cast(event_)); + } + is_created_ = true; + } + + void moveHelper(CUDAEvent&& other) { + // Transfer ownership of all state from other to this + flags_ = other.flags_; + is_created_ = other.is_created_; + was_recorded_ = other.was_recorded_; + external_ = other.external_; + device_index_ = other.device_index_; + event_ = other.event_; + + // Reset other to a valid empty state to prevent double-free + // The moved-from object must not attempt to destroy the event + other.is_created_ = false; + other.event_ = cudaEvent_t{}; + } +}; + +// EventPool - Thread-safe pool of CUDA events to avoid expensive cudaEventCreate +// calls. cudaEventCreate when concurrently invoked from multiple threads can be +// very expensive (especially on certain device/driver combinations). +using CUDAEventPtr = + std::unique_ptr>; + +class EventPool { + public: + EventPool() : pools_(at::cuda::device_count()) {} + + CUDAEventPtr get(const DeviceIndex device) { + // If the device is invalid, return a default event and no pooling + if (device < 0 || device >= (DeviceIndex)pools_.size()) { + auto deleter = [](CUDAEvent* event) { + delete event; + }; + return CUDAEventPtr( + std::make_unique(cudaEventDisableTiming).release(), deleter); + } + + auto& pool = pools_[device]; + + // Create a destructor that returns the event to the appropriate device pool + auto destructor = [&pool](CUDAEvent* event) noexcept { + if (event != nullptr) { + std::lock_guard lock(pool.mutex_); + pool.event_pool_.emplace_back(event); + } + }; + + { + std::lock_guard lock(pool.mutex_); + if (!pool.event_pool_.empty()) { + auto event = std::move(pool.event_pool_.back()); + pool.event_pool_.pop_back(); + return CUDAEventPtr(event.release(), destructor); + } + } + + return CUDAEventPtr( + std::make_unique(cudaEventDisableTiming).release(), + destructor); + } + + void empty_cache() { + for (auto& pool : pools_) { + std::lock_guard lock(pool.mutex_); + pool.event_pool_.clear(); + } + } + + void init_num_events(const size_t num_events) { + for (DeviceIndex device_idx = 0; device_idx < at::cuda::device_count(); ++device_idx) { + CUDAGuard device_guard(device_idx); + std::vector temp_events; + temp_events.reserve(num_events); + for (size_t i = 0; i < num_events; ++i) { + auto event = get(device_idx); + // Record the event to ensure it's properly initialized + event->record(); + temp_events.emplace_back(std::move(event)); + } + // Events will be returned to pool when temp_events is destroyed + } + } + + private: + struct alignas(64) PerDevicePool { + alignas(64) std::mutex mutex_; + std::vector> event_pool_; + }; + + std::vector pools_; +}; + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAGeneratorImpl.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAGeneratorImpl.h new file mode 100644 index 0000000000000000000000000000000000000000..2955f6b8a5f3eb667b47dccdc1ae2e47e791cead --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAGeneratorImpl.h @@ -0,0 +1,185 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +namespace at { + +namespace cuda { +struct CUDAGraph; +} + +/** + * Note [CUDA Graph-safe RNG states] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * + * Strategy: + * ~~~~~~~~~ + * (It helps to look at + * cuda/detail/PhiloxCudaStateRaw.cuh and + * cuda/detail/UnpackRaw.cuh + * while you read this.) + * + * A CUDA graph containing multiple RNG ops behaves like a + * single giant kernel from the perspective of ops external + * to the graph. During graph capture, logic in CUDAGeneratorImpl + * records the total of all offset increments that occur in the + * graphed region, and records the final total as the offset for + * the entire graph. + * + * When the graph reruns, the logic that reruns it + * increments this device's CUDA generator's offset + * by that total. + * + * Meanwhile, within the graph, at capture time, instead of + * populating PhiloxCudaStates with the uint64_t offset pulled + * directly from the global state, PhiloxCudaState uses a pointer + * to a one-element stream-local int64_t device tensor + * holding an initial offset value, and a uint64_t holding an + * intra-graph offset. (The intra-graph offset starts from zero + * when capture begins.) In each consumer kernel, + * at::cuda::philox::unpack computes the offset to use for this kernel + * as intra-graph offset + *initial offset. + * + * When the graph reruns, the logic that reruns it first + * fill_s the initial offset tensor with this device's + * CUDA generator's current offset. + * + * The control flow above ensures graphed execution is bitwise + * identical to eager execution as long as RNG ops are enqueued + * from a single thread, even if RNG ops and graphs containing + * RNG ops are enqueued and run simultaneously on multiple streams. + * + * Usage: + * ~~~~~~ + * PhiloxCudaState in this file, and unpack() in + * cuda/CUDAGraphsUtils.cuh allow non-divergent use of + * CUDAGeneratorImpl whether graph capture is underway or not. + * + * Each PhiloxCudaState instance should be used for one and only one + * consumer kernel. + * + * Example (see e.g. native/cuda/Dropout.cu): + * + * #include + * #include + * + * __global__ void kernel(..., PhiloxCudaState philox_args) { + * auto seeds = at::cuda::philox::unpack(philox_args); + * IndexType idx = blockIdx.x * blockDim.x + threadIdx.x; + * curandStatePhilox4_32_10_t state; + * curand_init(std::get<0>(seeds), // seed + * idx, // per-thread subsequence + * std::get<1>(seeds), // offset in subsequence + * &state); + * ... + * } + * + * host_caller(...) { + * PhiloxCudaState rng_engine_inputs; + * { + * // See Note [Acquire lock when using random generators] + * std::lock_guard lock(gen->mutex_); + * + * // gen could be HostState or DevState here! No divergent code needed! + * rng_engine_inputs = gen->philox_cuda_state(offset_increment); + * } + * kernel<<<...>>>(..., rng_engine_inputs); + * } + * + */ + +struct CUDAGeneratorState : public c10::intrusive_ptr_target { + uint64_t seed_; + uint64_t philox_offset_per_thread_; + uint64_t offset_intragraph_; + bool capturing_{}; + std::unordered_set registered_graphs_; + at::TensorBase seed_extragraph_; + at::TensorBase offset_extragraph_; + + CUDAGeneratorState( + uint64_t seed = default_rng_seed_val, + uint64_t philox_offset_per_thread = 0, + uint64_t offset_intragraph = 0) + : seed_(seed), + philox_offset_per_thread_(philox_offset_per_thread), + offset_intragraph_(offset_intragraph) {} + + void increase(uint64_t increment); + + void register_graph(cuda::CUDAGraph* graph); + void unregister_graph(cuda::CUDAGraph* graph); + + void capture_prologue(); + // capture_epilogue returns the wholegraph_increment + uint64_t capture_epilogue(); + void replay_prologue(uint64_t wholegraph_increment); + c10::intrusive_ptr clone(); +}; + +struct TORCH_CUDA_CPP_API CUDAGeneratorImpl : public c10::GeneratorImpl { + // Constructors + CUDAGeneratorImpl(DeviceIndex device_index = -1); + CUDAGeneratorImpl( + DeviceIndex device_index, + c10::intrusive_ptr state_); + ~CUDAGeneratorImpl() override = default; + + // CUDAGeneratorImpl methods + std::shared_ptr clone() const; + void set_current_seed(uint64_t seed) override; + void set_offset(uint64_t offset) override; + uint64_t get_offset() const override; + uint64_t current_seed() const override; + uint64_t seed() override; + void set_state(const c10::TensorImpl& new_state) override; + c10::intrusive_ptr get_state() const override; + void graphsafe_set_state( + const c10::intrusive_ptr& state) override; + c10::intrusive_ptr graphsafe_get_state() const override; + + void set_philox_offset_per_thread(uint64_t offset); + uint64_t philox_offset_per_thread() const; + + void register_graph(cuda::CUDAGraph* graph); + void unregister_graph(cuda::CUDAGraph* graph); + + // Generates a PhiloxCudaState with a specified increment, and increment + // current state + PhiloxCudaState philox_cuda_state(uint64_t increment); + + bool reset_rnn_state() { + return !no_reset_rnn_state_.test_and_set(); + } + + // Temporarily accommodates call sites that use philox_engine_inputs. + // Allows incremental refactor of call sites to use philox_cuda_state. + std::pair philox_engine_inputs(uint64_t increment); + + static c10::DeviceType device_type(); + + private: + CUDAGeneratorImpl* clone_impl() const override; + + c10::intrusive_ptr state_; + std::atomic_flag no_reset_rnn_state_; +}; + +namespace cuda::detail { + +TORCH_CUDA_CPP_API const Generator& getDefaultCUDAGenerator( + DeviceIndex device_index = -1); +TORCH_CUDA_CPP_API Generator createCUDAGenerator(DeviceIndex device_index = -1); + +} // namespace cuda::detail +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAGraph.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAGraph.h new file mode 100644 index 0000000000000000000000000000000000000000..9687b067d571c42f3ea5d6a417ec700b7dc122d0 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAGraph.h @@ -0,0 +1,100 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace at { + +struct Generator; +struct CUDAGeneratorImpl; +struct CUDAGeneratorState; + +namespace cuda { + +// Standalone way to get a unique mempool id usable as a pool=... argument +// to CUDAGraph::capture_begin +TORCH_CUDA_CPP_API MempoolId_t graph_pool_handle(); + +struct TORCH_CUDA_CPP_API CUDAGraph { + CUDAGraph(bool keep_graph=false); + ~CUDAGraph(); + + // See Note [Explicit Registration of Generators to the CUDA Graph] + void register_generator_state(c10::intrusive_ptr state); + void register_generator_state(const at::Generator& generator); + void capture_begin( + MempoolId_t pool = {0, 0}, + cudaStreamCaptureMode capture_mode = cudaStreamCaptureModeGlobal); + void capture_end(); + void instantiate(); + void replay(); + void reset(); + MempoolId_t pool(); + void enable_debug_mode(); + void debug_dump(const std::string& debug_path); + cudaGraph_t raw_cuda_graph(); + cudaGraphExec_t raw_cuda_graph_exec(); + + protected: + cudaGraph_t graph_ = nullptr; + cudaGraphExec_t graph_exec_ = nullptr; + + // internal states so reset() can do its best cleaning up + + // Set to true in capture_end if cudaStreamEndCapture succeeded + // Set back to false after instantiate() unless keep_graph=True or + // enable_debug_mode() was called on any CUDAGraph instance. + bool has_graph_ = false; + // Set to true in capture_end if cudaStreamEndCapture succeeded + bool capture_ended_ = false; + // Set to true in capture_end if cudaGraphInstantiate succeeded + bool has_graph_exec_ = false; + + // the ID assigned by cuda during graph capture, + // used to identify when a stream is participating in capture + CaptureId_t capture_id_ = 0; + + // uuid used to request a particular private mempool from CUDACachingAllocator. + // By default, this will be set to {id_, 0}. + // + // If capture_begin is called with "pool=other_graph.pool()", this graph's mempool_id_ + // will be set to the other graph's mempool_id_, and therefore share a mempool with the + // other graph. + // + // If capture_begin is called with "pool=handle" where "handle" came from graph_pool_handle(), + // it will share a mempool with any other captures that used "pool=handle". + // + // Sharing a mempool across graphs saves memory, and it's safe if you + // know you'll replay those graphs in the same order you captured them. + MempoolId_t mempool_id_; + + // Stream on which capture began + at::cuda::CUDAStream capture_stream_; + + // multiple generator states and their wholegraph_increments in this graph + // that are managed by the CUDA Graph + ska::flat_hash_map, uint64_t> + captured_generator_states_; + + // Device where capture occurred. Right now, for simplicity, we require all ops + // in a capture to run on the same device, but this is a limitation of CUDAGraph, + // not CUDA itself. We can straightforwardly modify CUDAGraph to support multi-device + // captures if needed. + // init capture_dev_ as UNDEFINED_DEVICE to check that it stores the real device id in the destructor + static constexpr c10::DeviceIndex UNDEFINED_DEVICE = -1; + c10::DeviceIndex capture_dev_{UNDEFINED_DEVICE}; + + bool keep_graph_; +}; + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAGraphsUtils.cuh b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAGraphsUtils.cuh new file mode 100644 index 0000000000000000000000000000000000000000..5ddebd32b16f3e0324563cc5ad25f59c493323bf --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAGraphsUtils.cuh @@ -0,0 +1,58 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +// c10/cuda/CUDAGraphsC10Utils.h has utils used by both c10 and aten. +// This file adds utils used by aten only. + +namespace at::cuda { + +using CaptureId_t = c10::cuda::CaptureId_t; +using CaptureStatus = c10::cuda::CaptureStatus; + +// Use this version where you don't want to create a CUDA context if none exists. +inline CaptureStatus currentStreamCaptureStatus() { + // don't create a context if we don't have to + if (c10::cuda::hasPrimaryContext(c10::cuda::current_device())) { + return c10::cuda::currentStreamCaptureStatusMayInitCtx(); + } else { + return CaptureStatus::None; + } +} + +inline void assertNotCapturing(const std::string& attempt) { + auto status = currentStreamCaptureStatus(); + TORCH_CHECK(status == CaptureStatus::None, + attempt, + " during CUDA graph capture. If you need this call to be captured, " + "please file an issue. " + "Current cudaStreamCaptureStatus: ", + status); +} + +inline void errorIfCapturingCudnnBenchmark(const std::string& version_specific) { + auto status = currentStreamCaptureStatus(); + TORCH_CHECK(status == CaptureStatus::None, + "Current cudaStreamCaptureStatus: ", + status, + "\nCapturing ", + version_specific, + "is prohibited. Possible causes of this error:\n" + "1. No warmup iterations occurred before capture.\n" + "2. The convolutions you're trying to capture use dynamic shapes, " + "in which case capturing them is generally prohibited."); +} + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAGreenContext.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAGreenContext.h new file mode 100644 index 0000000000000000000000000000000000000000..3777a1938f43d5bcf5598d7c08e98f361b60d2b6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAGreenContext.h @@ -0,0 +1,43 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include + +// Forward declare green context as opaque ptr +typedef struct CUgreenCtx_st* CUgreenCtx; + +namespace at::cuda { + +class TORCH_CUDA_CPP_API GreenContext { + public: + // Green context creation + static std::unique_ptr create( + uint32_t num_sms, + std::optional device_id); + ~GreenContext() noexcept; + + // Delete copy constructor and assignment + GreenContext(const GreenContext&) = delete; + GreenContext& operator=(const GreenContext&) = delete; + + // Make this context current + void setContext(); + + void popContext(); + + private: + GreenContext(uint32_t device_id, uint32_t num_sms); + // Implement move operations + GreenContext(GreenContext&& other) noexcept; + GreenContext& operator=(GreenContext&& other) noexcept; + + int32_t device_id_ = -1; + CUgreenCtx green_ctx_ = nullptr; + CUcontext context_ = nullptr; + cudaStream_t parent_stream_ = nullptr; +}; +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAScaledBlas.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAScaledBlas.h new file mode 100644 index 0000000000000000000000000000000000000000..9e4cfcb3602e14f31d8355d447361f1487c4e1dc --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAScaledBlas.h @@ -0,0 +1,179 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include +#include +#include +#include +#include +#include +#include +#define TORCH_ASSERT_ONLY_METHOD_OPERATORS +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef USE_FBGEMM_GENAI +#include +#endif + +#ifndef AT_PER_OPERATOR_HEADERS +#include +#include +#else +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#endif + +using at::blas::ScalingType; +using at::blas::SwizzleType; + +namespace at::cuda::scaled { + +static bool _scaled_mm_allowed_device(bool sm90_only=false, bool sm100_only=false) { +#ifdef USE_ROCM + static const std::vector archs = { + "gfx942", +#if ROCM_VERSION >= 60300 + "gfx1200", "gfx1201", +#endif +#if ROCM_VERSION >= 60500 + "gfx950" +#endif + }; + return at::detail::getCUDAHooks().isGPUArch(archs); +#else + auto dprops = at::cuda::getCurrentDeviceProperties(); + + if (sm90_only || sm100_only) { + return (sm90_only && dprops->major == 9) || (sm100_only && dprops->major == 10); + } else { + return dprops->major >= 9 || (dprops->major == 8 && dprops->minor == 9); + } +#endif +} + +#ifdef USE_ROCM +static bool _scaled_mm_is_fnuz() { + return at::detail::getCUDAHooks().isGPUArch({"gfx942"}); +} +#endif +/** + * Track concrete implementations available + */ +enum class ScaledGemmImplementation { + NONE = 0, + TENSORWISE_TENSORWISE = 1, + ROWWISE_ROWWISE = 2, + BLOCK_128x128_1x128 = 3, + BLOCK_1x128_128x128 = 4, + BLOCK_1x128_1x128 = 5, + MXFP8_MXFP8 = 6, + NVFP4_NVFP4 = 7, + NVFP4_NVFP4_SINGLE_SCALE = 8, + MXFP4_MXFP4 = 9, +}; + +/** + * Convert passed int (enum) from python back into a + * strictly-typed enum + */ +template +std::vector convert_int_to_enum(ArrayType& v) { + std::vector converted; + converted.reserve(v.size()); + + for (auto vi : v) { + converted.push_back(static_cast(vi)); + } + return converted; +} + +bool check_tensorwise_recipe(c10::ScalarType, + std::vector&, + ArrayRef&, + c10::ScalarType, + std::vector&, + ArrayRef&); + + +bool check_rowwise_recipe(c10::ScalarType, + std::vector&, + ArrayRef&, + c10::ScalarType, + std::vector&, + ArrayRef&); + +bool check_nvfp4_recipe(c10::ScalarType, + std::vector&, + ArrayRef&, + c10::ScalarType, + std::vector&, + ArrayRef&); + +bool check_nvfp4_recipe_single_scale + (c10::ScalarType, + std::vector&, + ArrayRef&, + c10::ScalarType, + std::vector&, + ArrayRef&); + +bool check_deepseek_recipe(ScalingType, + ScalingType, + c10::ScalarType, + std::vector&, + ArrayRef&, + c10::ScalarType, + std::vector&, + ArrayRef&); + +bool check_mxfp8_recipe(c10::ScalarType, + std::vector&, + ArrayRef&, + c10::ScalarType, + std::vector&, + ArrayRef&); + +bool check_mxfp4_recipe(c10::ScalarType, + std::vector&, + ArrayRef&, + c10::ScalarType, + std::vector&, + ArrayRef&); + +} // namespace at::native::cuda::blas::scaled + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDASparse.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDASparse.h new file mode 100644 index 0000000000000000000000000000000000000000..0559bd1ec2aaf8133c695488538825021fa83a35 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDASparse.h @@ -0,0 +1,41 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#if defined(USE_ROCM) +#include +#define HIPSPARSE_VERSION ((hipsparseVersionMajor*100000) + (hipsparseVersionMinor*100) + hipsparseVersionPatch) +#endif + + +// cuSparse Generic API spsv function was added in CUDA 11.3.0 +#if defined(CUDART_VERSION) && defined(CUSPARSE_VERSION) && (CUSPARSE_VERSION >= 11500) +#define AT_USE_CUSPARSE_GENERIC_SPSV() 1 +#else +#define AT_USE_CUSPARSE_GENERIC_SPSV() 0 +#endif + +// cuSparse Generic API spsm function was added in CUDA 11.3.1 +#if defined(CUDART_VERSION) && defined(CUSPARSE_VERSION) && (CUSPARSE_VERSION >= 11600) +#define AT_USE_CUSPARSE_GENERIC_SPSM() 1 +#else +#define AT_USE_CUSPARSE_GENERIC_SPSM() 0 +#endif + +// cuSparse Generic API sddmm function was added in CUDA 11.2.1 (cuSparse version 11400) +#if defined(CUDART_VERSION) && defined(CUSPARSE_VERSION) && (CUSPARSE_VERSION >= 11400) +#define AT_USE_CUSPARSE_GENERIC_SDDMM() 1 +#else +#define AT_USE_CUSPARSE_GENERIC_SDDMM() 0 +#endif + +// BSR triangular solve functions were added in hipSPARSE 1.11.2 (ROCm 4.5.0) +#if defined(CUDART_VERSION) || defined(USE_ROCM) +#define AT_USE_HIPSPARSE_TRIANGULAR_SOLVE() 1 +#else +#define AT_USE_HIPSPARSE_TRIANGULAR_SOLVE() 0 +#endif + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDASparseBlas.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDASparseBlas.h new file mode 100644 index 0000000000000000000000000000000000000000..8ffcdad0b58aa52b696a7ec0b1928e4b86c3865a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDASparseBlas.h @@ -0,0 +1,325 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +/* + Provides a subset of cuSPARSE functions as templates: + + csrgeam2(...) + + where scalar_t is double, float, c10::complex or c10::complex. + The functions are available in at::cuda::sparse namespace. +*/ + +#include +#include + +// NOLINTBEGIN(misc-misplaced-const) +namespace at::cuda::sparse { + +#define CUSPARSE_CSRGEAM2_BUFFERSIZE_ARGTYPES(scalar_t) \ + cusparseHandle_t handle, int m, int n, const scalar_t *alpha, \ + const cusparseMatDescr_t descrA, int nnzA, \ + const scalar_t *csrSortedValA, const int *csrSortedRowPtrA, \ + const int *csrSortedColIndA, const scalar_t *beta, \ + const cusparseMatDescr_t descrB, int nnzB, \ + const scalar_t *csrSortedValB, const int *csrSortedRowPtrB, \ + const int *csrSortedColIndB, const cusparseMatDescr_t descrC, \ + const scalar_t *csrSortedValC, const int *csrSortedRowPtrC, \ + const int *csrSortedColIndC, size_t *pBufferSizeInBytes + +template +inline void csrgeam2_bufferSizeExt( + CUSPARSE_CSRGEAM2_BUFFERSIZE_ARGTYPES(scalar_t)) { + TORCH_INTERNAL_ASSERT( + false, + "at::cuda::sparse::csrgeam2_bufferSizeExt: not implemented for ", + typeid(scalar_t).name()); +} + +template <> +void csrgeam2_bufferSizeExt( + CUSPARSE_CSRGEAM2_BUFFERSIZE_ARGTYPES(float)); +template <> +void csrgeam2_bufferSizeExt( + CUSPARSE_CSRGEAM2_BUFFERSIZE_ARGTYPES(double)); +template <> +void csrgeam2_bufferSizeExt>( + CUSPARSE_CSRGEAM2_BUFFERSIZE_ARGTYPES(c10::complex)); +template <> +void csrgeam2_bufferSizeExt>( + CUSPARSE_CSRGEAM2_BUFFERSIZE_ARGTYPES(c10::complex)); + +#define CUSPARSE_CSRGEAM2_NNZ_ARGTYPES() \ + cusparseHandle_t handle, int m, int n, const cusparseMatDescr_t descrA, \ + int nnzA, const int *csrSortedRowPtrA, const int *csrSortedColIndA, \ + const cusparseMatDescr_t descrB, int nnzB, const int *csrSortedRowPtrB, \ + const int *csrSortedColIndB, const cusparseMatDescr_t descrC, \ + int *csrSortedRowPtrC, int *nnzTotalDevHostPtr, void *workspace + +template +inline void csrgeam2Nnz(CUSPARSE_CSRGEAM2_NNZ_ARGTYPES()) { + TORCH_CUDASPARSE_CHECK(cusparseXcsrgeam2Nnz( + handle, + m, + n, + descrA, + nnzA, + csrSortedRowPtrA, + csrSortedColIndA, + descrB, + nnzB, + csrSortedRowPtrB, + csrSortedColIndB, + descrC, + csrSortedRowPtrC, + nnzTotalDevHostPtr, + workspace)); +} + +#define CUSPARSE_CSRGEAM2_ARGTYPES(scalar_t) \ + cusparseHandle_t handle, int m, int n, const scalar_t *alpha, \ + const cusparseMatDescr_t descrA, int nnzA, \ + const scalar_t *csrSortedValA, const int *csrSortedRowPtrA, \ + const int *csrSortedColIndA, const scalar_t *beta, \ + const cusparseMatDescr_t descrB, int nnzB, \ + const scalar_t *csrSortedValB, const int *csrSortedRowPtrB, \ + const int *csrSortedColIndB, const cusparseMatDescr_t descrC, \ + scalar_t *csrSortedValC, int *csrSortedRowPtrC, int *csrSortedColIndC, \ + void *pBuffer + +template +inline void csrgeam2(CUSPARSE_CSRGEAM2_ARGTYPES(scalar_t)) { + TORCH_INTERNAL_ASSERT( + false, + "at::cuda::sparse::csrgeam2: not implemented for ", + typeid(scalar_t).name()); +} + +template <> +void csrgeam2(CUSPARSE_CSRGEAM2_ARGTYPES(float)); +template <> +void csrgeam2(CUSPARSE_CSRGEAM2_ARGTYPES(double)); +template <> +void csrgeam2>( + CUSPARSE_CSRGEAM2_ARGTYPES(c10::complex)); +template <> +void csrgeam2>( + CUSPARSE_CSRGEAM2_ARGTYPES(c10::complex)); + +#define CUSPARSE_BSRMM_ARGTYPES(scalar_t) \ + cusparseHandle_t handle, cusparseDirection_t dirA, \ + cusparseOperation_t transA, cusparseOperation_t transB, int mb, int n, \ + int kb, int nnzb, const scalar_t *alpha, \ + const cusparseMatDescr_t descrA, const scalar_t *bsrValA, \ + const int *bsrRowPtrA, const int *bsrColIndA, int blockDim, \ + const scalar_t *B, int ldb, const scalar_t *beta, scalar_t *C, int ldc + +template +inline void bsrmm(CUSPARSE_BSRMM_ARGTYPES(scalar_t)) { + TORCH_INTERNAL_ASSERT( + false, + "at::cuda::sparse::bsrmm: not implemented for ", + typeid(scalar_t).name()); +} + +template <> +void bsrmm(CUSPARSE_BSRMM_ARGTYPES(float)); +template <> +void bsrmm(CUSPARSE_BSRMM_ARGTYPES(double)); +template <> +void bsrmm>(CUSPARSE_BSRMM_ARGTYPES(c10::complex)); +template <> +void bsrmm>(CUSPARSE_BSRMM_ARGTYPES(c10::complex)); + +#define CUSPARSE_BSRMV_ARGTYPES(scalar_t) \ + cusparseHandle_t handle, cusparseDirection_t dirA, \ + cusparseOperation_t transA, int mb, int nb, int nnzb, \ + const scalar_t *alpha, const cusparseMatDescr_t descrA, \ + const scalar_t *bsrValA, const int *bsrRowPtrA, const int *bsrColIndA, \ + int blockDim, const scalar_t *x, const scalar_t *beta, scalar_t *y + +template +inline void bsrmv(CUSPARSE_BSRMV_ARGTYPES(scalar_t)) { + TORCH_INTERNAL_ASSERT( + false, + "at::cuda::sparse::bsrmv: not implemented for ", + typeid(scalar_t).name()); +} + +template <> +void bsrmv(CUSPARSE_BSRMV_ARGTYPES(float)); +template <> +void bsrmv(CUSPARSE_BSRMV_ARGTYPES(double)); +template <> +void bsrmv>(CUSPARSE_BSRMV_ARGTYPES(c10::complex)); +template <> +void bsrmv>(CUSPARSE_BSRMV_ARGTYPES(c10::complex)); + +#if AT_USE_HIPSPARSE_TRIANGULAR_SOLVE() + +#define CUSPARSE_BSRSV2_BUFFER_ARGTYPES(scalar_t) \ + cusparseHandle_t handle, cusparseDirection_t dirA, \ + cusparseOperation_t transA, int mb, int nnzb, \ + const cusparseMatDescr_t descrA, scalar_t *bsrValA, \ + const int *bsrRowPtrA, const int *bsrColIndA, int blockDim, \ + bsrsv2Info_t info, int *pBufferSizeInBytes + +template +inline void bsrsv2_bufferSize(CUSPARSE_BSRSV2_BUFFER_ARGTYPES(scalar_t)) { + TORCH_INTERNAL_ASSERT( + false, + "at::cuda::sparse::bsrsv2_bufferSize: not implemented for ", + typeid(scalar_t).name()); +} + +template <> +void bsrsv2_bufferSize(CUSPARSE_BSRSV2_BUFFER_ARGTYPES(float)); +template <> +void bsrsv2_bufferSize(CUSPARSE_BSRSV2_BUFFER_ARGTYPES(double)); +template <> +void bsrsv2_bufferSize>( + CUSPARSE_BSRSV2_BUFFER_ARGTYPES(c10::complex)); +template <> +void bsrsv2_bufferSize>( + CUSPARSE_BSRSV2_BUFFER_ARGTYPES(c10::complex)); + +#define CUSPARSE_BSRSV2_ANALYSIS_ARGTYPES(scalar_t) \ + cusparseHandle_t handle, cusparseDirection_t dirA, \ + cusparseOperation_t transA, int mb, int nnzb, \ + const cusparseMatDescr_t descrA, const scalar_t *bsrValA, \ + const int *bsrRowPtrA, const int *bsrColIndA, int blockDim, \ + bsrsv2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer + +template +inline void bsrsv2_analysis(CUSPARSE_BSRSV2_ANALYSIS_ARGTYPES(scalar_t)) { + TORCH_INTERNAL_ASSERT( + false, + "at::cuda::sparse::bsrsv2_analysis: not implemented for ", + typeid(scalar_t).name()); +} + +template <> +void bsrsv2_analysis(CUSPARSE_BSRSV2_ANALYSIS_ARGTYPES(float)); +template <> +void bsrsv2_analysis(CUSPARSE_BSRSV2_ANALYSIS_ARGTYPES(double)); +template <> +void bsrsv2_analysis>( + CUSPARSE_BSRSV2_ANALYSIS_ARGTYPES(c10::complex)); +template <> +void bsrsv2_analysis>( + CUSPARSE_BSRSV2_ANALYSIS_ARGTYPES(c10::complex)); + +#define CUSPARSE_BSRSV2_SOLVE_ARGTYPES(scalar_t) \ + cusparseHandle_t handle, cusparseDirection_t dirA, \ + cusparseOperation_t transA, int mb, int nnzb, const scalar_t *alpha, \ + const cusparseMatDescr_t descrA, const scalar_t *bsrValA, \ + const int *bsrRowPtrA, const int *bsrColIndA, int blockDim, \ + bsrsv2Info_t info, const scalar_t *x, scalar_t *y, \ + cusparseSolvePolicy_t policy, void *pBuffer + +template +inline void bsrsv2_solve(CUSPARSE_BSRSV2_SOLVE_ARGTYPES(scalar_t)) { + TORCH_INTERNAL_ASSERT( + false, + "at::cuda::sparse::bsrsv2_solve: not implemented for ", + typeid(scalar_t).name()); +} + +template <> +void bsrsv2_solve(CUSPARSE_BSRSV2_SOLVE_ARGTYPES(float)); +template <> +void bsrsv2_solve(CUSPARSE_BSRSV2_SOLVE_ARGTYPES(double)); +template <> +void bsrsv2_solve>( + CUSPARSE_BSRSV2_SOLVE_ARGTYPES(c10::complex)); +template <> +void bsrsv2_solve>( + CUSPARSE_BSRSV2_SOLVE_ARGTYPES(c10::complex)); + +#define CUSPARSE_BSRSM2_BUFFER_ARGTYPES(scalar_t) \ + cusparseHandle_t handle, cusparseDirection_t dirA, \ + cusparseOperation_t transA, cusparseOperation_t transX, int mb, int n, \ + int nnzb, const cusparseMatDescr_t descrA, scalar_t *bsrValA, \ + const int *bsrRowPtrA, const int *bsrColIndA, int blockDim, \ + bsrsm2Info_t info, int *pBufferSizeInBytes + +template +inline void bsrsm2_bufferSize(CUSPARSE_BSRSM2_BUFFER_ARGTYPES(scalar_t)) { + TORCH_INTERNAL_ASSERT( + false, + "at::cuda::sparse::bsrsm2_bufferSize: not implemented for ", + typeid(scalar_t).name()); +} + +template <> +void bsrsm2_bufferSize(CUSPARSE_BSRSM2_BUFFER_ARGTYPES(float)); +template <> +void bsrsm2_bufferSize(CUSPARSE_BSRSM2_BUFFER_ARGTYPES(double)); +template <> +void bsrsm2_bufferSize>( + CUSPARSE_BSRSM2_BUFFER_ARGTYPES(c10::complex)); +template <> +void bsrsm2_bufferSize>( + CUSPARSE_BSRSM2_BUFFER_ARGTYPES(c10::complex)); + +#define CUSPARSE_BSRSM2_ANALYSIS_ARGTYPES(scalar_t) \ + cusparseHandle_t handle, cusparseDirection_t dirA, \ + cusparseOperation_t transA, cusparseOperation_t transX, int mb, int n, \ + int nnzb, const cusparseMatDescr_t descrA, const scalar_t *bsrValA, \ + const int *bsrRowPtrA, const int *bsrColIndA, int blockDim, \ + bsrsm2Info_t info, cusparseSolvePolicy_t policy, void *pBuffer + +template +inline void bsrsm2_analysis(CUSPARSE_BSRSM2_ANALYSIS_ARGTYPES(scalar_t)) { + TORCH_INTERNAL_ASSERT( + false, + "at::cuda::sparse::bsrsm2_analysis: not implemented for ", + typeid(scalar_t).name()); +} + +template <> +void bsrsm2_analysis(CUSPARSE_BSRSM2_ANALYSIS_ARGTYPES(float)); +template <> +void bsrsm2_analysis(CUSPARSE_BSRSM2_ANALYSIS_ARGTYPES(double)); +template <> +void bsrsm2_analysis>( + CUSPARSE_BSRSM2_ANALYSIS_ARGTYPES(c10::complex)); +template <> +void bsrsm2_analysis>( + CUSPARSE_BSRSM2_ANALYSIS_ARGTYPES(c10::complex)); + +#define CUSPARSE_BSRSM2_SOLVE_ARGTYPES(scalar_t) \ + cusparseHandle_t handle, cusparseDirection_t dirA, \ + cusparseOperation_t transA, cusparseOperation_t transX, int mb, int n, \ + int nnzb, const scalar_t *alpha, const cusparseMatDescr_t descrA, \ + const scalar_t *bsrValA, const int *bsrRowPtrA, const int *bsrColIndA, \ + int blockDim, bsrsm2Info_t info, const scalar_t *B, int ldb, \ + scalar_t *X, int ldx, cusparseSolvePolicy_t policy, void *pBuffer + +template +inline void bsrsm2_solve(CUSPARSE_BSRSM2_SOLVE_ARGTYPES(scalar_t)) { + TORCH_INTERNAL_ASSERT( + false, + "at::cuda::sparse::bsrsm2_solve: not implemented for ", + typeid(scalar_t).name()); +} + +template <> +void bsrsm2_solve(CUSPARSE_BSRSM2_SOLVE_ARGTYPES(float)); +template <> +void bsrsm2_solve(CUSPARSE_BSRSM2_SOLVE_ARGTYPES(double)); +template <> +void bsrsm2_solve>( + CUSPARSE_BSRSM2_SOLVE_ARGTYPES(c10::complex)); +template <> +void bsrsm2_solve>( + CUSPARSE_BSRSM2_SOLVE_ARGTYPES(c10::complex)); + +#endif // AT_USE_HIPSPARSE_TRIANGULAR_SOLVE + +} // namespace at::cuda::sparse +// NOLINTEND(misc-misplaced-const) + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDASparseDescriptors.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDASparseDescriptors.h new file mode 100644 index 0000000000000000000000000000000000000000..e70e503fa8fc95cac7592f48bc965040d50fbf67 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDASparseDescriptors.h @@ -0,0 +1,257 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +#if defined(USE_ROCM) +#include +#endif + +namespace at::cuda::sparse { + +template +struct CuSparseDescriptorDeleter { + void operator()(T* x) { + if (x != nullptr) { + TORCH_CUDASPARSE_CHECK(destructor(x)); + } + } +}; + +template +class CuSparseDescriptor { + public: + T* descriptor() const { + return descriptor_.get(); + } + T* descriptor() { + return descriptor_.get(); + } + + protected: + std::unique_ptr> descriptor_; +}; + +template +struct ConstCuSparseDescriptorDeleter { + void operator()(T* x) { + if (x != nullptr) { + TORCH_CUDASPARSE_CHECK(destructor(x)); + } + } +}; + +template +class ConstCuSparseDescriptor { + public: + T* descriptor() const { + return descriptor_.get(); + } + T* descriptor() { + return descriptor_.get(); + } + + protected: + std::unique_ptr> descriptor_; +}; + +#if defined(USE_ROCM) +using cusparseMatDescr = std::remove_pointer_t; +using cusparseDnMatDescr = std::remove_pointer_t; +using cusparseDnVecDescr = std::remove_pointer_t; +using cusparseSpMatDescr = std::remove_pointer_t; +using cusparseSpMatDescr = std::remove_pointer_t; +using cusparseSpGEMMDescr = std::remove_pointer_t; +#if AT_USE_HIPSPARSE_TRIANGULAR_SOLVE() +using bsrsv2Info = std::remove_pointer_t; +using bsrsm2Info = std::remove_pointer_t; +#endif +#endif + +// NOTE: This is only needed for CUDA 11 and earlier, since CUDA 12 introduced +// API for const descriptors +cusparseStatus_t destroyConstDnMat(const cusparseDnMatDescr* dnMatDescr); + +class TORCH_CUDA_CPP_API CuSparseMatDescriptor + : public CuSparseDescriptor { + public: + CuSparseMatDescriptor() { + cusparseMatDescr_t raw_descriptor = nullptr; + TORCH_CUDASPARSE_CHECK(cusparseCreateMatDescr(&raw_descriptor)); + descriptor_.reset(raw_descriptor); + } + + CuSparseMatDescriptor(bool upper, bool unit) { + cusparseFillMode_t fill_mode = + upper ? CUSPARSE_FILL_MODE_UPPER : CUSPARSE_FILL_MODE_LOWER; + cusparseDiagType_t diag_type = + unit ? CUSPARSE_DIAG_TYPE_UNIT : CUSPARSE_DIAG_TYPE_NON_UNIT; + cusparseMatDescr_t raw_descriptor = nullptr; + TORCH_CUDASPARSE_CHECK(cusparseCreateMatDescr(&raw_descriptor)); + TORCH_CUDASPARSE_CHECK(cusparseSetMatFillMode(raw_descriptor, fill_mode)); + TORCH_CUDASPARSE_CHECK(cusparseSetMatDiagType(raw_descriptor, diag_type)); + descriptor_.reset(raw_descriptor); + } +}; + +#if AT_USE_HIPSPARSE_TRIANGULAR_SOLVE() + +class TORCH_CUDA_CPP_API CuSparseBsrsv2Info + : public CuSparseDescriptor { + public: + CuSparseBsrsv2Info() { + bsrsv2Info_t raw_descriptor = nullptr; + TORCH_CUDASPARSE_CHECK(cusparseCreateBsrsv2Info(&raw_descriptor)); + descriptor_.reset(raw_descriptor); + } +}; + +class TORCH_CUDA_CPP_API CuSparseBsrsm2Info + : public CuSparseDescriptor { + public: + CuSparseBsrsm2Info() { + bsrsm2Info_t raw_descriptor = nullptr; + TORCH_CUDASPARSE_CHECK(cusparseCreateBsrsm2Info(&raw_descriptor)); + descriptor_.reset(raw_descriptor); + } +}; + +#endif // AT_USE_HIPSPARSE_TRIANGULAR_SOLVE + +cusparseIndexType_t getCuSparseIndexType(const c10::ScalarType& scalar_type); + + class TORCH_CUDA_CPP_API CuSparseDnMatDescriptor + : public ConstCuSparseDescriptor< + cusparseDnMatDescr, + &cusparseDestroyDnMat> { + public: + explicit CuSparseDnMatDescriptor( + const Tensor& input, + int64_t batch_offset = -1); + }; + + class TORCH_CUDA_CPP_API CuSparseConstDnMatDescriptor + : public ConstCuSparseDescriptor< + const cusparseDnMatDescr, + &destroyConstDnMat> { + public: + explicit CuSparseConstDnMatDescriptor( + const Tensor& input, + int64_t batch_offset = -1); + cusparseDnMatDescr* unsafe_mutable_descriptor() const { + return const_cast(descriptor()); + } + cusparseDnMatDescr* unsafe_mutable_descriptor() { + return const_cast(descriptor()); + } + }; + + class TORCH_CUDA_CPP_API CuSparseDnVecDescriptor + : public ConstCuSparseDescriptor< + cusparseDnVecDescr, + &cusparseDestroyDnVec> { + public: + explicit CuSparseDnVecDescriptor(const Tensor& input); + }; + + class TORCH_CUDA_CPP_API CuSparseSpMatDescriptor + : public ConstCuSparseDescriptor< + cusparseSpMatDescr, + &cusparseDestroySpMat> {}; + +class TORCH_CUDA_CPP_API CuSparseSpMatCsrDescriptor + : public CuSparseSpMatDescriptor { + public: + explicit CuSparseSpMatCsrDescriptor(const Tensor& input, int64_t batch_offset = -1); + + std::tuple get_size() { + int64_t rows = 0, cols = 0, nnz = 0; + TORCH_CUDASPARSE_CHECK(cusparseSpMatGetSize( + this->descriptor(), + &rows, + &cols, + &nnz)); + return std::make_tuple(rows, cols, nnz); + } + + void set_tensor(const Tensor& input) { + auto crow_indices = input.crow_indices(); + auto col_indices = input.col_indices(); + auto values = input.values(); + + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(crow_indices.is_contiguous()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(col_indices.is_contiguous()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(values.is_contiguous()); + TORCH_CUDASPARSE_CHECK(cusparseCsrSetPointers( + this->descriptor(), + crow_indices.data_ptr(), + col_indices.data_ptr(), + values.data_ptr())); + } + +#if AT_USE_CUSPARSE_GENERIC_SPSV() + void set_mat_fill_mode(bool upper) { + cusparseFillMode_t fill_mode = + upper ? CUSPARSE_FILL_MODE_UPPER : CUSPARSE_FILL_MODE_LOWER; + TORCH_CUDASPARSE_CHECK(cusparseSpMatSetAttribute( + this->descriptor(), + CUSPARSE_SPMAT_FILL_MODE, + &fill_mode, + sizeof(fill_mode))); + } + + void set_mat_diag_type(bool unit) { + cusparseDiagType_t diag_type = + unit ? CUSPARSE_DIAG_TYPE_UNIT : CUSPARSE_DIAG_TYPE_NON_UNIT; + TORCH_CUDASPARSE_CHECK(cusparseSpMatSetAttribute( + this->descriptor(), + CUSPARSE_SPMAT_DIAG_TYPE, + &diag_type, + sizeof(diag_type))); + } +#endif +}; + +#if AT_USE_CUSPARSE_GENERIC_SPSV() +class TORCH_CUDA_CPP_API CuSparseSpSVDescriptor + : public CuSparseDescriptor { + public: + CuSparseSpSVDescriptor() { + cusparseSpSVDescr_t raw_descriptor = nullptr; + TORCH_CUDASPARSE_CHECK(cusparseSpSV_createDescr(&raw_descriptor)); + descriptor_.reset(raw_descriptor); + } +}; +#endif + +#if AT_USE_CUSPARSE_GENERIC_SPSM() +class TORCH_CUDA_CPP_API CuSparseSpSMDescriptor + : public CuSparseDescriptor { + public: + CuSparseSpSMDescriptor() { + cusparseSpSMDescr_t raw_descriptor = nullptr; + TORCH_CUDASPARSE_CHECK(cusparseSpSM_createDescr(&raw_descriptor)); + descriptor_.reset(raw_descriptor); + } +}; +#endif + +class TORCH_CUDA_CPP_API CuSparseSpGEMMDescriptor + : public CuSparseDescriptor { + public: + CuSparseSpGEMMDescriptor() { + cusparseSpGEMMDescr_t raw_descriptor = nullptr; + TORCH_CUDASPARSE_CHECK(cusparseSpGEMM_createDescr(&raw_descriptor)); + descriptor_.reset(raw_descriptor); + } +}; + +} // namespace at::cuda::sparse + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDATensorMethods.cuh b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDATensorMethods.cuh new file mode 100644 index 0000000000000000000000000000000000000000..41d847081a8b1459d00c2a4b0ef42bfa8de25b4c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDATensorMethods.cuh @@ -0,0 +1,20 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include + +namespace at { +template <> +inline __half* Tensor::data() const { + return reinterpret_cast<__half*>(data()); +} +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAUtils.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAUtils.h new file mode 100644 index 0000000000000000000000000000000000000000..47a464883e6aa9d8486c87b1e4219cf870b2cbdd --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CUDAUtils.h @@ -0,0 +1,25 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace at::cuda { + +// Check if every tensor in a list of tensors matches the current +// device. +inline bool check_device(ArrayRef ts) { + if (ts.empty()) { + return true; + } + Device curDevice = Device(kCUDA, current_device()); + for (const Tensor& t : ts) { + if (t.device() != curDevice) return false; + } + return true; +} + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CachingHostAllocator.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CachingHostAllocator.h new file mode 100644 index 0000000000000000000000000000000000000000..770d4730bc5c790ea853985060d5f2d9bd05bc80 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/CachingHostAllocator.h @@ -0,0 +1,75 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace at::cuda { + +// +// A caching allocator for CUDA host allocations (pinned memory). +// +// This provides a drop-in replacement for THCudaHostAllocator, which reuses +// freed pinned (page-locked) memory allocations. This avoids device +// synchronizations due to cudaFreeHost calls. +// +// To ensure correct behavior, THCCachingHostAllocator_recordEvent must be +// called anytime a pointer from this allocator is used in a cudaMemcpyAsync +// call between host and device, and passed the corresponding context from the +// allocation. This is currently invoked by at::native::copy_kernel_cuda. +// +C10_DEPRECATED_MESSAGE( + "at::cuda::getCachingHostAllocator() is deprecated. Please use at::getHostAllocator(at::kCUDA) instead.") +inline TORCH_CUDA_CPP_API at::HostAllocator* getCachingHostAllocator() { + return at::getHostAllocator(at::kCUDA); +} + +// Records an event in the specified stream. The allocation corresponding to the +// input `ptr`/`ctx` will not be reused until the event has occurred. +C10_DEPRECATED_MESSAGE( + "at::cuda::CachingHostAllocator_recordEvent(...) is deprecated. Please use at::getHostAllocator(at::kCUDA)->record_event(...) instead.") +inline TORCH_CUDA_CPP_API bool CachingHostAllocator_recordEvent( + void* ptr, + void* ctx, + c10::cuda::CUDAStream stream) { + return getHostAllocator(at::kCUDA)->record_event(ptr, ctx, stream.unwrap()); +} + +// Releases cached pinned memory allocations via cudaHostFree +C10_DEPRECATED_MESSAGE( + "at::cuda::CachingHostAllocator_emptyCache() is deprecated. Please use at::getHostAllocator(at::kCUDA)->empty_cache() instead.") +inline TORCH_CUDA_CPP_API void CachingHostAllocator_emptyCache() { + getHostAllocator(at::kCUDA)->empty_cache(); +} + +C10_DEPRECATED_MESSAGE( + "at::cuda::HostAlloc(...) is deprecated. Please use at::getHostAllocator(at::kCUDA)->allocate(...) instead.") +inline TORCH_CUDA_CPP_API at::DataPtr HostAlloc(size_t size) { + return getHostAllocator(at::kCUDA)->allocate(size); +} + +C10_DEPRECATED_MESSAGE( + "at::cuda::CachingHostAllocator_getStats() is deprecated. Please use at::getHostAllocator(at::kCUDA)->get_stats() instead.") +inline TORCH_CUDA_CPP_API at::HostStats CachingHostAllocator_getStats() { + return getHostAllocator(at::kCUDA)->get_stats(); +} + +C10_DEPRECATED_MESSAGE( + "at::cuda::CachingHostAllocator_resetAccumulatedStats() is deprecated. Please use at::getHostAllocator(at::kCUDA)->reset_accumulated_stats() instead.") +inline TORCH_CUDA_CPP_API void CachingHostAllocator_resetAccumulatedStats() { + getHostAllocator(at::kCUDA)->reset_accumulated_stats(); +} + +C10_DEPRECATED_MESSAGE( + "at::cuda::CachingHostAllocator_resetPeakStats() is deprecated. Please use at::getHostAllocator(at::kCUDA)->reset_peak_stats() instead.") +inline TORCH_CUDA_CPP_API void CachingHostAllocator_resetPeakStats() { + getHostAllocator(at::kCUDA)->reset_peak_stats(); +} + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/DeviceUtils.cuh b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/DeviceUtils.cuh new file mode 100644 index 0000000000000000000000000000000000000000..70b745c77b783f9fa2ab6e977b9f9b3f47464158 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/DeviceUtils.cuh @@ -0,0 +1,126 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +__device__ __forceinline__ unsigned int ACTIVE_MASK() +{ +#if !defined(USE_ROCM) + return __activemask(); +#else +// will be ignored anyway + return 0xffffffff; +#endif +} + +__device__ __forceinline__ void WARP_SYNC(unsigned mask = 0xffffffff) { +#if !defined(USE_ROCM) + return __syncwarp(mask); +#endif +} + +#if defined(USE_ROCM) +__device__ __forceinline__ unsigned long long int WARP_BALLOT(int predicate) +{ +return __ballot(predicate); +} +#else +__device__ __forceinline__ unsigned int WARP_BALLOT(int predicate, unsigned int mask = 0xffffffff) +{ +#if !defined(USE_ROCM) + return __ballot_sync(mask, predicate); +#else + return __ballot(predicate); +#endif +} +#endif + +template +__device__ __forceinline__ T WARP_SHFL_XOR(T value, int laneMask, int width = warpSize, unsigned int mask = 0xffffffff) +{ +#if !defined(USE_ROCM) + return __shfl_xor_sync(mask, value, laneMask, width); +#else + return __shfl_xor(value, laneMask, width); +#endif +} + +template +__device__ __forceinline__ T WARP_SHFL(T value, int srcLane, int width = warpSize, unsigned int mask = 0xffffffff) +{ +#if !defined(USE_ROCM) + return __shfl_sync(mask, value, srcLane, width); +#else + return __shfl(value, srcLane, width); +#endif +} + +template +__device__ __forceinline__ T WARP_SHFL_UP(T value, unsigned int delta, int width = warpSize, unsigned int mask = 0xffffffff) +{ +#if !defined(USE_ROCM) + return __shfl_up_sync(mask, value, delta, width); +#else + return __shfl_up(value, delta, width); +#endif +} + +template +__device__ __forceinline__ T WARP_SHFL_DOWN(T value, unsigned int delta, int width = warpSize, unsigned int mask = 0xffffffff) +{ +#if !defined(USE_ROCM) + return __shfl_down_sync(mask, value, delta, width); +#else + return __shfl_down(value, delta, width); +#endif +} + +#if defined(USE_ROCM) +template<> +__device__ __forceinline__ int64_t WARP_SHFL_DOWN(int64_t value, unsigned int delta, int width , unsigned int mask) +{ + //(HIP doesn't support int64_t). Trick from https://devblogs.nvidia.com/faster-parallel-reductions-kepler/ + int2 a = *reinterpret_cast(&value); + a.x = __shfl_down(a.x, delta); + a.y = __shfl_down(a.y, delta); + return *reinterpret_cast(&a); +} +#endif + +template<> +__device__ __forceinline__ c10::Half WARP_SHFL_DOWN(c10::Half value, unsigned int delta, int width, unsigned int mask) +{ + return c10::Half(WARP_SHFL_DOWN(value.x, delta, width, mask), c10::Half::from_bits_t{}); +} + +template +__device__ __forceinline__ c10::complex WARP_SHFL_DOWN(c10::complex value, unsigned int delta, int width = warpSize, unsigned int mask = 0xffffffff) +{ +#if !defined(USE_ROCM) + return c10::complex( + __shfl_down_sync(mask, value.real_, delta, width), + __shfl_down_sync(mask, value.imag_, delta, width)); +#else + return c10::complex( + __shfl_down(value.real_, delta, width), + __shfl_down(value.imag_, delta, width)); +#endif +} + +/** + * For CC 3.5+, perform a load using __ldg + */ +template +__device__ __forceinline__ T doLdg(const T* p) { +#if __CUDA_ARCH__ >= 350 && !defined(USE_ROCM) + return __ldg(p); +#else + return *p; +#endif +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/EmptyTensor.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/EmptyTensor.h new file mode 100644 index 0000000000000000000000000000000000000000..c7d3e0fce43c029615ddd8a63c7269ccb014989b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/EmptyTensor.h @@ -0,0 +1,49 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +namespace at::detail { + +TORCH_CUDA_CPP_API TensorBase empty_cuda( + IntArrayRef size, + ScalarType dtype, + std::optional device_opt, + std::optional memory_format_opt); + +TORCH_CUDA_CPP_API TensorBase empty_cuda( + IntArrayRef size, + std::optional dtype_opt, + std::optional layout_opt, + std::optional device_opt, + std::optional pin_memory_opt, + std::optional memory_format_opt); + +TORCH_CUDA_CPP_API TensorBase empty_cuda( + IntArrayRef size, + const TensorOptions &options); + +TORCH_CUDA_CPP_API TensorBase empty_strided_cuda( + IntArrayRef size, + IntArrayRef stride, + ScalarType dtype, + std::optional device_opt); + +TORCH_CUDA_CPP_API TensorBase empty_strided_cuda( + IntArrayRef size, + IntArrayRef stride, + std::optional dtype_opt, + std::optional layout_opt, + std::optional device_opt, + std::optional pin_memory_opt); + +TORCH_CUDA_CPP_API TensorBase empty_strided_cuda( + IntArrayRef size, + IntArrayRef stride, + const TensorOptions &options); + + +} // namespace at::detail + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/Exceptions.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/Exceptions.h new file mode 100644 index 0000000000000000000000000000000000000000..f70102e9320176c04ffadf127cc46fcb55246d03 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/Exceptions.h @@ -0,0 +1,235 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#if !defined(USE_ROCM) +#include +#else +#include +#endif + +#if defined(USE_CUDSS) +#include +#endif + +#include +#include +#include + + +namespace c10 { + +class CuDNNError : public c10::Error { + using Error::Error; +}; + +} // namespace c10 + +#define AT_CUDNN_FRONTEND_CHECK(EXPR, ...) \ + do { \ + auto error_object = EXPR; \ + if (!error_object.is_good()) { \ + TORCH_CHECK_WITH(CuDNNError, false, \ + "cuDNN Frontend error: ", error_object.get_message()); \ + } \ + } while (0) \ + +#define AT_CUDNN_CHECK_WITH_SHAPES(EXPR, ...) AT_CUDNN_CHECK(EXPR, "\n", ##__VA_ARGS__) + +// See Note [CHECK macro] +#define AT_CUDNN_CHECK(EXPR, ...) \ + do { \ + cudnnStatus_t status = EXPR; \ + if (status != CUDNN_STATUS_SUCCESS) { \ + if (status == CUDNN_STATUS_NOT_SUPPORTED) { \ + TORCH_CHECK_WITH(CuDNNError, false, \ + "cuDNN error: ", \ + cudnnGetErrorString(status), \ + ". This error may appear if you passed in a non-contiguous input.", ##__VA_ARGS__); \ + } else { \ + TORCH_CHECK_WITH(CuDNNError, false, \ + "cuDNN error: ", cudnnGetErrorString(status), ##__VA_ARGS__); \ + } \ + } \ + } while (0) + +namespace at::cuda::blas { +C10_EXPORT const char* _cublasGetErrorEnum(cublasStatus_t error); +} // namespace at::cuda::blas + +#define TORCH_CUDABLAS_CHECK(EXPR) \ + do { \ + cublasStatus_t __err = EXPR; \ + TORCH_CHECK(__err == CUBLAS_STATUS_SUCCESS, \ + "CUDA error: ", \ + at::cuda::blas::_cublasGetErrorEnum(__err), \ + " when calling `" #EXPR "`"); \ + } while (0) + +const char *cusparseGetErrorString(cusparseStatus_t status); + +#define TORCH_CUDASPARSE_CHECK(EXPR) \ + do { \ + cusparseStatus_t __err = EXPR; \ + TORCH_CHECK(__err == CUSPARSE_STATUS_SUCCESS, \ + "CUDA error: ", \ + cusparseGetErrorString(__err), \ + " when calling `" #EXPR "`"); \ + } while (0) + +#if defined(USE_CUDSS) +namespace at::cuda::cudss { +C10_EXPORT const char* cudssGetErrorMessage(cudssStatus_t error); +} // namespace at::cuda::solver + +#define TORCH_CUDSS_CHECK(EXPR) \ + do { \ + cudssStatus_t __err = EXPR; \ + if (__err == CUDSS_STATUS_EXECUTION_FAILED) { \ + TORCH_CHECK_LINALG( \ + false, \ + "cudss error: ", \ + at::cuda::cudss::cudssGetErrorMessage(__err), \ + ", when calling `" #EXPR "`", \ + ". This error may appear if the input matrix contains NaN. ");\ + } else { \ + TORCH_CHECK( \ + __err == CUDSS_STATUS_SUCCESS, \ + "cudss error: ", \ + at::cuda::cudss::cudssGetErrorMessage(__err), \ + ", when calling `" #EXPR "`. "); \ + } \ + } while (0) +#else +#define TORCH_CUDSS_CHECK(EXPR) EXPR +#endif + +namespace at::cuda::solver { +#if !defined(USE_ROCM) + +C10_EXPORT const char* cusolverGetErrorMessage(cusolverStatus_t status); + +constexpr const char* _cusolver_backend_suggestion = \ + "If you keep seeing this error, you may use " \ + "`torch.backends.cuda.preferred_linalg_library()` to try " \ + "linear algebra operators with other supported backends. " \ + "See https://pytorch.org/docs/stable/backends.html#torch.backends.cuda.preferred_linalg_library"; + +// When cuda >= 11.5, cusolver normally finishes execution and sets info array indicating convergence issue. +#define TORCH_CUSOLVER_CHECK(EXPR) \ + do { \ + cusolverStatus_t __err = EXPR; \ + if (__err == CUSOLVER_STATUS_INVALID_VALUE) { \ + TORCH_CHECK_LINALG( \ + false, \ + "cusolver error: ", \ + at::cuda::solver::cusolverGetErrorMessage(__err), \ + ", when calling `" #EXPR "`", \ + ". This error may appear if the input matrix contains NaN. ", \ + at::cuda::solver::_cusolver_backend_suggestion); \ + } else { \ + TORCH_CHECK( \ + __err == CUSOLVER_STATUS_SUCCESS, \ + "cusolver error: ", \ + at::cuda::solver::cusolverGetErrorMessage(__err), \ + ", when calling `" #EXPR "`. ", \ + at::cuda::solver::_cusolver_backend_suggestion); \ + } \ + } while (0) + +#else // defined(USE_ROCM) + +C10_EXPORT const char* hipsolverGetErrorMessage(hipsolverStatus_t status); + +constexpr const char* _hipsolver_backend_suggestion = \ + "If you keep seeing this error, you may use " \ + "`torch.backends.cuda.preferred_linalg_library()` to try " \ + "linear algebra operators with other supported backends. " \ + "See https://pytorch.org/docs/stable/backends.html#torch.backends.cuda.preferred_linalg_library"; + +#define TORCH_CUSOLVER_CHECK(EXPR) \ + do { \ + hipsolverStatus_t __err = EXPR; \ + if (__err == HIPSOLVER_STATUS_INVALID_VALUE) { \ + TORCH_CHECK_LINALG( \ + false, \ + "hipsolver error: ", \ + at::cuda::solver::hipsolverGetErrorMessage(__err), \ + ", when calling `" #EXPR "`", \ + ". This error may appear if the input matrix contains NaN. ", \ + at::cuda::solver::_hipsolver_backend_suggestion); \ + } else { \ + TORCH_CHECK( \ + __err == HIPSOLVER_STATUS_SUCCESS, \ + "hipsolver error: ", \ + at::cuda::solver::hipsolverGetErrorMessage(__err), \ + ", when calling `" #EXPR "`. ", \ + at::cuda::solver::_hipsolver_backend_suggestion); \ + } \ + } while (0) +#endif +} // namespace at::cuda::solver + +#define AT_CUDA_CHECK(EXPR) C10_CUDA_CHECK(EXPR) + +// For CUDA Driver API +// +// This is here instead of in c10 because NVRTC is loaded dynamically via a stub +// in ATen, and we need to use its nvrtcGetErrorString. +// See NOTE [ USE OF NVRTC AND DRIVER API ]. +#if !defined(USE_ROCM) + +#define AT_CUDA_DRIVER_CHECK(EXPR) \ + do { \ + CUresult __err = EXPR; \ + if (__err != CUDA_SUCCESS) { \ + const char* err_str; \ + [[maybe_unused]] CUresult get_error_str_err = \ + at::globalContext().getNVRTC().cuGetErrorString(__err, &err_str); \ + if (get_error_str_err != CUDA_SUCCESS) { \ + TORCH_CHECK(false, "CUDA driver error: unknown error"); \ + } else { \ + TORCH_CHECK(false, "CUDA driver error: ", err_str); \ + } \ + } \ + } while (0) + +#else + +#define AT_CUDA_DRIVER_CHECK(EXPR) \ + do { \ + CUresult __err = EXPR; \ + if (__err != CUDA_SUCCESS) { \ + TORCH_CHECK(false, "CUDA driver error: ", static_cast(__err)); \ + } \ + } while (0) + +#endif + +// For CUDA NVRTC +// +// Note: As of CUDA 10, nvrtc error code 7, NVRTC_ERROR_BUILTIN_OPERATION_FAILURE, +// incorrectly produces the error string "NVRTC unknown error." +// The following maps it correctly. +// +// This is here instead of in c10 because NVRTC is loaded dynamically via a stub +// in ATen, and we need to use its nvrtcGetErrorString. +// See NOTE [ USE OF NVRTC AND DRIVER API ]. +#define AT_CUDA_NVRTC_CHECK(EXPR) \ + do { \ + nvrtcResult __err = EXPR; \ + if (__err != NVRTC_SUCCESS) { \ + if (static_cast(__err) != 7) { \ + TORCH_CHECK(false, "CUDA NVRTC error: ", at::globalContext().getNVRTC().nvrtcGetErrorString(__err)); \ + } else { \ + TORCH_CHECK(false, "CUDA NVRTC error: NVRTC_ERROR_BUILTIN_OPERATION_FAILURE"); \ + } \ + } \ + } while (0) + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/MemPool.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/MemPool.h new file mode 100644 index 0000000000000000000000000000000000000000..d7af173fa75a5cbdb8f565569be40f4035a02e5f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/MemPool.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace at::cuda { + +// Keep BC only +using c10::CaptureId_t; +using c10::MempoolId_t; + +// MemPool represents a pool of memory in a caching allocator. Currently, +// it's just the ID of the pool object maintained in the CUDACachingAllocator. +// +// An allocator pointer can be passed to the MemPool to define how the +// allocations should be done in the pool. For example: using a different +// system allocator such as ncclMemAlloc. +struct TORCH_CUDA_CPP_API MemPool { + MemPool( + c10::cuda::CUDACachingAllocator::CUDAAllocator* allocator = nullptr, + bool is_user_created = true, + bool use_on_oom = false, + bool no_split = false); + MemPool(const MemPool&) = delete; + MemPool(MemPool&&) = default; + MemPool& operator=(const MemPool&) = delete; + MemPool& operator=(MemPool&&) = default; + ~MemPool(); + + MempoolId_t id(); + c10::cuda::CUDACachingAllocator::CUDAAllocator* allocator(); + int use_count(); + c10::DeviceIndex device(); + static MempoolId_t graph_pool_handle(bool is_user_created = true); + + private: + static std::atomic uid_; + static std::atomic uuid_; + c10::cuda::CUDACachingAllocator::CUDAAllocator* allocator_; + bool is_user_created_; + MempoolId_t id_; + c10::DeviceIndex device_; +}; + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/NumericLimits.cuh b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/NumericLimits.cuh new file mode 100644 index 0000000000000000000000000000000000000000..9fb5ed34ead66a74a14993429b0d63585d60bf35 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/NumericLimits.cuh @@ -0,0 +1,126 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +// NumericLimits.cuh is a holder for numeric limits definitions of commonly used +// types. This header is very specific to ROCm HIP and may be removed in the future. +// This header is derived from the legacy THCNumerics.cuh. + +// The lower_bound and upper_bound constants are same as lowest and max for +// integral types, but are -inf and +inf for floating point types. They are +// useful in implementing min, max, etc. + +namespace at { + +template +struct numeric_limits { +}; + +// WARNING: the following at::numeric_limits definitions are there only to support +// HIP compilation for the moment. Use std::numeric_limits if you are not +// compiling for ROCm. +// from @colesbury: "The functions on numeric_limits aren't marked with +// __device__ which is why they don't work with ROCm. CUDA allows them +// because they're constexpr." + +namespace { + // ROCm doesn't like INFINITY too. + constexpr double inf = INFINITY; +} + +template <> +struct numeric_limits { + static inline __host__ __device__ bool lowest() { return false; } + static inline __host__ __device__ bool max() { return true; } + static inline __host__ __device__ bool lower_bound() { return false; } + static inline __host__ __device__ bool upper_bound() { return true; } +}; + +template <> +struct numeric_limits { + static inline __host__ __device__ uint8_t lowest() { return 0; } + static inline __host__ __device__ uint8_t max() { return UINT8_MAX; } + static inline __host__ __device__ uint8_t lower_bound() { return 0; } + static inline __host__ __device__ uint8_t upper_bound() { return UINT8_MAX; } +}; + +template <> +struct numeric_limits { + static inline __host__ __device__ int8_t lowest() { return INT8_MIN; } + static inline __host__ __device__ int8_t max() { return INT8_MAX; } + static inline __host__ __device__ int8_t lower_bound() { return INT8_MIN; } + static inline __host__ __device__ int8_t upper_bound() { return INT8_MAX; } +}; + +template <> +struct numeric_limits { + static inline __host__ __device__ int16_t lowest() { return INT16_MIN; } + static inline __host__ __device__ int16_t max() { return INT16_MAX; } + static inline __host__ __device__ int16_t lower_bound() { return INT16_MIN; } + static inline __host__ __device__ int16_t upper_bound() { return INT16_MAX; } +}; + +template <> +struct numeric_limits { + static inline __host__ __device__ int32_t lowest() { return INT32_MIN; } + static inline __host__ __device__ int32_t max() { return INT32_MAX; } + static inline __host__ __device__ int32_t lower_bound() { return INT32_MIN; } + static inline __host__ __device__ int32_t upper_bound() { return INT32_MAX; } +}; + +template <> +struct numeric_limits { +#ifdef _MSC_VER + static inline __host__ __device__ int64_t lowest() { return _I64_MIN; } + static inline __host__ __device__ int64_t max() { return _I64_MAX; } + static inline __host__ __device__ int64_t lower_bound() { return _I64_MIN; } + static inline __host__ __device__ int64_t upper_bound() { return _I64_MAX; } +#else + static inline __host__ __device__ int64_t lowest() { return INT64_MIN; } + static inline __host__ __device__ int64_t max() { return INT64_MAX; } + static inline __host__ __device__ int64_t lower_bound() { return INT64_MIN; } + static inline __host__ __device__ int64_t upper_bound() { return INT64_MAX; } +#endif +}; + +template <> +struct numeric_limits { + static inline __host__ __device__ at::Half lowest() { return at::Half(0xFBFF, at::Half::from_bits()); } + static inline __host__ __device__ at::Half max() { return at::Half(0x7BFF, at::Half::from_bits()); } + static inline __host__ __device__ at::Half lower_bound() { return at::Half(0xFC00, at::Half::from_bits()); } + static inline __host__ __device__ at::Half upper_bound() { return at::Half(0x7C00, at::Half::from_bits()); } +}; + +template <> +struct numeric_limits { + static inline __host__ __device__ at::BFloat16 lowest() { return at::BFloat16(0xFF7F, at::BFloat16::from_bits()); } + static inline __host__ __device__ at::BFloat16 max() { return at::BFloat16(0x7F7F, at::BFloat16::from_bits()); } + static inline __host__ __device__ at::BFloat16 lower_bound() { return at::BFloat16(0xFF80, at::BFloat16::from_bits()); } + static inline __host__ __device__ at::BFloat16 upper_bound() { return at::BFloat16(0x7F80, at::BFloat16::from_bits()); } +}; + +template <> +struct numeric_limits { + static inline __host__ __device__ float lowest() { return -FLT_MAX; } + static inline __host__ __device__ float max() { return FLT_MAX; } + static inline __host__ __device__ float lower_bound() { return -static_cast(inf); } + static inline __host__ __device__ float upper_bound() { return static_cast(inf); } +}; + +template <> +struct numeric_limits { + static inline __host__ __device__ double lowest() { return -DBL_MAX; } + static inline __host__ __device__ double max() { return DBL_MAX; } + static inline __host__ __device__ double lower_bound() { return -inf; } + static inline __host__ __device__ double upper_bound() { return inf; } +}; + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/PeerToPeerAccess.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/PeerToPeerAccess.h new file mode 100644 index 0000000000000000000000000000000000000000..4d2af0e230f8c9c0323a40822c503004611497a4 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/PeerToPeerAccess.h @@ -0,0 +1,18 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include +#include +#include + +namespace at::cuda { +namespace detail { +void init_p2p_access_cache(int64_t num_devices); +} + +TORCH_CUDA_CPP_API bool get_p2p_access(c10::DeviceIndex source_dev, c10::DeviceIndex dest_dev); +TORCH_CUDA_CPP_API bool get_fabric_access(c10::DeviceIndex device); + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/PhiloxCudaState.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/PhiloxCudaState.h new file mode 100644 index 0000000000000000000000000000000000000000..df0077e7b20d58738646b22b058a6d9474e8686c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/PhiloxCudaState.h @@ -0,0 +1,10 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/PhiloxUtils.cuh b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/PhiloxUtils.cuh new file mode 100644 index 0000000000000000000000000000000000000000..4a7824426e76d22826d8489d8f75557a7a65876d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/PhiloxUtils.cuh @@ -0,0 +1,9 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/PinnedMemoryAllocator.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/PinnedMemoryAllocator.h new file mode 100644 index 0000000000000000000000000000000000000000..5fac9a3a6b3f472650eee44b5f18dd5f1795a5e0 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/PinnedMemoryAllocator.h @@ -0,0 +1,15 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace at::cuda { + +inline TORCH_CUDA_CPP_API at::HostAllocator* getPinnedMemoryAllocator() { + return at::getHostAllocator(at::kCUDA); +} +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/ScanUtils.cuh b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/ScanUtils.cuh new file mode 100644 index 0000000000000000000000000000000000000000..233c307bc5a8668b1728441c645bcf550159140c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/ScanUtils.cuh @@ -0,0 +1,83 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +// Collection of in-kernel scan / prefix sum utilities + +namespace at::cuda { + +// Inclusive prefix sum for binary vars using intra-warp voting + +// shared memory +template +__device__ void inclusiveBinaryPrefixScan(T* smem, bool in, T* out, BinaryFunction binop) { + // Within-warp, we use warp voting. +#if defined (USE_ROCM) + unsigned long long int vote = WARP_BALLOT(in); + T index = __popcll(getLaneMaskLe() & vote); + T carry = __popcll(vote); +#else + T vote = WARP_BALLOT(in); + T index = __popc(getLaneMaskLe() & vote); + T carry = __popc(vote); +#endif + + int warp = threadIdx.x / C10_WARP_SIZE; + + // Per each warp, write out a value + if (getLaneId() == 0) { + smem[warp] = carry; + } + + __syncthreads(); + + // Sum across warps in one thread. This appears to be faster than a + // warp shuffle scan for CC 3.0+ + if (threadIdx.x == 0) { + int current = 0; + for (int i = 0; i < blockDim.x / C10_WARP_SIZE; ++i) { + T v = smem[i]; + smem[i] = binop(smem[i], current); + current = binop(current, v); + } + } + + __syncthreads(); + + // load the carry from the preceding warp + if (warp >= 1) { + index = binop(index, smem[warp - 1]); + } + + *out = index; + + if (KillWARDependency) { + __syncthreads(); + } +} + +// Exclusive prefix sum for binary vars using intra-warp voting + +// shared memory +template +__device__ void exclusiveBinaryPrefixScan(T* smem, bool in, T* out, T* carry, BinaryFunction binop) { + inclusiveBinaryPrefixScan(smem, in, out, binop); + + // Inclusive to exclusive + *out -= (T) in; + + // The outgoing carry for all threads is the last warp's sum + *carry = smem[at::ceil_div(blockDim.x, C10_WARP_SIZE) - 1]; + + if (KillWARDependency) { + __syncthreads(); + } +} + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/Sleep.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/Sleep.h new file mode 100644 index 0000000000000000000000000000000000000000..04564bd165e0732756dbb79394ee6db08a40ed5f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/Sleep.h @@ -0,0 +1,23 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include + +namespace at::cuda { + +// enqueues a kernel that spins for the specified number of cycles +TORCH_CUDA_CU_API void sleep(int64_t cycles); + +// enqueues a kernel that spins until a flag is cleared by a +// corresponding call to clear_flag() +TORCH_CUDA_CU_API void busy_wait_for_flag(); +TORCH_CUDA_CU_API void clear_flag(); + +// flushes instruction cache for ROCm; no-op for CUDA +TORCH_CUDA_CU_API void flush_icache(); + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/ThrustAllocator.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/ThrustAllocator.h new file mode 100644 index 0000000000000000000000000000000000000000..083c2dd84d49ec0882b00753378a046d26be43fc --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/ThrustAllocator.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace at::cuda { + +/// Allocator for Thrust to re-route its internal device allocations +/// to the THC allocator +class ThrustAllocator { +public: + typedef char value_type; + + char* allocate(std::ptrdiff_t size) { + return static_cast(c10::cuda::CUDACachingAllocator::raw_alloc(size)); + } + + void deallocate(char* p, size_t size) { + c10::cuda::CUDACachingAllocator::raw_delete(p); + } +}; + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/cub-RadixSortPairs.cuh b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/cub-RadixSortPairs.cuh new file mode 100644 index 0000000000000000000000000000000000000000..0f231c3fbda755cc7b2d2dc4ee4e758a42e1a505 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/cub-RadixSortPairs.cuh @@ -0,0 +1,79 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#define TORCH_ASSERT_NO_OPERATORS +#include +#include + +namespace at::cuda::cub::detail { + +template +void radix_sort_pairs_impl( + const key_t* keys_in, + key_t* keys_out, + const OpaqueType* values_in, + OpaqueType* values_out, + int64_t n, + bool descending, + int64_t begin_bit, + int64_t end_bit) { + TORCH_CHECK( + n <= std::numeric_limits::max(), + "cub sort does not support sorting more than INT_MAX elements"); + using key_t_ = typename detail::cuda_type::type; + + auto allocator = c10::cuda::CUDACachingAllocator::get(); + c10::DataPtr keys_out_owner; + + if (keys_out == nullptr) { + keys_out_owner = allocator->allocate(n * sizeof(key_t)); + keys_out = reinterpret_cast(keys_out_owner.get()); + } + + const key_t_* keys_in_ = reinterpret_cast(keys_in); + key_t_* keys_out_ = reinterpret_cast(keys_out); + + if (descending) { + CUB_WRAPPER( + NO_ROCM(at_cuda_detail)::cub::DeviceRadixSort::SortPairsDescending, + keys_in_, + keys_out_, + values_in, + values_out, + n, + begin_bit, + end_bit, + c10::cuda::getCurrentCUDAStream()); + } else { + CUB_WRAPPER( + NO_ROCM(at_cuda_detail)::cub::DeviceRadixSort::SortPairs, + keys_in_, + keys_out_, + values_in, + values_out, + n, + begin_bit, + end_bit, + c10::cuda::getCurrentCUDAStream()); + } +} + +#define AT_INSTANTIATE_SORT_PAIRS(key_t, value_size) \ + template void radix_sort_pairs_impl( \ + const key_t* keys_in, \ + key_t* keys_out, \ + const OpaqueType* values_in, \ + OpaqueType* values_out, \ + int64_t n, \ + bool descending, \ + int64_t begin_bit, \ + int64_t end_bit); + +#define AT_INSTANTIATE_SORT_PAIRS_8(scalar_t, ScalarType) \ + AT_INSTANTIATE_SORT_PAIRS(scalar_t, 8) + +} // namespace at::cuda::cub::detail + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/cub.cuh b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/cub.cuh new file mode 100644 index 0000000000000000000000000000000000000000..a6666f74796d226ce13b8162c249b3458dee1c93 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/cub.cuh @@ -0,0 +1,576 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +#include +#include +#include +#include + +#ifndef USE_ROCM +#include +#endif + +#include +#include + +#if USE_GLOBAL_CUB_WRAPPED_NAMESPACE() + +#include + +#else + +// include cub in a safe manner, see: +// https://github.com/pytorch/pytorch/pull/55292 +#undef CUB_NS_POSTFIX //undef to avoid redefinition warnings +#undef CUB_NS_PREFIX +#undef CUB_NS_QUALIFIER +#define CUB_NS_PREFIX namespace at_cuda_detail { +#define CUB_NS_POSTFIX } +#define CUB_NS_QUALIFIER ::at_cuda_detail::cub +#include +#undef CUB_NS_POSTFIX +#undef CUB_NS_PREFIX +#undef CUB_NS_QUALIFIER + +#endif + +#include +#include +#include + +// handle the temporary storage and 'twice' calls for cub API +#define CUB_WRAPPER(func, ...) do { \ + size_t temp_storage_bytes = 0; \ + AT_CUDA_CHECK(func(nullptr, temp_storage_bytes, __VA_ARGS__)); \ + auto& caching_allocator = *::c10::cuda::CUDACachingAllocator::get(); \ + auto temp_storage = caching_allocator.allocate(temp_storage_bytes); \ + AT_CUDA_CHECK(func(temp_storage.get(), temp_storage_bytes, __VA_ARGS__));\ +} while (false) + +#ifdef USE_ROCM +#define NO_ROCM(x) +#define ROCM_HIPCUB(x) ::hipcub +#else +#define NO_ROCM(x) x +#define ROCM_HIPCUB(x) x +#endif + +#if CUB_V3_PLUS() +#include +#include +#include +#define ATEN_CUB_TRANSFORM_ITERATOR(ValueType, ...) ::thrust::transform_iterator<__VA_ARGS__> +#define ATEN_CUB_COUNTING_ITERATOR(...) ::thrust::counting_iterator<__VA_ARGS__> +#define ATEN_CUB_CONSTANT_ITERATOR(...) ::thrust::constant_iterator<__VA_ARGS__> +#define ATEN_CUB_MAXIMUM() ::cuda::maximum<>() +#else +#define ATEN_CUB_TRANSFORM_ITERATOR(...) NO_ROCM(at_cuda_detail)ROCM_HIPCUB(::cub)::TransformInputIterator<__VA_ARGS__> +#define ATEN_CUB_COUNTING_ITERATOR(...) NO_ROCM(at_cuda_detail)ROCM_HIPCUB(::cub)::CountingInputIterator<__VA_ARGS__> +#define ATEN_CUB_CONSTANT_ITERATOR(...) NO_ROCM(at_cuda_detail)ROCM_HIPCUB(::cub)::ConstantInputIterator<__VA_ARGS__> +#define ATEN_CUB_MAXIMUM() NO_ROCM(at_cuda_detail)ROCM_HIPCUB(::cub)::Max() +#endif + +#if defined(USE_ROCM) + +// backport https://github.com/NVIDIA/cub/pull/306 for c10::BFloat16 + +template <> +struct ROCM_HIPCUB(cub)::FpLimits +{ + static __host__ __device__ __forceinline__ c10::BFloat16 Max() { + unsigned short max_word = 0x7F7F; + return reinterpret_cast(max_word); + } + + static __host__ __device__ __forceinline__ c10::BFloat16 Lowest() { + unsigned short lowest_word = 0xFF7F; + return reinterpret_cast(lowest_word); + } +}; + +template <> +struct ROCM_HIPCUB(cub)::NumericTraits: + ROCM_HIPCUB(cub)::BaseTraits {}; + +#endif + +#if !defined(USE_ROCM) +namespace at::native { +namespace cub = ::at_cuda_detail::cub; +} // namespace at::native +#endif + +namespace at::cuda::cub { + +namespace detail { + +template +struct cuda_type { + using type = T; +}; +template<> +struct cuda_type { + using type = __half; +}; + +#if !defined(USE_ROCM) + +template<> +struct cuda_type { + using type = __nv_bfloat16; +}; + +#elif defined(USE_ROCM) + +template<> +struct cuda_type { + using type = hip_bfloat16; +}; + +#endif + +} // namespace detail + +template +inline void segmented_sort_pairs( + const key_t *keys_in, key_t *keys_out, + const value_t *values_in, value_t *values_out, + int64_t num_elements, int64_t num_segments, + OffsetIteratorT begin_offsets, OffsetIteratorT end_offsets, + bool descending=false, int64_t begin_bit=0, int64_t end_bit=sizeof(key_t)*8 +) { + TORCH_CHECK(num_elements <= std::numeric_limits::max(), + "cub sort does not support sorting more than INT_MAX elements"); + TORCH_CHECK(num_segments <= std::numeric_limits::max(), + "cub sort does not support sorting more than INT_MAX elements"); + using key_t_ = typename detail::cuda_type::type; + + auto allocator = c10::cuda::CUDACachingAllocator::get(); + c10::DataPtr keys_out_owner; + + if (keys_out == nullptr) { + keys_out_owner = allocator->allocate(num_elements * sizeof(key_t)); + keys_out = reinterpret_cast(keys_out_owner.get()); + } + + const key_t_ *keys_in_ = reinterpret_cast(keys_in); + key_t_ *keys_out_ = reinterpret_cast(keys_out); + + if (descending) { + CUB_WRAPPER(NO_ROCM(at_cuda_detail)::cub::DeviceSegmentedRadixSort::SortPairsDescending, + keys_in_, keys_out_, values_in, values_out, + num_elements, num_segments, begin_offsets, end_offsets, + begin_bit, end_bit, c10::cuda::getCurrentCUDAStream()); + } else { + CUB_WRAPPER(NO_ROCM(at_cuda_detail)::cub::DeviceSegmentedRadixSort::SortPairs, + keys_in_, keys_out_, values_in, values_out, + num_elements, num_segments, begin_offsets, end_offsets, + begin_bit, end_bit, c10::cuda::getCurrentCUDAStream()); + } +} + +template +inline void unique_by_key( + KeysInputIteratorT keys_in, ValuesInputIteratorT values_in, + ValuesOutputIteratorT values_out, + NumSelectedIteratorT num_selected, int64_t num_input_items) +{ + // TODO: use thrust::discard_iterator to handle null keys_out when https://github.com/NVIDIA/cub/issues/406 is fixed. + using KeyT = typename std::iterator_traits::value_type; + auto allocator = c10::cuda::CUDACachingAllocator::get(); + c10::DataPtr keys_out_owner; + keys_out_owner = allocator->allocate(num_input_items * sizeof(KeyT)); + auto keys_out_ = static_cast(keys_out_owner.get()); + CUB_WRAPPER(NO_ROCM(at_cuda_detail)::cub::DeviceSelect::UniqueByKey, + keys_in, values_in, keys_out_, values_out, num_selected, num_input_items, c10::cuda::getCurrentCUDAStream()); +} + +namespace impl { + +template +C10_LAUNCH_BOUNDS_1(1) +__global__ void transform_vals(InputIteratorT1 a, InputIteratorT2 b, OutputIteratorT out, ScanOpT scan_op){ + // NOTE: out here not the final scan output, but an intermediate of the accumulation type. + using acc_t = typename std::iterator_traits::value_type; + *out = scan_op(static_cast(*a), static_cast(*b)); +} + +// even though cub is supposed to support tensors with int_max elements, in reality it doesn't, +// so split at int_max/2 +constexpr int max_cub_size = std::numeric_limits::max() / 2 + 1; // 2**30 +} + +// non synchronizing cub call +// even though cub is supposed to support tensors with int_max elements, in reality it doesn't, +// so split at int_max/2 +template +inline void inclusive_scan(InputIteratorT input, OutputIteratorT output, ScanOpT scan_op, int64_t num_items) { +#if defined(USE_ROCM) + //For ROCm, use hipCUB chained iterators + CUB_WRAPPER(NO_ROCM(detail)::hipcub::DeviceScan::InclusiveScan, + input, + output, + scan_op, + num_items, + at::cuda::getCurrentCUDAStream()); + C10_HIP_KERNEL_LAUNCH_CHECK(); +#else + // non synchronizing cub call + // even though cub is supposed to support tensors with int_max elements, in reality it doesn't, + // so split at int_max/2 + int size_cub = std::min(num_items, max_cub_size); + CUB_WRAPPER(NO_ROCM(at_cuda_detail)::cub::DeviceScan::InclusiveScan, + input, + output, + scan_op, + size_cub, + at::cuda::getCurrentCUDAStream()); + C10_CUDA_KERNEL_LAUNCH_CHECK(); + using input_t = typename std::iterator_traits::value_type; + for (int64_t i = max_cub_size; i < num_items; i += max_cub_size) { + auto allocator = c10::cuda::CUDACachingAllocator::get(); + c10::DataPtr first_elem = allocator->allocate(sizeof(input_t)); + auto first_elem_ptr = reinterpret_cast(first_elem.get()); + + size_cub = std::min(num_items - i, max_cub_size); + impl::transform_vals<<<1, 1, 0, at::cuda::getCurrentCUDAStream()>>>( + output + i - 1, + input + i, + first_elem_ptr, + scan_op); + C10_CUDA_KERNEL_LAUNCH_CHECK(); + CUB_WRAPPER(NO_ROCM(at_cuda_detail)::cub::DeviceScan::ExclusiveScan, + input + i + 1, + output + i, + scan_op, + ::at_cuda_detail::cub::FutureValue(first_elem_ptr), + size_cub, + at::cuda::getCurrentCUDAStream()); + } +#endif +} + +# if defined(CUDA_VERSION) || defined(USE_ROCM) + +template +struct BlockPrefixCallbackOp +{ + public: + T running_total; + + __host__ __device__ BlockPrefixCallbackOp(T running_total) : running_total(running_total) {} + + // Callback operator to be entered by the first warp of threads in the block. + // Thread-0 is responsible for returning a value for seeding the block-wide scan. + __host__ __device__ T operator()(T block_aggregate) + { + T old_prefix = running_total; + running_total += block_aggregate; + return old_prefix; + } +}; + +template +__global__ void final_scan_kernel(const T* d_in, T* d_out, T* agg, int64_t nelem, int iters_per_cta) { + int64_t offset = BLOCK_THREADS * ITEMS_PER_THREAD * iters_per_cta * (int64_t)blockIdx.x; + int64_t remaining = nelem - offset; + if (remaining <= 0) { + return; + } + + d_in += offset; + d_out += offset; + + using BlockLoadT = ROCM_HIPCUB(at_cuda_detail::cub)::BlockLoad; + + // Specialize BlockStore type for our thread block (uses warp-striped loads for coalescing, then transposes in shared + // memory to a blocked arrangement) + using BlockStoreT = ROCM_HIPCUB(at_cuda_detail::cub)::BlockStore; + + // Specialize BlockScan type for our thread block + using BlockScanT = ROCM_HIPCUB(at_cuda_detail::cub)::BlockScan; + using BlockReduceT = ROCM_HIPCUB(at_cuda_detail::cub)::BlockReduce; + + + // Shared memory + __shared__ union TempStorage + { + typename BlockLoadT::TempStorage load; + typename BlockStoreT::TempStorage store; + typename BlockScanT::TempStorage scan; + typename BlockReduceT::TempStorage reduce; + } temp_storage; + + // load agg and reduce my starting value + T agg_data; + agg_data = threadIdx.x >= blockIdx.x ? T(0) : agg[threadIdx.x]; + // if there are fewer threads than previous values to be read, + // read another value + if (threadIdx.x + blockDim.x < blockIdx.x) { + agg_data += agg[threadIdx.x + blockDim.x]; + } + T aggregate = BlockReduceT(temp_storage.reduce).Sum(agg_data); + __syncthreads(); + BlockPrefixCallbackOp prefix_op(aggregate); + + + // Per-thread tile data + T data[ITEMS_PER_THREAD]; + + for (int i=0; i= BLOCK_THREADS * ITEMS_PER_THREAD) { + BlockLoadT(temp_storage.load).Load(d_in, data); + } else { + #pragma unroll + for (int j=0; j= BLOCK_THREADS * ITEMS_PER_THREAD) { + BlockStoreT(temp_storage.store).Store(d_out, data); + } else { + BlockStoreT(temp_storage.store).Store(d_out, data, remaining); + } + d_in += BLOCK_THREADS * ITEMS_PER_THREAD; + d_out += BLOCK_THREADS * ITEMS_PER_THREAD; + remaining -= BLOCK_THREADS * ITEMS_PER_THREAD; + if (remaining <= 0) return; + __syncthreads(); + } + +} + +template +struct TransformFunctor { + __device__ aggT operator()(T value) const { + if constexpr (!nonzero) { + return value; + } else { + return (value != T(0)) ? 1 : 0; + } + } +}; + +template +__global__ void calc_block_sums(const T * d_in, aggT * agg, int64_t nelem, int iters_per_cta){ + int64_t offset = BLOCK_THREADS * ITEMS_PER_THREAD * iters_per_cta * (int64_t)blockIdx.x; + int64_t remaining = nelem - offset; + if (remaining <= 0) { + return; + } + d_in += offset; + + using BlockLoadT = ROCM_HIPCUB(at_cuda_detail::cub)::BlockLoad; + using BlockReduceT = ROCM_HIPCUB(at_cuda_detail::cub)::BlockReduce; + // Shared memory + __shared__ union TempStorage + { + typename BlockLoadT::TempStorage load; + typename BlockReduceT::TempStorage reduce; + } temp_storage; + aggT data[ITEMS_PER_THREAD]; + aggT agg_val = 0; + TransformFunctor transform_functor; + auto iter_in = ATEN_CUB_TRANSFORM_ITERATOR(aggT, TransformFunctor, const T*)(d_in, transform_functor); + for (int i=0; i= BLOCK_THREADS * ITEMS_PER_THREAD) { + BlockLoadT(temp_storage.load).Load(iter_in, data); + __syncthreads(); + agg_val += BlockReduceT(temp_storage.reduce).Sum(data); + + } else { + BlockLoadT(temp_storage.load).Load(iter_in, data, remaining, aggT(0)); + __syncthreads(); + agg_val += BlockReduceT(temp_storage.reduce).Sum(data); + } + iter_in += BLOCK_THREADS * ITEMS_PER_THREAD; + remaining -= BLOCK_THREADS * ITEMS_PER_THREAD; + if (remaining <= 0) { + // for nonzeros we need to write out last blocks + // accumulated value to be able to compute + // total number of nonzeros + if (nonzero && threadIdx.x == 0) { + agg[blockIdx.x] = agg_val; + } + return; + } + __syncthreads(); + + } + if (threadIdx.x == 0) { + agg[blockIdx.x] = agg_val; + } + +} + +template +struct NonZeroOp { + __host__ __device__ __forceinline__ int operator()(const T& a) const { + return (a != T(0)); + } +}; + +template +constexpr int block_threads(){ + if constexpr (size >=16) { + return 128; + } else if constexpr (size >=8) { + return 256; + } else { + return 512; + } +} + +template +inline void inclusive_deterministic_scan(const scalar_t * input, scalar_t * output, ScanOpT scan_op, int64_t num_items) { + static_assert(std::is_same_v>, ""); + constexpr int BLOCK_THREADS = block_threads(); + constexpr int ITEMS_PER_THREAD = 16; + auto grid_size = (num_items + BLOCK_THREADS * ITEMS_PER_THREAD - 1) / (BLOCK_THREADS * ITEMS_PER_THREAD); + const int64_t num_sms = at::cuda::getCurrentDeviceProperties()->multiProcessorCount; + + const int iters_per_cta = (grid_size + num_sms - 1)/num_sms; + grid_size = std::min(num_sms, grid_size); + // simple reduction in scan kernel handles at most 2 items per thread + TORCH_INTERNAL_ASSERT(2 * BLOCK_THREADS >= grid_size); + auto& allocator = *c10::cuda::CUDACachingAllocator::get(); + auto agg = allocator.allocate(grid_size * sizeof(scalar_t)); + calc_block_sums + <<>>( + input, (scalar_t*)agg.get(), num_items, iters_per_cta); + C10_CUDA_KERNEL_LAUNCH_CHECK(); + final_scan_kernel + <<>>( + input, output, (scalar_t*)agg.get(), num_items, iters_per_cta); + C10_CUDA_KERNEL_LAUNCH_CHECK(); +} + +#endif + +template +inline void exclusive_scan(InputIteratorT input, OutputIteratorT output, ScanOpT scan_op, InitValueT init_value, int64_t num_items) { +#if defined(USE_ROCM) + //For ROCm, use hipCUB chained iterators + CUB_WRAPPER(NO_ROCM(detail)::hipcub::DeviceScan::ExclusiveScan, + input, + output, + scan_op, + init_value, + num_items, + at::cuda::getCurrentCUDAStream()); + C10_HIP_KERNEL_LAUNCH_CHECK(); +#else + // non synchronizing cub call + // even though cub is supposed to support tensors with int_max elements, in reality it doesn't, + // so split at int_max/2 + int size_cub = std::min(num_items, max_cub_size); + CUB_WRAPPER(NO_ROCM(at_cuda_detail)::cub::DeviceScan::ExclusiveScan, + input, + output, + scan_op, + init_value, + size_cub, + at::cuda::getCurrentCUDAStream()); + C10_CUDA_KERNEL_LAUNCH_CHECK(); + for (int64_t i = max_cub_size; i < num_items; i += max_cub_size) { + auto allocator = c10::cuda::CUDACachingAllocator::get(); + c10::DataPtr first_elem = allocator->allocate(sizeof(InitValueT)); + auto first_elem_ptr = reinterpret_cast(first_elem.get()); + + size_cub = std::min(num_items - i, max_cub_size); + impl::transform_vals<<<1, 1, 0, at::cuda::getCurrentCUDAStream()>>>( + output + i - 1, + input + i - 1, + first_elem_ptr, + scan_op); + C10_CUDA_KERNEL_LAUNCH_CHECK(); + CUB_WRAPPER(NO_ROCM(at_cuda_detail)::cub::DeviceScan::ExclusiveScan, + input + i, + output + i, + scan_op, + ::at_cuda_detail::cub::FutureValue(first_elem_ptr), + size_cub, + at::cuda::getCurrentCUDAStream()); + } +#endif +} + + +template +inline void inclusive_sum_by_key(KeysInputIteratorT keys, ValuesInputIteratorT input, ValuesOutputIteratorT output, int64_t num_items) { + TORCH_CHECK(num_items <= std::numeric_limits::max(), + "cub InclusiveSumByKey does not support more than INT_MAX elements"); +#if !defined(USE_ROCM) + CUB_WRAPPER(at_cuda_detail::cub::DeviceScan::InclusiveSumByKey, + keys, input, output, num_items, NO_ROCM(::cuda)::std::equal_to<>(), at::cuda::getCurrentCUDAStream()); +#else + CUB_WRAPPER(cub::DeviceScan::InclusiveSumByKey, + keys, input, output, num_items, hipcub::Equality(), at::cuda::getCurrentCUDAStream()); +#endif +} + +template +inline void inclusive_scan_by_key(KeysInputIteratorT keys, ValuesInputIteratorT input, ValuesOutputIteratorT output, ScanOpT scan_op, int64_t num_items) { + TORCH_CHECK(num_items <= std::numeric_limits::max(), + "cub InclusiveSumByKey does not support more than INT_MAX elements"); +#if !defined(USE_ROCM) + CUB_WRAPPER(at_cuda_detail::cub::DeviceScan::InclusiveScanByKey, + keys, input, output, scan_op, num_items, NO_ROCM(::cuda)::std::equal_to<>(), at::cuda::getCurrentCUDAStream()); +#else + CUB_WRAPPER(cub::DeviceScan::InclusiveScanByKey, + keys, input, output, scan_op, num_items, hipcub::Equality(), at::cuda::getCurrentCUDAStream()); +#endif +} + + +template +void unique(InputIteratorT input, OutputIteratorT output, + NumSelectedIteratorT num_selected_out, int64_t num_items) { + TORCH_CHECK(num_items <= std::numeric_limits::max(), + "cub unique does not support more than INT_MAX elements"); + CUB_WRAPPER(NO_ROCM(at_cuda_detail)::cub::DeviceSelect::Unique, + input, output, num_selected_out, num_items, at::cuda::getCurrentCUDAStream()); +} + +template +void run_length_encode(InputIteratorT input, OutputIteratorT output, CountsOutputIteratorT counts_out, + LengthOutputIteratorT length_out, int64_t num_items) { + TORCH_CHECK(num_items <= std::numeric_limits::max(), + "cub run_length_encode does not support more than INT_MAX elements"); + CUB_WRAPPER( + NO_ROCM(at_cuda_detail)::cub::DeviceRunLengthEncode::Encode, + input, output, counts_out, length_out, num_items, + at::cuda::getCurrentCUDAStream()); +} + +template +void reduce(InputIteratorT input, OutputIteratorT output, int64_t num_items, ReductionOpT op, T init) { + TORCH_CHECK(num_items <= std::numeric_limits::max(), + "cub reduce does not support more than INT_MAX elements"); + CUB_WRAPPER( + NO_ROCM(at_cuda_detail)::cub::DeviceReduce::Reduce, + input, output, num_items, op, init, + at::cuda::getCurrentCUDAStream()); + +} + +} // namespace at::cuda::cub + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/cub.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/cub.h new file mode 100644 index 0000000000000000000000000000000000000000..199e46b7464eed369a63b66a8e1026529218b724 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/cub.h @@ -0,0 +1,98 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include + +// NOTE: These templates are intentionally not defined in this header, +// which avoids re-compiling them for each translation unit. If you get +// a link error, you need to add an explicit instantiation for your +// types in cub.cu + +namespace at::cuda::cub { + +inline int get_num_bits(uint64_t max_key) { + int num_bits = 1; + while (max_key > 1) { + max_key >>= 1; + num_bits++; + } + return num_bits; +} + +namespace detail { + +// radix_sort_pairs doesn't interact with value_t other than to copy +// the data, so we can save template instantiations by reinterpreting +// it as an opaque type. +// We use native integer types for 1/2/4/8-byte values to reduce +// register usage in CUDA kernels. For sizes > 8 fall back to char array. +template struct alignas(N) OpaqueType { char data[N]; }; +template <> struct alignas(1) OpaqueType<1> { uint8_t data; }; +template <> struct alignas(2) OpaqueType<2> { uint16_t data; }; +template <> struct alignas(4) OpaqueType<4> { uint32_t data; }; +template <> struct alignas(8) OpaqueType<8> { uint64_t data; }; + +template +void radix_sort_pairs_impl( + const key_t *keys_in, key_t *keys_out, + const OpaqueType *values_in, OpaqueType *values_out, + int64_t n, bool descending, int64_t begin_bit, int64_t end_bit); + +} // namespace detail + +template +void radix_sort_pairs( + const key_t *keys_in, key_t *keys_out, + const value_t *values_in, value_t *values_out, + int64_t n, bool descending=false, int64_t begin_bit=0, int64_t end_bit=sizeof(key_t)*8) { + static_assert(std::is_trivially_copyable_v || + AT_ROCM_ENABLED(), // ROCm incorrectly fails this check for vector types + "radix_sort_pairs value type must be trivially copyable"); + // Make value type opaque, so all inputs of a certain size use the same template instantiation + using opaque_t = detail::OpaqueType; + static_assert(sizeof(value_t) <= 8 && (sizeof(value_t) & (sizeof(value_t) - 1)) == 0, + "This size of value_t is not instantiated. Please instantiate it in cub.cu" + " and modify this check."); + static_assert(sizeof(value_t) == alignof(value_t), "Expected value_t to be size-aligned"); + detail::radix_sort_pairs_impl( + keys_in, keys_out, + reinterpret_cast(values_in), + reinterpret_cast(values_out), + n, descending, begin_bit, end_bit); +} + +template +void radix_sort_keys( + const key_t *keys_in, key_t *keys_out, + int64_t n, bool descending=false, int64_t begin_bit=0, int64_t end_bit=sizeof(key_t)*8); + +// NOTE: Intermediate sums will be truncated to input_t precision +template +void inclusive_sum_truncating(const input_t *input, output_t *output, int64_t n); + +template +void inclusive_sum(const scalar_t *input, scalar_t *output, int64_t n) { + return inclusive_sum_truncating(input, output, n); +} + +// NOTE: Sums are done is common_type +template +void exclusive_sum_in_common_type(const input_t *input, output_t *output, int64_t n); + +template +void exclusive_sum(const scalar_t *input, scalar_t *output, int64_t n) { + return exclusive_sum_in_common_type(input, output, n); +} + +void mask_exclusive_sum(const uint8_t *mask, int64_t *output_idx, int64_t n); +inline void mask_exclusive_sum(const bool *mask, int64_t *output_idx, int64_t n) { + return mask_exclusive_sum( + reinterpret_cast(mask), output_idx, n); +} + +} // namespace at::cuda::cub + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/cub_definitions.cuh b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/cub_definitions.cuh new file mode 100644 index 0000000000000000000000000000000000000000..563bd82bebb7375081ee35f5021edc5bb5e17e78 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/cub_definitions.cuh @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#if !defined(USE_ROCM) +#include // for CUDA_VERSION +#endif + +#if !defined(USE_ROCM) +#include +#else +#define CUB_VERSION 200001 +#endif + +// cub support for CUB_WRAPPED_NAMESPACE is added to cub 1.13.1 in: +// https://github.com/NVIDIA/cub/pull/326 +// CUB_WRAPPED_NAMESPACE is defined globally in cmake/Dependencies.cmake +// starting from CUDA 11.5 +#if defined(CUB_WRAPPED_NAMESPACE) || defined(THRUST_CUB_WRAPPED_NAMESPACE) +#define USE_GLOBAL_CUB_WRAPPED_NAMESPACE() true +#else +#define USE_GLOBAL_CUB_WRAPPED_NAMESPACE() false +#endif + +// There were many bc-breaking changes in major version release of CCCL v3.0.0 +// Please see https://nvidia.github.io/cccl/cccl/3.0_migration_guide.html +#if CUB_VERSION >= 200800 +#define CUB_V3_PLUS() true +#else +#define CUB_V3_PLUS() false +#endif + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/jiterator.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/jiterator.h new file mode 100644 index 0000000000000000000000000000000000000000..7cb786a3c1ab5bfb8150f3211cbdac61bd3cc9c1 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/jiterator.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +#if AT_USE_JITERATOR() + +#include +#include +#include + +#include +#include + +namespace at::cuda { + +TORCH_CUDA_CPP_API c10::SmallVector CompileAndLaunchKernel( + const std::string& code_string, + const std::string& kernel_name, + const int num_outputs, + const c10::SmallVector& tensors, + const c10::SmallVector& extra_args, + bool return_by_ref); + +} // namespace at::cuda + +#else + +namespace at::cuda { + +TORCH_CUDA_CPP_API c10::SmallVector CompileAndLaunchKernel( + const std::string& code_string, + const std::string& kernel_name, + const int num_outputs, + const c10::SmallVector& tensors, + const c10::SmallVector& extra_args, + bool return_by_ref) { + TORCH_CHECK(false, "Jiterator is not supported"); + } +} // namespace at::cuda + +#endif // AT_USE_JITERATOR() + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/jiterator_impl.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/jiterator_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..acdcba30dc6a5f64f5b066c2f0795f251ca53f86 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/jiterator_impl.h @@ -0,0 +1,255 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +#if AT_USE_JITERATOR() + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace at::native { + + +#define AT_FOR_8_CASES(_) \ + _(1) \ + _(2) \ + _(3) \ + _(4) \ + _(5) \ + _(6) \ + _(7) \ + _(8) + +#define AT_FOR_8_CASES_WITH_COMMA(_) \ + _(1) , \ + _(2) , \ + _(3) , \ + _(4) , \ + _(5) , \ + _(6) , \ + _(7) , \ + _(8) + +c10::SmallVector get_extra_args_typenames(const c10::SmallVector& extra_args) { + c10::SmallVector args_typenames(extra_args.size()); + for (const auto i : c10::irange(extra_args.size())) { + args_typenames[i] = at::cuda::jit::typeName(extra_args[i].type()); + } + return args_typenames; +} + +int can_vectorize_up_to(at::ScalarType type, char* pointer) { + switch(type) { +#define DEFINE_CASE(ctype, scalartype) \ + case ScalarType::scalartype : return memory::can_vectorize_up_to(pointer); + + AT_FORALL_SCALAR_TYPES_WITH_COMPLEX(DEFINE_CASE) +#undef DEFINE_CASE + + default: TORCH_INTERNAL_ASSERT(false, "Unrecognized ScalarType: ", type); + } +} + +// jitted version of the above +// See Note [Jiterator], this relies on the assumptions enumerated there +int jitted_can_vectorize_up_to(const TensorIteratorBase& iter) { + const at::ScalarType common_dtype = iter.common_dtype(); + const at::ScalarType result_dtype = common_dtype; + + // Deals with output + int result = can_vectorize_up_to(result_dtype, static_cast(iter.data_ptr(0))); + + // Incorporates input(s) + for (auto i = 1; i < iter.ntensors(); ++i) { + result = std::min(result, can_vectorize_up_to(common_dtype, static_cast(iter.data_ptr(i)))); + } + + return result; +} + +template +static std::unique_ptr> make_unique_offset_calculator( + const TensorIteratorBase& iter) { + // array size can not be 0, this happens when N == 0 + constexpr int array_size = std::max(N, 1); + TORCH_INTERNAL_ASSERT(N == (IS_INPUT ? iter.ninputs() : iter.noutputs())); + + std::array strides; + int64_t element_sizes[array_size]; + for (int i = 0; i < N; i++) { + int index = IS_INPUT ? i + iter.noutputs() : i; + strides[i] = iter.strides(index).data(); + element_sizes[i] = iter.element_size(index); + } + return std::make_unique>(iter.ndim(), iter.shape().data(), strides.data(), element_sizes); +} + +template +struct OffsetCalculatorVariant { +#define DEFINE_CASE(index) std::unique_ptr> + using OffsetCalculatorTypes = std::variant< + AT_FOR_8_CASES_WITH_COMMA(DEFINE_CASE) + >; +#undef DEFINE_CASE + + OffsetCalculatorVariant(const TensorIteratorBase& iter) { + int num = IS_INPUT ? iter.ninputs() : iter.noutputs(); + + switch(num) { +#define DEFINE_CASE(index) \ + case index : v = make_unique_offset_calculator(iter); break; + + AT_FOR_8_CASES(DEFINE_CASE) +#undef DEFINE_CASE + default: + TORCH_CHECK(false, "OffsetCalculatorVariant is not implemented for num_tensor = ", num); + } + } + + void* data_ptr() { + return std::visit([](auto & v){ return static_cast(v.get()); }, v); + } + + private: + OffsetCalculatorTypes v{}; +}; + +struct ArrayVariant { +// works for up to 8 input + 8 outputs +#define DEFINE_CASE(index) std::array, std::array + using ArrayTypes = std::variant< + AT_FOR_8_CASES_WITH_COMMA(DEFINE_CASE) + >; +#undef DEFINE_CASE + + ArrayVariant(const TensorIteratorBase& iter) { + int ntensors = iter.ntensors(); + switch(ntensors) { +#define DEFINE_CASE(index) \ + case index: array = std::array{}; break; \ + case index+8: array = std::array{}; break; + + AT_FOR_8_CASES(DEFINE_CASE) +#undef DEFINE_CASE + + default: + TORCH_CHECK(false, "ArrayVariant is not implemented for ntensors = ", ntensors); + } + + std::visit([&](auto& a) { + for (auto i = 0; i < ntensors; ++i) { + a[i] = (char*)iter.data_ptr(i); + } + }, array); + } + + void* data_ptr() { + return std::visit([](auto & a){ return static_cast(&a); }, array); + } + +private: + ArrayTypes array; +}; + +struct TrivialOffsetCalculatorVariant { +#define DEFINE_CASE(index) TrivialOffsetCalculator + using TrivialOffsetCalculatorTypes = std::variant< + AT_FOR_8_CASES_WITH_COMMA(DEFINE_CASE) + >; +#undef DEFINE_CASE + + TrivialOffsetCalculatorVariant(int num) { + switch(num) { +#define DEFINE_CASE(index) \ + case index: v = TrivialOffsetCalculator(); break; + + AT_FOR_8_CASES(DEFINE_CASE) +#undef DEFINE_CASE + + default: + TORCH_CHECK(false, "TrivialOffsetCalculatorVariant is not implemented for num_tensors = ", num); + } + } + + void* data_ptr() { + return std::visit([](auto & v){ return static_cast(&v); }, v); + } + +private: + TrivialOffsetCalculatorTypes v{}; +}; + +struct LoadWithCastVariant { +#define DEFINE_CASE(index) std::unique_ptr> + using LoadWithCastPtr = std::variant< + AT_FOR_8_CASES_WITH_COMMA(DEFINE_CASE) + >; +#undef DEFINE_CASE + + LoadWithCastVariant(const TensorIteratorBase& iter) { + int arity = iter.ninputs(); + switch(arity) { +#define DEFINE_CASE(index) \ + case index: v = std::make_unique>(iter); break; + + AT_FOR_8_CASES(DEFINE_CASE) +#undef DEFINE_CASE + + default: + TORCH_CHECK(false, "LoadWithCastVariant is not implemented for ninputs = ", arity); + } + } + + void* data_ptr() { + return std::visit([](auto & v){ return static_cast(v.get()); }, v); + } + +private: + LoadWithCastPtr v{}; +}; + +struct StoreWithCastVariant { +#define DEFINE_CASE(index) std::unique_ptr> + using StoreWithCastPtr = std::variant< + AT_FOR_8_CASES_WITH_COMMA(DEFINE_CASE) + >; +#undef DEFINE_CASE + + StoreWithCastVariant(const TensorIteratorBase& iter) { + int num = iter.noutputs(); + switch(num) { +#define DEFINE_CASE(index) \ + case index: v = std::make_unique>(iter); break; + + AT_FOR_8_CASES(DEFINE_CASE) +#undef DEFINE_CASE + + default: + TORCH_CHECK(false, "StoreWithCastVariant is not implemented for noutputs = ", num); + } + } + + void* data_ptr() { + return std::visit([](auto & v){ return static_cast(v.get()); }, v); + } + +private: + StoreWithCastPtr v{}; +}; + +} // namespace at::native + + +#endif // AT_USE_JITERATOR() + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/llvm_jit_strings.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/llvm_jit_strings.h new file mode 100644 index 0000000000000000000000000000000000000000..fb802616b8a8aeb8bd4b398e665e24470c2b674b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cuda/llvm_jit_strings.h @@ -0,0 +1,19 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace at::cuda { + +TORCH_CUDA_CPP_API const std::string &get_traits_string(); +TORCH_CUDA_CPP_API const std::string &get_cmath_string(); +TORCH_CUDA_CPP_API const std::string &get_complex_body_string(); +TORCH_CUDA_CPP_API const std::string &get_complex_half_body_string(); +TORCH_CUDA_CPP_API const std::string &get_complex_math_string(); + +} // namespace at::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cudnn/Descriptors.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cudnn/Descriptors.h new file mode 100644 index 0000000000000000000000000000000000000000..ab53d20eee3d9cb4e942f86ac4d26a3c8ab309b7 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cudnn/Descriptors.h @@ -0,0 +1,415 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +#ifndef AT_PER_OPERATOR_HEADERS +#include +#else +#include +#endif + +#if defined(CUDNN_VERSION) && CUDNN_VERSION >= 8907 +#define USE_CUDNN_RNN_V8_API +#endif + +namespace at::native { + +std::string cudnnTypeToString(cudnnDataType_t dtype); + +// TODO: Add constructors for all of the descriptors + +inline int dataSize(cudnnDataType_t dataType) +{ + switch (dataType) { + case CUDNN_DATA_BFLOAT16: + case CUDNN_DATA_HALF: return 2; + case CUDNN_DATA_FLOAT: return 4; + default: return 8; + } +} + +// NOTE [ cudnn fixSizeOneDimStride ] +// The stride for a size-1 dimensions is not uniquely determined; in +// fact, it can be anything you want, because the fact that the +// tensor is size 1 at this dimension means that you will never actually +// try advancing your pointer by this stride. +// +// However, CuDNN has a much more stringent requirement on strides: +// if you are passing a contiguous input, it better be the case +// that the stride for dim i is the product of the sizes of dims +// i+1 to the end. This stride is indeed uniquely determined. This +// function modifies 'stride' in place so this invariant holds. +template +static inline void fixSizeOneDimStride(int dim, const T *size, T *stride, bool nhwc) { + int64_t z = 1; + int index = 0; + std::vector permutation(dim); + + if (nhwc) { + permutation[index++] = 1; + } + for (int d = dim-1; d > 1; d--) { + permutation[index++] = d; + } + if (!nhwc) { + permutation[index++] = 1; + } + permutation[index++] = 0; + for (int d : permutation) { + if (size[d] == 1) { + stride[d] = z; + } else { + z *= size[d]; + } + } +} + +template +struct DescriptorDeleter { + void operator()(T* x) { + if (x != nullptr) { + AT_CUDNN_CHECK(dtor(x)); + } + } +}; + +// A generic class for wrapping cuDNN descriptor types. All you need +// is to give the underlying type the Descriptor_t points to (usually, +// if it's cudnnTensorDescriptor_t it points to cudnnTensorStruct), +// the constructor and the destructor. Subclasses are responsible +// for defining a set() function to actually set the descriptor. +// +// Descriptors default construct to a nullptr, and have a descriptor +// initialized the first time you call set() or any other initializing +// function. +template +// NOLINTNEXTLINE(bugprone-exception-escape) +class TORCH_CUDA_CPP_API Descriptor { + public: + // TODO: Figure out why const-correctness doesn't work here + + // Use desc() to access the underlying descriptor pointer in + // a read-only fashion. Most client code should use this. + // If the descriptor was never initialized, this will return + // nullptr. + T* desc() const { return desc_.get(); } + T* desc() { return desc_.get(); } + + // Use mut_desc() to access the underlying descriptor pointer + // if you intend to modify what it points to (e.g., using + // cudnnSetFooDescriptor). This will ensure that the descriptor + // is initialized. Code in this file will use this function. + T* mut_desc() { init(); return desc_.get(); } +protected: + void init() { + if (desc_ == nullptr) { + T* raw_desc = nullptr; + AT_CUDNN_CHECK(ctor(&raw_desc)); + desc_.reset(raw_desc); + } + } +private: + std::unique_ptr> desc_; +}; + +class TORCH_CUDA_CPP_API RNNDataDescriptor : public Descriptor< + cudnnRNNDataStruct, + &cudnnCreateRNNDataDescriptor, + &cudnnDestroyRNNDataDescriptor> { +public: + void set(const at::Tensor &t, cudnnRNNDataLayout_t layout, int maxSeqLength, int batchSize, int vectorSize, const int* seqLengthArray); +private: + void set(cudnnDataType_t dataType, cudnnRNNDataLayout_t layout, int maxSeqLength, int batchSize, int vectorSize, const int* seqLengthArray) { + AT_CUDNN_CHECK(cudnnSetRNNDataDescriptor(mut_desc(), dataType, layout, maxSeqLength, batchSize, vectorSize, seqLengthArray, nullptr)); + } +}; + +class TORCH_CUDA_CPP_API TensorDescriptor : public Descriptor< + cudnnTensorStruct, + &cudnnCreateTensorDescriptor, + &cudnnDestroyTensorDescriptor> { + public: + TensorDescriptor() = default; + explicit TensorDescriptor(const at::Tensor &t, size_t pad = 0) { + set(t, pad); + } + + // Note [CuDNN broadcast padding] + // ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + // pad specifies the minimum dimensionality of the tensor descriptor + // we produce (it doesn't have anything to do with, e.g., convolution + // padding). If 't' is lower-dimensional than 'pad', the remaining + // dimensions (on the right) are padded with ones. This doesn't + // affect the underlying data layout. This is particularly useful for + // dealing with a peculiarity of the CuDNN API, which is that broadcasting in CuDNN is + // done in two steps: first, the client code is expected to pad out + // (the dimensions) input tensors to be the same dimension as the + // target broadcast, and then second, CuDNN takes of actually + // broadcasting size 1 dimensions. + + void set(const at::Tensor &t, size_t pad = 0); + void set(const at::Tensor &t, at::MemoryFormat memory_format, size_t pad = 0); + void set(cudnnDataType_t dataType, IntArrayRef sizes, IntArrayRef strides, size_t pad = 0); + + void print(); + +private: + void set(cudnnDataType_t dataType, IntArrayRef sizes, IntArrayRef strides, size_t pad, bool nhwc); + + void set(cudnnDataType_t dataType, int dim, int* size, int* stride, bool nhwc) { + std::vector strides_copy(stride, stride + dim); + fixSizeOneDimStride(dim, size, strides_copy.data(), nhwc); + AT_CUDNN_CHECK(cudnnSetTensorNdDescriptor(mut_desc(), dataType, dim, size, strides_copy.data())); + } +}; + +std::ostream& operator<<(std::ostream & out, const TensorDescriptor& d); + +class TORCH_CUDA_CPP_API FilterDescriptor : public Descriptor< + cudnnFilterStruct, + &cudnnCreateFilterDescriptor, + &cudnnDestroyFilterDescriptor> { + public: + void set(const at::Tensor &t, int64_t pad = 0) { + set(t, at::MemoryFormat::Contiguous, pad); + } + + void set(const at::Tensor &t, const at::MemoryFormat memory_format, int64_t pad = 0); + + void print(); +private: + void set(cudnnDataType_t dataType, int dim, int* size, cudnnTensorFormat_t filter_format) { + AT_CUDNN_CHECK(cudnnSetFilterNdDescriptor(mut_desc(), dataType, filter_format, dim, size)); + } +}; + +std::ostream& operator<<(std::ostream & out, const FilterDescriptor& d); + +struct TORCH_CUDA_CPP_API ConvolutionDescriptor + : public Descriptor< + cudnnConvolutionStruct, + &cudnnCreateConvolutionDescriptor, + &cudnnDestroyConvolutionDescriptor> { + void set(cudnnDataType_t dataType, int dim, int* pad, int* stride, int * upscale /* aka dilation */, int groups, bool allow_tf32) { + cudnnDataType_t mathType = dataType; + if (dataType == CUDNN_DATA_HALF) mathType = CUDNN_DATA_FLOAT; + AT_CUDNN_CHECK(cudnnSetConvolutionNdDescriptor(mut_desc(), dim, pad, stride, upscale, + CUDNN_CROSS_CORRELATION, mathType)); + AT_CUDNN_CHECK(cudnnSetConvolutionGroupCount(mut_desc(), groups)); + // See Note [behavior of cudnnFind and cudnnGet] + AT_CUDNN_CHECK(cudnnSetConvolutionMathType(mut_desc(), CUDNN_DEFAULT_MATH)); + if(dataType == CUDNN_DATA_HALF) { + AT_CUDNN_CHECK(cudnnSetConvolutionMathType(mut_desc(), CUDNN_TENSOR_OP_MATH)); + } else if (dataType == CUDNN_DATA_FLOAT && !allow_tf32) { + AT_CUDNN_CHECK(cudnnSetConvolutionMathType(mut_desc(), CUDNN_FMA_MATH)); + } + } +}; + +struct TORCH_CUDA_CPP_API SpatialTransformerDescriptor + : public Descriptor< + cudnnSpatialTransformerStruct, + &cudnnCreateSpatialTransformerDescriptor, + &cudnnDestroySpatialTransformerDescriptor> { + void set(cudnnDataType_t dataType, int dim, int* size) { + AT_CUDNN_CHECK(cudnnSetSpatialTransformerNdDescriptor(mut_desc(), CUDNN_SAMPLER_BILINEAR, dataType, dim, size)); + } +}; + +// NOLINTNEXTLINE(bugprone-exception-escape) +struct TORCH_CUDA_CPP_API DropoutDescriptor + : public Descriptor< + cudnnDropoutStruct, + &cudnnCreateDropoutDescriptor, + &cudnnDestroyDropoutDescriptor> { + at::Tensor state; + + // Initialize a dropout descriptor's RNG state. + // WARNING: This function is very expensive, avoid calling this function! + void initialize_rng(cudnnHandle_t handle, float dropout, long long int seed, const TensorOptions& options) { + TORCH_INTERNAL_ASSERT(dropout > 0, "dropout must be nonzero; otherwise call set_no_dropout"); + size_t state_size = 0; + AT_CUDNN_CHECK(cudnnDropoutGetStatesSize(handle, &state_size)); + AT_ASSERT(options.device().type() == kCUDA); + AT_ASSERT(options.dtype() == kByte); + state = at::empty({static_cast(state_size)}, options); + AT_CUDNN_CHECK(cudnnSetDropoutDescriptor(mut_desc(), handle, dropout, state.data_ptr(), state_size, seed)); + } + + // Restore a dropout descriptor given a dropout probability and existing RNG state. + void set(cudnnHandle_t handle, float dropout, const at::Tensor& state) { + TORCH_INTERNAL_ASSERT(dropout > 0, "dropout must be nonzero; otherwise call set_no_dropout"); + void *state_ptr = state.data_ptr(); + size_t state_size = state.size(0); + // NB: The seed doesn't actually matter, so we give a dummy value + AT_CUDNN_CHECK(cudnnRestoreDropoutDescriptor(mut_desc(), handle, dropout, state_ptr, state_size, 0 /* seed */)); + } + + // Restore a dropout descriptor corresponding to no dropout + void set_no_dropout(cudnnHandle_t handle) { + // NB: seed doesn't matter when dropout = 0, because no random number + // initialization actually takes place when there is no dropout. + // NB: Empirically, cudnnSetDropoutDescriptor is cheap when + // dropout == 0 + AT_CUDNN_CHECK(cudnnSetDropoutDescriptor(mut_desc(), handle, 0 /* dropout */, nullptr, 0 /* state_size */, 0 /* seed */)); + } +}; + +struct TORCH_CUDA_CPP_API RNNDescriptor : public Descriptor< + cudnnRNNStruct, + &cudnnCreateRNNDescriptor, + &cudnnDestroyRNNDescriptor> { + DropoutDescriptor dropout_desc_; + void set(cudnnHandle_t handle, +#ifdef USE_CUDNN_RNN_V8_API + int input_size, + bool packed, +#endif + int hidden_size, int proj_size, int num_layers, DropoutDescriptor&& dropout_desc, + cudnnRNNInputMode_t input_mode, cudnnDirectionMode_t bidirectional, + cudnnRNNMode_t mode, cudnnDataType_t datatype, cudnnDataType_t input_type, cudnnRNNAlgo_t algo, bool allow_tf32) { + dropout_desc_ = std::move(dropout_desc); +#ifndef USE_CUDNN_RNN_V8_API + AT_CUDNN_CHECK(cudnnSetRNNDescriptor_v6( + handle, + mut_desc(), + hidden_size, + num_layers, + dropout_desc_.desc(), + input_mode, + bidirectional, + mode, + algo, + datatype)); + if (proj_size != 0) { + AT_CUDNN_CHECK(cudnnSetRNNProjectionLayers( + handle, + /*rnnDesc=*/mut_desc(), + /*recProjSize=*/proj_size, + /*outProjSize=*/0)); + } + cudaDeviceProp* prop = at::cuda::getCurrentDeviceProperties(); + if (prop->major >= 7) { + if (input_type == CUDNN_DATA_HALF) { + cudnnSetRNNMatrixMathType(mut_desc(), CUDNN_TENSOR_OP_MATH); + } + else if (input_type == CUDNN_DATA_FLOAT && !allow_tf32) { + cudnnSetRNNMatrixMathType(mut_desc(), CUDNN_FMA_MATH); + } + else { + // Technically, as the default it's not necessary to explicitly + // set this. + cudnnSetRNNMatrixMathType(mut_desc(), CUDNN_DEFAULT_MATH); + } + } +#else + cudaDeviceProp* prop = at::cuda::getCurrentDeviceProperties(); + auto math_type = CUDNN_DEFAULT_MATH; + if (prop->major >= 7) { + if (input_type == CUDNN_DATA_HALF) { + math_type = CUDNN_TENSOR_OP_MATH; + } else if (!allow_tf32) { + math_type = CUDNN_FMA_MATH; + } + } + AT_CUDNN_CHECK(cudnnSetRNNDescriptor_v8( + mut_desc(), + algo, + mode, + CUDNN_RNN_DOUBLE_BIAS, + bidirectional, + input_mode, + input_type, + datatype, + math_type, + input_size, + hidden_size, + proj_size ? proj_size : hidden_size, + num_layers, + dropout_desc_.desc(), + packed ? CUDNN_RNN_PADDED_IO_DISABLED : CUDNN_RNN_PADDED_IO_ENABLED)); +#endif + } +}; + +struct TORCH_CUDA_CPP_API CTCLossDescriptor + : public Descriptor< + cudnnCTCLossStruct, + &cudnnCreateCTCLossDescriptor, + &cudnnDestroyCTCLossDescriptor> { + void set(cudnnDataType_t datatype) { + AT_CUDNN_CHECK(cudnnSetCTCLossDescriptor(mut_desc(), datatype)); + } + void setEx( + cudnnDataType_t datatype, + cudnnLossNormalizationMode_t normMode, + cudnnNanPropagation_t gradMode) { + AT_CUDNN_CHECK( + cudnnSetCTCLossDescriptorEx(mut_desc(), datatype, normMode, gradMode)); + } + void set_v8_v9( + cudnnDataType_t datatype, + cudnnLossNormalizationMode_t normMode, + cudnnNanPropagation_t gradMode, + int maxLabelLength) { +#if defined(CUDNN_VERSION) && CUDNN_VERSION >= 90000 + auto gradModev9 = CUDNN_CTC_ZERO_OOB_GRADIENTS; + if (gradMode == cudnnNanPropagation_t::CUDNN_PROPAGATE_NAN) { + gradModev9 = CUDNN_CTC_SKIP_OOB_GRADIENTS; + } + AT_CUDNN_CHECK( + cudnnSetCTCLossDescriptor_v9(mut_desc(), datatype, normMode, gradModev9, maxLabelLength)); +#else + AT_CUDNN_CHECK( + cudnnSetCTCLossDescriptor_v8(mut_desc(), datatype, normMode, gradMode, maxLabelLength)); +#endif + } + +}; + +struct TORCH_CUDA_CPP_API ActivationDescriptor + : public Descriptor< + cudnnActivationStruct, + &cudnnCreateActivationDescriptor, + &cudnnDestroyActivationDescriptor> { + void set(cudnnActivationMode_t mode) { + AT_ASSERT( + mode == CUDNN_ACTIVATION_RELU, + "TODO: support more cuDNN activation modes"); + AT_CUDNN_CHECK(cudnnSetActivationDescriptor( + mut_desc(), + mode, + cudnnNanPropagation_t::CUDNN_NOT_PROPAGATE_NAN, + std::numeric_limits::max())); + } +}; + +union Constant +{ + float f; + double d; + Constant(cudnnDataType_t dataType, double value) { + if (dataType == CUDNN_DATA_HALF || dataType == CUDNN_DATA_FLOAT) { + f = static_cast(value); + } else { + d = value; + } + } +}; + +} // namespace + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cudnn/Handle.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cudnn/Handle.h new file mode 100644 index 0000000000000000000000000000000000000000..e9a63546fc601d365fafc562b73af83e721ebd09 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cudnn/Handle.h @@ -0,0 +1,14 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace at::native { + +TORCH_CUDA_CPP_API cudnnHandle_t getCudnnHandle(); +} // namespace at::native + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cudnn/Handles.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cudnn/Handles.h new file mode 100644 index 0000000000000000000000000000000000000000..af02865d70cd1a87c8e15dc27c13a62075cf9c08 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cudnn/Handles.h @@ -0,0 +1,7 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cudnn/Types.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cudnn/Types.h new file mode 100644 index 0000000000000000000000000000000000000000..c02eb65e04c0daa19c51b7ed217bd9684f042047 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cudnn/Types.h @@ -0,0 +1,19 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace at::native { + +TORCH_CUDA_CPP_API cudnnDataType_t +getCudnnDataTypeFromScalarType(const at::ScalarType dtype); +cudnnDataType_t getCudnnDataType(const at::Tensor& tensor); + +int64_t cudnn_version(); + +} // namespace at::native + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cudnn/Utils.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cudnn/Utils.h new file mode 100644 index 0000000000000000000000000000000000000000..25f70509eb5289ec8bfc4cf5545565dd5dda9d48 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cudnn/Utils.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace at::native { + +// cuDNN has a buggy check for tensor being contiguous (that is, it does +// not ignore stride for dimension that is equal to 0). This function +// makes tensors which have zero stride contiguous, by setting the +// strides to 1 as cuDNN likes. +inline Tensor contiguousIfZeroInStrides(const Tensor& t) { + for (auto s : t.strides()) { + if (s == 0) + return t.contiguous(); + } + return t; +} + +} // namespace at::native + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cudnn/cudnn-wrapper.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cudnn/cudnn-wrapper.h new file mode 100644 index 0000000000000000000000000000000000000000..d291f5a778a53b678406ffaff59b86b15543b4e8 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/cudnn/cudnn-wrapper.h @@ -0,0 +1,21 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#define STRINGIFY(x) #x +#define STRING(x) STRINGIFY(x) + +#if CUDNN_MAJOR < 8 || (CUDNN_MAJOR == 8 && CUDNN_MINOR < 5) +#pragma message("CuDNN v" STRING( \ + CUDNN_MAJOR) " found, but need at least CuDNN v8. You can get the latest version of CuDNN from https://developer.nvidia.com/cudnn or disable CuDNN with USE_CUDNN=0") +#pragma message "We strongly encourage you to move to 8.5 and above." +#pragma message "This message is intended to annoy you enough to update." +#endif + +#undef STRINGIFY +#undef STRING + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/AcceleratorHooksInterface.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/AcceleratorHooksInterface.h new file mode 100644 index 0000000000000000000000000000000000000000..6eb86b68bb755d4016e68aac8c801c72c27d72ab --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/AcceleratorHooksInterface.h @@ -0,0 +1,101 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include +#include + +C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter") + +namespace at { + +// AcceleratorHooksInterface is a shared interface provided by all +// accelerators to allow generic code. +// This interface is hook-based as it corresponds to all the functions +// that are going to be called in a generic way from the CPU code. + +struct TORCH_API AcceleratorHooksInterface { + // This should never actually be implemented, but it is used to + // squelch -Werror=non-virtual-dtor + virtual ~AcceleratorHooksInterface() = default; + + // Whether this backend was enabled at compilation time. + // This function should NEVER throw. + virtual bool isBuilt() const { + return false; + } + + // Whether this backend can be used at runtime, meaning it was built, + // its runtime dependencies are available (driver) and at least one + // supported device can be used. + // This function should NEVER throw. This function should NOT initialize the context + // on any device (result of hasPrimaryContext below should not change). + // While it is acceptable for this function to poison fork, it is + // recommended to avoid doing so whenever possible. + virtual bool isAvailable() const { + return false; + } + + // Whether the device at device_index is fully initialized or not. + virtual bool hasPrimaryContext(DeviceIndex device_index) const = 0; + + virtual void init() const { + TORCH_CHECK(false, "Backend doesn`t support init()"); + } + + virtual DeviceIndex deviceCount() const { + return 0; + } + + virtual void setCurrentDevice(DeviceIndex device) const { + TORCH_CHECK(false, "Backend doesn't support setCurrentDevice()"); + } + + virtual DeviceIndex getCurrentDevice() const { + TORCH_CHECK(false, "Backend doesn't support getCurrentDevice()"); + return -1; + } + + virtual DeviceIndex exchangeDevice(DeviceIndex device) const { + TORCH_CHECK(false, "Backend doesn't support exchangeDevice()"); + return -1; + } + + virtual DeviceIndex maybeExchangeDevice(DeviceIndex device) const { + TORCH_CHECK(false, "Backend doesn't support maybeExchangeDevice()"); + return -1; + } + + virtual bool isPinnedPtr(const void* data) const { + return false; + } + + virtual Allocator* getPinnedMemoryAllocator() const { + TORCH_CHECK(false, "Backend doesn't support getPinnedMemoryAllocator()"); + return nullptr; + } + + virtual Device getDeviceFromPtr(void* data) const { + TORCH_CHECK(false, "Backend doesn't support getDeviceFromPtr()"); + } + + virtual const Generator& getDefaultGenerator( + [[maybe_unused]] DeviceIndex device_index = -1) const { + TORCH_CHECK(false, "Backend doesn`t support getDefaultGenerator()"); + } + + virtual Generator getNewGenerator( + [[maybe_unused]] DeviceIndex device_index = -1) const { + TORCH_CHECK(false, "Backend doesn`t support getNewGenerator()"); + } +}; + +} // namespace at + +C10_DIAGNOSTIC_POP() + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/CUDAHooksInterface.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/CUDAHooksInterface.h new file mode 100644 index 0000000000000000000000000000000000000000..4a1dd2fb1b0839e8424735a8c312a83a7ef10f24 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/CUDAHooksInterface.h @@ -0,0 +1,249 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +// NB: Class must live in `at` due to limitations of Registry.h. +namespace at { + +// Forward-declares at::cuda::NVRTC +namespace cuda { +struct NVRTC; +} // namespace cuda + +#ifdef _MSC_VER +constexpr const char* CUDA_HELP = + "PyTorch splits its backend into two shared libraries: a CPU library " + "and a CUDA library; this error has occurred because you are trying " + "to use some CUDA functionality, but the CUDA library has not been " + "loaded by the dynamic linker for some reason. The CUDA library MUST " + "be loaded, EVEN IF you don't directly use any symbols from the CUDA library! " + "One common culprit is a lack of -INCLUDE:?warp_size@cuda@at@@YAHXZ " + "in your link arguments; many dynamic linkers will delete dynamic library " + "dependencies if you don't depend on any of their symbols. You can check " + "if this has occurred by using link on your binary to see if there is a " + "dependency on *_cuda.dll library."; +#else +constexpr const char* CUDA_HELP = + "PyTorch splits its backend into two shared libraries: a CPU library " + "and a CUDA library; this error has occurred because you are trying " + "to use some CUDA functionality, but the CUDA library has not been " + "loaded by the dynamic linker for some reason. The CUDA library MUST " + "be loaded, EVEN IF you don't directly use any symbols from the CUDA library! " + "One common culprit is a lack of -Wl,--no-as-needed in your link arguments; many " + "dynamic linkers will delete dynamic library dependencies if you don't " + "depend on any of their symbols. You can check if this has occurred by " + "using ldd on your binary to see if there is a dependency on *_cuda.so " + "library."; +#endif + +// The CUDAHooksInterface is an omnibus interface for any CUDA functionality +// which we may want to call into from CPU code (and thus must be dynamically +// dispatched, to allow for separate compilation of CUDA code). How do I +// decide if a function should live in this class? There are two tests: +// +// 1. Does the *implementation* of this function require linking against +// CUDA libraries? +// +// 2. Is this function *called* from non-CUDA ATen code? +// +// (2) should filter out many ostensible use-cases, since many times a CUDA +// function provided by ATen is only really ever used by actual CUDA code. +// +// TODO: Consider putting the stub definitions in another class, so that one +// never forgets to implement each virtual function in the real implementation +// in CUDAHooks. This probably doesn't buy us much though. +struct TORCH_API CUDAHooksInterface : AcceleratorHooksInterface { + // This should never actually be implemented, but it is used to + // squelch -Werror=non-virtual-dtor + ~CUDAHooksInterface() override = default; + + // Initialize THCState and, transitively, the CUDA state + void init() const override { + TORCH_CHECK(false, "Cannot initialize CUDA without ATen_cuda library. ", CUDA_HELP); + } + + const Generator& getDefaultGenerator( + [[maybe_unused]] DeviceIndex device_index = -1) const override { + TORCH_CHECK( + false, + "Cannot get default CUDA generator without ATen_cuda library. ", + CUDA_HELP); + } + + Generator getNewGenerator( + [[maybe_unused]] DeviceIndex device_index = -1) const override { + TORCH_CHECK( + false, + "Cannot get CUDA generator without ATen_cuda library. ", + CUDA_HELP); + } + + Device getDeviceFromPtr(void* /*data*/) const override { + TORCH_CHECK(false, "Cannot get device of pointer on CUDA without ATen_cuda library. ", CUDA_HELP); + } + + bool isPinnedPtr(const void* /*data*/) const override { + return false; + } + + virtual bool hasCUDA() const { + return false; + } + + virtual bool hasCUDART() const { + return false; + } + + virtual bool hasMAGMA() const { + return false; + } + + virtual bool hasCuDNN() const { + return false; + } + + virtual bool hasCuSOLVER() const { + return false; + } + + virtual bool hasCuBLASLt() const { + return false; + } + + virtual bool hasROCM() const { + return false; + } + + virtual bool hasCKSDPA() const { + return false; + } + + virtual bool hasCKGEMM() const { + return false; + } + + virtual const at::cuda::NVRTC& nvrtc() const { + TORCH_CHECK(false, "NVRTC requires CUDA. ", CUDA_HELP); + } + + bool hasPrimaryContext(DeviceIndex device_index) const override { + TORCH_CHECK(false, "Cannot call hasPrimaryContext(", device_index, ") without ATen_cuda library. ", CUDA_HELP); + } + + virtual DeviceIndex current_device() const { + return -1; + } + + Allocator* getPinnedMemoryAllocator() const override { + TORCH_CHECK(false, "Pinned memory requires CUDA. ", CUDA_HELP); + } + + virtual Allocator* getCUDADeviceAllocator() const { + TORCH_CHECK(false, "CUDADeviceAllocator requires CUDA. ", CUDA_HELP); + } + + virtual bool compiledWithCuDNN() const { + return false; + } + + virtual bool compiledWithMIOpen() const { + return false; + } + + virtual bool supportsDilatedConvolutionWithCuDNN() const { + return false; + } + + virtual bool supportsDepthwiseConvolutionWithCuDNN() const { + return false; + } + + virtual bool supportsBFloat16ConvolutionWithCuDNNv8() const { + return false; + } + + virtual bool supportsBFloat16RNNWithCuDNN() const { + return false; + } + + virtual long versionCuDNN() const { + TORCH_CHECK(false, "Cannot query cuDNN version without ATen_cuda library. ", CUDA_HELP); + } + + virtual long versionRuntimeCuDNN() const { + TORCH_CHECK(false, "Cannot query cuDNN version without ATen_cuda library. ", CUDA_HELP); + } + + virtual long versionCuDNNFrontend() const { + TORCH_CHECK(false, "Cannot query cuDNN Frontend version without ATen_cuda library. ", CUDA_HELP); + } + + virtual long versionMIOpen() const { + TORCH_CHECK(false, "Cannot query MIOpen version without ATen_cuda library. ", CUDA_HELP); + } + + virtual long versionCUDART() const { + TORCH_CHECK(false, "Cannot query CUDART version without ATen_cuda library. ", CUDA_HELP); + } + + virtual std::string showConfig() const { + TORCH_CHECK(false, "Cannot query detailed CUDA version without ATen_cuda library. ", CUDA_HELP); + } + + virtual double batchnormMinEpsilonCuDNN() const { + TORCH_CHECK(false, + "Cannot query batchnormMinEpsilonCuDNN() without ATen_cuda library. ", CUDA_HELP); + } + + virtual int64_t cuFFTGetPlanCacheMaxSize(DeviceIndex /*device_index*/) const { + TORCH_CHECK(false, "Cannot access cuFFT plan cache without ATen_cuda library. ", CUDA_HELP); + } + + virtual void cuFFTSetPlanCacheMaxSize(DeviceIndex /*device_index*/, int64_t /*max_size*/) const { + TORCH_CHECK(false, "Cannot access cuFFT plan cache without ATen_cuda library. ", CUDA_HELP); + } + + virtual int64_t cuFFTGetPlanCacheSize(DeviceIndex /*device_index*/) const { + TORCH_CHECK(false, "Cannot access cuFFT plan cache without ATen_cuda library. ", CUDA_HELP); + } + + virtual void cuFFTClearPlanCache(DeviceIndex /*device_index*/) const { + TORCH_CHECK(false, "Cannot access cuFFT plan cache without ATen_cuda library. ", CUDA_HELP); + } + + virtual int getNumGPUs() const { + return 0; + } + +#ifdef USE_ROCM + virtual bool isGPUArch(const std::vector& /*archs*/, DeviceIndex = -1 /*device_index*/) const { + TORCH_CHECK(false, "Cannot check GPU arch without ATen_cuda library. ", CUDA_HELP); + } +#endif + + virtual void deviceSynchronize(DeviceIndex /*device_index*/) const { + TORCH_CHECK(false, "Cannot synchronize CUDA device without ATen_cuda library. ", CUDA_HELP); + } +}; + +// NB: dummy argument to suppress "ISO C++11 requires at least one argument +// for the "..." in a variadic macro" +struct TORCH_API CUDAHooksArgs {}; + +TORCH_DECLARE_REGISTRY(CUDAHooksRegistry, CUDAHooksInterface, CUDAHooksArgs); +#define REGISTER_CUDA_HOOKS(clsname) \ + C10_REGISTER_CLASS(CUDAHooksRegistry, clsname, clsname) + +namespace detail { +TORCH_API const CUDAHooksInterface& getCUDAHooks(); +} // namespace detail +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/FunctionTraits.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/FunctionTraits.h new file mode 100644 index 0000000000000000000000000000000000000000..50f8d2bea0c3f921cbd304ad0116d64d29fa0db1 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/FunctionTraits.h @@ -0,0 +1,108 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +// Modified from https://stackoverflow.com/questions/7943525/is-it-possible-to-figure-out-the-parameter-type-and-return-type-of-a-lambda + +// Fallback, anything with an operator() +template +struct function_traits : public function_traits { +}; + +// Pointers to class members that are themselves functors. +// For example, in the following code: +// template +// struct S { +// func_t f; +// }; +// template +// S make_s(func_t f) { +// return S { .f = f }; +// } +// +// auto s = make_s([] (int, float) -> double { /* ... */ }); +// +// function_traits traits; +template +struct function_traits : public function_traits { +}; + +// Const class member functions +template +struct function_traits : public function_traits { +}; + +// Reference types +template +struct function_traits : public function_traits {}; +template +struct function_traits : public function_traits {}; + +// Free functions +template +struct function_traits { + // arity is the number of arguments. + enum { arity = sizeof...(Args) }; + + using ArgsTuple = std::tuple; + using result_type = ReturnType; + + template + struct arg + { + using type = typename std::tuple_element>::type; + // the i-th argument is equivalent to the i-th tuple element of a tuple + // composed of those arguments. + }; +}; + +template +struct nullary_function_traits { + using traits = function_traits; + using result_type = typename traits::result_type; +}; + +template +struct unary_function_traits { + using traits = function_traits; + using result_type = typename traits::result_type; + using arg1_t = typename traits::template arg<0>::type; +}; + +template +struct binary_function_traits { + using traits = function_traits; + using result_type = typename traits::result_type; + using arg1_t = typename traits::template arg<0>::type; + using arg2_t = typename traits::template arg<1>::type; +}; + + +// Traits for calling with c10::guts::invoke, where member_functions have a first argument of ClassType +template +struct invoke_traits : public function_traits{ +}; + +template +struct invoke_traits : public invoke_traits{ +}; + +template +struct invoke_traits : public invoke_traits{ +}; + +template +struct invoke_traits : + public function_traits { +}; + +template +struct invoke_traits : + public function_traits { +}; + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/HIPHooksInterface.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/HIPHooksInterface.h new file mode 100644 index 0000000000000000000000000000000000000000..6fa8e1cd02db86b14ff315ec8daf8fed38097f31 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/HIPHooksInterface.h @@ -0,0 +1,72 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +// NB: Class must live in `at` due to limitations of Registry.h. +namespace at { + +// The HIPHooksInterface is an omnibus interface for any HIP functionality +// which we may want to call into from CPU code (and thus must be dynamically +// dispatched, to allow for separate compilation of HIP code). See +// CUDAHooksInterface for more detailed motivation. +struct TORCH_API HIPHooksInterface : AcceleratorHooksInterface { + // This should never actually be implemented, but it is used to + // squelch -Werror=non-virtual-dtor + ~HIPHooksInterface() override = default; + + void init() const override { + TORCH_CHECK(false, "Cannot initialize HIP without ATen_hip library."); + } + + const Generator& getDefaultGenerator( + [[maybe_unused]] DeviceIndex device_index = -1) const override { + TORCH_CHECK(false, "Cannot initialize HIP without ATen_hip library."); + } + + virtual bool hasHIP() const { + return false; + } + + virtual c10::DeviceIndex current_device() const { + return -1; + } + + bool isPinnedPtr(const void* /*data*/ ) const override { + return false; + } + + Allocator* getPinnedMemoryAllocator() const override { + TORCH_CHECK(false, "Pinned memory requires HIP."); + } + + virtual int getNumGPUs() const { + return 0; + } + + bool hasPrimaryContext(DeviceIndex /*device_index*/ ) const override { + TORCH_CHECK(false, "Cannot check primary context without ATen_hip library."); + } +}; + +// NB: dummy argument to suppress "ISO C++11 requires at least one argument +// for the "..." in a variadic macro" +struct TORCH_API HIPHooksArgs {}; + +TORCH_DECLARE_REGISTRY(HIPHooksRegistry, HIPHooksInterface, HIPHooksArgs); +#define REGISTER_HIP_HOOKS(clsname) \ + C10_REGISTER_CLASS(HIPHooksRegistry, clsname, clsname) + +namespace detail { +TORCH_API const HIPHooksInterface& getHIPHooks(); + +} // namespace detail +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/HPUHooksInterface.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/HPUHooksInterface.h new file mode 100644 index 0000000000000000000000000000000000000000..77f69ffb6ac4cf4faeb5ad7f6890433459ea1235 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/HPUHooksInterface.h @@ -0,0 +1,62 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include + +namespace at { + +struct TORCH_API HPUHooksInterface : AcceleratorHooksInterface { + ~HPUHooksInterface() override = default; + + void init() const override { + TORCH_CHECK(false, "Cannot initialize HPU without HPU backend"); + } + + virtual bool hasHPU() const { + return false; + } + + Device getDeviceFromPtr(void* /*data*/) const override { + TORCH_CHECK( + false, "Cannot get device of pointer on HPU without HPU backend"); + } + + bool isPinnedPtr(const void* /*data*/) const override { + return false; + } + + Allocator* getPinnedMemoryAllocator() const override { + TORCH_CHECK( + false, + "You should register `HPUHooksInterface` for HPU before call `getPinnedMemoryAllocator`."); + } + + bool hasPrimaryContext( + [[maybe_unused]] DeviceIndex device_index) const override { + TORCH_CHECK( + false, + "You should register `HPUHooksInterface` for HPU before call `hasPrimaryContext`."); + } +}; + +struct TORCH_API HPUHooksArgs {}; + +TORCH_DECLARE_REGISTRY(HPUHooksRegistry, HPUHooksInterface, HPUHooksArgs); +#define REGISTER_HPU_HOOKS(clsname) \ + C10_REGISTER_CLASS(HPUHooksRegistry, clsname, clsname) + +namespace detail { + +TORCH_API const at::HPUHooksInterface& getHPUHooks(); + +} // namespace detail +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/IPUHooksInterface.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/IPUHooksInterface.h new file mode 100644 index 0000000000000000000000000000000000000000..1c8d81d3081519c9b55cbdf8b21b13535b2c4e05 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/IPUHooksInterface.h @@ -0,0 +1,48 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include +#include + +namespace at { + +struct TORCH_API IPUHooksInterface : AcceleratorHooksInterface { + ~IPUHooksInterface() override = default; + + void init() const override { + TORCH_CHECK(false, "Cannot initialize IPU without ATen_ipu library."); + } + + bool hasPrimaryContext(DeviceIndex /*device_index*/) const override { + TORCH_CHECK(false, "Cannot initialize IPU without ATen_ipu library."); + return false; + } + + const Generator& getDefaultGenerator( + [[maybe_unused]] DeviceIndex device_index = -1) const override { + TORCH_CHECK(false, "Cannot initialize IPU without ATen_ipu library."); + } + + Generator getNewGenerator( + DeviceIndex /*device_index*/ = -1) const override { + TORCH_CHECK(false, "Cannot initialize IPU without ATen_ipu library."); + } +}; + +struct TORCH_API IPUHooksArgs {}; + +TORCH_DECLARE_REGISTRY(IPUHooksRegistry, IPUHooksInterface, IPUHooksArgs); +#define REGISTER_IPU_HOOKS(clsname) \ + C10_REGISTER_CLASS(IPUHooksRegistry, clsname, clsname) + +namespace detail { +TORCH_API const IPUHooksInterface& getIPUHooks(); +} // namespace detail +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/MAIAHooksInterface.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/MAIAHooksInterface.h new file mode 100644 index 0000000000000000000000000000000000000000..a214c0ef37db323cc536cebdcee3ccd6187ceebf --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/MAIAHooksInterface.h @@ -0,0 +1,47 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +// NB: Class must live in `at` due to limitations of Registry.h. +namespace at { + +struct TORCH_API MAIAHooksInterface : AcceleratorHooksInterface { + // This should never actually be implemented, but it is used to + // squelch -Werror=non-virtual-dtor + ~MAIAHooksInterface() override = default; + + void init() const override { + TORCH_CHECK(false, "Cannot initialize MAIA without ATen_maia library."); + } + + bool hasPrimaryContext(DeviceIndex /*device_index*/) const override { + TORCH_CHECK(false, "Cannot initialize MAIA without ATen_maia library."); + return false; + } + + virtual std::string showConfig() const { + TORCH_CHECK(false, "Cannot query detailed MAIA version information."); + } +}; + +// NB: dummy argument to suppress "ISO C++11 requires at least one argument +// for the "..." in a variadic macro" +struct TORCH_API MAIAHooksArgs {}; + +TORCH_DECLARE_REGISTRY(MAIAHooksRegistry, MAIAHooksInterface, MAIAHooksArgs); +#define REGISTER_MAIA_HOOKS(clsname) \ + C10_REGISTER_CLASS(MAIAHooksRegistry, clsname, clsname) + +namespace detail { +TORCH_API const MAIAHooksInterface& getMAIAHooks(); +} // namespace detail + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/MPSHooksInterface.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/MPSHooksInterface.h new file mode 100644 index 0000000000000000000000000000000000000000..7ebeceb095ac3861f62777fbe11a299064233f07 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/MPSHooksInterface.h @@ -0,0 +1,130 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// Copyright © 2022 Apple Inc. + +#pragma once + +#include + +#include +#include +#include + +#include + +C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter") +namespace at { + +struct TORCH_API MPSHooksInterface : AcceleratorHooksInterface { + // this fails the implementation if MPSHooks functions are called, but + // MPS backend is not present. + #define FAIL_MPSHOOKS_FUNC(func) \ + TORCH_CHECK(false, "Cannot execute ", func, "() without MPS backend."); + + ~MPSHooksInterface() override = default; + + // Initialize the MPS library state + void init() const override { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual bool hasMPS() const { + return false; + } + virtual bool isOnMacOSorNewer(unsigned major = 13, unsigned minor = 0) const { + FAIL_MPSHOOKS_FUNC(__func__); + } + const Generator& getDefaultGenerator( + [[maybe_unused]] DeviceIndex device_index = -1) const override { + FAIL_MPSHOOKS_FUNC(__func__); + } + Generator getNewGenerator( + [[maybe_unused]] DeviceIndex device_index) const override { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual Allocator* getMPSDeviceAllocator() const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual void deviceSynchronize() const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual void commitStream() const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual void* getCommandBuffer() const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual void* getDispatchQueue() const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual void emptyCache() const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual size_t getCurrentAllocatedMemory() const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual size_t getDriverAllocatedMemory() const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual size_t getRecommendedMaxMemory() const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual void setMemoryFraction(double /*ratio*/) const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual void profilerStartTrace(const std::string& mode, bool waitUntilCompleted) const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual void profilerStopTrace() const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual uint32_t acquireEvent(bool enable_timing) const { + FAIL_MPSHOOKS_FUNC(__func__); + } + Device getDeviceFromPtr(void* data) const override { + TORCH_CHECK(false, "Cannot get device of pointer on MPS without ATen_mps library. "); + } + virtual void releaseEvent(uint32_t event_id) const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual void recordEvent(uint32_t event_id) const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual void waitForEvent(uint32_t event_id) const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual void synchronizeEvent(uint32_t event_id) const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual bool queryEvent(uint32_t event_id) const { + FAIL_MPSHOOKS_FUNC(__func__); + } + virtual double elapsedTimeOfEvents(uint32_t start_event_id, uint32_t end_event_id) const { + FAIL_MPSHOOKS_FUNC(__func__); + } + bool hasPrimaryContext(DeviceIndex device_index) const override { + FAIL_MPSHOOKS_FUNC(__func__); + } + bool isPinnedPtr(const void* data) const override { + return false; + } + Allocator* getPinnedMemoryAllocator() const override { + FAIL_MPSHOOKS_FUNC(__func__); + } + #undef FAIL_MPSHOOKS_FUNC +}; + +struct TORCH_API MPSHooksArgs {}; + +TORCH_DECLARE_REGISTRY(MPSHooksRegistry, MPSHooksInterface, MPSHooksArgs); +#define REGISTER_MPS_HOOKS(clsname) \ + C10_REGISTER_CLASS(MPSHooksRegistry, clsname, clsname) + +namespace detail { +TORCH_API const MPSHooksInterface& getMPSHooks(); + +} // namespace detail +} // namespace at +C10_DIAGNOSTIC_POP() + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/MTIAHooksInterface.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/MTIAHooksInterface.h new file mode 100644 index 0000000000000000000000000000000000000000..830e9ac171602a7507b7b007a2c4059bbb57316c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/MTIAHooksInterface.h @@ -0,0 +1,218 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include + +#include + +#include +#include + +#include +namespace at { +class Context; +} + +namespace at { +constexpr const char* MTIA_HELP = + "The MTIA backend requires MTIA extension for PyTorch;" + "this error has occurred because you are trying " + "to use some MTIA's functionality without MTIA extension included."; + +struct TORCH_API MTIAHooksInterface : AcceleratorHooksInterface { +// this fails the implementation if MTIAHooks functions are called, but +// MTIA backend is not present. +#define FAIL_MTIAHOOKS_FUNC(func) TORCH_CHECK(false, "Cannot execute ", func, "() without MTIA backend."); + + ~MTIAHooksInterface() override = default; + + void init() const override { + // Avoid logging here, since MTIA needs init devices first then it will know + // how many devices are available. Make it as no-op if mtia extension is not + // dynamically loaded. + return; + } + + virtual bool hasMTIA() const { + return false; + } + + DeviceIndex deviceCount() const override { + return 0; + } + + virtual void deviceSynchronize(c10::DeviceIndex /*device_index*/) const { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + virtual std::string showConfig() const { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + bool hasPrimaryContext(DeviceIndex /*device_index*/) const override { + return false; + } + + void setCurrentDevice(DeviceIndex /*device*/) const override { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + DeviceIndex getCurrentDevice() const override { + FAIL_MTIAHOOKS_FUNC(__func__); + return -1; + } + + DeviceIndex exchangeDevice(DeviceIndex /*device*/) const override { + FAIL_MTIAHOOKS_FUNC(__func__); + return -1; + } + + DeviceIndex maybeExchangeDevice(DeviceIndex /*device*/) const override { + FAIL_MTIAHOOKS_FUNC(__func__); + return -1; + } + + virtual c10::Stream getCurrentStream(DeviceIndex /*device*/) const { + FAIL_MTIAHOOKS_FUNC(__func__); + return c10::Stream::unpack3(-1, 0, c10::DeviceType::MTIA); + } + + virtual int64_t getCurrentRawStream(DeviceIndex /*device*/) const { + FAIL_MTIAHOOKS_FUNC(__func__); + return -1; + } + + virtual c10::Stream getDefaultStream(DeviceIndex /*device*/) const { + FAIL_MTIAHOOKS_FUNC(__func__); + return c10::Stream::unpack3(-1, 0, c10::DeviceType::MTIA); + } + + virtual void setCurrentStream(const c10::Stream& /*stream*/) const { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + bool isPinnedPtr(const void* /*data*/) const override { + return false; + } + + Allocator* getPinnedMemoryAllocator() const override { + FAIL_MTIAHOOKS_FUNC(__func__); + return nullptr; + } + + virtual PyObject* memoryStats(DeviceIndex /*device*/) const { + FAIL_MTIAHOOKS_FUNC(__func__); + return nullptr; + } + + virtual PyObject* getDeviceCapability(DeviceIndex /*device*/) const { + FAIL_MTIAHOOKS_FUNC(__func__); + return nullptr; + } + + virtual PyObject* getDeviceProperties(DeviceIndex device) const { + FAIL_MTIAHOOKS_FUNC(__func__); + return nullptr; + } + + virtual void emptyCache() const { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + virtual void recordMemoryHistory(const std::optional& /*enabled*/, + const std::string& /*stacks*/, + size_t /*max_entries*/) const { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + virtual PyObject* memorySnapshot(const std::optional& local_path) const { + FAIL_MTIAHOOKS_FUNC(__func__); + return nullptr; + } + + virtual DeviceIndex getDeviceCount() const { + FAIL_MTIAHOOKS_FUNC(__func__); + return 0; + } + + virtual void resetPeakMemoryStats(DeviceIndex /*device*/) const { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + virtual void attachOutOfMemoryObserver(PyObject* observer) const { + FAIL_MTIAHOOKS_FUNC(__func__); + return; + } + + virtual bool isAvailable() const override; + + /* MTIAGraph related APIs */ + virtual int64_t mtiagraphCreate(bool keep_graph = false) const { + FAIL_MTIAHOOKS_FUNC(__func__); + return -1; + } + + virtual void mtiagraphDestroy(int64_t handle) const { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + virtual void mtiagraphCaptureBegin(int64_t handle, MempoolId_t pool) const { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + virtual void mtiagraphCaptureEnd(int64_t handle) const { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + virtual void mtiagraphInstantiate(int64_t handle) const { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + virtual void mtiagraphReplay(int64_t handle) const { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + virtual void mtiagraphReset(int64_t handle) const { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + virtual MempoolId_t mtiagraphPool(int64_t handle) const { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + virtual MempoolId_t graphPoolHandle() const { + FAIL_MTIAHOOKS_FUNC(__func__); + } + + virtual const Generator& getDefaultGenerator(DeviceIndex) const override { + FAIL_MTIAHOOKS_FUNC(__func__); + static Generator dummy_generator; + return dummy_generator; + } + + virtual Generator getNewGenerator(DeviceIndex) const override { + FAIL_MTIAHOOKS_FUNC(__func__); + static Generator dummy_generator; + return dummy_generator; + } +}; + +struct TORCH_API MTIAHooksArgs {}; + +TORCH_DECLARE_REGISTRY(MTIAHooksRegistry, MTIAHooksInterface, MTIAHooksArgs); +#define REGISTER_MTIA_HOOKS(clsname) C10_REGISTER_CLASS(MTIAHooksRegistry, clsname, clsname) + +namespace detail { +TORCH_API const MTIAHooksInterface& getMTIAHooks(); +TORCH_API bool isMTIAHooksBuilt(); +} // namespace detail +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/PrivateUse1HooksInterface.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/PrivateUse1HooksInterface.h new file mode 100644 index 0000000000000000000000000000000000000000..3f0dcc93a0cdcaf1f0837eb9b79237eceec67016 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/PrivateUse1HooksInterface.h @@ -0,0 +1,94 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include +#include + +C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter") + +namespace at { + +struct TORCH_API PrivateUse1HooksInterface : AcceleratorHooksInterface { +#define FAIL_PRIVATEUSE1HOOKS_FUNC(func) \ + TORCH_CHECK_NOT_IMPLEMENTED( \ + false, \ + "You should register `PrivateUse1HooksInterface`", \ + "by `RegisterPrivateUse1HooksInterface` and implement `", \ + func, \ + "` at the same time for PrivateUse1."); + + ~PrivateUse1HooksInterface() override = default; + + bool isBuilt() const override { + FAIL_PRIVATEUSE1HOOKS_FUNC(__func__); + } + + bool isAvailable() const override { + FAIL_PRIVATEUSE1HOOKS_FUNC(__func__); + } + + const at::Generator& getDefaultGenerator( + c10::DeviceIndex device_index) const override { + FAIL_PRIVATEUSE1HOOKS_FUNC(__func__); + } + + Generator getNewGenerator( + [[maybe_unused]] DeviceIndex device_index = -1) const override { + // TODO(FFFrog): Preserved for BC and will be removed in the future. + if (at::GetGeneratorPrivate().has_value()) + return at::GetGeneratorForPrivateuse1(device_index); + + FAIL_PRIVATEUSE1HOOKS_FUNC(__func__); + } + + at::Device getDeviceFromPtr(void* data) const override { + FAIL_PRIVATEUSE1HOOKS_FUNC(__func__); + } + + bool isPinnedPtr(const void* data) const override { + return false; + } + + Allocator* getPinnedMemoryAllocator() const override { + FAIL_PRIVATEUSE1HOOKS_FUNC(__func__); + } + + bool hasPrimaryContext(DeviceIndex device_index) const override { + FAIL_PRIVATEUSE1HOOKS_FUNC(__func__); + } + + void init() const override {} + virtual void resizePrivateUse1Bytes( + const c10::Storage& storage, + size_t newsize) const { + FAIL_PRIVATEUSE1HOOKS_FUNC(__func__); + } + +#undef FAIL_PRIVATEUSE1HOOKS_FUNC +}; + +struct TORCH_API PrivateUse1HooksArgs {}; + +TORCH_API void RegisterPrivateUse1HooksInterface( + at::PrivateUse1HooksInterface* hook_); + +TORCH_API bool isPrivateUse1HooksRegistered(); + +namespace detail { + +TORCH_API const at::PrivateUse1HooksInterface& getPrivateUse1Hooks(); + +} // namespace detail + +} // namespace at + +C10_DIAGNOSTIC_POP() + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/XLAHooksInterface.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/XLAHooksInterface.h new file mode 100644 index 0000000000000000000000000000000000000000..35498352e43964c0280f4ab760ee86968a118e71 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/XLAHooksInterface.h @@ -0,0 +1,84 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter") + +namespace at { + +constexpr const char* XLA_HELP = + "This error has occurred because you are trying " + "to use some XLA functionality, but the XLA library has not been " + "loaded by the dynamic linker. You must load xla libraries by `import torch_xla`"; + +struct TORCH_API XLAHooksInterface : AcceleratorHooksInterface { + ~XLAHooksInterface() override = default; + + void init() const override { + TORCH_CHECK(false, "Cannot initialize XLA without torch_xla library. ", XLA_HELP); + } + + virtual bool hasXLA() const { + return false; + } + + virtual std::string showConfig() const { + TORCH_CHECK( + false, + "Cannot query detailed XLA version without torch_xla library. ", + XLA_HELP); + } + + const Generator& getDefaultGenerator( + [[maybe_unused]] DeviceIndex device_index = -1) const override { + TORCH_CHECK( + false, "Cannot get default XLA generator without torch_xla library. ", XLA_HELP); + } + + Generator getNewGenerator( + [[maybe_unused]] DeviceIndex device_index = -1) const override { + TORCH_CHECK(false, "Cannot get XLA generator without torch_xla library. ", XLA_HELP); + } + + virtual DeviceIndex getCurrentDevice() const override { + TORCH_CHECK(false, "Cannot get current XLA device without torch_xla library. ", XLA_HELP); + } + + Device getDeviceFromPtr(void* /*data*/) const override { + TORCH_CHECK(false, "Cannot get device of pointer on XLA without torch_xla library. ", XLA_HELP); + } + + Allocator* getPinnedMemoryAllocator() const override { + TORCH_CHECK(false, "Cannot get XLA pinned memory allocator without torch_xla library. ", XLA_HELP); + } + + bool isPinnedPtr(const void* data) const override { + return false; + } + + bool hasPrimaryContext(DeviceIndex device_index) const override { + TORCH_CHECK(false, "Cannot query primary context without torch_xla library. ", XLA_HELP); + } + +}; + +struct TORCH_API XLAHooksArgs {}; + +TORCH_DECLARE_REGISTRY(XLAHooksRegistry, XLAHooksInterface, XLAHooksArgs); +#define REGISTER_XLA_HOOKS(clsname) \ + C10_REGISTER_CLASS(XLAHooksRegistry, clsname, clsname) + +namespace detail { +TORCH_API const XLAHooksInterface& getXLAHooks(); +} // namespace detail +} // namespace at +C10_DIAGNOSTIC_POP() + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/XPUHooksInterface.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/XPUHooksInterface.h new file mode 100644 index 0000000000000000000000000000000000000000..51106c50a569568bab82336031e24d648cc31d27 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/detail/XPUHooksInterface.h @@ -0,0 +1,89 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wunused-parameter") + +namespace at { + +struct TORCH_API XPUHooksInterface : AcceleratorHooksInterface{ + ~XPUHooksInterface() override = default; + + void init() const override { + TORCH_CHECK(false, "Cannot initialize XPU without ATen_xpu library."); + } + + virtual bool hasXPU() const { + return false; + } + + virtual std::string showConfig() const { + TORCH_CHECK( + false, + "Cannot query detailed XPU version without ATen_xpu library."); + } + + virtual int32_t getGlobalIdxFromDevice(const Device& device) const { + TORCH_CHECK(false, "Cannot get XPU global device index without ATen_xpu library."); + } + + const Generator& getDefaultGenerator( + [[maybe_unused]] DeviceIndex device_index = -1) const override { + TORCH_CHECK( + false, "Cannot get default XPU generator without ATen_xpu library."); + } + + Generator getNewGenerator( + [[maybe_unused]] DeviceIndex device_index = -1) const override { + TORCH_CHECK(false, "Cannot get XPU generator without ATen_xpu library."); + } + + virtual DeviceIndex getNumGPUs() const { + return 0; + } + + virtual DeviceIndex current_device() const { + TORCH_CHECK(false, "Cannot get current device on XPU without ATen_xpu library."); + } + + Device getDeviceFromPtr(void* /*data*/) const override { + TORCH_CHECK(false, "Cannot get device of pointer on XPU without ATen_xpu library."); + } + + virtual void deviceSynchronize(DeviceIndex /*device_index*/) const { + TORCH_CHECK(false, "Cannot synchronize XPU device without ATen_xpu library."); + } + + Allocator* getPinnedMemoryAllocator() const override { + TORCH_CHECK(false, "Cannot get XPU pinned memory allocator without ATen_xpu library."); + } + + bool isPinnedPtr(const void* data) const override { + return false; + } + + bool hasPrimaryContext(DeviceIndex device_index) const override { + TORCH_CHECK(false, "Cannot query primary context without ATen_xpu library."); + } +}; + +struct TORCH_API XPUHooksArgs {}; + +TORCH_DECLARE_REGISTRY(XPUHooksRegistry, XPUHooksInterface, XPUHooksArgs); +#define REGISTER_XPU_HOOKS(clsname) \ + C10_REGISTER_CLASS(XPUHooksRegistry, clsname, clsname) + +namespace detail { +TORCH_API const XPUHooksInterface& getXPUHooks(); +} // namespace detail +} // namespace at +C10_DIAGNOSTIC_POP() + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/metal/Context.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/metal/Context.h new file mode 100644 index 0000000000000000000000000000000000000000..c36e999b1e107a1f2ea1e4538eed7c0a9da16564 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/metal/Context.h @@ -0,0 +1,37 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#ifndef MetalContext_h +#define MetalContext_h + +#include + +#include + +namespace at::metal { + +struct MetalInterface { + virtual ~MetalInterface() = default; + virtual bool is_metal_available() const = 0; + virtual at::Tensor& metal_copy_(at::Tensor& self, const at::Tensor& src) + const = 0; +}; + +extern std::atomic g_metal_impl_registry; + +class MetalImplRegistrar { + public: + explicit MetalImplRegistrar(MetalInterface* /*impl*/); +}; + +at::Tensor& metal_copy_(at::Tensor& self, const at::Tensor& src); + +} // namespace at::metal + +namespace at::native { +bool is_metal_available(); +} // namespace at::native + +#endif /* MetalContext_h */ + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_add_relu.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_add_relu.h new file mode 100644 index 0000000000000000000000000000000000000000..49a169367ec1071385aaf583c9665c83cc0324b7 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_add_relu.h @@ -0,0 +1,69 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_add_relu.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor +inline at::Tensor _add_relu(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha=1) { + return at::_ops::_add_relu_Tensor::call(self, other, alpha); +} + +// aten::_add_relu_.Tensor(Tensor(a!) self, Tensor other, *, Scalar alpha=1) -> Tensor(a!) +inline at::Tensor & _add_relu_(at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha=1) { + return at::_ops::_add_relu__Tensor::call(self, other, alpha); +} + +// aten::_add_relu.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _add_relu_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha=1) { + return at::_ops::_add_relu_out::call(self, other, alpha, out); +} +// aten::_add_relu.out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _add_relu_outf(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha, at::Tensor & out) { + return at::_ops::_add_relu_out::call(self, other, alpha, out); +} + +// aten::_add_relu.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor +inline at::Tensor _add_relu(const at::Tensor & self, const at::Scalar & other, const at::Scalar & alpha=1) { + return at::_ops::_add_relu_Scalar::call(self, other, alpha); +} + +// aten::_add_relu_.Scalar(Tensor(a!) self, Scalar other, Scalar alpha=1) -> Tensor(a!) +inline at::Tensor & _add_relu_(at::Tensor & self, const at::Scalar & other, const at::Scalar & alpha=1) { + return at::_ops::_add_relu__Scalar::call(self, other, alpha); +} + +// aten::_add_relu.Scalar_out(Tensor self, Scalar other, Scalar alpha=1, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _add_relu_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & other, const at::Scalar & alpha=1) { + return at::_ops::_add_relu_Scalar_out::call(self, other, alpha, out); +} +// aten::_add_relu.Scalar_out(Tensor self, Scalar other, Scalar alpha=1, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _add_relu_outf(const at::Tensor & self, const at::Scalar & other, const at::Scalar & alpha, at::Tensor & out) { + return at::_ops::_add_relu_Scalar_out::call(self, other, alpha, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_backward_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..e0bddc3ddebc765890a264fabce60e80dd704f8d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_backward_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API void _backward(const at::Tensor & self, at::TensorList inputs, const ::std::optional & gradient={}, ::std::optional retain_graph=::std::nullopt, bool create_graph=false); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_batch_norm_impl_index_backward_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_batch_norm_impl_index_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..bad0025d2751eb90ca51aca5fddddfc7fa7a5d95 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_batch_norm_impl_index_backward_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple _batch_norm_impl_index_backward(int64_t impl_index, const at::Tensor & input, const at::Tensor & grad_output, const ::std::optional & weight, const ::std::optional & running_mean, const ::std::optional & running_var, const ::std::optional & save_mean, const ::std::optional & save_var_transform, bool train, double eps, ::std::array output_mask, const at::Tensor & reservedSpace); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_cast_Double_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_cast_Double_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..f51b5beca2abe8a6b0f7d4c74c2833debd153ece --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_cast_Double_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _cast_Double { + using schema = at::Tensor (const at::Tensor &, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_cast_Double"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_cast_Double(Tensor self, bool non_blocking=False) -> Tensor"; + static at::Tensor call(const at::Tensor & self, bool non_blocking); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, bool non_blocking); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_cast_Int.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_cast_Int.h new file mode 100644 index 0000000000000000000000000000000000000000..b1dd26435a3d5b961a14d0d49001bcb7736240d7 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_cast_Int.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_cast_Int(Tensor self, bool non_blocking=False) -> Tensor +inline at::Tensor _cast_Int(const at::Tensor & self, bool non_blocking=false) { + return at::_ops::_cast_Int::call(self, non_blocking); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_choose_qparams_per_tensor.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_choose_qparams_per_tensor.h new file mode 100644 index 0000000000000000000000000000000000000000..e13feb2ca7d02c0d54a2a7d4c94a4a579809a4a8 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_choose_qparams_per_tensor.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_choose_qparams_per_tensor(Tensor self, bool reduce_range=False) -> (float, int) +inline ::std::tuple _choose_qparams_per_tensor(const at::Tensor & self, bool reduce_range=false) { + return at::_ops::_choose_qparams_per_tensor::call(self, reduce_range); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_conj_physical_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_conj_physical_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..46a8ede799981e1f40f3894a3f120b7e85cce1fe --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_conj_physical_compositeexplicitautograd_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor _conj_physical(const at::Tensor & self); +TORCH_API at::Tensor & _conj_physical_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & _conj_physical_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_conj_physical_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_conj_physical_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..3ddeb28a2a4281175cdb7afc3bee589b9d87c947 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_conj_physical_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _conj_physical { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_conj_physical"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_conj_physical(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API _conj_physical_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_conj_physical"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_conj_physical.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_convert_indices_from_csr_to_coo.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_convert_indices_from_csr_to_coo.h new file mode 100644 index 0000000000000000000000000000000000000000..b45d0a750f39873fbd1c071d659430eec43726fc --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_convert_indices_from_csr_to_coo.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_convert_indices_from_csr_to_coo(Tensor crow_indices, Tensor col_indices, *, bool out_int32=False, bool transpose=False) -> Tensor +inline at::Tensor _convert_indices_from_csr_to_coo(const at::Tensor & crow_indices, const at::Tensor & col_indices, bool out_int32=false, bool transpose=false) { + return at::_ops::_convert_indices_from_csr_to_coo::call(crow_indices, col_indices, out_int32, transpose); +} + +// aten::_convert_indices_from_csr_to_coo.out(Tensor crow_indices, Tensor col_indices, *, bool out_int32=False, bool transpose=False, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _convert_indices_from_csr_to_coo_out(at::Tensor & out, const at::Tensor & crow_indices, const at::Tensor & col_indices, bool out_int32=false, bool transpose=false) { + return at::_ops::_convert_indices_from_csr_to_coo_out::call(crow_indices, col_indices, out_int32, transpose, out); +} +// aten::_convert_indices_from_csr_to_coo.out(Tensor crow_indices, Tensor col_indices, *, bool out_int32=False, bool transpose=False, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _convert_indices_from_csr_to_coo_outf(const at::Tensor & crow_indices, const at::Tensor & col_indices, bool out_int32, bool transpose, at::Tensor & out) { + return at::_ops::_convert_indices_from_csr_to_coo_out::call(crow_indices, col_indices, out_int32, transpose, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_convert_weight_to_int4pack.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_convert_weight_to_int4pack.h new file mode 100644 index 0000000000000000000000000000000000000000..e99a52d3fc5bae9f3bd590d4a750e5c5d4a66b32 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_convert_weight_to_int4pack.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_convert_weight_to_int4pack(Tensor self, int innerKTiles) -> Tensor +inline at::Tensor _convert_weight_to_int4pack(const at::Tensor & self, int64_t innerKTiles) { + return at::_ops::_convert_weight_to_int4pack::call(self, innerKTiles); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_copy_from.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_copy_from.h new file mode 100644 index 0000000000000000000000000000000000000000..5e7ad78d1909d9d9a244f41ec60b969be0200068 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_copy_from.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_copy_from(Tensor self, Tensor dst, bool non_blocking=False) -> Tensor +inline at::Tensor _copy_from(const at::Tensor & self, const at::Tensor & dst, bool non_blocking=false) { + return at::_ops::_copy_from::call(self, dst, non_blocking); +} + +// aten::_copy_from.out(Tensor self, Tensor dst, bool non_blocking=False, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _copy_from_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & dst, bool non_blocking=false) { + return at::_ops::_copy_from_out::call(self, dst, non_blocking, out); +} +// aten::_copy_from.out(Tensor self, Tensor dst, bool non_blocking=False, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _copy_from_outf(const at::Tensor & self, const at::Tensor & dst, bool non_blocking, at::Tensor & out) { + return at::_ops::_copy_from_out::call(self, dst, non_blocking, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_cslt_compress_cuda_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_cslt_compress_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..0e9995875704625993ccfc7049f9922aa177413a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_cslt_compress_cuda_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _cslt_compress(const at::Tensor & input); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_cudnn_init_dropout_state_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_cudnn_init_dropout_state_native.h new file mode 100644 index 0000000000000000000000000000000000000000..710bb07a3fdc9d3d6f4cea4ef02af89b9ae67ab2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_cudnn_init_dropout_state_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _cudnn_init_dropout_state_out(double dropout, bool train, int64_t dropout_seed, at::Tensor & out); +TORCH_API at::Tensor _cudnn_init_dropout_state(double dropout, bool train, int64_t dropout_seed, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_cufft_get_plan_cache_size.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_cufft_get_plan_cache_size.h new file mode 100644 index 0000000000000000000000000000000000000000..aa988b125eb6561e593eb3dd08c68eae48a18d52 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_cufft_get_plan_cache_size.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_cufft_get_plan_cache_size(DeviceIndex device_index) -> int +inline int64_t _cufft_get_plan_cache_size(at::DeviceIndex device_index) { + return at::_ops::_cufft_get_plan_cache_size::call(device_index); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_dirichlet_grad.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_dirichlet_grad.h new file mode 100644 index 0000000000000000000000000000000000000000..79e94a580a96c18badb99633b0b740caec451b2f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_dirichlet_grad.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_dirichlet_grad(Tensor x, Tensor alpha, Tensor total) -> Tensor +inline at::Tensor _dirichlet_grad(const at::Tensor & x, const at::Tensor & alpha, const at::Tensor & total) { + return at::_ops::_dirichlet_grad::call(x, alpha, total); +} + +// aten::_dirichlet_grad.out(Tensor x, Tensor alpha, Tensor total, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _dirichlet_grad_out(at::Tensor & out, const at::Tensor & x, const at::Tensor & alpha, const at::Tensor & total) { + return at::_ops::_dirichlet_grad_out::call(x, alpha, total, out); +} +// aten::_dirichlet_grad.out(Tensor x, Tensor alpha, Tensor total, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _dirichlet_grad_outf(const at::Tensor & x, const at::Tensor & alpha, const at::Tensor & total, at::Tensor & out) { + return at::_ops::_dirichlet_grad_out::call(x, alpha, total, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_dyn_quant_matmul_4bit_cpu_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_dyn_quant_matmul_4bit_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..6b0077774af5d0af67b9ecc5e40c335d1412002b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_dyn_quant_matmul_4bit_cpu_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _dyn_quant_matmul_4bit(const at::Tensor & inp, const at::Tensor & packed_weights, int64_t block_size, int64_t in_features, int64_t out_features); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_dyn_quant_pack_4bit_weight_cpu_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_dyn_quant_pack_4bit_weight_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..16588a6610e05dd3af70815b54a4fefe54ad0c26 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_dyn_quant_pack_4bit_weight_cpu_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _dyn_quant_pack_4bit_weight(const at::Tensor & weights, const at::Tensor & scales_zeros, const ::std::optional & bias, int64_t block_size, int64_t in_features, int64_t out_features); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_efficient_attention_backward_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_efficient_attention_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..74b3293e8b507c96e7b0598f0910ae073beaa1fa --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_efficient_attention_backward_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple _efficient_attention_backward(const at::Tensor & grad_out_, const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const ::std::optional & bias, const at::Tensor & out, const ::std::optional & cu_seqlens_q, const ::std::optional & cu_seqlens_k, int64_t max_seqlen_q, int64_t max_seqlen_k, const at::Tensor & logsumexp, double dropout_p, const at::Tensor & philox_seed, const at::Tensor & philox_offset, int64_t custom_mask_type, bool bias_requires_grad, ::std::optional scale=::std::nullopt, ::std::optional num_splits_key=::std::nullopt, ::std::optional window_size=::std::nullopt, bool shared_storage_dqdkdv=false); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_efficientzerotensor_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_efficientzerotensor_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..58fad5b1b2ff28a9c997fee57272d9ca89258453 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_efficientzerotensor_compositeexplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _efficientzerotensor_out(at::Tensor & out, at::IntArrayRef size); +TORCH_API at::Tensor & _efficientzerotensor_outf(at::IntArrayRef size, at::Tensor & out); +TORCH_API at::Tensor & _efficientzerotensor_symint_out(at::Tensor & out, c10::SymIntArrayRef size); +TORCH_API at::Tensor & _efficientzerotensor_symint_outf(c10::SymIntArrayRef size, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_backward_cpu_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_backward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..9544335046d776821fcdee82c30e04c3b8db08ab --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_backward_cpu_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _embedding_bag_backward(const at::Tensor & grad, const at::Tensor & indices, const at::Tensor & offsets, const at::Tensor & offset2bag, const at::Tensor & bag_size, const at::Tensor & maximum_indices, int64_t num_weights, bool scale_grad_by_freq, int64_t mode, bool sparse, const ::std::optional & per_sample_weights, int64_t padding_idx=-1); +TORCH_API at::Tensor _embedding_bag_backward_symint(const at::Tensor & grad, const at::Tensor & indices, const at::Tensor & offsets, const at::Tensor & offset2bag, const at::Tensor & bag_size, const at::Tensor & maximum_indices, c10::SymInt num_weights, bool scale_grad_by_freq, int64_t mode, bool sparse, const ::std::optional & per_sample_weights, int64_t padding_idx=-1); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_backward_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..3a3273c8d101d64f7006ad88ccd25aa7b6eada05 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_backward_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _embedding_bag_backward_symint(const at::Tensor & grad, const at::Tensor & indices, const at::Tensor & offsets, const at::Tensor & offset2bag, const at::Tensor & bag_size, const at::Tensor & maximum_indices, c10::SymInt num_weights, bool scale_grad_by_freq, int64_t mode, bool sparse, const ::std::optional & per_sample_weights, int64_t padding_idx=-1); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_dense_backward_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_dense_backward_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..080dfad3fa465a9219a097b99b448859f611f13f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_dense_backward_compositeexplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & _embedding_bag_dense_backward_out(at::Tensor & out, const at::Tensor & grad, const at::Tensor & indices, const at::Tensor & offset2bag, const at::Tensor & bag_size, const at::Tensor & maximum_indices, int64_t num_weights, bool scale_grad_by_freq, int64_t mode, const ::std::optional & per_sample_weights, int64_t padding_idx=-1); +TORCH_API at::Tensor & _embedding_bag_dense_backward_outf(const at::Tensor & grad, const at::Tensor & indices, const at::Tensor & offset2bag, const at::Tensor & bag_size, const at::Tensor & maximum_indices, int64_t num_weights, bool scale_grad_by_freq, int64_t mode, const ::std::optional & per_sample_weights, int64_t padding_idx, at::Tensor & out); +TORCH_API at::Tensor & _embedding_bag_dense_backward_symint_out(at::Tensor & out, const at::Tensor & grad, const at::Tensor & indices, const at::Tensor & offset2bag, const at::Tensor & bag_size, const at::Tensor & maximum_indices, c10::SymInt num_weights, bool scale_grad_by_freq, int64_t mode, const ::std::optional & per_sample_weights, int64_t padding_idx=-1); +TORCH_API at::Tensor & _embedding_bag_dense_backward_symint_outf(const at::Tensor & grad, const at::Tensor & indices, const at::Tensor & offset2bag, const at::Tensor & bag_size, const at::Tensor & maximum_indices, c10::SymInt num_weights, bool scale_grad_by_freq, int64_t mode, const ::std::optional & per_sample_weights, int64_t padding_idx, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_dense_backward_cpu_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_dense_backward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..aefb66ab79c3e91cf8c14fa6fb89b9fe0e53131a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_dense_backward_cpu_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _embedding_bag_dense_backward(const at::Tensor & grad, const at::Tensor & indices, const at::Tensor & offset2bag, const at::Tensor & bag_size, const at::Tensor & maximum_indices, int64_t num_weights, bool scale_grad_by_freq, int64_t mode, const ::std::optional & per_sample_weights, int64_t padding_idx=-1); +TORCH_API at::Tensor _embedding_bag_dense_backward_symint(const at::Tensor & grad, const at::Tensor & indices, const at::Tensor & offset2bag, const at::Tensor & bag_size, const at::Tensor & maximum_indices, c10::SymInt num_weights, bool scale_grad_by_freq, int64_t mode, const ::std::optional & per_sample_weights, int64_t padding_idx=-1); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_dense_backward_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_dense_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..04099d7303e4a27d490cd64a1293de6175c3b334 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_embedding_bag_dense_backward_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _embedding_bag_dense_backward { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, c10::SymInt, bool, int64_t, const ::std::optional &, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_embedding_bag_dense_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_embedding_bag_dense_backward(Tensor grad, Tensor indices, Tensor offset2bag, Tensor bag_size, Tensor maximum_indices, SymInt num_weights, bool scale_grad_by_freq, int mode, Tensor? per_sample_weights, int padding_idx=-1) -> Tensor"; + static at::Tensor call(const at::Tensor & grad, const at::Tensor & indices, const at::Tensor & offset2bag, const at::Tensor & bag_size, const at::Tensor & maximum_indices, c10::SymInt num_weights, bool scale_grad_by_freq, int64_t mode, const ::std::optional & per_sample_weights, int64_t padding_idx); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad, const at::Tensor & indices, const at::Tensor & offset2bag, const at::Tensor & bag_size, const at::Tensor & maximum_indices, c10::SymInt num_weights, bool scale_grad_by_freq, int64_t mode, const ::std::optional & per_sample_weights, int64_t padding_idx); +}; + +struct TORCH_API _embedding_bag_dense_backward_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, c10::SymInt, bool, int64_t, const ::std::optional &, int64_t, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_embedding_bag_dense_backward"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_embedding_bag_dense_backward.out(Tensor grad, Tensor indices, Tensor offset2bag, Tensor bag_size, Tensor maximum_indices, SymInt num_weights, bool scale_grad_by_freq, int mode, Tensor? per_sample_weights, int padding_idx=-1, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & grad, const at::Tensor & indices, const at::Tensor & offset2bag, const at::Tensor & bag_size, const at::Tensor & maximum_indices, c10::SymInt num_weights, bool scale_grad_by_freq, int64_t mode, const ::std::optional & per_sample_weights, int64_t padding_idx, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad, const at::Tensor & indices, const at::Tensor & offset2bag, const at::Tensor & bag_size, const at::Tensor & maximum_indices, c10::SymInt num_weights, bool scale_grad_by_freq, int64_t mode, const ::std::optional & per_sample_weights, int64_t padding_idx, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_fake_quantize_learnable_per_channel_affine_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_fake_quantize_learnable_per_channel_affine_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..dec0cb882832bae79669a73d6b2c98556f25ddcd --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_fake_quantize_learnable_per_channel_affine_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _fake_quantize_learnable_per_channel_affine { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, int64_t, int64_t, int64_t, double); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_fake_quantize_learnable_per_channel_affine"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_fake_quantize_learnable_per_channel_affine(Tensor self, Tensor scale, Tensor zero_point, int axis, int quant_min, int quant_max, float grad_factor=1.0) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & scale, const at::Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, double grad_factor); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & scale, const at::Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, double grad_factor); +}; + +struct TORCH_API _fake_quantize_learnable_per_channel_affine_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, const at::Tensor &, int64_t, int64_t, int64_t, double, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_fake_quantize_learnable_per_channel_affine"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_fake_quantize_learnable_per_channel_affine.out(Tensor self, Tensor scale, Tensor zero_point, int axis, int quant_min, int quant_max, float grad_factor=1.0, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Tensor & scale, const at::Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, double grad_factor, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & scale, const at::Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max, double grad_factor, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_abs_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_abs_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..6d2b7cbc16108af2caae7873de47b64803a8bb47 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_abs_ops.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _foreach_abs { + using schema = ::std::vector (at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_foreach_abs"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_foreach_abs(Tensor[] self) -> Tensor[]"; + static ::std::vector call(at::TensorList self); + static ::std::vector redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self); +}; + +struct TORCH_API _foreach_abs_ { + using schema = void (at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_foreach_abs_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_foreach_abs_(Tensor(a!)[] self) -> ()"; + static void call(at::TensorList self); + static void redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self); +}; + +struct TORCH_API _foreach_abs_out { + using schema = void (at::TensorList, at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_foreach_abs"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_foreach_abs.out(Tensor[] self, *, Tensor(a!)[] out) -> ()"; + static void call(at::TensorList self, at::TensorList out); + static void redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self, at::TensorList out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_addcmul_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_addcmul_native.h new file mode 100644 index 0000000000000000000000000000000000000000..28971f0626c3da2d935416ba0a04723e0391f1c3 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_addcmul_native.h @@ -0,0 +1,40 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::vector foreach_tensor_addcmul_scalar_slow(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, const at::Scalar & value=1); +TORCH_API void _foreach_addcmul_Scalar_out(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, const at::Scalar & value, at::TensorList out); +TORCH_API void foreach_tensor_addcmul_scalar_slow_(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, const at::Scalar & value=1); +TORCH_API ::std::vector foreach_tensor_addcmul_scalar_cuda(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, const at::Scalar & value=1); +TORCH_API void foreach_tensor_addcmul_scalar_cuda_(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, const at::Scalar & value=1); +TORCH_API ::std::vector foreach_tensor_addcmul_scalarlist_slow(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, at::ArrayRef scalars); +TORCH_API void _foreach_addcmul_ScalarList_out(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, at::ArrayRef scalars, at::TensorList out); +TORCH_API void foreach_tensor_addcmul_scalarlist_slow_(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, at::ArrayRef scalars); +TORCH_API ::std::vector foreach_tensor_addcmul_scalarlist_cuda(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, at::ArrayRef scalars); +TORCH_API void foreach_tensor_addcmul_scalarlist_cuda_(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, at::ArrayRef scalars); +TORCH_API ::std::vector foreach_tensor_addcmul_tensor_slow(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, const at::Tensor & scalars); +TORCH_API void _foreach_addcmul_Tensor_out(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, const at::Tensor & scalars, at::TensorList out); +TORCH_API void foreach_tensor_addcmul_tensor_slow_(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, const at::Tensor & scalars); +TORCH_API ::std::vector foreach_tensor_addcmul_tensor_cuda(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, const at::Tensor & scalars); +TORCH_API void foreach_tensor_addcmul_tensor_cuda_(at::TensorList self, at::TensorList tensor1, at::TensorList tensor2, const at::Tensor & scalars); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_clamp_min_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_clamp_min_native.h new file mode 100644 index 0000000000000000000000000000000000000000..0b793c28aa47248db77eea0d3525541824021d54 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_clamp_min_native.h @@ -0,0 +1,40 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::vector foreach_tensor_clamp_min_scalar_kernel_slow(at::TensorList self, const at::Scalar & scalar); +TORCH_API void _foreach_clamp_min_Scalar_out(at::TensorList self, const at::Scalar & scalar, at::TensorList out); +TORCH_API void foreach_tensor_clamp_min_scalar_kernel_slow_(at::TensorList self, const at::Scalar & scalar); +TORCH_API ::std::vector foreach_tensor_clamp_min_scalar_kernel_cuda(at::TensorList self, const at::Scalar & scalar); +TORCH_API void foreach_tensor_clamp_min_scalar_kernel_cuda_(at::TensorList self, const at::Scalar & scalar); +TORCH_API ::std::vector foreach_tensor_clamp_min_list_kernel_slow(at::TensorList self, at::TensorList other); +TORCH_API void _foreach_clamp_min_List_out(at::TensorList self, at::TensorList other, at::TensorList out); +TORCH_API void foreach_tensor_clamp_min_list_kernel_slow_(at::TensorList self, at::TensorList other); +TORCH_API ::std::vector foreach_tensor_clamp_min_list_kernel_cuda(at::TensorList self, at::TensorList other); +TORCH_API void foreach_tensor_clamp_min_list_kernel_cuda_(at::TensorList self, at::TensorList other); +TORCH_API ::std::vector foreach_tensor_clamp_min_scalarlist_kernel_slow(at::TensorList self, at::ArrayRef scalars); +TORCH_API void _foreach_clamp_min_ScalarList_out(at::TensorList self, at::ArrayRef scalars, at::TensorList out); +TORCH_API void foreach_tensor_clamp_min_scalarlist_kernel_slow_(at::TensorList self, at::ArrayRef scalars); +TORCH_API ::std::vector foreach_tensor_clamp_min_scalarlist_kernel_cuda(at::TensorList self, at::ArrayRef scalars); +TORCH_API void foreach_tensor_clamp_min_scalarlist_kernel_cuda_(at::TensorList self, at::ArrayRef scalars); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_cos_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_cos_native.h new file mode 100644 index 0000000000000000000000000000000000000000..d086912353c24932707d866ac0fac91f0d66e74c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_cos_native.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::vector foreach_tensor_cos_slow(at::TensorList self); +TORCH_API void _foreach_cos_out(at::TensorList self, at::TensorList out); +TORCH_API void foreach_tensor_cos_slow_(at::TensorList self); +TORCH_API ::std::vector foreach_tensor_cos_cuda(at::TensorList self); +TORCH_API void foreach_tensor_cos_cuda_(at::TensorList self); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_log10_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_log10_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..467f1210115b746f14e24b154e601e0708bda7e3 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_log10_ops.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _foreach_log10 { + using schema = ::std::vector (at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_foreach_log10"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_foreach_log10(Tensor[] self) -> Tensor[]"; + static ::std::vector call(at::TensorList self); + static ::std::vector redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self); +}; + +struct TORCH_API _foreach_log10_ { + using schema = void (at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_foreach_log10_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_foreach_log10_(Tensor(a!)[] self) -> ()"; + static void call(at::TensorList self); + static void redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self); +}; + +struct TORCH_API _foreach_log10_out { + using schema = void (at::TensorList, at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_foreach_log10"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_foreach_log10.out(Tensor[] self, *, Tensor(a!)[] out) -> ()"; + static void call(at::TensorList self, at::TensorList out); + static void redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self, at::TensorList out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_neg_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_neg_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..d37826985accd96074b2e3152269f815af408f94 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_neg_ops.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _foreach_neg { + using schema = ::std::vector (at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_foreach_neg"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_foreach_neg(Tensor[] self) -> Tensor[]"; + static ::std::vector call(at::TensorList self); + static ::std::vector redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self); +}; + +struct TORCH_API _foreach_neg_ { + using schema = void (at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_foreach_neg_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_foreach_neg_(Tensor(a!)[] self) -> ()"; + static void call(at::TensorList self); + static void redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self); +}; + +struct TORCH_API _foreach_neg_out { + using schema = void (at::TensorList, at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_foreach_neg"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_foreach_neg.out(Tensor[] self, *, Tensor(a!)[] out) -> ()"; + static void call(at::TensorList self, at::TensorList out); + static void redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self, at::TensorList out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_norm_cuda_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_norm_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..0d415c565c93374ee145a9b5636019a5e48dcb1e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_norm_cuda_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::vector _foreach_norm(at::TensorList self, const at::Scalar & ord=2, ::std::optional dtype=::std::nullopt); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_sigmoid_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_sigmoid_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..58b3498ddcd7b476c2ee3732c7d72ed2dc60dad0 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_sigmoid_ops.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _foreach_sigmoid { + using schema = ::std::vector (at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_foreach_sigmoid"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_foreach_sigmoid(Tensor[] self) -> Tensor[]"; + static ::std::vector call(at::TensorList self); + static ::std::vector redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self); +}; + +struct TORCH_API _foreach_sigmoid_ { + using schema = void (at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_foreach_sigmoid_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_foreach_sigmoid_(Tensor(a!)[] self) -> ()"; + static void call(at::TensorList self); + static void redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self); +}; + +struct TORCH_API _foreach_sigmoid_out { + using schema = void (at::TensorList, at::TensorList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_foreach_sigmoid"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_foreach_sigmoid.out(Tensor[] self, *, Tensor(a!)[] out) -> ()"; + static void call(at::TensorList self, at::TensorList out); + static void redispatch(c10::DispatchKeySet dispatchKeySet, at::TensorList self, at::TensorList out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_sign_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_sign_native.h new file mode 100644 index 0000000000000000000000000000000000000000..b1e28df9f18f28096eccfeb64cb88b71e10bb6c4 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_sign_native.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::vector foreach_tensor_sign_slow(at::TensorList self); +TORCH_API void _foreach_sign_out(at::TensorList self, at::TensorList out); +TORCH_API void foreach_tensor_sign_slow_(at::TensorList self); +TORCH_API ::std::vector foreach_tensor_sign_cuda(at::TensorList self); +TORCH_API void foreach_tensor_sign_cuda_(at::TensorList self); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_tan_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_tan_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..2c0ffd1f66bb56be934f0a4e394690da6d17d90b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_tan_compositeexplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::vector _foreach_tan(at::TensorList self); +TORCH_API void _foreach_tan_out(at::TensorList out, at::TensorList self); +TORCH_API void _foreach_tan_outf(at::TensorList self, at::TensorList out); +TORCH_API void _foreach_tan_(at::TensorList self); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_tanh_cuda_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_tanh_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..02930b3ab45a704a44825a4ac0f2993f45075010 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_foreach_tanh_cuda_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::vector _foreach_tanh(at::TensorList self); +TORCH_API void _foreach_tanh_(at::TensorList self); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_functional_assert_async_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_functional_assert_async_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..ad4e53e4898b97553362eef909aa89ce236a34ce --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_functional_assert_async_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _functional_assert_async_msg { + using schema = at::Tensor (const at::Tensor &, c10::string_view, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_functional_assert_async"; + static constexpr const char* overload_name = "msg"; + static constexpr const char* schema_str = "_functional_assert_async.msg(Tensor self, str assert_msg, Tensor dep_token) -> Tensor"; + static at::Tensor call(const at::Tensor & self, c10::string_view assert_msg, const at::Tensor & dep_token); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::string_view assert_msg, const at::Tensor & dep_token); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_fused_sgd_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_fused_sgd_native.h new file mode 100644 index 0000000000000000000000000000000000000000..69be06e0332ff58d9149a175356e8c6eb11155e4 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_fused_sgd_native.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple<::std::vector,::std::vector,::std::vector> _fused_sgd(at::TensorList self, at::TensorList grads, at::TensorList momentum_buffer_list, double weight_decay, double momentum, double lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const ::std::optional & grad_scale={}, const ::std::optional & found_inf={}); +TORCH_API void _fused_sgd_out(at::TensorList self, at::TensorList grads, at::TensorList momentum_buffer_list, double weight_decay, double momentum, double lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const ::std::optional & grad_scale, const ::std::optional & found_inf, at::TensorList out); +TORCH_API void _fused_sgd_kernel_cpu_(at::TensorList self, at::TensorList grads, at::TensorList momentum_buffer_list, double weight_decay, double momentum, double lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const ::std::optional & grad_scale={}, const ::std::optional & found_inf={}); +TORCH_API void _fused_sgd_kernel_cuda_(at::TensorList self, at::TensorList grads, at::TensorList momentum_buffer_list, double weight_decay, double momentum, double lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const ::std::optional & grad_scale={}, const ::std::optional & found_inf={}); +TORCH_API ::std::tuple<::std::vector,::std::vector,::std::vector> _fused_sgd(at::TensorList self, at::TensorList grads, at::TensorList momentum_buffer_list, double weight_decay, double momentum, const at::Tensor & lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const ::std::optional & grad_scale={}, const ::std::optional & found_inf={}); +TORCH_API void _fused_sgd_tensor_lr_out(at::TensorList self, at::TensorList grads, at::TensorList momentum_buffer_list, double weight_decay, double momentum, const at::Tensor & lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const ::std::optional & grad_scale, const ::std::optional & found_inf, at::TensorList out); +TORCH_API void _fused_sgd_kernel_cpu_(at::TensorList self, at::TensorList grads, at::TensorList momentum_buffer_list, double weight_decay, double momentum, const at::Tensor & lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const ::std::optional & grad_scale={}, const ::std::optional & found_inf={}); +TORCH_API void _fused_sgd_kernel_cuda_(at::TensorList self, at::TensorList grads, at::TensorList momentum_buffer_list, double weight_decay, double momentum, const at::Tensor & lr, double dampening, bool nesterov, bool maximize, bool is_first_step, const ::std::optional & grad_scale={}, const ::std::optional & found_inf={}); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_fw_primal_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_fw_primal_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..8a294abafa4537c004b816a0fda4adf48b0f009b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_fw_primal_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _fw_primal { + using schema = at::Tensor (const at::Tensor &, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_fw_primal"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_fw_primal(Tensor(a) self, int level) -> Tensor(a)"; + static at::Tensor call(const at::Tensor & self, int64_t level); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t level); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_grouped_mm_cuda_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_grouped_mm_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..6d814108e192fd573e14d98d3b8881826e3582f2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_grouped_mm_cuda_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor _grouped_mm(const at::Tensor & self, const at::Tensor & mat2, const ::std::optional & offs={}, const ::std::optional & bias={}, ::std::optional out_dtype=::std::nullopt); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_has_compatible_shallow_copy_type_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_has_compatible_shallow_copy_type_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..0c5467659e037beca65e47de47735d752c93058e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_has_compatible_shallow_copy_type_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _has_compatible_shallow_copy_type { + using schema = bool (const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_has_compatible_shallow_copy_type"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_has_compatible_shallow_copy_type(Tensor self, Tensor from) -> bool"; + static bool call(const at::Tensor & self, const at::Tensor & from); + static bool redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & from); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_has_same_storage_numel_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_has_same_storage_numel_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..da42843b41e00c7b598c360a93fff8ec18ae5a76 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_has_same_storage_numel_compositeexplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API bool _has_same_storage_numel(const at::Tensor & self, const at::Tensor & other); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_index_put_impl.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_index_put_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..817d94a7c8888141bb174579e496df666f47edba --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_index_put_impl.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_index_put_impl_(Tensor(a!) self, Tensor?[] indices, Tensor values, bool accumulate=False, bool unsafe=False) -> Tensor(a!) +inline at::Tensor & _index_put_impl_(at::Tensor & self, const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate=false, bool unsafe=false) { + return at::_ops::_index_put_impl_::call(self, indices, values, accumulate, unsafe); +} + +// aten::_index_put_impl.out(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False, bool unsafe=False, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _index_put_impl_out(at::Tensor & out, const at::Tensor & self, const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate=false, bool unsafe=false) { + return at::_ops::_index_put_impl_out::call(self, indices, values, accumulate, unsafe, out); +} +// aten::_index_put_impl.out(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False, bool unsafe=False, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _index_put_impl_outf(const at::Tensor & self, const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate, bool unsafe, at::Tensor & out) { + return at::_ops::_index_put_impl_out::call(self, indices, values, accumulate, unsafe, out); +} + +// aten::_index_put_impl(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False, bool unsafe=False) -> Tensor +inline at::Tensor _index_put_impl(const at::Tensor & self, const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate=false, bool unsafe=false) { + return at::_ops::_index_put_impl::call(self, indices, values, accumulate, unsafe); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_is_all_true.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_is_all_true.h new file mode 100644 index 0000000000000000000000000000000000000000..f164af4080144d4eee050e1eb338c79d6b90a3ca --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_is_all_true.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_is_all_true(Tensor self) -> Tensor +inline at::Tensor _is_all_true(const at::Tensor & self) { + return at::_ops::_is_all_true::call(self); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_is_any_true_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_is_any_true_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..e4ab25ecf8dc5a0996401d0ca5700f2e265f8487 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_is_any_true_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _is_any_true { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_is_any_true"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_is_any_true(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_linalg_check_errors_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_linalg_check_errors_native.h new file mode 100644 index 0000000000000000000000000000000000000000..e95d4879041920da7bf7fd8b6ccd9e51b421b8aa --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_linalg_check_errors_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API void _linalg_check_errors(const at::Tensor & info, c10::string_view api_name, bool is_matrix); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_linalg_eigh_compositeexplicitautogradnonfunctional_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_linalg_eigh_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..9ac004633d68f78cedb79a881da9a9deb1d0bb71 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_linalg_eigh_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API ::std::tuple _linalg_eigh(const at::Tensor & A, c10::string_view UPLO="L", bool compute_v=true); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_linalg_slogdet_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_linalg_slogdet_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..f30baaf39f5d54367caf57d97b33f5b1abe584a2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_linalg_slogdet_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _linalg_slogdet { + using schema = ::std::tuple (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_linalg_slogdet"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_linalg_slogdet(Tensor A) -> (Tensor sign, Tensor logabsdet, Tensor LU, Tensor pivots)"; + static ::std::tuple call(const at::Tensor & A); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & A); +}; + +struct TORCH_API _linalg_slogdet_sign { + using schema = ::std::tuple (const at::Tensor &, at::Tensor &, at::Tensor &, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_linalg_slogdet"; + static constexpr const char* overload_name = "sign"; + static constexpr const char* schema_str = "_linalg_slogdet.sign(Tensor A, *, Tensor(a!) sign, Tensor(b!) logabsdet, Tensor(c!) LU, Tensor(d!) pivots) -> (Tensor(a!) sign, Tensor(b!) logabsdet, Tensor(c!) LU, Tensor(d!) pivots)"; + static ::std::tuple call(const at::Tensor & A, at::Tensor & sign, at::Tensor & logabsdet, at::Tensor & LU, at::Tensor & pivots); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & A, at::Tensor & sign, at::Tensor & logabsdet, at::Tensor & LU, at::Tensor & pivots); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_native_batch_norm_legit_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_native_batch_norm_legit_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..8b4c04a0961d503ca2d811e46f41924ebc160211 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_native_batch_norm_legit_compositeexplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::tuple _native_batch_norm_legit_functional(const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, const at::Tensor & running_mean, const at::Tensor & running_var, bool training, double momentum, double eps); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_native_multi_head_attention_cuda_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_native_multi_head_attention_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..e7c4dd0009f3ce6dae3ba1f6d02a3842d8b8499e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_native_multi_head_attention_cuda_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::tuple _native_multi_head_attention(const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, int64_t embed_dim, int64_t num_head, const at::Tensor & qkv_weight, const at::Tensor & qkv_bias, const at::Tensor & proj_weight, const at::Tensor & proj_bias, const ::std::optional & mask={}, bool need_weights=true, bool average_attn_weights=true, ::std::optional mask_type=::std::nullopt); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_neg_view_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_neg_view_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..84da33281b26f6a948762765f6d22b578fbe06e2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_neg_view_compositeexplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor _neg_view(const at::Tensor & self); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_compute_contiguous_strides_offsets_cpu_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_compute_contiguous_strides_offsets_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..d53f58291d86044d29cb302b53edab3cd870f628 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_compute_contiguous_strides_offsets_cpu_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API ::std::tuple _nested_compute_contiguous_strides_offsets(const at::Tensor & nested_size); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_get_ragged_idx.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_get_ragged_idx.h new file mode 100644 index 0000000000000000000000000000000000000000..a3cab297d4530b934c3d86f74128b4465a3cead5 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_get_ragged_idx.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_nested_get_ragged_idx(Tensor self) -> int +inline int64_t _nested_get_ragged_idx(const at::Tensor & self) { + return at::_ops::_nested_get_ragged_idx::call(self); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_tensor_from_mask_left_aligned_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_tensor_from_mask_left_aligned_native.h new file mode 100644 index 0000000000000000000000000000000000000000..293c03d886fdf51e7066ee164bd3e9c306fa1d90 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_tensor_from_mask_left_aligned_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API bool NestedTensor_nested_tensor_from_mask_left_aligned(const at::Tensor & t, const at::Tensor & mask); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_view_from_jagged_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_view_from_jagged_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..ecf9d3c78962ff9ba59c6f735f92aaa3e834177c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_nested_view_from_jagged_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _nested_view_from_jagged { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, const ::std::optional &, int64_t, const ::std::optional &, const ::std::optional &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_nested_view_from_jagged"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_nested_view_from_jagged(Tensor(a) self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1, Tensor? min_seqlen=None, Tensor? max_seqlen=None) -> Tensor(a)"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & offsets, const at::Tensor & dummy, const ::std::optional & lengths, int64_t ragged_idx, const ::std::optional & min_seqlen, const ::std::optional & max_seqlen); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & offsets, const at::Tensor & dummy, const ::std::optional & lengths, int64_t ragged_idx, const ::std::optional & min_seqlen, const ::std::optional & max_seqlen); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_nnpack_available_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_nnpack_available_native.h new file mode 100644 index 0000000000000000000000000000000000000000..79055b298fdfe4518840dcce70972bdb10636488 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_nnpack_available_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API bool _nnpack_available(); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_nnpack_available_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_nnpack_available_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..b7b738b548c3e52a42ce925f5684e3c5002ea668 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_nnpack_available_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _nnpack_available { + using schema = bool (); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_nnpack_available"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_nnpack_available() -> bool"; + static bool call(); + static bool redispatch(c10::DispatchKeySet dispatchKeySet); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_safe_softmax_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_safe_softmax_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..2ac9fadf988111129946ada737bd72e2fd4e065c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_safe_softmax_compositeexplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor _safe_softmax(const at::Tensor & self, int64_t dim, ::std::optional dtype=::std::nullopt); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_scaled_dot_product_flash_attention_for_cpu_backward_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_scaled_dot_product_flash_attention_for_cpu_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..ff938f225e690595d866a26d9e6c84d86ebe8344 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_scaled_dot_product_flash_attention_for_cpu_backward_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _scaled_dot_product_flash_attention_for_cpu_backward { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, double, bool, const ::std::optional &, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_scaled_dot_product_flash_attention_for_cpu_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_scaled_dot_product_flash_attention_for_cpu_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor out, Tensor logsumexp, float dropout_p, bool is_causal, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value)"; + static ::std::tuple call(const at::Tensor & grad_out, const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const at::Tensor & out, const at::Tensor & logsumexp, double dropout_p, bool is_causal, const ::std::optional & attn_mask, ::std::optional scale); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_out, const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const at::Tensor & out, const at::Tensor & logsumexp, double dropout_p, bool is_causal, const ::std::optional & attn_mask, ::std::optional scale); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_scaled_dot_product_fused_attention_overrideable_backward_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_scaled_dot_product_fused_attention_overrideable_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..3eee1d697518f54995e06c173e7d7a4395195a33 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_scaled_dot_product_fused_attention_overrideable_backward_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _scaled_dot_product_fused_attention_overrideable_backward { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, ::std::array, const at::Tensor &, const at::Tensor &, const at::Tensor &, const at::Tensor &, c10::SymInt, c10::SymInt, double, bool, const at::Tensor &, const at::Tensor &, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_scaled_dot_product_fused_attention_overrideable_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_scaled_dot_product_fused_attention_overrideable_backward(Tensor grad_out, Tensor query, Tensor key, Tensor value, Tensor attn_bias, bool[4] grad_input_mask, Tensor out, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, Tensor philox_seed, Tensor philox_offset, *, float? scale=None) -> (Tensor grad_query, Tensor grad_key, Tensor grad_value, Tensor grad_attn_bias)"; + static ::std::tuple call(const at::Tensor & grad_out, const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const at::Tensor & attn_bias, ::std::array grad_input_mask, const at::Tensor & out, const at::Tensor & logsumexp, const at::Tensor & cum_seq_q, const at::Tensor & cum_seq_k, c10::SymInt max_q, c10::SymInt max_k, double dropout_p, bool is_causal, const at::Tensor & philox_seed, const at::Tensor & philox_offset, ::std::optional scale); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_out, const at::Tensor & query, const at::Tensor & key, const at::Tensor & value, const at::Tensor & attn_bias, ::std::array grad_input_mask, const at::Tensor & out, const at::Tensor & logsumexp, const at::Tensor & cum_seq_q, const at::Tensor & cum_seq_k, c10::SymInt max_q, c10::SymInt max_k, double dropout_p, bool is_causal, const at::Tensor & philox_seed, const at::Tensor & philox_offset, ::std::optional scale); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_slow_conv2d_forward_cpu_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_slow_conv2d_forward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ff9fc9ddb9821271559da9c2f0501d60c21e0a5e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_slow_conv2d_forward_cpu_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor _slow_conv2d_forward(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding); +TORCH_API at::Tensor _slow_conv2d_forward_symint(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding); +TORCH_API at::Tensor & _slow_conv2d_forward_out(at::Tensor & output, const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding); +TORCH_API at::Tensor & _slow_conv2d_forward_outf(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding, at::Tensor & output); +TORCH_API at::Tensor & _slow_conv2d_forward_symint_out(at::Tensor & output, const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding); +TORCH_API at::Tensor & _slow_conv2d_forward_symint_outf(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, at::Tensor & output); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sobol_engine_initialize_state_compositeimplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sobol_engine_initialize_state_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..fdd9bc211d90a469dda59594c3ea1a1f4241265b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sobol_engine_initialize_state_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor & _sobol_engine_initialize_state_(at::Tensor & self, int64_t dimension); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_softmax_backward_data_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_softmax_backward_data_native.h new file mode 100644 index 0000000000000000000000000000000000000000..06f2c97abd716784d1651dea8fdb640c77571c19 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_softmax_backward_data_native.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_softmax_backward_cpu_out : public at::meta::structured__softmax_backward_data { +void impl(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype, const at::Tensor & grad_input); +}; +struct TORCH_API structured_softmax_backward_cuda_out : public at::meta::structured__softmax_backward_data { +void impl(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype, const at::Tensor & grad_input); +}; +TORCH_API at::Tensor nested_softmax_backward(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_unsafe_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_unsafe_native.h new file mode 100644 index 0000000000000000000000000000000000000000..e97c11d3f3376bccc9770f874d598ce6b97a0bf0 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_compressed_tensor_unsafe_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _sparse_compressed_tensor_unsafe_symint(const at::Tensor & compressed_indices, const at::Tensor & plain_indices, const at::Tensor & values, c10::SymIntArrayRef size, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_unsafe_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_unsafe_native.h new file mode 100644 index 0000000000000000000000000000000000000000..fe8760fcb01e6543cd18a3ff8c5b70b4b5e117dd --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_coo_tensor_unsafe_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _sparse_coo_tensor_unsafe_symint(const at::Tensor & indices, const at::Tensor & values, c10::SymIntArrayRef size, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}, ::std::optional is_coalesced=::std::nullopt); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_csc_tensor_unsafe.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_csc_tensor_unsafe.h new file mode 100644 index 0000000000000000000000000000000000000000..0643e714a35b1ad5a72bb7f2a0ea2be02a98dfdf --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_csc_tensor_unsafe.h @@ -0,0 +1,40 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_csc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor _sparse_csc_tensor_unsafe(const at::Tensor & ccol_indices, const at::Tensor & row_indices, const at::Tensor & values, at::IntArrayRef size, at::TensorOptions options={}) { + return at::_ops::_sparse_csc_tensor_unsafe::call(ccol_indices, row_indices, values, size, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +// aten::_sparse_csc_tensor_unsafe(Tensor ccol_indices, Tensor row_indices, Tensor values, int[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor _sparse_csc_tensor_unsafe(const at::Tensor & ccol_indices, const at::Tensor & row_indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::_sparse_csc_tensor_unsafe::call(ccol_indices, row_indices, values, size, dtype, layout, device, pin_memory); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_csr_sum.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_csr_sum.h new file mode 100644 index 0000000000000000000000000000000000000000..3e44a0c449eb3bb2a7ef0d16bca5992d1d5b03cc --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_csr_sum.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_sparse_csr_sum.dim_dtype(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor _sparse_csr_sum(const at::Tensor & self, at::IntArrayRef dim, bool keepdim=false, ::std::optional dtype=::std::nullopt) { + return at::_ops::_sparse_csr_sum_dim_dtype::call(self, dim, keepdim, dtype); +} + +// aten::_sparse_csr_sum.dim_dtype_out(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_csr_sum_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef dim, bool keepdim=false, ::std::optional dtype=::std::nullopt) { + return at::_ops::_sparse_csr_sum_dim_dtype_out::call(self, dim, keepdim, dtype, out); +} +// aten::_sparse_csr_sum.dim_dtype_out(Tensor self, int[1] dim, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _sparse_csr_sum_outf(const at::Tensor & self, at::IntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out) { + return at::_ops::_sparse_csr_sum_dim_dtype_out::call(self, dim, keepdim, dtype, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_log_softmax_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_log_softmax_native.h new file mode 100644 index 0000000000000000000000000000000000000000..a024d73dc676d7e32548e3bba054e390852f3eca --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_log_softmax_native.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _sparse_log_softmax(const at::Tensor & self, int64_t dim, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor _sparse_log_softmax(const at::Tensor & self, at::Dimname dim, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor & _sparse_log_softmax_out(const at::Tensor & self, int64_t dim, bool half_to_float, at::Tensor & out); +TORCH_API at::Tensor log_softmax_sparse_cpu(const at::Tensor & self, int64_t dim, bool half_to_float); +TORCH_API at::Tensor log_softmax_sparse_cuda(const at::Tensor & self, int64_t dim, bool half_to_float); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_mm_reduce_impl_backward_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_mm_reduce_impl_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..c62dd2b281cbe724eac227e1ca7bd95be96037dd --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_mm_reduce_impl_backward_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_mm_reduce_impl_backward { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, c10::string_view, const at::Tensor &, ::std::array); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_mm_reduce_impl_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_mm_reduce_impl_backward(Tensor self, Tensor grad_out, Tensor weight, str reduce, Tensor arg_out, bool[2] output_mask) -> (Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & self, const at::Tensor & grad_out, const at::Tensor & weight, c10::string_view reduce, const at::Tensor & arg_out, ::std::array output_mask); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & grad_out, const at::Tensor & weight, c10::string_view reduce, const at::Tensor & arg_out, ::std::array output_mask); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..748f9073708da0c88d729423f0e7ac49e2af1cc4 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_semi_structured_apply_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_semi_structured_apply { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_semi_structured_apply"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_semi_structured_apply(Tensor input, Tensor thread_masks) -> (Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & input, const at::Tensor & thread_masks); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & thread_masks); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_semi_structured_linear_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_semi_structured_linear_native.h new file mode 100644 index 0000000000000000000000000000000000000000..ca26cc6ec93e3c63cfa1997b36d38a77889fb66d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_semi_structured_linear_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor _sparse_semi_structured_linear(const at::Tensor & input, const at::Tensor & weight, const at::Tensor & meta, const ::std::optional & bias={}, ::std::optional activation=::std::nullopt, ::std::optional out_dtype=::std::nullopt); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_semi_structured_tile_cuda_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_semi_structured_tile_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..505b1848ef7fa9d1bbc7a4c2f73c03aa8c95755d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_semi_structured_tile_cuda_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::tuple _sparse_semi_structured_tile(const at::Tensor & input, c10::string_view algorithm="", bool use_cutlass=true); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_semi_structured_tile_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_semi_structured_tile_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..375c76c840f998b4f69cf5b8d551232d5b483ed0 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_sparse_semi_structured_tile_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _sparse_semi_structured_tile { + using schema = ::std::tuple (const at::Tensor &, c10::string_view, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_sparse_semi_structured_tile"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_sparse_semi_structured_tile(Tensor input, str algorithm=\"\", bool use_cutlass=True) -> (Tensor, Tensor, Tensor, Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & input, c10::string_view algorithm, bool use_cutlass); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, c10::string_view algorithm, bool use_cutlass); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_spdiags_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_spdiags_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..9e9dc4f10e7c27796c55c43c9b297c778c666a3e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_spdiags_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _spdiags { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, at::IntArrayRef, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_spdiags"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_spdiags(Tensor diagonals, Tensor offsets, int[] shape, Layout? layout=None) -> Tensor"; + static at::Tensor call(const at::Tensor & diagonals, const at::Tensor & offsets, at::IntArrayRef shape, ::std::optional layout); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & diagonals, const at::Tensor & offsets, at::IntArrayRef shape, ::std::optional layout); +}; + +struct TORCH_API _spdiags_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, at::IntArrayRef, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_spdiags"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_spdiags.out(Tensor diagonals, Tensor offsets, int[] shape, Layout? layout=None, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & diagonals, const at::Tensor & offsets, at::IntArrayRef shape, ::std::optional layout, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & diagonals, const at::Tensor & offsets, at::IntArrayRef shape, ::std::optional layout, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_standard_gamma_grad_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_standard_gamma_grad_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..5e7fac175591daf4ad77ebb517280be10317a123 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_standard_gamma_grad_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _standard_gamma_grad { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_standard_gamma_grad"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_standard_gamma_grad(Tensor self, Tensor output) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & output); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & output); +}; + +struct TORCH_API _standard_gamma_grad_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_standard_gamma_grad"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_standard_gamma_grad.out(Tensor self, Tensor output, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Tensor & output, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & output, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f99343056c04f4b7aa47204d717b09368c8be442 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_test_autograd_multiple_dispatch_view_compositeexplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor _test_autograd_multiple_dispatch_view(const at::Tensor & self); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_test_optional_intlist_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_test_optional_intlist_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..39a4c53adf9cd74f8fb07286093a8662f5215465 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_test_optional_intlist_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _test_optional_intlist { + using schema = at::Tensor (const at::Tensor &, at::OptionalIntArrayRef); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_test_optional_intlist"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_test_optional_intlist(Tensor values, int[]? addends) -> Tensor"; + static at::Tensor call(const at::Tensor & values, at::OptionalIntArrayRef addends); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & values, at::OptionalIntArrayRef addends); +}; + +struct TORCH_API _test_optional_intlist_out { + using schema = at::Tensor & (const at::Tensor &, at::OptionalIntArrayRef, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_test_optional_intlist"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_test_optional_intlist.out(Tensor values, int[]? addends, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & values, at::OptionalIntArrayRef addends, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & values, at::OptionalIntArrayRef addends, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_test_string_default_compositeimplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_test_string_default_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..63f120845a22502351f675d77af0f4ff5f1cb055 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_test_string_default_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _test_string_default(const at::Tensor & dummy, c10::string_view a="\"'\\", c10::string_view b="\"'\\"); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_thnn_differentiable_lstm_cell_backward_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_thnn_differentiable_lstm_cell_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..2c592c9f0d48fcd30282131dfacd63c8d8785c77 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_thnn_differentiable_lstm_cell_backward_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _thnn_differentiable_lstm_cell_backward { + using schema = ::std::tuple (const ::std::optional &, const ::std::optional &, const at::Tensor &, const at::Tensor &, const ::std::optional &, const ::std::optional &, const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_thnn_differentiable_lstm_cell_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_thnn_differentiable_lstm_cell_backward(Tensor? grad_hy, Tensor? grad_cy, Tensor input_gates, Tensor hidden_gates, Tensor? input_bias, Tensor? hidden_bias, Tensor cx, Tensor cy) -> (Tensor, Tensor, Tensor, Tensor, Tensor)"; + static ::std::tuple call(const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & input_gates, const at::Tensor & hidden_gates, const ::std::optional & input_bias, const ::std::optional & hidden_bias, const at::Tensor & cx, const at::Tensor & cy); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const ::std::optional & grad_hy, const ::std::optional & grad_cy, const at::Tensor & input_gates, const at::Tensor & hidden_gates, const ::std::optional & input_bias, const ::std::optional & hidden_bias, const at::Tensor & cx, const at::Tensor & cy); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f53a8e35fecca34eee858d1de1eefea566c89514 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_thnn_fused_gru_cell_compositeexplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::tuple _thnn_fused_gru_cell_out(at::Tensor & out0, at::Tensor & out1, const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & hx, const ::std::optional & input_bias={}, const ::std::optional & hidden_bias={}); +TORCH_API ::std::tuple _thnn_fused_gru_cell_outf(const at::Tensor & input_gates, const at::Tensor & hidden_gates, const at::Tensor & hx, const ::std::optional & input_bias, const ::std::optional & hidden_bias, at::Tensor & out0, at::Tensor & out1); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_to_sparse_bsc_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_to_sparse_bsc_native.h new file mode 100644 index 0000000000000000000000000000000000000000..87f7dfc1ff2ece7eaba0a4cbd320011db6b52c81 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_to_sparse_bsc_native.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & _to_sparse_bsc_out(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim, at::Tensor & out); +TORCH_API at::Tensor dense_to_sparse_bsc(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt); +TORCH_API at::Tensor coo_to_sparse_bsc(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt); +TORCH_API at::Tensor sparse_compressed_to_sparse_bsc(const at::Tensor & self, at::IntArrayRef blocksize, ::std::optional dense_dim=::std::nullopt); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_to_sparse_csc.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_to_sparse_csc.h new file mode 100644 index 0000000000000000000000000000000000000000..e0e7dc1b3e7ad9676383f026fb05c2ffd06f4eb6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_to_sparse_csc.h @@ -0,0 +1,40 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_to_sparse_csc.out(Tensor self, int? dense_dim=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _to_sparse_csc_out(at::Tensor & out, const at::Tensor & self, ::std::optional dense_dim=::std::nullopt) { + return at::_ops::_to_sparse_csc_out::call(self, dense_dim, out); +} +// aten::_to_sparse_csc.out(Tensor self, int? dense_dim=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _to_sparse_csc_outf(const at::Tensor & self, ::std::optional dense_dim, at::Tensor & out) { + return at::_ops::_to_sparse_csc_out::call(self, dense_dim, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_to_sparse_semi_structured.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_to_sparse_semi_structured.h new file mode 100644 index 0000000000000000000000000000000000000000..fd79b2758a5f6f3eff32c4945c6cbaed3f72be55 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_to_sparse_semi_structured.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_to_sparse_semi_structured(Tensor dense) -> (Tensor, Tensor) +inline ::std::tuple _to_sparse_semi_structured(const at::Tensor & dense) { + return at::_ops::_to_sparse_semi_structured::call(dense); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_triton_scaled_dot_attention.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_triton_scaled_dot_attention.h new file mode 100644 index 0000000000000000000000000000000000000000..f58b18e8f3baba28cd093124c07da0b732540ec1 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_triton_scaled_dot_attention.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_triton_scaled_dot_attention(Tensor q, Tensor k, Tensor v, float dropout_p=0.0) -> Tensor +inline at::Tensor _triton_scaled_dot_attention(const at::Tensor & q, const at::Tensor & k, const at::Tensor & v, double dropout_p=0.0) { + return at::_ops::_triton_scaled_dot_attention::call(q, k, v, dropout_p); +} + +// aten::_triton_scaled_dot_attention.out(Tensor q, Tensor k, Tensor v, float dropout_p=0.0, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _triton_scaled_dot_attention_out(at::Tensor & out, const at::Tensor & q, const at::Tensor & k, const at::Tensor & v, double dropout_p=0.0) { + return at::_ops::_triton_scaled_dot_attention_out::call(q, k, v, dropout_p, out); +} +// aten::_triton_scaled_dot_attention.out(Tensor q, Tensor k, Tensor v, float dropout_p=0.0, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _triton_scaled_dot_attention_outf(const at::Tensor & q, const at::Tensor & k, const at::Tensor & v, double dropout_p, at::Tensor & out) { + return at::_ops::_triton_scaled_dot_attention_out::call(q, k, v, dropout_p, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_unpack_dual_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_unpack_dual_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..5c425c30cef858f970959b0f9c1d78890ded326b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_unpack_dual_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _unpack_dual { + using schema = ::std::tuple (const at::Tensor &, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_unpack_dual"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_unpack_dual(Tensor(a) dual, int level) -> (Tensor(a) primal, Tensor tangent)"; + static ::std::tuple call(const at::Tensor & dual, int64_t level); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & dual, int64_t level); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_unsafe_index_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_unsafe_index_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..53213a7c208ae08d6838f1c987f969c9d6f77123 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_unsafe_index_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _unsafe_index_Tensor { + using schema = at::Tensor (const at::Tensor &, const c10::List<::std::optional> &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_unsafe_index"; + static constexpr const char* overload_name = "Tensor"; + static constexpr const char* schema_str = "_unsafe_index.Tensor(Tensor self, Tensor?[] indices) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const c10::List<::std::optional> & indices); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const c10::List<::std::optional> & indices); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward_meta.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..e88d05badd8ca81e2cccab4d15a7babb09a13058 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_bicubic2d_aa_backward_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured__upsample_bicubic2d_aa_backward : public at::impl::MetaBase { + + + void meta(const at::Tensor & grad_output, at::ArrayRef output_size, at::ArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward_meta.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..eb802d863f20a07c64b540b5bf3fb9ff822fa222 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_bilinear2d_aa_backward_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured__upsample_bilinear2d_aa_backward : public at::impl::MetaBase { + + + void meta(const at::Tensor & grad_output, at::ArrayRef output_size, at::ArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d.h new file mode 100644 index 0000000000000000000000000000000000000000..d4cd4c1f51eea2efdd816feb159ee1d5d8e59729 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_upsample_nearest_exact2d.h @@ -0,0 +1,119 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_upsample_nearest_exact2d.vec(Tensor input, SymInt[]? output_size, float[]? scale_factors) -> Tensor +inline at::Tensor _upsample_nearest_exact2d(const at::Tensor & input, at::OptionalIntArrayRef output_size, ::std::optional> scale_factors) { + return at::_ops::_upsample_nearest_exact2d_vec::call(input, output_size.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*output_size)) : ::std::nullopt, scale_factors); +} +namespace symint { + template >> + at::Tensor _upsample_nearest_exact2d(const at::Tensor & input, at::OptionalIntArrayRef output_size, ::std::optional> scale_factors) { + return at::_ops::_upsample_nearest_exact2d_vec::call(input, output_size.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*output_size)) : ::std::nullopt, scale_factors); + } +} + +// aten::_upsample_nearest_exact2d.vec(Tensor input, SymInt[]? output_size, float[]? scale_factors) -> Tensor +inline at::Tensor _upsample_nearest_exact2d_symint(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, ::std::optional> scale_factors) { + return at::_ops::_upsample_nearest_exact2d_vec::call(input, output_size, scale_factors); +} +namespace symint { + template >> + at::Tensor _upsample_nearest_exact2d(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, ::std::optional> scale_factors) { + return at::_ops::_upsample_nearest_exact2d_vec::call(input, output_size, scale_factors); + } +} + +// aten::_upsample_nearest_exact2d.out(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact2d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d_out::call(self, c10::fromIntArrayRefSlow(output_size), scales_h, scales_w, out); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact2d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d_out::call(self, c10::fromIntArrayRefSlow(output_size), scales_h, scales_w, out); + } +} + +// aten::_upsample_nearest_exact2d.out(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact2d_outf(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out) { + return at::_ops::_upsample_nearest_exact2d_out::call(self, c10::fromIntArrayRefSlow(output_size), scales_h, scales_w, out); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact2d_outf(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out) { + return at::_ops::_upsample_nearest_exact2d_out::call(self, c10::fromIntArrayRefSlow(output_size), scales_h, scales_w, out); + } +} + +// aten::_upsample_nearest_exact2d.out(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact2d_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d_out::call(self, output_size, scales_h, scales_w, out); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact2d_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d_out::call(self, output_size, scales_h, scales_w, out); + } +} + +// aten::_upsample_nearest_exact2d.out(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & _upsample_nearest_exact2d_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out) { + return at::_ops::_upsample_nearest_exact2d_out::call(self, output_size, scales_h, scales_w, out); +} +namespace symint { + template >> + at::Tensor & _upsample_nearest_exact2d_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w, at::Tensor & out) { + return at::_ops::_upsample_nearest_exact2d_out::call(self, output_size, scales_h, scales_w, out); + } +} + +// aten::_upsample_nearest_exact2d(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor +inline at::Tensor _upsample_nearest_exact2d(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d::call(self, c10::fromIntArrayRefSlow(output_size), scales_h, scales_w); +} +namespace symint { + template >> + at::Tensor _upsample_nearest_exact2d(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d::call(self, c10::fromIntArrayRefSlow(output_size), scales_h, scales_w); + } +} + +// aten::_upsample_nearest_exact2d(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor +inline at::Tensor _upsample_nearest_exact2d_symint(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d::call(self, output_size, scales_h, scales_w); +} +namespace symint { + template >> + at::Tensor _upsample_nearest_exact2d(const at::Tensor & self, c10::SymIntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt) { + return at::_ops::_upsample_nearest_exact2d::call(self, output_size, scales_h, scales_w); + } +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_validate_compressed_sparse_indices_cuda_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_validate_compressed_sparse_indices_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..56f4d53d6c5d8b8ca4319d0ed3aeae140fe344ea --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_validate_compressed_sparse_indices_cuda_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API void _validate_compressed_sparse_indices(bool is_crow, const at::Tensor & compressed_idx, const at::Tensor & plain_idx, int64_t cdim, int64_t dim, int64_t nnz); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_values_copy_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_values_copy_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..650d8f96de3e9159123ce60a2b2cdca5219a3b53 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_values_copy_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API _values_copy { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_values_copy"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "_values_copy(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API _values_copy_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::_values_copy"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "_values_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_version_compositeimplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_version_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ca668d18900e7fa6876e928f0f381d7be4151b52 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_version_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API int64_t _version(const at::Tensor & self); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_with_scales_and_zeros.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_with_scales_and_zeros.h new file mode 100644 index 0000000000000000000000000000000000000000..ccc931e66457e1e241ed6601a3bff9a5dcb0e2f2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_weight_int4pack_mm_with_scales_and_zeros.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::_weight_int4pack_mm_with_scales_and_zeros(Tensor self, Tensor mat2, int qGroupSize, Tensor qScale, Tensor qZeros) -> Tensor +inline at::Tensor _weight_int4pack_mm_with_scales_and_zeros(const at::Tensor & self, const at::Tensor & mat2, int64_t qGroupSize, const at::Tensor & qScale, const at::Tensor & qZeros) { + return at::_ops::_weight_int4pack_mm_with_scales_and_zeros::call(self, mat2, qGroupSize, qScale, qZeros); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_wrapped_quantized_linear_prepacked_compositeimplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_wrapped_quantized_linear_prepacked_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..a81eb440a7c68263c87ddfe2aa330d98a967971c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/_wrapped_quantized_linear_prepacked_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor _wrapped_quantized_linear_prepacked(const at::Tensor & input, const at::Tensor & input_scale, const at::Tensor & input_zero_point, const at::Tensor & packed_weight, const at::Tensor & output_scale, const at::Tensor & output_zero_point, int64_t out_channel); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/adaptive_avg_pool3d.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/adaptive_avg_pool3d.h new file mode 100644 index 0000000000000000000000000000000000000000..e51e694346350608846f08ff310bcdbfdd02137f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/adaptive_avg_pool3d.h @@ -0,0 +1,97 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::adaptive_avg_pool3d.out(Tensor self, SymInt[3] output_size, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & adaptive_avg_pool3d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size) { + return at::_ops::adaptive_avg_pool3d_out::call(self, c10::fromIntArrayRefSlow(output_size), out); +} +namespace symint { + template >> + at::Tensor & adaptive_avg_pool3d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef output_size) { + return at::_ops::adaptive_avg_pool3d_out::call(self, c10::fromIntArrayRefSlow(output_size), out); + } +} + +// aten::adaptive_avg_pool3d.out(Tensor self, SymInt[3] output_size, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & adaptive_avg_pool3d_outf(const at::Tensor & self, at::IntArrayRef output_size, at::Tensor & out) { + return at::_ops::adaptive_avg_pool3d_out::call(self, c10::fromIntArrayRefSlow(output_size), out); +} +namespace symint { + template >> + at::Tensor & adaptive_avg_pool3d_outf(const at::Tensor & self, at::IntArrayRef output_size, at::Tensor & out) { + return at::_ops::adaptive_avg_pool3d_out::call(self, c10::fromIntArrayRefSlow(output_size), out); + } +} + +// aten::adaptive_avg_pool3d.out(Tensor self, SymInt[3] output_size, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & adaptive_avg_pool3d_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size) { + return at::_ops::adaptive_avg_pool3d_out::call(self, output_size, out); +} +namespace symint { + template >> + at::Tensor & adaptive_avg_pool3d_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef output_size) { + return at::_ops::adaptive_avg_pool3d_out::call(self, output_size, out); + } +} + +// aten::adaptive_avg_pool3d.out(Tensor self, SymInt[3] output_size, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & adaptive_avg_pool3d_symint_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, at::Tensor & out) { + return at::_ops::adaptive_avg_pool3d_out::call(self, output_size, out); +} +namespace symint { + template >> + at::Tensor & adaptive_avg_pool3d_outf(const at::Tensor & self, c10::SymIntArrayRef output_size, at::Tensor & out) { + return at::_ops::adaptive_avg_pool3d_out::call(self, output_size, out); + } +} + +// aten::adaptive_avg_pool3d(Tensor self, SymInt[3] output_size) -> Tensor +inline at::Tensor adaptive_avg_pool3d(const at::Tensor & self, at::IntArrayRef output_size) { + return at::_ops::adaptive_avg_pool3d::call(self, c10::fromIntArrayRefSlow(output_size)); +} +namespace symint { + template >> + at::Tensor adaptive_avg_pool3d(const at::Tensor & self, at::IntArrayRef output_size) { + return at::_ops::adaptive_avg_pool3d::call(self, c10::fromIntArrayRefSlow(output_size)); + } +} + +// aten::adaptive_avg_pool3d(Tensor self, SymInt[3] output_size) -> Tensor +inline at::Tensor adaptive_avg_pool3d_symint(const at::Tensor & self, c10::SymIntArrayRef output_size) { + return at::_ops::adaptive_avg_pool3d::call(self, output_size); +} +namespace symint { + template >> + at::Tensor adaptive_avg_pool3d(const at::Tensor & self, c10::SymIntArrayRef output_size) { + return at::_ops::adaptive_avg_pool3d::call(self, output_size); + } +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/adaptive_max_pool1d_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/adaptive_max_pool1d_native.h new file mode 100644 index 0000000000000000000000000000000000000000..c0e8415486f740c4d3a9870c4b3345cfcf52a880 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/adaptive_max_pool1d_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple adaptive_max_pool1d(const at::Tensor & self, at::IntArrayRef output_size); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/adaptive_max_pool2d_backward_cpu_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/adaptive_max_pool2d_backward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..8a7ec7a93c6911d226ee42e781a1709f2f8a55c6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/adaptive_max_pool2d_backward_cpu_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor adaptive_max_pool2d_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & indices); +TORCH_API at::Tensor & adaptive_max_pool2d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & indices); +TORCH_API at::Tensor & adaptive_max_pool2d_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & indices, at::Tensor & grad_input); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/adaptive_max_pool2d_cuda_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/adaptive_max_pool2d_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..46a5ec97418393c9f8f38ee49bc30c73ed9e3352 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/adaptive_max_pool2d_cuda_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::tuple adaptive_max_pool2d(const at::Tensor & self, at::IntArrayRef output_size); +TORCH_API ::std::tuple adaptive_max_pool2d_out(at::Tensor & out, at::Tensor & indices, const at::Tensor & self, at::IntArrayRef output_size); +TORCH_API ::std::tuple adaptive_max_pool2d_outf(const at::Tensor & self, at::IntArrayRef output_size, at::Tensor & out, at::Tensor & indices); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/alias_copy.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/alias_copy.h new file mode 100644 index 0000000000000000000000000000000000000000..e1a60453a4478b00b70f42ac6154d8aa4c0930cd --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/alias_copy.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::alias_copy(Tensor self) -> Tensor +inline at::Tensor alias_copy(const at::Tensor & self) { + return at::_ops::alias_copy::call(self); +} + +// aten::alias_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & alias_copy_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::alias_copy_out::call(self, out); +} +// aten::alias_copy.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & alias_copy_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::alias_copy_out::call(self, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/align_as.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/align_as.h new file mode 100644 index 0000000000000000000000000000000000000000..d90cfb04ec8fd1fc80fd1a072a235b880d9418f1 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/align_as.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/all_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/all_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..90fe8943ac0a0928f97c45e901baea0b83db5696 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/all_compositeexplicitautograd_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor all(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim=false); +TORCH_API at::Tensor & all_out(at::Tensor & out, const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim=false); +TORCH_API at::Tensor & all_outf(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/all_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/all_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..087f5eec4ce498346a8fccdcc5cc420e0e289ef0 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/all_ops.h @@ -0,0 +1,111 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API all_dim { + using schema = at::Tensor (const at::Tensor &, int64_t, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::all"; + static constexpr const char* overload_name = "dim"; + static constexpr const char* schema_str = "all.dim(Tensor self, int dim, bool keepdim=False) -> Tensor"; + static at::Tensor call(const at::Tensor & self, int64_t dim, bool keepdim); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t dim, bool keepdim); +}; + +struct TORCH_API all_dims { + using schema = at::Tensor (const at::Tensor &, at::OptionalIntArrayRef, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::all"; + static constexpr const char* overload_name = "dims"; + static constexpr const char* schema_str = "all.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor"; + static at::Tensor call(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim); +}; + +struct TORCH_API all_out { + using schema = at::Tensor & (const at::Tensor &, int64_t, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::all"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "all.out(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, int64_t dim, bool keepdim, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t dim, bool keepdim, at::Tensor & out); +}; + +struct TORCH_API all_dims_out { + using schema = at::Tensor & (const at::Tensor &, at::OptionalIntArrayRef, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::all"; + static constexpr const char* overload_name = "dims_out"; + static constexpr const char* schema_str = "all.dims_out(Tensor self, int[]? dim=None, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim, at::Tensor & out); +}; + +struct TORCH_API all_dimname { + using schema = at::Tensor (const at::Tensor &, at::Dimname, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::all"; + static constexpr const char* overload_name = "dimname"; + static constexpr const char* schema_str = "all.dimname(Tensor self, Dimname dim, bool keepdim=False) -> Tensor"; + static at::Tensor call(const at::Tensor & self, at::Dimname dim, bool keepdim); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Dimname dim, bool keepdim); +}; + +struct TORCH_API all_dimname_out { + using schema = at::Tensor & (const at::Tensor &, at::Dimname, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::all"; + static constexpr const char* overload_name = "dimname_out"; + static constexpr const char* schema_str = "all.dimname_out(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Dimname dim, bool keepdim, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Dimname dim, bool keepdim, at::Tensor & out); +}; + +struct TORCH_API all { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::all"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "all(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API all_all_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::all"; + static constexpr const char* overload_name = "all_out"; + static constexpr const char* schema_str = "all.all_out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/allclose_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/allclose_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..5ba5108bcb96f86e71e67b96b3641e38d921e713 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/allclose_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API allclose { + using schema = bool (const at::Tensor &, const at::Tensor &, double, double, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::allclose"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "allclose(Tensor self, Tensor other, float rtol=1e-05, float atol=1e-08, bool equal_nan=False) -> bool"; + static bool call(const at::Tensor & self, const at::Tensor & other, double rtol, double atol, bool equal_nan); + static bool redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other, double rtol, double atol, bool equal_nan); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/aminmax.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/aminmax.h new file mode 100644 index 0000000000000000000000000000000000000000..09ca85ffe2803331c2c6bf3f3aa0627223ede533 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/aminmax.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::aminmax(Tensor self, *, int? dim=None, bool keepdim=False) -> (Tensor min, Tensor max) +inline ::std::tuple aminmax(const at::Tensor & self, ::std::optional dim=::std::nullopt, bool keepdim=false) { + return at::_ops::aminmax::call(self, dim, keepdim); +} + +// aten::aminmax.out(Tensor self, *, int? dim=None, bool keepdim=False, Tensor(a!) min, Tensor(b!) max) -> (Tensor(a!) min, Tensor(b!) max) +inline ::std::tuple aminmax_out(at::Tensor & min, at::Tensor & max, const at::Tensor & self, ::std::optional dim=::std::nullopt, bool keepdim=false) { + return at::_ops::aminmax_out::call(self, dim, keepdim, min, max); +} +// aten::aminmax.out(Tensor self, *, int? dim=None, bool keepdim=False, Tensor(a!) min, Tensor(b!) max) -> (Tensor(a!) min, Tensor(b!) max) +inline ::std::tuple aminmax_outf(const at::Tensor & self, ::std::optional dim, bool keepdim, at::Tensor & min, at::Tensor & max) { + return at::_ops::aminmax_out::call(self, dim, keepdim, min, max); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/arctan_compositeimplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/arctan_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..a311807d68e19f66b86e244ad2b2f3f4ed79e313 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/arctan_compositeimplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor arctan(const at::Tensor & self); +TORCH_API at::Tensor & arctan_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & arctan_outf(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & arctan_(at::Tensor & self); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/argmax_meta_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/argmax_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..c381b9d7b321971201817b0339579bc593cfb5c9 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/argmax_meta_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor argmax(const at::Tensor & self, ::std::optional dim=::std::nullopt, bool keepdim=false); +TORCH_API at::Tensor & argmax_out(at::Tensor & out, const at::Tensor & self, ::std::optional dim=::std::nullopt, bool keepdim=false); +TORCH_API at::Tensor & argmax_outf(const at::Tensor & self, ::std::optional dim, bool keepdim, at::Tensor & out); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/argmax_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/argmax_native.h new file mode 100644 index 0000000000000000000000000000000000000000..2eda6bc19c39370e9a3b6572331c0831075f72a4 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/argmax_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_argmax_out : public at::meta::structured_argmax { +void impl(const at::Tensor & self, ::std::optional dim, bool keepdim, const at::Tensor & out); +}; +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/asin_meta.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/asin_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..428968fff99452cbc51bc1ea88cdde43b56c6668 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/asin_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_asin : public TensorIteratorBase { + + + void meta(const at::Tensor & self); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/atan2_meta_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/atan2_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..6de155c7a80ceab7cbb8d636d8cfbd702c67a4c9 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/atan2_meta_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor atan2(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & atan2_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & atan2_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +TORCH_API at::Tensor & atan2_(at::Tensor & self, const at::Tensor & other); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/avg_pool1d_compositeimplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/avg_pool1d_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..50e36355f9f69e2d123b5b36ca9b5b5b1f1d926f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/avg_pool1d_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor avg_pool1d(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride={}, at::IntArrayRef padding=0, bool ceil_mode=false, bool count_include_pad=true); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/baddbmm.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/baddbmm.h new file mode 100644 index 0000000000000000000000000000000000000000..41324edfe602fbc0c1675c83abcd65061fd758d3 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/baddbmm.h @@ -0,0 +1,59 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::baddbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor +inline at::Tensor baddbmm(const at::Tensor & self, const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta=1, const at::Scalar & alpha=1) { + return at::_ops::baddbmm::call(self, batch1, batch2, beta, alpha); +} + +// aten::baddbmm.out(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & baddbmm_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta=1, const at::Scalar & alpha=1) { + return at::_ops::baddbmm_out::call(self, batch1, batch2, beta, alpha, out); +} +// aten::baddbmm.out(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & baddbmm_outf(const at::Tensor & self, const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta, const at::Scalar & alpha, at::Tensor & out) { + return at::_ops::baddbmm_out::call(self, batch1, batch2, beta, alpha, out); +} + +// aten::baddbmm.dtype(Tensor self, Tensor batch1, Tensor batch2, ScalarType out_dtype, *, Scalar beta=1, Scalar alpha=1) -> Tensor +inline at::Tensor baddbmm(const at::Tensor & self, const at::Tensor & batch1, const at::Tensor & batch2, at::ScalarType out_dtype, const at::Scalar & beta=1, const at::Scalar & alpha=1) { + return at::_ops::baddbmm_dtype::call(self, batch1, batch2, out_dtype, beta, alpha); +} + +// aten::baddbmm.dtype_out(Tensor self, Tensor batch1, Tensor batch2, ScalarType out_dtype, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & baddbmm_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & batch1, const at::Tensor & batch2, at::ScalarType out_dtype, const at::Scalar & beta=1, const at::Scalar & alpha=1) { + return at::_ops::baddbmm_dtype_out::call(self, batch1, batch2, out_dtype, beta, alpha, out); +} +// aten::baddbmm.dtype_out(Tensor self, Tensor batch1, Tensor batch2, ScalarType out_dtype, *, Scalar beta=1, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & baddbmm_outf(const at::Tensor & self, const at::Tensor & batch1, const at::Tensor & batch2, at::ScalarType out_dtype, const at::Scalar & beta, const at::Scalar & alpha, at::Tensor & out) { + return at::_ops::baddbmm_dtype_out::call(self, batch1, batch2, out_dtype, beta, alpha, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/baddbmm_cpu_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/baddbmm_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..b51e2171b90847ea6ab3c0299b2a7bfc1d0f6b06 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/baddbmm_cpu_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor baddbmm(const at::Tensor & self, const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta=1, const at::Scalar & alpha=1); +TORCH_API at::Tensor & baddbmm_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta=1, const at::Scalar & alpha=1); +TORCH_API at::Tensor & baddbmm_outf(const at::Tensor & self, const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta, const at::Scalar & alpha, at::Tensor & out); +TORCH_API at::Tensor & baddbmm_(at::Tensor & self, const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta=1, const at::Scalar & alpha=1); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/batch_norm_backward_cpu_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/batch_norm_backward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..cb970fa2106cb187d5a7251c17ba62bdaa0c7385 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/batch_norm_backward_cpu_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API ::std::tuple batch_norm_backward(const at::Tensor & grad_out, const at::Tensor & input, const at::Tensor & weight, const ::std::optional & running_mean, const ::std::optional & running_var, const ::std::optional & save_mean, const ::std::optional & save_var, bool update, double eps, ::std::array output_mask, const at::Tensor & reserve); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/batch_norm_backward_elemt_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/batch_norm_backward_elemt_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..b72ec59b28c2d40918bf4b76fa5d0bef4bff59f6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/batch_norm_backward_elemt_compositeexplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & batch_norm_backward_elemt_out(at::Tensor & out, const at::Tensor & grad_out, const at::Tensor & input, const at::Tensor & mean, const at::Tensor & invstd, const ::std::optional & weight, const at::Tensor & sum_dy, const at::Tensor & sum_dy_xmu, const at::Tensor & count); +TORCH_API at::Tensor & batch_norm_backward_elemt_outf(const at::Tensor & grad_out, const at::Tensor & input, const at::Tensor & mean, const at::Tensor & invstd, const ::std::optional & weight, const at::Tensor & sum_dy, const at::Tensor & sum_dy_xmu, const at::Tensor & count, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/bernoulli_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/bernoulli_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..65c7027d2e996be80c075a8f72460939d93dbd2c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/bernoulli_ops.h @@ -0,0 +1,111 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API bernoulli { + using schema = at::Tensor (const at::Tensor &, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::bernoulli"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "bernoulli(Tensor self, *, Generator? generator=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, ::std::optional generator); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, ::std::optional generator); +}; + +struct TORCH_API bernoulli_out { + using schema = at::Tensor & (const at::Tensor &, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::bernoulli"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "bernoulli.out(Tensor self, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, ::std::optional generator, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, ::std::optional generator, at::Tensor & out); +}; + +struct TORCH_API bernoulli__Tensor { + using schema = at::Tensor & (at::Tensor &, const at::Tensor &, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::bernoulli_"; + static constexpr const char* overload_name = "Tensor"; + static constexpr const char* schema_str = "bernoulli_.Tensor(Tensor(a!) self, Tensor p, *, Generator? generator=None) -> Tensor(a!)"; + static at::Tensor & call(at::Tensor & self, const at::Tensor & p, ::std::optional generator); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self, const at::Tensor & p, ::std::optional generator); +}; + +struct TORCH_API bernoulli__float { + using schema = at::Tensor & (at::Tensor &, double, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::bernoulli_"; + static constexpr const char* overload_name = "float"; + static constexpr const char* schema_str = "bernoulli_.float(Tensor(a!) self, float p=0.5, *, Generator? generator=None) -> Tensor(a!)"; + static at::Tensor & call(at::Tensor & self, double p, ::std::optional generator); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self, double p, ::std::optional generator); +}; + +struct TORCH_API bernoulli_p { + using schema = at::Tensor (const at::Tensor &, double, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::bernoulli"; + static constexpr const char* overload_name = "p"; + static constexpr const char* schema_str = "bernoulli.p(Tensor self, float p, *, Generator? generator=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, double p, ::std::optional generator); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, double p, ::std::optional generator); +}; + +struct TORCH_API bernoulli_Tensor_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::bernoulli"; + static constexpr const char* overload_name = "Tensor_out"; + static constexpr const char* schema_str = "bernoulli.Tensor_out(Tensor self, Tensor p, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Tensor & p, ::std::optional generator, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & p, ::std::optional generator, at::Tensor & out); +}; + +struct TORCH_API bernoulli_Tensor { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::bernoulli"; + static constexpr const char* overload_name = "Tensor"; + static constexpr const char* schema_str = "bernoulli.Tensor(Tensor self, Tensor p, *, Generator? generator=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & p, ::std::optional generator); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & p, ::std::optional generator); +}; + +struct TORCH_API bernoulli_float_out { + using schema = at::Tensor & (const at::Tensor &, double, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::bernoulli"; + static constexpr const char* overload_name = "float_out"; + static constexpr const char* schema_str = "bernoulli.float_out(Tensor self, float p=0.5, *, Generator? generator=None, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, double p, ::std::optional generator, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, double p, ::std::optional generator, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/bilinear.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/bilinear.h new file mode 100644 index 0000000000000000000000000000000000000000..1aad4a5dc864b9ebda94bc0108a89a1a051fac9d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/bilinear.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::bilinear(Tensor input1, Tensor input2, Tensor weight, Tensor? bias=None) -> Tensor +inline at::Tensor bilinear(const at::Tensor & input1, const at::Tensor & input2, const at::Tensor & weight, const ::std::optional & bias={}) { + return at::_ops::bilinear::call(input1, input2, weight, bias); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/binary_cross_entropy_with_logits_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/binary_cross_entropy_with_logits_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..9680788849be6c5e5aeb366a2dcec455977016a3 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/binary_cross_entropy_with_logits_compositeexplicitautograd_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor binary_cross_entropy_with_logits(const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight={}, const ::std::optional & pos_weight={}, int64_t reduction=at::Reduction::Mean); +TORCH_API at::Tensor & binary_cross_entropy_with_logits_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight={}, const ::std::optional & pos_weight={}, int64_t reduction=at::Reduction::Mean); +TORCH_API at::Tensor & binary_cross_entropy_with_logits_outf(const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, const ::std::optional & pos_weight, int64_t reduction, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/bincount.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/bincount.h new file mode 100644 index 0000000000000000000000000000000000000000..d6a55bd8de4fa3be1f9eae37a638882c46d4bb7c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/bincount.h @@ -0,0 +1,97 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::bincount(Tensor self, Tensor? weights=None, SymInt minlength=0) -> Tensor +inline at::Tensor bincount(const at::Tensor & self, const ::std::optional & weights={}, int64_t minlength=0) { + return at::_ops::bincount::call(self, weights, minlength); +} +namespace symint { + template >> + at::Tensor bincount(const at::Tensor & self, const ::std::optional & weights={}, int64_t minlength=0) { + return at::_ops::bincount::call(self, weights, minlength); + } +} + +// aten::bincount(Tensor self, Tensor? weights=None, SymInt minlength=0) -> Tensor +inline at::Tensor bincount_symint(const at::Tensor & self, const ::std::optional & weights={}, c10::SymInt minlength=0) { + return at::_ops::bincount::call(self, weights, minlength); +} +namespace symint { + template >> + at::Tensor bincount(const at::Tensor & self, const ::std::optional & weights={}, c10::SymInt minlength=0) { + return at::_ops::bincount::call(self, weights, minlength); + } +} + +// aten::bincount.out(Tensor self, Tensor? weights=None, SymInt minlength=0, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & bincount_out(at::Tensor & out, const at::Tensor & self, const ::std::optional & weights={}, int64_t minlength=0) { + return at::_ops::bincount_out::call(self, weights, minlength, out); +} +namespace symint { + template >> + at::Tensor & bincount_out(at::Tensor & out, const at::Tensor & self, const ::std::optional & weights={}, int64_t minlength=0) { + return at::_ops::bincount_out::call(self, weights, minlength, out); + } +} + +// aten::bincount.out(Tensor self, Tensor? weights=None, SymInt minlength=0, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & bincount_outf(const at::Tensor & self, const ::std::optional & weights, int64_t minlength, at::Tensor & out) { + return at::_ops::bincount_out::call(self, weights, minlength, out); +} +namespace symint { + template >> + at::Tensor & bincount_outf(const at::Tensor & self, const ::std::optional & weights, int64_t minlength, at::Tensor & out) { + return at::_ops::bincount_out::call(self, weights, minlength, out); + } +} + +// aten::bincount.out(Tensor self, Tensor? weights=None, SymInt minlength=0, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & bincount_symint_out(at::Tensor & out, const at::Tensor & self, const ::std::optional & weights={}, c10::SymInt minlength=0) { + return at::_ops::bincount_out::call(self, weights, minlength, out); +} +namespace symint { + template >> + at::Tensor & bincount_out(at::Tensor & out, const at::Tensor & self, const ::std::optional & weights={}, c10::SymInt minlength=0) { + return at::_ops::bincount_out::call(self, weights, minlength, out); + } +} + +// aten::bincount.out(Tensor self, Tensor? weights=None, SymInt minlength=0, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & bincount_symint_outf(const at::Tensor & self, const ::std::optional & weights, c10::SymInt minlength, at::Tensor & out) { + return at::_ops::bincount_out::call(self, weights, minlength, out); +} +namespace symint { + template >> + at::Tensor & bincount_outf(const at::Tensor & self, const ::std::optional & weights, c10::SymInt minlength, at::Tensor & out) { + return at::_ops::bincount_out::call(self, weights, minlength, out); + } +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/bitwise_and_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/bitwise_and_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..6aafa2a84e2ca9e08d17050925451da305024016 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/bitwise_and_compositeexplicitautograd_dispatch.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor bitwise_and(const at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & bitwise_and_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & bitwise_and_outf(const at::Tensor & self, const at::Scalar & other, at::Tensor & out); +TORCH_API at::Tensor & bitwise_and_(at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor bitwise_and(const at::Scalar & self, const at::Tensor & other); +TORCH_API at::Tensor & bitwise_and_out(at::Tensor & out, const at::Scalar & self, const at::Tensor & other); +TORCH_API at::Tensor & bitwise_and_outf(const at::Scalar & self, const at::Tensor & other, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/bitwise_xor_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/bitwise_xor_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f26513a64fc2c97193efcdddaec6ea3046aed447 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/bitwise_xor_compositeexplicitautograd_dispatch.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor bitwise_xor(const at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & bitwise_xor_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & bitwise_xor_outf(const at::Tensor & self, const at::Scalar & other, at::Tensor & out); +TORCH_API at::Tensor & bitwise_xor_(at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor bitwise_xor(const at::Scalar & self, const at::Tensor & other); +TORCH_API at::Tensor & bitwise_xor_out(at::Tensor & out, const at::Scalar & self, const at::Tensor & other); +TORCH_API at::Tensor & bitwise_xor_outf(const at::Scalar & self, const at::Tensor & other, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/blackman_window.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/blackman_window.h new file mode 100644 index 0000000000000000000000000000000000000000..c05d73eaf8b05699698dd6b73f14ec2ea93bafbc --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/blackman_window.h @@ -0,0 +1,67 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::blackman_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor blackman_window(int64_t window_length, at::TensorOptions options={}) { + return at::_ops::blackman_window::call(window_length, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +// aten::blackman_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor blackman_window(int64_t window_length, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::blackman_window::call(window_length, dtype, layout, device, pin_memory); +} + +// aten::blackman_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor blackman_window(int64_t window_length, bool periodic, at::TensorOptions options={}) { + return at::_ops::blackman_window_periodic::call(window_length, periodic, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +// aten::blackman_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor blackman_window(int64_t window_length, bool periodic, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::blackman_window_periodic::call(window_length, periodic, dtype, layout, device, pin_memory); +} + +// aten::blackman_window.out(int window_length, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & blackman_window_out(at::Tensor & out, int64_t window_length) { + return at::_ops::blackman_window_out::call(window_length, out); +} +// aten::blackman_window.out(int window_length, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & blackman_window_outf(int64_t window_length, at::Tensor & out) { + return at::_ops::blackman_window_out::call(window_length, out); +} + +// aten::blackman_window.periodic_out(int window_length, bool periodic, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & blackman_window_out(at::Tensor & out, int64_t window_length, bool periodic) { + return at::_ops::blackman_window_periodic_out::call(window_length, periodic, out); +} +// aten::blackman_window.periodic_out(int window_length, bool periodic, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & blackman_window_outf(int64_t window_length, bool periodic, at::Tensor & out) { + return at::_ops::blackman_window_periodic_out::call(window_length, periodic, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/block_diag_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/block_diag_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..2c1c2778444d8f95a42effcfff082da9a02a6f8a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/block_diag_compositeexplicitautograd_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor block_diag(at::TensorList tensors); +TORCH_API at::Tensor & block_diag_out(at::Tensor & out, at::TensorList tensors); +TORCH_API at::Tensor & block_diag_outf(at::TensorList tensors, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/cat_meta_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/cat_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ad6f373fd8772e178b02974ef1939a1b4766c70a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/cat_meta_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor cat(const at::ITensorListRef & tensors, int64_t dim=0); +TORCH_API at::Tensor & cat_out(at::Tensor & out, const at::ITensorListRef & tensors, int64_t dim=0); +TORCH_API at::Tensor & cat_outf(const at::ITensorListRef & tensors, int64_t dim, at::Tensor & out); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/ccol_indices.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/ccol_indices.h new file mode 100644 index 0000000000000000000000000000000000000000..ef1d9199bcd95f9496e7aa1788fc6011e655f8e9 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/ccol_indices.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/ceil_cpu_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/ceil_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..2ad8dbb761281160514b9acce65ce4abb8a1bd50 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/ceil_cpu_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor ceil(const at::Tensor & self); +TORCH_API at::Tensor & ceil_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & ceil_outf(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & ceil_(at::Tensor & self); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/clamp_max_cuda_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/clamp_max_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..871565fd8a43c1696342cd588f08c1a1c671fc4d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/clamp_max_cuda_dispatch.h @@ -0,0 +1,35 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor clamp_max(const at::Tensor & self, const at::Scalar & max); +TORCH_API at::Tensor & clamp_max_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & max); +TORCH_API at::Tensor & clamp_max_outf(const at::Tensor & self, const at::Scalar & max, at::Tensor & out); +TORCH_API at::Tensor & clamp_max_(at::Tensor & self, const at::Scalar & max); +TORCH_API at::Tensor clamp_max(const at::Tensor & self, const at::Tensor & max); +TORCH_API at::Tensor & clamp_max_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & max); +TORCH_API at::Tensor & clamp_max_outf(const at::Tensor & self, const at::Tensor & max, at::Tensor & out); +TORCH_API at::Tensor & clamp_max_(at::Tensor & self, const at::Tensor & max); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/clamp_max_meta_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/clamp_max_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..647351258de189b623d61af30f7d5524a9c703c4 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/clamp_max_meta_dispatch.h @@ -0,0 +1,35 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor clamp_max(const at::Tensor & self, const at::Scalar & max); +TORCH_API at::Tensor & clamp_max_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & max); +TORCH_API at::Tensor & clamp_max_outf(const at::Tensor & self, const at::Scalar & max, at::Tensor & out); +TORCH_API at::Tensor & clamp_max_(at::Tensor & self, const at::Scalar & max); +TORCH_API at::Tensor clamp_max(const at::Tensor & self, const at::Tensor & max); +TORCH_API at::Tensor & clamp_max_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & max); +TORCH_API at::Tensor & clamp_max_outf(const at::Tensor & self, const at::Tensor & max, at::Tensor & out); +TORCH_API at::Tensor & clamp_max_(at::Tensor & self, const at::Tensor & max); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/clamp_min_meta_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/clamp_min_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..d661d29f41150246ca0d50517c37a5789aca5b8a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/clamp_min_meta_dispatch.h @@ -0,0 +1,35 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor clamp_min(const at::Tensor & self, const at::Scalar & min); +TORCH_API at::Tensor & clamp_min_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & min); +TORCH_API at::Tensor & clamp_min_outf(const at::Tensor & self, const at::Scalar & min, at::Tensor & out); +TORCH_API at::Tensor & clamp_min_(at::Tensor & self, const at::Scalar & min); +TORCH_API at::Tensor clamp_min(const at::Tensor & self, const at::Tensor & min); +TORCH_API at::Tensor & clamp_min_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & min); +TORCH_API at::Tensor & clamp_min_outf(const at::Tensor & self, const at::Tensor & min, at::Tensor & out); +TORCH_API at::Tensor & clamp_min_(at::Tensor & self, const at::Tensor & min); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/col2im_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/col2im_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..448c94998ec59e3bba8819d0a8cdc3829387da40 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/col2im_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API col2im_out { + using schema = at::Tensor & (const at::Tensor &, c10::SymIntArrayRef, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::col2im"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "col2im.out(Tensor self, SymInt[2] output_size, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, c10::SymIntArrayRef output_size, at::IntArrayRef kernel_size, at::IntArrayRef dilation, at::IntArrayRef padding, at::IntArrayRef stride, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef output_size, at::IntArrayRef kernel_size, at::IntArrayRef dilation, at::IntArrayRef padding, at::IntArrayRef stride, at::Tensor & out); +}; + +struct TORCH_API col2im { + using schema = at::Tensor (const at::Tensor &, c10::SymIntArrayRef, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef, at::IntArrayRef); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::col2im"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "col2im(Tensor self, SymInt[2] output_size, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride) -> Tensor"; + static at::Tensor call(const at::Tensor & self, c10::SymIntArrayRef output_size, at::IntArrayRef kernel_size, at::IntArrayRef dilation, at::IntArrayRef padding, at::IntArrayRef stride); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef output_size, at::IntArrayRef kernel_size, at::IntArrayRef dilation, at::IntArrayRef padding, at::IntArrayRef stride); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/concatenate.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/concatenate.h new file mode 100644 index 0000000000000000000000000000000000000000..0866f3b02b10f75b9af86bc23a7aad9cfefb9655 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/concatenate.h @@ -0,0 +1,59 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::concatenate(Tensor[] tensors, int dim=0) -> Tensor +inline at::Tensor concatenate(at::TensorList tensors, int64_t dim=0) { + return at::_ops::concatenate::call(tensors, dim); +} + +// aten::concatenate.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & concatenate_out(at::Tensor & out, at::TensorList tensors, int64_t dim=0) { + return at::_ops::concatenate_out::call(tensors, dim, out); +} +// aten::concatenate.out(Tensor[] tensors, int dim=0, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & concatenate_outf(at::TensorList tensors, int64_t dim, at::Tensor & out) { + return at::_ops::concatenate_out::call(tensors, dim, out); +} + +// aten::concatenate.names(Tensor[] tensors, Dimname dim) -> Tensor +inline at::Tensor concatenate(at::TensorList tensors, at::Dimname dim) { + return at::_ops::concatenate_names::call(tensors, dim); +} + +// aten::concatenate.names_out(Tensor[] tensors, Dimname dim, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & concatenate_out(at::Tensor & out, at::TensorList tensors, at::Dimname dim) { + return at::_ops::concatenate_names_out::call(tensors, dim, out); +} +// aten::concatenate.names_out(Tensor[] tensors, Dimname dim, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & concatenate_outf(at::TensorList tensors, at::Dimname dim, at::Tensor & out) { + return at::_ops::concatenate_names_out::call(tensors, dim, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/conj_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/conj_native.h new file mode 100644 index 0000000000000000000000000000000000000000..ef26c9bd3f294fc59949091a8abdab907cb15dbb --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/conj_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor conj(const at::Tensor & self); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/conj_physical.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/conj_physical.h new file mode 100644 index 0000000000000000000000000000000000000000..33c196fb5826fdf8e12197557d3d8361540a78f6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/conj_physical.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::conj_physical(Tensor self) -> Tensor +inline at::Tensor conj_physical(const at::Tensor & self) { + return at::_ops::conj_physical::call(self); +} + +// aten::conj_physical.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & conj_physical_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::conj_physical_out::call(self, out); +} +// aten::conj_physical.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & conj_physical_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::conj_physical_out::call(self, out); +} + +// aten::conj_physical_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & conj_physical_(at::Tensor & self) { + return at::_ops::conj_physical_::call(self); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/conv2d_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/conv2d_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..903e8c1a3a31a309d8acf5c61177e45b4e8038ac --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/conv2d_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API conv2d { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const ::std::optional &, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymInt); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::conv2d"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "conv2d(Tensor input, Tensor weight, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] dilation=1, SymInt groups=1) -> Tensor"; + static at::Tensor call(const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups); +}; + +struct TORCH_API conv2d_padding { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const ::std::optional &, c10::SymIntArrayRef, c10::string_view, c10::SymIntArrayRef, c10::SymInt); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::conv2d"; + static constexpr const char* overload_name = "padding"; + static constexpr const char* schema_str = "conv2d.padding(Tensor input, Tensor weight, Tensor? bias=None, SymInt[2] stride=1, str padding=\"valid\", SymInt[2] dilation=1, SymInt groups=1) -> Tensor"; + static at::Tensor call(const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::string_view padding, c10::SymIntArrayRef dilation, c10::SymInt groups); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::string_view padding, c10::SymIntArrayRef dilation, c10::SymInt groups); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/conv_depthwise3d_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/conv_depthwise3d_native.h new file mode 100644 index 0000000000000000000000000000000000000000..bac6b681b6f2487bb703b879fc78c8c3c4a3d13f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/conv_depthwise3d_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & conv_depthwise3d_out_symint(const at::Tensor & self, const at::Tensor & weight, c10::SymIntArrayRef kernel_size, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, at::Tensor & out); +TORCH_API at::Tensor conv_depthwise3d_cuda(const at::Tensor & self, const at::Tensor & weight, at::IntArrayRef kernel_size, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/conv_tbc_backward_compositeimplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/conv_tbc_backward_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..d3d06535771b0bd95ad80fcdf03d2edf525965c6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/conv_tbc_backward_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API ::std::tuple conv_tbc_backward(const at::Tensor & self, const at::Tensor & input, const at::Tensor & weight, const at::Tensor & bias, int64_t pad); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/conv_tbc_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/conv_tbc_native.h new file mode 100644 index 0000000000000000000000000000000000000000..cdbcba45eb98db013b8eda0ee0e53097d025d8d8 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/conv_tbc_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor conv_tbc(const at::Tensor & self, const at::Tensor & weight, const at::Tensor & bias, int64_t pad=0); +TORCH_API at::Tensor & conv_tbc_out(const at::Tensor & self, const at::Tensor & weight, const at::Tensor & bias, int64_t pad, at::Tensor & out); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/convolution_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/convolution_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..134b936ef05a0cb9abd6339723d70ac144f08e3c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/convolution_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API convolution { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const ::std::optional &, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymIntArrayRef, bool, c10::SymIntArrayRef, c10::SymInt); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::convolution"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups) -> Tensor"; + static at::Tensor call(const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups); +}; + +struct TORCH_API convolution_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, const ::std::optional &, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymIntArrayRef, bool, c10::SymIntArrayRef, c10::SymInt, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::convolution"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "convolution.out(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, bool transposed, c10::SymIntArrayRef output_padding, c10::SymInt groups, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/copy_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/copy_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..e228444ecd90c6f91660e1386b5f8d01f5e25aca --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/copy_compositeexplicitautograd_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & copy_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & src, bool non_blocking=false); +TORCH_API at::Tensor & copy_outf(const at::Tensor & self, const at::Tensor & src, bool non_blocking, at::Tensor & out); +TORCH_API at::Tensor & copy_(at::Tensor & self, const at::Tensor & src, bool non_blocking=false); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/ctc_loss.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/ctc_loss.h new file mode 100644 index 0000000000000000000000000000000000000000..ebd7dfdca74b50d8b17a5a66da99e39788464f5e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/ctc_loss.h @@ -0,0 +1,41 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::ctc_loss.IntList(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank=0, int reduction=Mean, bool zero_infinity=False) -> Tensor +inline at::Tensor ctc_loss(const at::Tensor & log_probs, const at::Tensor & targets, at::IntArrayRef input_lengths, at::IntArrayRef target_lengths, int64_t blank=0, int64_t reduction=at::Reduction::Mean, bool zero_infinity=false) { + return at::_ops::ctc_loss_IntList::call(log_probs, targets, input_lengths, target_lengths, blank, reduction, zero_infinity); +} + +// aten::ctc_loss.Tensor(Tensor log_probs, Tensor targets, Tensor input_lengths, Tensor target_lengths, int blank=0, int reduction=Mean, bool zero_infinity=False) -> Tensor +inline at::Tensor ctc_loss(const at::Tensor & log_probs, const at::Tensor & targets, const at::Tensor & input_lengths, const at::Tensor & target_lengths, int64_t blank=0, int64_t reduction=at::Reduction::Mean, bool zero_infinity=false) { + return at::_ops::ctc_loss_Tensor::call(log_probs, targets, input_lengths, target_lengths, blank, reduction, zero_infinity); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/cudnn_affine_grid_generator_backward.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/cudnn_affine_grid_generator_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..4ca9576132b56673ef462af6846f058dd766212f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/cudnn_affine_grid_generator_backward.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::cudnn_affine_grid_generator_backward(Tensor grad, int N, int C, int H, int W) -> Tensor grad_theta +inline at::Tensor cudnn_affine_grid_generator_backward(const at::Tensor & grad, int64_t N, int64_t C, int64_t H, int64_t W) { + return at::_ops::cudnn_affine_grid_generator_backward::call(grad, N, C, H, W); +} + +// aten::cudnn_affine_grid_generator_backward.out(Tensor grad, int N, int C, int H, int W, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & cudnn_affine_grid_generator_backward_out(at::Tensor & out, const at::Tensor & grad, int64_t N, int64_t C, int64_t H, int64_t W) { + return at::_ops::cudnn_affine_grid_generator_backward_out::call(grad, N, C, H, W, out); +} +// aten::cudnn_affine_grid_generator_backward.out(Tensor grad, int N, int C, int H, int W, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & cudnn_affine_grid_generator_backward_outf(const at::Tensor & grad, int64_t N, int64_t C, int64_t H, int64_t W, at::Tensor & out) { + return at::_ops::cudnn_affine_grid_generator_backward_out::call(grad, N, C, H, W, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/cudnn_convolution_relu_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/cudnn_convolution_relu_native.h new file mode 100644 index 0000000000000000000000000000000000000000..acfcf35b35e7158a64048ef756d3b29b6bef65e2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/cudnn_convolution_relu_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & cudnn_convolution_relu_out_symint(const at::Tensor & self, const at::Tensor & weight, const ::std::optional & bias, c10::SymIntArrayRef stride, c10::SymIntArrayRef padding, c10::SymIntArrayRef dilation, c10::SymInt groups, at::Tensor & out); +TORCH_API at::Tensor cudnn_convolution_relu(const at::Tensor & self, const at::Tensor & weight, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, int64_t groups); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/cudnn_is_acceptable.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/cudnn_is_acceptable.h new file mode 100644 index 0000000000000000000000000000000000000000..c3f3994541ca499a9b817efe9b92a849e7765ddc --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/cudnn_is_acceptable.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::cudnn_is_acceptable(Tensor self) -> bool +inline bool cudnn_is_acceptable(const at::Tensor & self) { + return at::_ops::cudnn_is_acceptable::call(self); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/cudnn_is_acceptable_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/cudnn_is_acceptable_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..7c500c0e015fb04f0bbc470a8efa363be0ccdee6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/cudnn_is_acceptable_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API cudnn_is_acceptable { + using schema = bool (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::cudnn_is_acceptable"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "cudnn_is_acceptable(Tensor self) -> bool"; + static bool call(const at::Tensor & self); + static bool redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/cummaxmin_backward_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/cummaxmin_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..9fcdafa687834123962c728e217f21fe416ecf5f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/cummaxmin_backward_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API cummaxmin_backward { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::cummaxmin_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "cummaxmin_backward(Tensor grad, Tensor input, Tensor indices, int dim) -> Tensor"; + static at::Tensor call(const at::Tensor & grad, const at::Tensor & input, const at::Tensor & indices, int64_t dim); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad, const at::Tensor & input, const at::Tensor & indices, int64_t dim); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/cummin_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/cummin_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..7e9d7a60c670bd31c0f818a69de5816daf539dee --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/cummin_compositeexplicitautograd_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::tuple cummin(const at::Tensor & self, int64_t dim); +TORCH_API ::std::tuple cummin_out(at::Tensor & values, at::Tensor & indices, const at::Tensor & self, int64_t dim); +TORCH_API ::std::tuple cummin_outf(const at::Tensor & self, int64_t dim, at::Tensor & values, at::Tensor & indices); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/cumprod_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/cumprod_native.h new file mode 100644 index 0000000000000000000000000000000000000000..f953894baab92cd6a147dc70a98bc0f87b994ba7 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/cumprod_native.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_cumprod_out : public at::meta::structured_cumprod { +void impl(const at::Tensor & self, int64_t dim, ::std::optional dtype, const at::Tensor & out); +}; +TORCH_API at::Tensor cumprod(const at::Tensor & self, at::Dimname dim, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor & cumprod_out(const at::Tensor & self, at::Dimname dim, ::std::optional dtype, at::Tensor & out); +TORCH_API at::Tensor & cumprod_(at::Tensor & self, at::Dimname dim, ::std::optional dtype=::std::nullopt); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/diagonal_copy_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/diagonal_copy_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..42a08ed1c65b7364488dd0e7acb6ceb2b0ced8a2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/diagonal_copy_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API diagonal_copy { + using schema = at::Tensor (const at::Tensor &, int64_t, int64_t, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::diagonal_copy"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "diagonal_copy(Tensor self, int offset=0, int dim1=0, int dim2=1) -> Tensor"; + static at::Tensor call(const at::Tensor & self, int64_t offset, int64_t dim1, int64_t dim2); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t offset, int64_t dim1, int64_t dim2); +}; + +struct TORCH_API diagonal_copy_out { + using schema = at::Tensor & (const at::Tensor &, int64_t, int64_t, int64_t, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::diagonal_copy"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "diagonal_copy.out(Tensor self, int offset=0, int dim1=0, int dim2=1, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, int64_t offset, int64_t dim1, int64_t dim2, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t offset, int64_t dim1, int64_t dim2, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/div_cpu_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/div_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..144dc620cb7d2121a4fa09f71b349906c4194706 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/div_cpu_dispatch.h @@ -0,0 +1,35 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor div(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & div_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & div_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +TORCH_API at::Tensor & div_(at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor div(const at::Tensor & self, const at::Tensor & other, ::std::optional rounding_mode); +TORCH_API at::Tensor & div_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other, ::std::optional rounding_mode); +TORCH_API at::Tensor & div_outf(const at::Tensor & self, const at::Tensor & other, ::std::optional rounding_mode, at::Tensor & out); +TORCH_API at::Tensor & div_(at::Tensor & self, const at::Tensor & other, ::std::optional rounding_mode); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/dot_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/dot_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..3cee09ff5f2ae66a6ec056227f02b0a979ae8de8 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/dot_compositeexplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & dot_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & tensor); +TORCH_API at::Tensor & dot_outf(const at::Tensor & self, const at::Tensor & tensor, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/dropout_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/dropout_native.h new file mode 100644 index 0000000000000000000000000000000000000000..f18f4fea07d0c4eea157a299bf46b954e0a85405 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/dropout_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor dropout(const at::Tensor & input, double p, bool train); +TORCH_API at::Tensor & dropout_(at::Tensor & self, double p, bool train); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/empty_strided.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/empty_strided.h new file mode 100644 index 0000000000000000000000000000000000000000..f1ba8750379010822227f402b86b923b96ff25ad --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/empty_strided.h @@ -0,0 +1,119 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::empty_strided(SymInt[] size, SymInt[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor empty_strided(at::IntArrayRef size, at::IntArrayRef stride, at::TensorOptions options={}) { + return at::_ops::empty_strided::call(c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +namespace symint { + template >> + at::Tensor empty_strided(at::IntArrayRef size, at::IntArrayRef stride, at::TensorOptions options={}) { + return at::_ops::empty_strided::call(c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); + } +} + +// aten::empty_strided(SymInt[] size, SymInt[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor empty_strided(at::IntArrayRef size, at::IntArrayRef stride, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::empty_strided::call(c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), dtype, layout, device, pin_memory); +} +namespace symint { + template >> + at::Tensor empty_strided(at::IntArrayRef size, at::IntArrayRef stride, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::empty_strided::call(c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), dtype, layout, device, pin_memory); + } +} + +// aten::empty_strided(SymInt[] size, SymInt[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor empty_strided_symint(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, at::TensorOptions options={}) { + return at::_ops::empty_strided::call(size, stride, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +namespace symint { + template >> + at::Tensor empty_strided(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, at::TensorOptions options={}) { + return at::_ops::empty_strided::call(size, stride, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); + } +} + +// aten::empty_strided(SymInt[] size, SymInt[] stride, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor empty_strided_symint(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::empty_strided::call(size, stride, dtype, layout, device, pin_memory); +} +namespace symint { + template >> + at::Tensor empty_strided(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::empty_strided::call(size, stride, dtype, layout, device, pin_memory); + } +} + +// aten::empty_strided.out(SymInt[] size, SymInt[] stride, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & empty_strided_out(at::Tensor & out, at::IntArrayRef size, at::IntArrayRef stride) { + return at::_ops::empty_strided_out::call(c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), out); +} +namespace symint { + template >> + at::Tensor & empty_strided_out(at::Tensor & out, at::IntArrayRef size, at::IntArrayRef stride) { + return at::_ops::empty_strided_out::call(c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), out); + } +} + +// aten::empty_strided.out(SymInt[] size, SymInt[] stride, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & empty_strided_outf(at::IntArrayRef size, at::IntArrayRef stride, at::Tensor & out) { + return at::_ops::empty_strided_out::call(c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), out); +} +namespace symint { + template >> + at::Tensor & empty_strided_outf(at::IntArrayRef size, at::IntArrayRef stride, at::Tensor & out) { + return at::_ops::empty_strided_out::call(c10::fromIntArrayRefSlow(size), c10::fromIntArrayRefSlow(stride), out); + } +} + +// aten::empty_strided.out(SymInt[] size, SymInt[] stride, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & empty_strided_symint_out(at::Tensor & out, c10::SymIntArrayRef size, c10::SymIntArrayRef stride) { + return at::_ops::empty_strided_out::call(size, stride, out); +} +namespace symint { + template >> + at::Tensor & empty_strided_out(at::Tensor & out, c10::SymIntArrayRef size, c10::SymIntArrayRef stride) { + return at::_ops::empty_strided_out::call(size, stride, out); + } +} + +// aten::empty_strided.out(SymInt[] size, SymInt[] stride, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & empty_strided_symint_outf(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, at::Tensor & out) { + return at::_ops::empty_strided_out::call(size, stride, out); +} +namespace symint { + template >> + at::Tensor & empty_strided_outf(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, at::Tensor & out) { + return at::_ops::empty_strided_out::call(size, stride, out); + } +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/erf_cpu_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/erf_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..3b771451bfac25a8aaef767d0e826818d12ca2ff --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/erf_cpu_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor erf(const at::Tensor & self); +TORCH_API at::Tensor & erf_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & erf_outf(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & erf_(at::Tensor & self); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/exp2_meta.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/exp2_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..d8237b7584541cf7a47d7a000978ed47f43fbdb4 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/exp2_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_exp2 : public TensorIteratorBase { + + + void meta(const at::Tensor & self); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/expand_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/expand_native.h new file mode 100644 index 0000000000000000000000000000000000000000..21f799764ba3d31bcc9af5b9e2f81f6fbc6b808d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/expand_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor expand(const at::Tensor & self, at::IntArrayRef size, bool implicit=false); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/expm1_compositeexplicitautogradnonfunctional_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/expm1_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..26e8243c0cf28d280c524473134d6635cf491a68 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/expm1_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor expm1(const at::Tensor & self); +TORCH_API at::Tensor & expm1_(at::Tensor & self); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fake_quantize_per_channel_affine.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fake_quantize_per_channel_affine.h new file mode 100644 index 0000000000000000000000000000000000000000..e79ee036d31599769800fcae3693ca3e1504bbe3 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fake_quantize_per_channel_affine.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::fake_quantize_per_channel_affine(Tensor self, Tensor scale, Tensor zero_point, int axis, int quant_min, int quant_max) -> Tensor +inline at::Tensor fake_quantize_per_channel_affine(const at::Tensor & self, const at::Tensor & scale, const at::Tensor & zero_point, int64_t axis, int64_t quant_min, int64_t quant_max) { + return at::_ops::fake_quantize_per_channel_affine::call(self, scale, zero_point, axis, quant_min, quant_max); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fake_quantize_per_channel_affine_cachemask_backward.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fake_quantize_per_channel_affine_cachemask_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..f1b5e62650eb2d006d436392a10e0129720b8910 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fake_quantize_per_channel_affine_cachemask_backward.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::fake_quantize_per_channel_affine_cachemask_backward(Tensor grad, Tensor mask) -> Tensor +inline at::Tensor fake_quantize_per_channel_affine_cachemask_backward(const at::Tensor & grad, const at::Tensor & mask) { + return at::_ops::fake_quantize_per_channel_affine_cachemask_backward::call(grad, mask); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fbgemm_linear_fp16_weight.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fbgemm_linear_fp16_weight.h new file mode 100644 index 0000000000000000000000000000000000000000..6e381fa3b9b06b6796e09186f55367aa9861755f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fbgemm_linear_fp16_weight.h @@ -0,0 +1,41 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::fbgemm_linear_fp16_weight(Tensor input, Tensor packed_weight, Tensor bias) -> Tensor +inline at::Tensor fbgemm_linear_fp16_weight(const at::Tensor & input, const at::Tensor & packed_weight, const at::Tensor & bias) { + return at::_ops::fbgemm_linear_fp16_weight::call(input, packed_weight, bias); +} + +// aten::fbgemm_linear_fp16_weight.out(Tensor input, Tensor packed_weight, Tensor bias, Tensor(a!) output) -> Tensor +inline at::Tensor fbgemm_linear_fp16_weight(const at::Tensor & input, const at::Tensor & packed_weight, const at::Tensor & bias, at::Tensor & output) { + return at::_ops::fbgemm_linear_fp16_weight_out::call(input, packed_weight, bias, output); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fbgemm_pack_gemm_matrix_fp16_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fbgemm_pack_gemm_matrix_fp16_native.h new file mode 100644 index 0000000000000000000000000000000000000000..1fa34be058324ab99dce88cd79e645cbbeff5a04 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fbgemm_pack_gemm_matrix_fp16_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor fbgemm_pack_gemm_matrix_fp16(const at::Tensor & input); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/feature_alpha_dropout.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/feature_alpha_dropout.h new file mode 100644 index 0000000000000000000000000000000000000000..58de921acd450a02f7e9039adf7386fb49de53c0 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/feature_alpha_dropout.h @@ -0,0 +1,41 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::feature_alpha_dropout(Tensor input, float p, bool train) -> Tensor +inline at::Tensor feature_alpha_dropout(const at::Tensor & input, double p, bool train) { + return at::_ops::feature_alpha_dropout::call(input, p, train); +} + +// aten::feature_alpha_dropout_(Tensor(a!) self, float p, bool train) -> Tensor(a!) +inline at::Tensor & feature_alpha_dropout_(at::Tensor & self, double p, bool train) { + return at::_ops::feature_alpha_dropout_::call(self, p, train); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fft_fftn_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fft_fftn_native.h new file mode 100644 index 0000000000000000000000000000000000000000..a673ac4d432654908dc92b214d1aa5309ccae148 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fft_fftn_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor fft_fftn_symint(const at::Tensor & self, at::OptionalSymIntArrayRef s=::std::nullopt, at::OptionalIntArrayRef dim=::std::nullopt, ::std::optional norm=::std::nullopt); +TORCH_API at::Tensor & fft_fftn_symint_out(const at::Tensor & self, at::OptionalSymIntArrayRef s, at::OptionalIntArrayRef dim, ::std::optional norm, at::Tensor & out); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fft_hfft2_compositeimplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fft_hfft2_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..2f20cf099f42f8ec66735edb418d3bff14415599 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fft_hfft2_compositeimplicitautograd_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor fft_hfft2(const at::Tensor & self, at::OptionalIntArrayRef s=::std::nullopt, at::IntArrayRef dim={-2,-1}, ::std::optional norm=::std::nullopt); +TORCH_API at::Tensor fft_hfft2_symint(const at::Tensor & self, at::OptionalSymIntArrayRef s=::std::nullopt, at::IntArrayRef dim={-2,-1}, ::std::optional norm=::std::nullopt); +TORCH_API at::Tensor & fft_hfft2_out(at::Tensor & out, const at::Tensor & self, at::OptionalIntArrayRef s=::std::nullopt, at::IntArrayRef dim={-2,-1}, ::std::optional norm=::std::nullopt); +TORCH_API at::Tensor & fft_hfft2_outf(const at::Tensor & self, at::OptionalIntArrayRef s, at::IntArrayRef dim, ::std::optional norm, at::Tensor & out); +TORCH_API at::Tensor & fft_hfft2_symint_out(at::Tensor & out, const at::Tensor & self, at::OptionalSymIntArrayRef s=::std::nullopt, at::IntArrayRef dim={-2,-1}, ::std::optional norm=::std::nullopt); +TORCH_API at::Tensor & fft_hfft2_symint_outf(const at::Tensor & self, at::OptionalSymIntArrayRef s, at::IntArrayRef dim, ::std::optional norm, at::Tensor & out); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fft_hfftn_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fft_hfftn_native.h new file mode 100644 index 0000000000000000000000000000000000000000..3d906c1c182582e983f4d782966f6ab353e28ad7 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fft_hfftn_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor fft_hfftn_symint(const at::Tensor & self, at::OptionalSymIntArrayRef s=::std::nullopt, at::OptionalIntArrayRef dim=::std::nullopt, ::std::optional norm=::std::nullopt); +TORCH_API at::Tensor & fft_hfftn_symint_out(const at::Tensor & self, at::OptionalSymIntArrayRef s, at::OptionalIntArrayRef dim, ::std::optional norm, at::Tensor & out); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fft_ifft2_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fft_ifft2_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..3e0f18274f94e7c5d2c8830626f51abb7c8e6b1d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fft_ifft2_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API fft_ifft2 { + using schema = at::Tensor (const at::Tensor &, at::OptionalSymIntArrayRef, at::IntArrayRef, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::fft_ifft2"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "fft_ifft2(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, at::OptionalSymIntArrayRef s, at::IntArrayRef dim, ::std::optional norm); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::OptionalSymIntArrayRef s, at::IntArrayRef dim, ::std::optional norm); +}; + +struct TORCH_API fft_ifft2_out { + using schema = at::Tensor & (const at::Tensor &, at::OptionalSymIntArrayRef, at::IntArrayRef, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::fft_ifft2"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "fft_ifft2.out(Tensor self, SymInt[1]? s=None, int[1] dim=[-2,-1], str? norm=None, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::OptionalSymIntArrayRef s, at::IntArrayRef dim, ::std::optional norm, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::OptionalSymIntArrayRef s, at::IntArrayRef dim, ::std::optional norm, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fft_ifftshift_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fft_ifftshift_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..8b01459753d7e46527d9b4f1766dbba312de2ccb --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fft_ifftshift_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API fft_ifftshift { + using schema = at::Tensor (const at::Tensor &, at::OptionalIntArrayRef); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::fft_ifftshift"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "fft_ifftshift(Tensor self, int[1]? dim=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, at::OptionalIntArrayRef dim); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::OptionalIntArrayRef dim); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fft_rfft_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fft_rfft_native.h new file mode 100644 index 0000000000000000000000000000000000000000..04770a370b486d9378b3bf92b597e145063c4411 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fft_rfft_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor fft_rfft_symint(const at::Tensor & self, ::std::optional n=::std::nullopt, int64_t dim=-1, ::std::optional norm=::std::nullopt); +TORCH_API at::Tensor & fft_rfft_symint_out(const at::Tensor & self, ::std::optional n, int64_t dim, ::std::optional norm, at::Tensor & out); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/flip_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/flip_native.h new file mode 100644 index 0000000000000000000000000000000000000000..060abf43906bae5741f7183fbe4772a36868bc33 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/flip_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & flip_out(const at::Tensor & self, at::IntArrayRef dims, at::Tensor & out); +TORCH_API at::Tensor flip(const at::Tensor & self, at::IntArrayRef dims); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fliplr_compositeimplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fliplr_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..b94a6bcda4cc7652046e308200d967cc3ab30b18 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fliplr_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor fliplr(const at::Tensor & self); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/float_power_compositeimplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/float_power_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..45204d00306faeec727197d013ef76ccdb4db293 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/float_power_compositeimplicitautograd_dispatch.h @@ -0,0 +1,38 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor float_power(const at::Tensor & self, const at::Tensor & exponent); +TORCH_API at::Tensor & float_power_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & exponent); +TORCH_API at::Tensor & float_power_outf(const at::Tensor & self, const at::Tensor & exponent, at::Tensor & out); +TORCH_API at::Tensor & float_power_(at::Tensor & self, const at::Tensor & exponent); +TORCH_API at::Tensor float_power(const at::Scalar & self, const at::Tensor & exponent); +TORCH_API at::Tensor & float_power_out(at::Tensor & out, const at::Scalar & self, const at::Tensor & exponent); +TORCH_API at::Tensor & float_power_outf(const at::Scalar & self, const at::Tensor & exponent, at::Tensor & out); +TORCH_API at::Tensor float_power(const at::Tensor & self, const at::Scalar & exponent); +TORCH_API at::Tensor & float_power_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & exponent); +TORCH_API at::Tensor & float_power_outf(const at::Tensor & self, const at::Scalar & exponent, at::Tensor & out); +TORCH_API at::Tensor & float_power_(at::Tensor & self, const at::Scalar & exponent); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fmin.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fmin.h new file mode 100644 index 0000000000000000000000000000000000000000..3e68f0f179dede53b9892f6d1d3267dae056f8fd --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fmin.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::fmin(Tensor self, Tensor other) -> Tensor +inline at::Tensor fmin(const at::Tensor & self, const at::Tensor & other) { + return at::_ops::fmin::call(self, other); +} + +// aten::fmin.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & fmin_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other) { + return at::_ops::fmin_out::call(self, other, out); +} +// aten::fmin.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & fmin_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out) { + return at::_ops::fmin_out::call(self, other, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fractional_max_pool2d_backward_cpu_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fractional_max_pool2d_backward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..4def4dc1523f1c064cfc32fd2858d29a7620b2e3 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fractional_max_pool2d_backward_cpu_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor fractional_max_pool2d_backward(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices); +TORCH_API at::Tensor & fractional_max_pool2d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices); +TORCH_API at::Tensor & fractional_max_pool2d_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices, at::Tensor & grad_input); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fractional_max_pool2d_backward_meta_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fractional_max_pool2d_backward_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..06a961b2b0e36f42eeaad6f2e2ced82fcfa142f3 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fractional_max_pool2d_backward_meta_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor fractional_max_pool2d_backward(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices); +TORCH_API at::Tensor & fractional_max_pool2d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices); +TORCH_API at::Tensor & fractional_max_pool2d_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices, at::Tensor & grad_input); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fractional_max_pool3d_backward.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fractional_max_pool3d_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..5a99191a799e689339d55c8ee29cc3b3cb3390be --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fractional_max_pool3d_backward.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::fractional_max_pool3d_backward.grad_input(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] output_size, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & fractional_max_pool3d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices) { + return at::_ops::fractional_max_pool3d_backward_grad_input::call(grad_output, self, kernel_size, output_size, indices, grad_input); +} +// aten::fractional_max_pool3d_backward.grad_input(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] output_size, Tensor indices, *, Tensor(a!) grad_input) -> Tensor(a!) +inline at::Tensor & fractional_max_pool3d_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices, at::Tensor & grad_input) { + return at::_ops::fractional_max_pool3d_backward_grad_input::call(grad_output, self, kernel_size, output_size, indices, grad_input); +} + +// aten::fractional_max_pool3d_backward(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] output_size, Tensor indices) -> Tensor +inline at::Tensor fractional_max_pool3d_backward(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef output_size, const at::Tensor & indices) { + return at::_ops::fractional_max_pool3d_backward::call(grad_output, self, kernel_size, output_size, indices); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fused_moving_avg_obs_fake_quant_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fused_moving_avg_obs_fake_quant_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..651d919dd5b74c93491c9bba8ab4fce157ee7e8d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/fused_moving_avg_obs_fake_quant_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API fused_moving_avg_obs_fake_quant { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Tensor &, at::Tensor &, at::Tensor &, at::Tensor &, at::Tensor &, double, int64_t, int64_t, int64_t, bool, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::fused_moving_avg_obs_fake_quant"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "fused_moving_avg_obs_fake_quant(Tensor self, Tensor observer_on, Tensor fake_quant_on, Tensor(a!) running_min, Tensor(b!) running_max, Tensor(c!) scale, Tensor(d!) zero_point, float averaging_const, int quant_min, int quant_max, int ch_axis, bool per_row_fake_quant=False, bool symmetric_quant=False) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & observer_on, const at::Tensor & fake_quant_on, at::Tensor & running_min, at::Tensor & running_max, at::Tensor & scale, at::Tensor & zero_point, double averaging_const, int64_t quant_min, int64_t quant_max, int64_t ch_axis, bool per_row_fake_quant, bool symmetric_quant); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & observer_on, const at::Tensor & fake_quant_on, at::Tensor & running_min, at::Tensor & running_max, at::Tensor & scale, at::Tensor & zero_point, double averaging_const, int64_t quant_min, int64_t quant_max, int64_t ch_axis, bool per_row_fake_quant, bool symmetric_quant); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/ge_compositeexplicitautogradnonfunctional_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/ge_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..d55c748171314102282ebfea25c3e721749a6f1c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/ge_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor ge(const at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & ge_(at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor ge(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & ge_(at::Tensor & self, const at::Tensor & other); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/geometric_meta_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/geometric_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..c917b0da093e3d77a3d0f42ab7905724fe593e6e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/geometric_meta_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor & geometric_(at::Tensor & self, double p, ::std::optional generator=::std::nullopt); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/glu_backward_jvp_cuda_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/glu_backward_jvp_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..d1798fe8eeba666c48265b639dee858e34ce3804 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/glu_backward_jvp_cuda_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor glu_backward_jvp(const at::Tensor & grad_x, const at::Tensor & grad_glu, const at::Tensor & x, const at::Tensor & dgrad_glu, const at::Tensor & dx, int64_t dim); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/glu_jvp_cpu_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/glu_jvp_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..62b26abe272cabf1f49a56023c8e388eee6545d7 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/glu_jvp_cpu_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor glu_jvp(const at::Tensor & glu, const at::Tensor & x, const at::Tensor & dx, int64_t dim); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/grid_sampler_3d_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/grid_sampler_3d_native.h new file mode 100644 index 0000000000000000000000000000000000000000..af002d20ecdad821450f1f9d0bd5761440696588 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/grid_sampler_3d_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & grid_sampler_3d_out(const at::Tensor & input, const at::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, at::Tensor & out); +TORCH_API at::Tensor grid_sampler_3d_cpu(const at::Tensor & input, const at::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners); +TORCH_API at::Tensor grid_sampler_3d_cuda(const at::Tensor & input, const at::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/group_norm.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/group_norm.h new file mode 100644 index 0000000000000000000000000000000000000000..63932a855ec5f456a085bd1f7b70ee4a04d18232 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/group_norm.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::group_norm(Tensor input, int num_groups, Tensor? weight=None, Tensor? bias=None, float eps=1e-05, bool cudnn_enabled=True) -> Tensor +inline at::Tensor group_norm(const at::Tensor & input, int64_t num_groups, const ::std::optional & weight={}, const ::std::optional & bias={}, double eps=1e-05, bool cudnn_enabled=true) { + return at::_ops::group_norm::call(input, num_groups, weight, bias, eps, cudnn_enabled); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/hann_window.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/hann_window.h new file mode 100644 index 0000000000000000000000000000000000000000..d36dc25f3797c7312861e189b6ccd0888b863a4d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/hann_window.h @@ -0,0 +1,67 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::hann_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor hann_window(int64_t window_length, at::TensorOptions options={}) { + return at::_ops::hann_window::call(window_length, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +// aten::hann_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor hann_window(int64_t window_length, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::hann_window::call(window_length, dtype, layout, device, pin_memory); +} + +// aten::hann_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor hann_window(int64_t window_length, bool periodic, at::TensorOptions options={}) { + return at::_ops::hann_window_periodic::call(window_length, periodic, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +// aten::hann_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor hann_window(int64_t window_length, bool periodic, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::hann_window_periodic::call(window_length, periodic, dtype, layout, device, pin_memory); +} + +// aten::hann_window.out(int window_length, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & hann_window_out(at::Tensor & out, int64_t window_length) { + return at::_ops::hann_window_out::call(window_length, out); +} +// aten::hann_window.out(int window_length, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & hann_window_outf(int64_t window_length, at::Tensor & out) { + return at::_ops::hann_window_out::call(window_length, out); +} + +// aten::hann_window.periodic_out(int window_length, bool periodic, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & hann_window_out(at::Tensor & out, int64_t window_length, bool periodic) { + return at::_ops::hann_window_periodic_out::call(window_length, periodic, out); +} +// aten::hann_window.periodic_out(int window_length, bool periodic, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & hann_window_outf(int64_t window_length, bool periodic, at::Tensor & out) { + return at::_ops::hann_window_periodic_out::call(window_length, periodic, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/hardtanh_meta_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/hardtanh_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..1ce85b5140c3637a1df3596eef21e2ba361f55e5 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/hardtanh_meta_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor & hardtanh_(at::Tensor & self, const at::Scalar & min_val=-1, const at::Scalar & max_val=1); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/hardtanh_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/hardtanh_native.h new file mode 100644 index 0000000000000000000000000000000000000000..693598cca66a3e20f27f08c6514e68f4b76b96f1 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/hardtanh_native.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor hardtanh(const at::Tensor & self, const at::Scalar & min_val=-1, const at::Scalar & max_val=1); +TORCH_API at::Tensor & hardtanh_out(const at::Tensor & self, const at::Scalar & min_val, const at::Scalar & max_val, at::Tensor & out); +TORCH_API at::Tensor & hardtanh_(at::Tensor & self, const at::Scalar & min_val=-1, const at::Scalar & max_val=1); +TORCH_API at::Tensor hardtanh_quantized_cpu(const at::Tensor & self, const at::Scalar & min_val=-1, const at::Scalar & max_val=1); +TORCH_API at::Tensor & hardtanh_out_quantized_cpu(const at::Tensor & self, const at::Scalar & min_val, const at::Scalar & max_val, at::Tensor & out); +TORCH_API at::Tensor & hardtanh_quantized_cpu_(at::Tensor & self, const at::Scalar & min_val=-1, const at::Scalar & max_val=1); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/hash_tensor_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/hash_tensor_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..ad770ae3a3a7bbb4f9e9bef2431660db3a9ca789 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/hash_tensor_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API hash_tensor { + using schema = at::Tensor (const at::Tensor &, at::IntArrayRef, bool, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::hash_tensor"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "hash_tensor(Tensor self, int[1] dim=[], *, bool keepdim=False, int mode=0) -> Tensor"; + static at::Tensor call(const at::Tensor & self, at::IntArrayRef dim, bool keepdim, int64_t mode); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef dim, bool keepdim, int64_t mode); +}; + +struct TORCH_API hash_tensor_out { + using schema = at::Tensor & (const at::Tensor &, at::IntArrayRef, bool, int64_t, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::hash_tensor"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "hash_tensor.out(Tensor self, int[1] dim=[], *, bool keepdim=False, int mode=0, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::IntArrayRef dim, bool keepdim, int64_t mode, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef dim, bool keepdim, int64_t mode, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/heaviside_cpu_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/heaviside_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..338fc0aad25a4c41f04207397cb52340ff25cce5 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/heaviside_cpu_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor heaviside(const at::Tensor & self, const at::Tensor & values); +TORCH_API at::Tensor & heaviside_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & values); +TORCH_API at::Tensor & heaviside_outf(const at::Tensor & self, const at::Tensor & values, at::Tensor & out); +TORCH_API at::Tensor & heaviside_(at::Tensor & self, const at::Tensor & values); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/histogramdd_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/histogramdd_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..35a5ce667c80a4770478fafedbdaa506f7f8eac2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/histogramdd_ops.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API histogramdd { + using schema = ::std::tuple> (const at::Tensor &, at::IntArrayRef, ::std::optional>, const ::std::optional &, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::histogramdd"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "histogramdd(Tensor self, int[] bins, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor[] bin_edges)"; + static ::std::tuple> call(const at::Tensor & self, at::IntArrayRef bins, ::std::optional> range, const ::std::optional & weight, bool density); + static ::std::tuple> redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::IntArrayRef bins, ::std::optional> range, const ::std::optional & weight, bool density); +}; + +struct TORCH_API histogramdd_int_bins { + using schema = ::std::tuple> (const at::Tensor &, int64_t, ::std::optional>, const ::std::optional &, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::histogramdd"; + static constexpr const char* overload_name = "int_bins"; + static constexpr const char* schema_str = "histogramdd.int_bins(Tensor self, int bins, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor[] bin_edges)"; + static ::std::tuple> call(const at::Tensor & self, int64_t bins, ::std::optional> range, const ::std::optional & weight, bool density); + static ::std::tuple> redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t bins, ::std::optional> range, const ::std::optional & weight, bool density); +}; + +struct TORCH_API histogramdd_TensorList_bins { + using schema = ::std::tuple> (const at::Tensor &, at::TensorList, ::std::optional>, const ::std::optional &, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::histogramdd"; + static constexpr const char* overload_name = "TensorList_bins"; + static constexpr const char* schema_str = "histogramdd.TensorList_bins(Tensor self, Tensor[] bins, float[]? range=None, Tensor? weight=None, bool density=False) -> (Tensor hist, Tensor[] bin_edges)"; + static ::std::tuple> call(const at::Tensor & self, at::TensorList bins, ::std::optional> range, const ::std::optional & weight, bool density); + static ::std::tuple> redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::TensorList bins, ::std::optional> range, const ::std::optional & weight, bool density); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/hypot_cuda_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/hypot_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..74c8b0be77d519752e890de2cf6615c7b9f35576 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/hypot_cuda_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor hypot(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & hypot_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & hypot_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +TORCH_API at::Tensor & hypot_(at::Tensor & self, const at::Tensor & other); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/igamma_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/igamma_native.h new file mode 100644 index 0000000000000000000000000000000000000000..e96bd1f435a5b98dedf8d1fe55ee29b620480a3c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/igamma_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_igamma_out : public at::meta::structured_igamma { +void impl(const at::Tensor & self, const at::Tensor & other, const at::Tensor & out); +}; +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/index_fill_compositeimplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/index_fill_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..3ccaedf30dc6cc4ed59a63c003dd5f3ede7905f8 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/index_fill_compositeimplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor & index_fill_(at::Tensor & self, at::Dimname dim, const at::Tensor & index, const at::Scalar & value); +TORCH_API at::Tensor index_fill(const at::Tensor & self, at::Dimname dim, const at::Tensor & index, const at::Scalar & value); +TORCH_API at::Tensor & index_fill_(at::Tensor & self, at::Dimname dim, const at::Tensor & index, const at::Tensor & value); +TORCH_API at::Tensor index_fill(const at::Tensor & self, at::Dimname dim, const at::Tensor & index, const at::Tensor & value); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/index_fill_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/index_fill_native.h new file mode 100644 index 0000000000000000000000000000000000000000..1a94ec88d508f8572f00d492b1356dc66c4ee3f8 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/index_fill_native.h @@ -0,0 +1,35 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor index_fill(const at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Scalar & value); +TORCH_API at::Tensor & index_fill_int_Scalar_out(const at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Scalar & value, at::Tensor & out); +TORCH_API at::Tensor & index_fill_(at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Scalar & value); +TORCH_API at::Tensor index_fill(const at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Tensor & value); +TORCH_API at::Tensor & index_fill_int_Tensor_out(const at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Tensor & value, at::Tensor & out); +TORCH_API at::Tensor & index_fill_(at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Tensor & value); +TORCH_API at::Tensor & index_fill_(at::Tensor & self, at::Dimname dim, const at::Tensor & index, const at::Scalar & value); +TORCH_API at::Tensor index_fill(const at::Tensor & self, at::Dimname dim, const at::Tensor & index, const at::Scalar & value); +TORCH_API at::Tensor & index_fill_(at::Tensor & self, at::Dimname dim, const at::Tensor & index, const at::Tensor & value); +TORCH_API at::Tensor index_fill(const at::Tensor & self, at::Dimname dim, const at::Tensor & index, const at::Tensor & value); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/index_put_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/index_put_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..17e9c77ec5ed74a4af8186b5534939d43ac966dc --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/index_put_compositeexplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor index_put(const at::Tensor & self, const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate=false); +TORCH_API at::Tensor & index_put_out(at::Tensor & out, const at::Tensor & self, const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate=false); +TORCH_API at::Tensor & index_put_outf(const at::Tensor & self, const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate, at::Tensor & out); +TORCH_API at::Tensor & index_put_(at::Tensor & self, const c10::List<::std::optional> & indices, const at::Tensor & values, bool accumulate=false); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/indices_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/indices_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..501687a0a2820e224caa15d8acd770fe7e7fcee6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/indices_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API indices { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::indices"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "indices(Tensor(a) self) -> Tensor(a)"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/instance_norm_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/instance_norm_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..daf64dd3e4f40bf57695ec5563c451cf105be18c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/instance_norm_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API instance_norm { + using schema = at::Tensor (const at::Tensor &, const ::std::optional &, const ::std::optional &, const ::std::optional &, const ::std::optional &, bool, double, double, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::instance_norm"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "instance_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool use_input_stats, float momentum, float eps, bool cudnn_enabled) -> Tensor"; + static at::Tensor call(const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, const ::std::optional & running_mean, const ::std::optional & running_var, bool use_input_stats, double momentum, double eps, bool cudnn_enabled); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, const ::std::optional & running_mean, const ::std::optional & running_var, bool use_input_stats, double momentum, double eps, bool cudnn_enabled); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/is_conj_compositeimplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/is_conj_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..11893f51d46b9c84cf31d56f4d36a04235501e24 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/is_conj_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API bool is_conj(const at::Tensor & self); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/isnan_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/isnan_native.h new file mode 100644 index 0000000000000000000000000000000000000000..cb0560e95459ff1c2872802e54316150ec9f0c42 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/isnan_native.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & isnan_out(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor isnan(const at::Tensor & self); +TORCH_API at::Tensor NestedTensor_isnan(const at::Tensor & self); +TORCH_API at::Tensor isnan_sparse(const at::Tensor & self); +TORCH_API at::Tensor isnan_sparse_csr(const at::Tensor & self); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/item_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/item_native.h new file mode 100644 index 0000000000000000000000000000000000000000..3f3b6c9bf3672025d659fc572184e85cfbe73597 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/item_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Scalar item(const at::Tensor & self); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/kaiser_window.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/kaiser_window.h new file mode 100644 index 0000000000000000000000000000000000000000..e7c5a15839c67382a18dacc844d896e1d1459602 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/kaiser_window.h @@ -0,0 +1,85 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::kaiser_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor kaiser_window(int64_t window_length, at::TensorOptions options={}) { + return at::_ops::kaiser_window::call(window_length, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +// aten::kaiser_window(int window_length, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor kaiser_window(int64_t window_length, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::kaiser_window::call(window_length, dtype, layout, device, pin_memory); +} + +// aten::kaiser_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor kaiser_window(int64_t window_length, bool periodic, at::TensorOptions options={}) { + return at::_ops::kaiser_window_periodic::call(window_length, periodic, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +// aten::kaiser_window.periodic(int window_length, bool periodic, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor kaiser_window(int64_t window_length, bool periodic, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::kaiser_window_periodic::call(window_length, periodic, dtype, layout, device, pin_memory); +} + +// aten::kaiser_window.beta(int window_length, bool periodic, float beta, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor kaiser_window(int64_t window_length, bool periodic, double beta, at::TensorOptions options={}) { + return at::_ops::kaiser_window_beta::call(window_length, periodic, beta, c10::optTypeMetaToScalarType(options.dtype_opt()), options.layout_opt(), options.device_opt(), options.pinned_memory_opt()); +} +// aten::kaiser_window.beta(int window_length, bool periodic, float beta, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor +inline at::Tensor kaiser_window(int64_t window_length, bool periodic, double beta, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory) { + return at::_ops::kaiser_window_beta::call(window_length, periodic, beta, dtype, layout, device, pin_memory); +} + +// aten::kaiser_window.out(int window_length, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & kaiser_window_out(at::Tensor & out, int64_t window_length) { + return at::_ops::kaiser_window_out::call(window_length, out); +} +// aten::kaiser_window.out(int window_length, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & kaiser_window_outf(int64_t window_length, at::Tensor & out) { + return at::_ops::kaiser_window_out::call(window_length, out); +} + +// aten::kaiser_window.periodic_out(int window_length, bool periodic, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & kaiser_window_out(at::Tensor & out, int64_t window_length, bool periodic) { + return at::_ops::kaiser_window_periodic_out::call(window_length, periodic, out); +} +// aten::kaiser_window.periodic_out(int window_length, bool periodic, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & kaiser_window_outf(int64_t window_length, bool periodic, at::Tensor & out) { + return at::_ops::kaiser_window_periodic_out::call(window_length, periodic, out); +} + +// aten::kaiser_window.beta_out(int window_length, bool periodic, float beta, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & kaiser_window_out(at::Tensor & out, int64_t window_length, bool periodic, double beta) { + return at::_ops::kaiser_window_beta_out::call(window_length, periodic, beta, out); +} +// aten::kaiser_window.beta_out(int window_length, bool periodic, float beta, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & kaiser_window_outf(int64_t window_length, bool periodic, double beta, at::Tensor & out) { + return at::_ops::kaiser_window_beta_out::call(window_length, periodic, beta, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/layer_norm.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/layer_norm.h new file mode 100644 index 0000000000000000000000000000000000000000..d2258334049f9cec8e43947c345106fcb9a58fef --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/layer_norm.h @@ -0,0 +1,53 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::layer_norm(Tensor input, SymInt[] normalized_shape, Tensor? weight=None, Tensor? bias=None, float eps=1e-05, bool cudnn_enable=True) -> Tensor +inline at::Tensor layer_norm(const at::Tensor & input, at::IntArrayRef normalized_shape, const ::std::optional & weight={}, const ::std::optional & bias={}, double eps=1e-05, bool cudnn_enable=true) { + return at::_ops::layer_norm::call(input, c10::fromIntArrayRefSlow(normalized_shape), weight, bias, eps, cudnn_enable); +} +namespace symint { + template >> + at::Tensor layer_norm(const at::Tensor & input, at::IntArrayRef normalized_shape, const ::std::optional & weight={}, const ::std::optional & bias={}, double eps=1e-05, bool cudnn_enable=true) { + return at::_ops::layer_norm::call(input, c10::fromIntArrayRefSlow(normalized_shape), weight, bias, eps, cudnn_enable); + } +} + +// aten::layer_norm(Tensor input, SymInt[] normalized_shape, Tensor? weight=None, Tensor? bias=None, float eps=1e-05, bool cudnn_enable=True) -> Tensor +inline at::Tensor layer_norm_symint(const at::Tensor & input, c10::SymIntArrayRef normalized_shape, const ::std::optional & weight={}, const ::std::optional & bias={}, double eps=1e-05, bool cudnn_enable=true) { + return at::_ops::layer_norm::call(input, normalized_shape, weight, bias, eps, cudnn_enable); +} +namespace symint { + template >> + at::Tensor layer_norm(const at::Tensor & input, c10::SymIntArrayRef normalized_shape, const ::std::optional & weight={}, const ::std::optional & bias={}, double eps=1e-05, bool cudnn_enable=true) { + return at::_ops::layer_norm::call(input, normalized_shape, weight, bias, eps, cudnn_enable); + } +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/leaky_relu_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/leaky_relu_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..e84b68ba872bc1251751df0331b116710fe0448b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/leaky_relu_ops.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API leaky_relu_out { + using schema = at::Tensor & (const at::Tensor &, const at::Scalar &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::leaky_relu"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "leaky_relu.out(Tensor self, Scalar negative_slope=0.01, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Scalar & negative_slope, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Scalar & negative_slope, at::Tensor & out); +}; + +struct TORCH_API leaky_relu { + using schema = at::Tensor (const at::Tensor &, const at::Scalar &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::leaky_relu"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "leaky_relu(Tensor self, Scalar negative_slope=0.01) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Scalar & negative_slope); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Scalar & negative_slope); +}; + +struct TORCH_API leaky_relu_ { + using schema = at::Tensor & (at::Tensor &, const at::Scalar &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::leaky_relu_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "leaky_relu_(Tensor(a!) self, Scalar negative_slope=0.01) -> Tensor(a!)"; + static at::Tensor & call(at::Tensor & self, const at::Scalar & negative_slope); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self, const at::Scalar & negative_slope); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_cholesky_ex_meta.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_cholesky_ex_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..258e83e9280e0116f81d5d5071c784706c588331 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_cholesky_ex_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_linalg_cholesky_ex : public at::impl::MetaBase { + + + void meta(const at::Tensor & self, bool upper, bool check_errors); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_eig_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_eig_native.h new file mode 100644 index 0000000000000000000000000000000000000000..5fc92e82739f2b9ded238821aace98f73e5b7aa0 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_eig_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API ::std::tuple linalg_eig(const at::Tensor & self); +TORCH_API ::std::tuple linalg_eig_out(const at::Tensor & self, at::Tensor & eigenvalues, at::Tensor & eigenvectors); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_eigvalsh_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_eigvalsh_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..cab1626fab237400d62b71c60e02a9aa7f8fb451 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_eigvalsh_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API linalg_eigvalsh { + using schema = at::Tensor (const at::Tensor &, c10::string_view); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_eigvalsh"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "linalg_eigvalsh(Tensor self, str UPLO=\"L\") -> Tensor"; + static at::Tensor call(const at::Tensor & self, c10::string_view UPLO); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::string_view UPLO); +}; + +struct TORCH_API linalg_eigvalsh_out { + using schema = at::Tensor & (const at::Tensor &, c10::string_view, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_eigvalsh"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "linalg_eigvalsh.out(Tensor self, str UPLO=\"L\", *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, c10::string_view UPLO, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::string_view UPLO, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_inv_ex_meta.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_inv_ex_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..ea51acdaf43a13e0a1dd339981721afbbfb64fa7 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_inv_ex_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_linalg_inv_ex : public at::impl::MetaBase { + + + void meta(const at::Tensor & A, bool check_errors); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_lu_compositeexplicitautogradnonfunctional_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_lu_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f6b57231823da5054c78a795676f4bad754aec76 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_lu_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API ::std::tuple linalg_lu(const at::Tensor & A, bool pivot=true); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_lu_factor_ex_meta_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_lu_factor_ex_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..5ddeb1c2786a9eca671e882978a624728a510a66 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_lu_factor_ex_meta_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API ::std::tuple linalg_lu_factor_ex(const at::Tensor & A, bool pivot=true, bool check_errors=false); +TORCH_API ::std::tuple linalg_lu_factor_ex_out(at::Tensor & LU, at::Tensor & pivots, at::Tensor & info, const at::Tensor & A, bool pivot=true, bool check_errors=false); +TORCH_API ::std::tuple linalg_lu_factor_ex_outf(const at::Tensor & A, bool pivot, bool check_errors, at::Tensor & LU, at::Tensor & pivots, at::Tensor & info); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_matmul_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_matmul_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..16fecdbbafad271b97f46ed8d8eca1dec61d069b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_matmul_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API linalg_matmul { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_matmul"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "linalg_matmul(Tensor self, Tensor other) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & other); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other); +}; + +struct TORCH_API linalg_matmul_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_matmul"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "linalg_matmul.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_matrix_power_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_matrix_power_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..ebb83187fb06cd85e509b2b087d17756f502073d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_matrix_power_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API linalg_matrix_power { + using schema = at::Tensor (const at::Tensor &, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_matrix_power"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "linalg_matrix_power(Tensor self, int n) -> Tensor"; + static at::Tensor call(const at::Tensor & self, int64_t n); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t n); +}; + +struct TORCH_API linalg_matrix_power_out { + using schema = at::Tensor & (const at::Tensor &, int64_t, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_matrix_power"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "linalg_matrix_power.out(Tensor self, int n, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, int64_t n, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t n, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_matrix_rank_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_matrix_rank_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..83a65f2cf6711b7fc75a8b93619e92acfc83b250 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_matrix_rank_ops.h @@ -0,0 +1,111 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API linalg_matrix_rank_atol_rtol_tensor { + using schema = at::Tensor (const at::Tensor &, const ::std::optional &, const ::std::optional &, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_matrix_rank"; + static constexpr const char* overload_name = "atol_rtol_tensor"; + static constexpr const char* schema_str = "linalg_matrix_rank.atol_rtol_tensor(Tensor input, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False) -> Tensor"; + static at::Tensor call(const at::Tensor & input, const ::std::optional & atol, const ::std::optional & rtol, bool hermitian); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const ::std::optional & atol, const ::std::optional & rtol, bool hermitian); +}; + +struct TORCH_API linalg_matrix_rank_atol_rtol_tensor_out { + using schema = at::Tensor & (const at::Tensor &, const ::std::optional &, const ::std::optional &, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_matrix_rank"; + static constexpr const char* overload_name = "atol_rtol_tensor_out"; + static constexpr const char* schema_str = "linalg_matrix_rank.atol_rtol_tensor_out(Tensor input, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & input, const ::std::optional & atol, const ::std::optional & rtol, bool hermitian, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const ::std::optional & atol, const ::std::optional & rtol, bool hermitian, at::Tensor & out); +}; + +struct TORCH_API linalg_matrix_rank_atol_rtol_float { + using schema = at::Tensor (const at::Tensor &, ::std::optional, ::std::optional, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_matrix_rank"; + static constexpr const char* overload_name = "atol_rtol_float"; + static constexpr const char* schema_str = "linalg_matrix_rank.atol_rtol_float(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False) -> Tensor"; + static at::Tensor call(const at::Tensor & self, ::std::optional atol, ::std::optional rtol, bool hermitian); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, ::std::optional atol, ::std::optional rtol, bool hermitian); +}; + +struct TORCH_API linalg_matrix_rank_atol_rtol_float_out { + using schema = at::Tensor & (const at::Tensor &, ::std::optional, ::std::optional, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_matrix_rank"; + static constexpr const char* overload_name = "atol_rtol_float_out"; + static constexpr const char* schema_str = "linalg_matrix_rank.atol_rtol_float_out(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, ::std::optional atol, ::std::optional rtol, bool hermitian, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, ::std::optional atol, ::std::optional rtol, bool hermitian, at::Tensor & out); +}; + +struct TORCH_API linalg_matrix_rank { + using schema = at::Tensor (const at::Tensor &, double, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_matrix_rank"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "linalg_matrix_rank(Tensor self, float tol, bool hermitian=False) -> Tensor"; + static at::Tensor call(const at::Tensor & self, double tol, bool hermitian); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, double tol, bool hermitian); +}; + +struct TORCH_API linalg_matrix_rank_out { + using schema = at::Tensor & (const at::Tensor &, double, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_matrix_rank"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "linalg_matrix_rank.out(Tensor self, float tol, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, double tol, bool hermitian, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, double tol, bool hermitian, at::Tensor & out); +}; + +struct TORCH_API linalg_matrix_rank_tol_tensor { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, bool); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_matrix_rank"; + static constexpr const char* overload_name = "tol_tensor"; + static constexpr const char* schema_str = "linalg_matrix_rank.tol_tensor(Tensor input, Tensor tol, bool hermitian=False) -> Tensor"; + static at::Tensor call(const at::Tensor & input, const at::Tensor & tol, bool hermitian); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & tol, bool hermitian); +}; + +struct TORCH_API linalg_matrix_rank_out_tol_tensor { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, bool, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_matrix_rank"; + static constexpr const char* overload_name = "out_tol_tensor"; + static constexpr const char* schema_str = "linalg_matrix_rank.out_tol_tensor(Tensor input, Tensor tol, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & input, const at::Tensor & tol, bool hermitian, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & tol, bool hermitian, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_norm_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_norm_native.h new file mode 100644 index 0000000000000000000000000000000000000000..83e522699842fedf212d11236f293d087469fa9e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_norm_native.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor linalg_norm(const at::Tensor & self, const ::std::optional & ord=::std::nullopt, at::OptionalIntArrayRef dim=::std::nullopt, bool keepdim=false, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor & linalg_norm_out(const at::Tensor & self, const ::std::optional & ord, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out); +TORCH_API at::Tensor linalg_norm(const at::Tensor & self, c10::string_view ord, at::OptionalIntArrayRef dim=::std::nullopt, bool keepdim=false, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor & linalg_norm_out(const at::Tensor & self, c10::string_view ord, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_norm_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_norm_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..2e110380679ebbd2b95ac665460fd472b18b4d66 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_norm_ops.h @@ -0,0 +1,67 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API linalg_norm { + using schema = at::Tensor (const at::Tensor &, const ::std::optional &, at::OptionalIntArrayRef, bool, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_norm"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "linalg_norm(Tensor self, Scalar? ord=None, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const ::std::optional & ord, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const ::std::optional & ord, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype); +}; + +struct TORCH_API linalg_norm_ord_str { + using schema = at::Tensor (const at::Tensor &, c10::string_view, at::OptionalIntArrayRef, bool, ::std::optional); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_norm"; + static constexpr const char* overload_name = "ord_str"; + static constexpr const char* schema_str = "linalg_norm.ord_str(Tensor self, str ord, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, c10::string_view ord, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::string_view ord, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype); +}; + +struct TORCH_API linalg_norm_out { + using schema = at::Tensor & (const at::Tensor &, const ::std::optional &, at::OptionalIntArrayRef, bool, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_norm"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "linalg_norm.out(Tensor self, Scalar? ord=None, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const ::std::optional & ord, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const ::std::optional & ord, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out); +}; + +struct TORCH_API linalg_norm_ord_str_out { + using schema = at::Tensor & (const at::Tensor &, c10::string_view, at::OptionalIntArrayRef, bool, ::std::optional, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::linalg_norm"; + static constexpr const char* overload_name = "ord_str_out"; + static constexpr const char* schema_str = "linalg_norm.ord_str_out(Tensor self, str ord, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, c10::string_view ord, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::string_view ord, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_pinv.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_pinv.h new file mode 100644 index 0000000000000000000000000000000000000000..ed313e3dc976817a8a12457c89aff13d14035920 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_pinv.h @@ -0,0 +1,87 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::linalg_pinv.atol_rtol_tensor(Tensor self, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False) -> Tensor +inline at::Tensor linalg_pinv(const at::Tensor & self, const ::std::optional & atol={}, const ::std::optional & rtol={}, bool hermitian=false) { + return at::_ops::linalg_pinv_atol_rtol_tensor::call(self, atol, rtol, hermitian); +} + +// aten::linalg_pinv.atol_rtol_tensor_out(Tensor self, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & linalg_pinv_out(at::Tensor & out, const at::Tensor & self, const ::std::optional & atol={}, const ::std::optional & rtol={}, bool hermitian=false) { + return at::_ops::linalg_pinv_atol_rtol_tensor_out::call(self, atol, rtol, hermitian, out); +} +// aten::linalg_pinv.atol_rtol_tensor_out(Tensor self, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & linalg_pinv_outf(const at::Tensor & self, const ::std::optional & atol, const ::std::optional & rtol, bool hermitian, at::Tensor & out) { + return at::_ops::linalg_pinv_atol_rtol_tensor_out::call(self, atol, rtol, hermitian, out); +} + +// aten::linalg_pinv.atol_rtol_float(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False) -> Tensor +inline at::Tensor linalg_pinv(const at::Tensor & self, ::std::optional atol, ::std::optional rtol, bool hermitian=false) { + return at::_ops::linalg_pinv_atol_rtol_float::call(self, atol, rtol, hermitian); +} + +// aten::linalg_pinv.atol_rtol_float_out(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & linalg_pinv_out(at::Tensor & out, const at::Tensor & self, ::std::optional atol, ::std::optional rtol, bool hermitian=false) { + return at::_ops::linalg_pinv_atol_rtol_float_out::call(self, atol, rtol, hermitian, out); +} +// aten::linalg_pinv.atol_rtol_float_out(Tensor self, *, float? atol=None, float? rtol=None, bool hermitian=False, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & linalg_pinv_outf(const at::Tensor & self, ::std::optional atol, ::std::optional rtol, bool hermitian, at::Tensor & out) { + return at::_ops::linalg_pinv_atol_rtol_float_out::call(self, atol, rtol, hermitian, out); +} + +// aten::linalg_pinv(Tensor self, float rcond, bool hermitian=False) -> Tensor +inline at::Tensor linalg_pinv(const at::Tensor & self, double rcond, bool hermitian=false) { + return at::_ops::linalg_pinv::call(self, rcond, hermitian); +} + +// aten::linalg_pinv.rcond_tensor(Tensor self, Tensor rcond, bool hermitian=False) -> Tensor +inline at::Tensor linalg_pinv(const at::Tensor & self, const at::Tensor & rcond, bool hermitian=false) { + return at::_ops::linalg_pinv_rcond_tensor::call(self, rcond, hermitian); +} + +// aten::linalg_pinv.out(Tensor self, float rcond, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & linalg_pinv_out(at::Tensor & out, const at::Tensor & self, double rcond, bool hermitian=false) { + return at::_ops::linalg_pinv_out::call(self, rcond, hermitian, out); +} +// aten::linalg_pinv.out(Tensor self, float rcond, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & linalg_pinv_outf(const at::Tensor & self, double rcond, bool hermitian, at::Tensor & out) { + return at::_ops::linalg_pinv_out::call(self, rcond, hermitian, out); +} + +// aten::linalg_pinv.out_rcond_tensor(Tensor self, Tensor rcond, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & linalg_pinv_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & rcond, bool hermitian=false) { + return at::_ops::linalg_pinv_out_rcond_tensor::call(self, rcond, hermitian, out); +} +// aten::linalg_pinv.out_rcond_tensor(Tensor self, Tensor rcond, bool hermitian=False, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & linalg_pinv_outf(const at::Tensor & self, const at::Tensor & rcond, bool hermitian, at::Tensor & out) { + return at::_ops::linalg_pinv_out_rcond_tensor::call(self, rcond, hermitian, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_pinv_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_pinv_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..7b1c3ddb28fcf95569dee03f63a9fedef508bc02 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_pinv_compositeexplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & linalg_pinv_out(at::Tensor & out, const at::Tensor & self, const ::std::optional & atol={}, const ::std::optional & rtol={}, bool hermitian=false); +TORCH_API at::Tensor & linalg_pinv_outf(const at::Tensor & self, const ::std::optional & atol, const ::std::optional & rtol, bool hermitian, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_qr_compositeexplicitautogradnonfunctional_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_qr_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..bcb57ee939a5850939356c266e8e812f935201ba --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_qr_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API ::std::tuple linalg_qr(const at::Tensor & A, c10::string_view mode="reduced"); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_slogdet.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_slogdet.h new file mode 100644 index 0000000000000000000000000000000000000000..053e2e788ca81a3efab0ae48ea187574f2596b10 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_slogdet.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::linalg_slogdet(Tensor A) -> (Tensor sign, Tensor logabsdet) +inline ::std::tuple linalg_slogdet(const at::Tensor & A) { + return at::_ops::linalg_slogdet::call(A); +} + +// aten::linalg_slogdet.out(Tensor A, *, Tensor(a!) sign, Tensor(b!) logabsdet) -> (Tensor(a!) sign, Tensor(b!) logabsdet) +inline ::std::tuple linalg_slogdet_out(at::Tensor & sign, at::Tensor & logabsdet, const at::Tensor & A) { + return at::_ops::linalg_slogdet_out::call(A, sign, logabsdet); +} +// aten::linalg_slogdet.out(Tensor A, *, Tensor(a!) sign, Tensor(b!) logabsdet) -> (Tensor(a!) sign, Tensor(b!) logabsdet) +inline ::std::tuple linalg_slogdet_outf(const at::Tensor & A, at::Tensor & sign, at::Tensor & logabsdet) { + return at::_ops::linalg_slogdet_out::call(A, sign, logabsdet); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_solve_triangular_cpu_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_solve_triangular_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..b3c050c3c21fc331d37b46fedada2106a90ce20e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_solve_triangular_cpu_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor linalg_solve_triangular(const at::Tensor & self, const at::Tensor & B, bool upper, bool left=true, bool unitriangular=false); +TORCH_API at::Tensor & linalg_solve_triangular_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & B, bool upper, bool left=true, bool unitriangular=false); +TORCH_API at::Tensor & linalg_solve_triangular_outf(const at::Tensor & self, const at::Tensor & B, bool upper, bool left, bool unitriangular, at::Tensor & out); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_vector_norm_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_vector_norm_native.h new file mode 100644 index 0000000000000000000000000000000000000000..6411403bfd31fab62d5a604158d113da1b054a6e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/linalg_vector_norm_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_linalg_vector_norm_out : public at::meta::structured_linalg_vector_norm { +void impl(const at::Tensor & self, const at::Scalar & ord, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype, const at::Tensor & out); +}; +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/log.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/log.h new file mode 100644 index 0000000000000000000000000000000000000000..5f4050ec1d69a08b46a81da02191316db7abfd3f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/log.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::log(Tensor self) -> Tensor +inline at::Tensor log(const at::Tensor & self) { + return at::_ops::log::call(self); +} + +// aten::log_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & log_(at::Tensor & self) { + return at::_ops::log_::call(self); +} + +// aten::log.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & log_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::log_out::call(self, out); +} +// aten::log.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & log_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::log_out::call(self, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/log1p_meta_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/log1p_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..7d591023efad4bbb96df6f2762eaa3fe9315acad --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/log1p_meta_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor log1p(const at::Tensor & self); +TORCH_API at::Tensor & log1p_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & log1p_outf(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & log1p_(at::Tensor & self); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/log2.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/log2.h new file mode 100644 index 0000000000000000000000000000000000000000..ce087a7bb3c0f7d65707597594adfbc139b40d6c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/log2.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::log2(Tensor self) -> Tensor +inline at::Tensor log2(const at::Tensor & self) { + return at::_ops::log2::call(self); +} + +// aten::log2_(Tensor(a!) self) -> Tensor(a!) +inline at::Tensor & log2_(at::Tensor & self) { + return at::_ops::log2_::call(self); +} + +// aten::log2.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & log2_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::log2_out::call(self, out); +} +// aten::log2.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & log2_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::log2_out::call(self, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/logcumsumexp_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/logcumsumexp_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..e54a7598265fa0b8601979f0f20f3342be0ff60d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/logcumsumexp_compositeexplicitautograd_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor logcumsumexp(const at::Tensor & self, int64_t dim); +TORCH_API at::Tensor & logcumsumexp_out(at::Tensor & out, const at::Tensor & self, int64_t dim); +TORCH_API at::Tensor & logcumsumexp_outf(const at::Tensor & self, int64_t dim, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/matrix_exp_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/matrix_exp_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..f0e8c5f4500d45f52c84e64a2b08b54c9aa5d361 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/matrix_exp_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API matrix_exp { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::matrix_exp"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "matrix_exp(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/max_pool1d_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/max_pool1d_native.h new file mode 100644 index 0000000000000000000000000000000000000000..446aa8fc36ce88e297baa35807c8e2eabbb8fb5d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/max_pool1d_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor max_pool1d(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride={}, at::IntArrayRef padding=0, at::IntArrayRef dilation=1, bool ceil_mode=false); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/max_pool2d_with_indices.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/max_pool2d_with_indices.h new file mode 100644 index 0000000000000000000000000000000000000000..2007d02b87626fb28000d375fbe5e0a0008c2e7d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/max_pool2d_with_indices.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::max_pool2d_with_indices.out(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False, *, Tensor(a!) out, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!)) +inline ::std::tuple max_pool2d_with_indices_out(at::Tensor & out, at::Tensor & indices, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride={}, at::IntArrayRef padding=0, at::IntArrayRef dilation=1, bool ceil_mode=false) { + return at::_ops::max_pool2d_with_indices_out::call(self, kernel_size, stride, padding, dilation, ceil_mode, out, indices); +} +// aten::max_pool2d_with_indices.out(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False, *, Tensor(a!) out, Tensor(b!) indices) -> (Tensor(a!), Tensor(b!)) +inline ::std::tuple max_pool2d_with_indices_outf(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode, at::Tensor & out, at::Tensor & indices) { + return at::_ops::max_pool2d_with_indices_out::call(self, kernel_size, stride, padding, dilation, ceil_mode, out, indices); +} + +// aten::max_pool2d_with_indices(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor) +inline ::std::tuple max_pool2d_with_indices(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride={}, at::IntArrayRef padding=0, at::IntArrayRef dilation=1, bool ceil_mode=false) { + return at::_ops::max_pool2d_with_indices::call(self, kernel_size, stride, padding, dilation, ceil_mode); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/max_pool2d_with_indices_backward_meta.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/max_pool2d_with_indices_backward_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..4df2ef4710f438f9fe6b66cdceab1836c7dd87ed --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/max_pool2d_with_indices_backward_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_max_pool2d_with_indices_backward : public at::impl::MetaBase { + + + void meta(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode, const at::Tensor & indices); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/maximum_compositeexplicitautogradnonfunctional_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/maximum_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..c18bdab1ca31dfa5ccc1f03a42206eadb2d78411 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/maximum_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor maximum(const at::Tensor & self, const at::Tensor & other); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/minimum_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/minimum_native.h new file mode 100644 index 0000000000000000000000000000000000000000..aafbe1e1135d6b343c3553aa1960fed7826c0e56 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/minimum_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_minimum_out : public at::meta::structured_minimum { +void impl(const at::Tensor & self, const at::Tensor & other, const at::Tensor & out); +}; +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_batch_norm_backward_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_batch_norm_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..8faac16ef1c602c74a60c042ad23acccb0be48ac --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_batch_norm_backward_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API miopen_batch_norm_backward { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, const ::std::optional &, const ::std::optional &, const ::std::optional &, const ::std::optional &, double); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::miopen_batch_norm_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "miopen_batch_norm_backward(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon) -> (Tensor, Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & input, const at::Tensor & grad_output, const at::Tensor & weight, const ::std::optional & running_mean, const ::std::optional & running_var, const ::std::optional & save_mean, const ::std::optional & save_var, double epsilon); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & grad_output, const at::Tensor & weight, const ::std::optional & running_mean, const ::std::optional & running_var, const ::std::optional & save_mean, const ::std::optional & save_var, double epsilon); +}; + +struct TORCH_API miopen_batch_norm_backward_out { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, const ::std::optional &, const ::std::optional &, const ::std::optional &, const ::std::optional &, double, at::Tensor &, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::miopen_batch_norm_backward"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "miopen_batch_norm_backward.out(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))"; + static ::std::tuple call(const at::Tensor & input, const at::Tensor & grad_output, const at::Tensor & weight, const ::std::optional & running_mean, const ::std::optional & running_var, const ::std::optional & save_mean, const ::std::optional & save_var, double epsilon, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & input, const at::Tensor & grad_output, const at::Tensor & weight, const ::std::optional & running_mean, const ::std::optional & running_var, const ::std::optional & save_mean, const ::std::optional & save_var, double epsilon, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_batch_norm_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_batch_norm_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..1ab80e699f50470aba96f05e3c4de8489cad8871 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_batch_norm_compositeexplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API ::std::tuple miopen_batch_norm_out(at::Tensor & out0, at::Tensor & out1, at::Tensor & out2, const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias, const ::std::optional & running_mean, const ::std::optional & running_var, bool training, double exponential_average_factor, double epsilon); +TORCH_API ::std::tuple miopen_batch_norm_outf(const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias, const ::std::optional & running_mean, const ::std::optional & running_var, bool training, double exponential_average_factor, double epsilon, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_batch_norm_cuda_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_batch_norm_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..46f03d7df24729fea38ce557cd443df02f92990e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_batch_norm_cuda_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::tuple miopen_batch_norm(const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias, const ::std::optional & running_mean, const ::std::optional & running_var, bool training, double exponential_average_factor, double epsilon); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_rnn_cuda_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_rnn_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..0667b8a763821ee8bb74f996b1161f4480c56b35 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/miopen_rnn_cuda_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::tuple miopen_rnn(const at::Tensor & input, at::TensorList weight, int64_t weight_stride0, const at::Tensor & hx, const ::std::optional & cx, int64_t mode, int64_t hidden_size, int64_t num_layers, bool batch_first, double dropout, bool train, bool bidirectional, at::IntArrayRef batch_sizes, const ::std::optional & dropout_state); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/mish_cpu_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/mish_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..8ea3ca88972b258358fc859c94d64edc6a7fc518 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/mish_cpu_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor mish(const at::Tensor & self); +TORCH_API at::Tensor & mish_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & mish_outf(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & mish_(at::Tensor & self); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/mkldnn_max_pool2d_backward_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/mkldnn_max_pool2d_backward_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..87a7ef2f54f5abf1d74b4d185c700d5a3ada5d59 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/mkldnn_max_pool2d_backward_compositeexplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & mkldnn_max_pool2d_backward_out(at::Tensor & out, const at::Tensor & grad_output, const at::Tensor & output, const at::Tensor & input, at::IntArrayRef kernel_size, at::IntArrayRef stride={}, at::IntArrayRef padding=0, at::IntArrayRef dilation=1, bool ceil_mode=false); +TORCH_API at::Tensor & mkldnn_max_pool2d_backward_outf(const at::Tensor & grad_output, const at::Tensor & output, const at::Tensor & input, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/mkldnn_max_pool2d_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/mkldnn_max_pool2d_native.h new file mode 100644 index 0000000000000000000000000000000000000000..e77e795a1ada06ed4b9a94d4d3904f18d2c8474c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/mkldnn_max_pool2d_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & mkldnn_max_pool2d_out(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode, at::Tensor & out); +TORCH_API at::Tensor mkldnn_max_pool2d(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride={}, at::IntArrayRef padding=0, at::IntArrayRef dilation=1, bool ceil_mode=false); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/mkldnn_reorder_conv2d_weight_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/mkldnn_reorder_conv2d_weight_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..79b58393ab7175c637ab3f22808e59d2edbd8c99 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/mkldnn_reorder_conv2d_weight_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API mkldnn_reorder_conv2d_weight { + using schema = at::Tensor (const at::Tensor &, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymInt, at::OptionalSymIntArrayRef); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::mkldnn_reorder_conv2d_weight"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "mkldnn_reorder_conv2d_weight(Tensor self, SymInt[2] padding=0, SymInt[2] stride=1, SymInt[2] dilation=1, SymInt groups=1, SymInt[]? input_size=None) -> Tensor"; + static at::Tensor call(const at::Tensor & self, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, at::OptionalSymIntArrayRef input_size); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, at::OptionalSymIntArrayRef input_size); +}; + +struct TORCH_API mkldnn_reorder_conv2d_weight_out { + using schema = at::Tensor & (const at::Tensor &, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymInt, at::OptionalSymIntArrayRef, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::mkldnn_reorder_conv2d_weight"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "mkldnn_reorder_conv2d_weight.out(Tensor self, SymInt[2] padding=0, SymInt[2] stride=1, SymInt[2] dilation=1, SymInt groups=1, SymInt[]? input_size=None, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, at::OptionalSymIntArrayRef input_size, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, at::OptionalSymIntArrayRef input_size, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/moveaxis_compositeimplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/moveaxis_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..39d5366ad61f8df25b505890d1added2d230a7cc --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/moveaxis_compositeimplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor moveaxis(const at::Tensor & self, at::IntArrayRef source, at::IntArrayRef destination); +TORCH_API at::Tensor moveaxis(const at::Tensor & self, int64_t source, int64_t destination); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/movedim.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/movedim.h new file mode 100644 index 0000000000000000000000000000000000000000..225df56de6d4072982924a2d92b4c3d851aa818e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/movedim.h @@ -0,0 +1,41 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::movedim.intlist(Tensor(a) self, int[] source, int[] destination) -> Tensor(a) +inline at::Tensor movedim(const at::Tensor & self, at::IntArrayRef source, at::IntArrayRef destination) { + return at::_ops::movedim_intlist::call(self, source, destination); +} + +// aten::movedim.int(Tensor(a) self, int source, int destination) -> Tensor(a) +inline at::Tensor movedim(const at::Tensor & self, int64_t source, int64_t destination) { + return at::_ops::movedim_int::call(self, source, destination); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/mps_convolution_backward_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/mps_convolution_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..73708f0367fd696ace228d684c46484965e4a881 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/mps_convolution_backward_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API mps_convolution_backward { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymInt, ::std::array); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::mps_convolution_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "mps_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)"; + static ::std::tuple call(const at::Tensor & self, const at::Tensor & grad_output, const at::Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, ::std::array output_mask); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & grad_output, const at::Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, ::std::array output_mask); +}; + +struct TORCH_API mps_convolution_backward_out { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, const at::Tensor &, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymIntArrayRef, c10::SymInt, ::std::array, at::Tensor &, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::mps_convolution_backward"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "mps_convolution_backward.out(Tensor self, Tensor grad_output, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool[3] output_mask, *, Tensor(a!) out0, Tensor(b!) out1, Tensor(c!) out2) -> (Tensor(a!), Tensor(b!), Tensor(c!))"; + static ::std::tuple call(const at::Tensor & self, const at::Tensor & grad_output, const at::Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, ::std::array output_mask, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & grad_output, const at::Tensor & weight, c10::SymIntArrayRef padding, c10::SymIntArrayRef stride, c10::SymIntArrayRef dilation, c10::SymInt groups, ::std::array output_mask, at::Tensor & out0, at::Tensor & out1, at::Tensor & out2); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/multilabel_margin_loss_forward_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/multilabel_margin_loss_forward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..f078659e6e14f28d7b9891c78d386465d8ce1dc3 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/multilabel_margin_loss_forward_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API multilabel_margin_loss_forward_output { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, int64_t, at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::multilabel_margin_loss_forward"; + static constexpr const char* overload_name = "output"; + static constexpr const char* schema_str = "multilabel_margin_loss_forward.output(Tensor self, Tensor target, int reduction, *, Tensor(a!) output, Tensor(b!) is_target) -> (Tensor(a!), Tensor(b!))"; + static ::std::tuple call(const at::Tensor & self, const at::Tensor & target, int64_t reduction, at::Tensor & output, at::Tensor & is_target); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & target, int64_t reduction, at::Tensor & output, at::Tensor & is_target); +}; + +struct TORCH_API multilabel_margin_loss_forward { + using schema = ::std::tuple (const at::Tensor &, const at::Tensor &, int64_t); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::multilabel_margin_loss_forward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "multilabel_margin_loss_forward(Tensor self, Tensor target, int reduction) -> (Tensor output, Tensor is_target)"; + static ::std::tuple call(const at::Tensor & self, const at::Tensor & target, int64_t reduction); + static ::std::tuple redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & target, int64_t reduction); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/multinomial_cuda_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/multinomial_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..cc96880eb1b33356f1981244400ac6867cf73371 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/multinomial_cuda_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor multinomial(const at::Tensor & self, int64_t num_samples, bool replacement=false, ::std::optional generator=::std::nullopt); +TORCH_API at::Tensor multinomial_symint(const at::Tensor & self, c10::SymInt num_samples, bool replacement=false, ::std::optional generator=::std::nullopt); +TORCH_API at::Tensor & multinomial_out(at::Tensor & out, const at::Tensor & self, int64_t num_samples, bool replacement=false, ::std::optional generator=::std::nullopt); +TORCH_API at::Tensor & multinomial_outf(const at::Tensor & self, int64_t num_samples, bool replacement, ::std::optional generator, at::Tensor & out); +TORCH_API at::Tensor & multinomial_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymInt num_samples, bool replacement=false, ::std::optional generator=::std::nullopt); +TORCH_API at::Tensor & multinomial_symint_outf(const at::Tensor & self, c10::SymInt num_samples, bool replacement, ::std::optional generator, at::Tensor & out); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/nanmean.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/nanmean.h new file mode 100644 index 0000000000000000000000000000000000000000..696990710201725771fcf0d97eb02a3c3952b14c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/nanmean.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::nanmean(Tensor self, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor +inline at::Tensor nanmean(const at::Tensor & self, at::OptionalIntArrayRef dim=::std::nullopt, bool keepdim=false, ::std::optional dtype=::std::nullopt) { + return at::_ops::nanmean::call(self, dim, keepdim, dtype); +} + +// aten::nanmean.out(Tensor self, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & nanmean_out(at::Tensor & out, const at::Tensor & self, at::OptionalIntArrayRef dim=::std::nullopt, bool keepdim=false, ::std::optional dtype=::std::nullopt) { + return at::_ops::nanmean_out::call(self, dim, keepdim, dtype, out); +} +// aten::nanmean.out(Tensor self, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & nanmean_outf(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out) { + return at::_ops::nanmean_out::call(self, dim, keepdim, dtype, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/nanmedian.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/nanmedian.h new file mode 100644 index 0000000000000000000000000000000000000000..da9a08795a529fa4d4fded483e1f98c3f812aecc --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/nanmedian.h @@ -0,0 +1,73 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::nanmedian(Tensor self) -> Tensor +inline at::Tensor nanmedian(const at::Tensor & self) { + return at::_ops::nanmedian::call(self); +} + +// aten::nanmedian.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple nanmedian(const at::Tensor & self, int64_t dim, bool keepdim=false) { + return at::_ops::nanmedian_dim::call(self, dim, keepdim); +} + +// aten::nanmedian.dim_values(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices) +inline ::std::tuple nanmedian_out(at::Tensor & values, at::Tensor & indices, const at::Tensor & self, int64_t dim, bool keepdim=false) { + return at::_ops::nanmedian_dim_values::call(self, dim, keepdim, values, indices); +} +// aten::nanmedian.dim_values(Tensor self, int dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices) +inline ::std::tuple nanmedian_outf(const at::Tensor & self, int64_t dim, bool keepdim, at::Tensor & values, at::Tensor & indices) { + return at::_ops::nanmedian_dim_values::call(self, dim, keepdim, values, indices); +} + +// aten::nanmedian.names_dim(Tensor self, Dimname dim, bool keepdim=False) -> (Tensor values, Tensor indices) +inline ::std::tuple nanmedian(const at::Tensor & self, at::Dimname dim, bool keepdim=false) { + return at::_ops::nanmedian_names_dim::call(self, dim, keepdim); +} + +// aten::nanmedian.names_dim_values(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices) +inline ::std::tuple nanmedian_out(at::Tensor & values, at::Tensor & indices, const at::Tensor & self, at::Dimname dim, bool keepdim=false) { + return at::_ops::nanmedian_names_dim_values::call(self, dim, keepdim, values, indices); +} +// aten::nanmedian.names_dim_values(Tensor self, Dimname dim, bool keepdim=False, *, Tensor(a!) values, Tensor(b!) indices) -> (Tensor(a!) values, Tensor(b!) indices) +inline ::std::tuple nanmedian_outf(const at::Tensor & self, at::Dimname dim, bool keepdim, at::Tensor & values, at::Tensor & indices) { + return at::_ops::nanmedian_names_dim_values::call(self, dim, keepdim, values, indices); +} + +// aten::nanmedian.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & nanmedian_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::nanmedian_out::call(self, out); +} +// aten::nanmedian.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & nanmedian_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::nanmedian_out::call(self, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/nanmedian_cpu_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/nanmedian_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..fd99f8c69a1388383fde288d2b2f7a3b0339f2d6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/nanmedian_cpu_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor nanmedian(const at::Tensor & self); +TORCH_API ::std::tuple nanmedian_out(at::Tensor & values, at::Tensor & indices, const at::Tensor & self, int64_t dim, bool keepdim=false); +TORCH_API ::std::tuple nanmedian_outf(const at::Tensor & self, int64_t dim, bool keepdim, at::Tensor & values, at::Tensor & indices); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/native_group_norm_backward_cuda_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/native_group_norm_backward_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..9b0d6e7300cbaca54a4c784dd59a2381583e7e46 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/native_group_norm_backward_cuda_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API ::std::tuple native_group_norm_backward(const at::Tensor & grad_out, const at::Tensor & input, const at::Tensor & mean, const at::Tensor & rstd, const ::std::optional & weight, int64_t N, int64_t C, int64_t HxW, int64_t group, ::std::array output_mask); +TORCH_API ::std::tuple native_group_norm_backward_symint(const at::Tensor & grad_out, const at::Tensor & input, const at::Tensor & mean, const at::Tensor & rstd, const ::std::optional & weight, c10::SymInt N, c10::SymInt C, c10::SymInt HxW, int64_t group, ::std::array output_mask); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/ne_cuda_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/ne_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..23ea7f42b88b75e600b6e830cdbb06ef1654b722 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/ne_cuda_dispatch.h @@ -0,0 +1,35 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor ne(const at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & ne_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & ne_outf(const at::Tensor & self, const at::Scalar & other, at::Tensor & out); +TORCH_API at::Tensor & ne_(at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor ne(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & ne_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & ne_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +TORCH_API at::Tensor & ne_(at::Tensor & self, const at::Tensor & other); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/new_empty_strided_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/new_empty_strided_native.h new file mode 100644 index 0000000000000000000000000000000000000000..24d96ebe44389925d9ef6cb159b9b15bf20f9d28 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/new_empty_strided_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & new_empty_strided_out_symint(const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, at::Tensor & out); +TORCH_API at::Tensor new_empty_strided_symint(const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional dtype={}, ::std::optional layout={}, ::std::optional device={}, ::std::optional pin_memory={}); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/nextafter_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/nextafter_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..3e8942097619bba20b528897c9722dda72182b55 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/nextafter_ops.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API nextafter_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::nextafter"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "nextafter.out(Tensor self, Tensor other, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other, at::Tensor & out); +}; + +struct TORCH_API nextafter { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::nextafter"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "nextafter(Tensor self, Tensor other) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & other); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other); +}; + +struct TORCH_API nextafter_ { + using schema = at::Tensor & (at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::nextafter_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "nextafter_(Tensor(a!) self, Tensor other) -> Tensor(a!)"; + static at::Tensor & call(at::Tensor & self, const at::Tensor & other); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self, const at::Tensor & other); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/nll_loss2d_backward_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/nll_loss2d_backward_native.h new file mode 100644 index 0000000000000000000000000000000000000000..b0044b6d98242965a16799b5606dbc00d26e1f35 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/nll_loss2d_backward_native.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor nll_loss2d_backward_cpu(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, int64_t ignore_index, const at::Tensor & total_weight); +TORCH_API at::Tensor & nll_loss2d_backward_out_cpu(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, int64_t ignore_index, const at::Tensor & total_weight, at::Tensor & grad_input); +TORCH_API at::Tensor nll_loss2d_backward_cuda(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, int64_t ignore_index, const at::Tensor & total_weight); +TORCH_API at::Tensor & nll_loss2d_backward_out_cuda(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, int64_t ignore_index, const at::Tensor & total_weight, at::Tensor & grad_input); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/nll_loss_backward_compositeexplicitautogradnonfunctional_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/nll_loss_backward_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..154db8e8a7e623ea13f8d02553bb0f3fd332fbe2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/nll_loss_backward_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor nll_loss_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, int64_t ignore_index, const at::Tensor & total_weight); +TORCH_API at::Tensor nll_loss_backward_symint(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, c10::SymInt ignore_index, const at::Tensor & total_weight); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/nonzero_numpy_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/nonzero_numpy_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..daa4908b74937beff0ac223d195230b6a5b6d282 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/nonzero_numpy_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API nonzero_numpy { + using schema = ::std::vector (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::nonzero_numpy"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "nonzero_numpy(Tensor self) -> Tensor[]"; + static ::std::vector call(const at::Tensor & self); + static ::std::vector redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/norm_compositeexplicitautogradnonfunctional_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/norm_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..27b12adbdf1c4786d0e2d4fea656cd349ecfc6b4 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/norm_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor norm(const at::Tensor & self, const ::std::optional & p, at::IntArrayRef dim, bool keepdim, at::ScalarType dtype); +TORCH_API at::Tensor norm(const at::Tensor & self, const ::std::optional & p, at::IntArrayRef dim, bool keepdim=false); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/numpy_T_compositeimplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/numpy_T_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..1292b551374bf4bfe42ea644f5af95ca7ce8c7c3 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/numpy_T_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor numpy_T(const at::Tensor & self); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/output_nr_compositeimplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/output_nr_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..82b619de6f6e125b6cd08c9725b808ab018a4b24 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/output_nr_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API int64_t output_nr(const at::Tensor & self); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/pixel_unshuffle_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/pixel_unshuffle_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..93a2dd1dd04bcb44bcf97849efac7e5e0a8fc324 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/pixel_unshuffle_compositeexplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & pixel_unshuffle_out(at::Tensor & out, const at::Tensor & self, int64_t downscale_factor); +TORCH_API at::Tensor & pixel_unshuffle_outf(const at::Tensor & self, int64_t downscale_factor, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/poisson_nll_loss_compositeimplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/poisson_nll_loss_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..ce642cbae71c9456208814d4a4dad363826e1612 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/poisson_nll_loss_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor poisson_nll_loss(const at::Tensor & input, const at::Tensor & target, bool log_input, bool full, double eps, int64_t reduction); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/poisson_nll_loss_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/poisson_nll_loss_native.h new file mode 100644 index 0000000000000000000000000000000000000000..363da3071604a27cbff262e7f60974eb5c22eda6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/poisson_nll_loss_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor poisson_nll_loss(const at::Tensor & input, const at::Tensor & target, bool log_input, bool full, double eps, int64_t reduction); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/polar_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/polar_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..faef703cb37d1e492777a47087a44d8273143f96 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/polar_compositeexplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor polar(const at::Tensor & abs, const at::Tensor & angle); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/polygamma_cpu_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/polygamma_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..e363b27034b5d4a352f697dab7448945a50c66f1 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/polygamma_cpu_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor polygamma(int64_t n, const at::Tensor & self); +TORCH_API at::Tensor & polygamma_out(at::Tensor & out, int64_t n, const at::Tensor & self); +TORCH_API at::Tensor & polygamma_outf(int64_t n, const at::Tensor & self, at::Tensor & out); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/put.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/put.h new file mode 100644 index 0000000000000000000000000000000000000000..24873634ad47ca0acf8e24173e3756f5c9ca6d40 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/put.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::put(Tensor self, Tensor index, Tensor source, bool accumulate=False) -> Tensor +inline at::Tensor put(const at::Tensor & self, const at::Tensor & index, const at::Tensor & source, bool accumulate=false) { + return at::_ops::put::call(self, index, source, accumulate); +} + +// aten::put.out(Tensor self, Tensor index, Tensor source, bool accumulate=False, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & put_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & index, const at::Tensor & source, bool accumulate=false) { + return at::_ops::put_out::call(self, index, source, accumulate, out); +} +// aten::put.out(Tensor self, Tensor index, Tensor source, bool accumulate=False, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & put_outf(const at::Tensor & self, const at::Tensor & index, const at::Tensor & source, bool accumulate, at::Tensor & out) { + return at::_ops::put_out::call(self, index, source, accumulate, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/q_per_channel_scales_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/q_per_channel_scales_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..44e28e469c2dfbe2c72edfd8c20147d57a0b8b05 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/q_per_channel_scales_compositeexplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & q_per_channel_scales_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & q_per_channel_scales_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/quantile_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/quantile_native.h new file mode 100644 index 0000000000000000000000000000000000000000..3b754b9d1c6e5c20c5fba1ce1ba93a1bae651707 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/quantile_native.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor quantile(const at::Tensor & self, const at::Tensor & q, ::std::optional dim=::std::nullopt, bool keepdim=false, c10::string_view interpolation="linear"); +TORCH_API at::Tensor & quantile_out(const at::Tensor & self, const at::Tensor & q, ::std::optional dim, bool keepdim, c10::string_view interpolation, at::Tensor & out); +TORCH_API at::Tensor quantile(const at::Tensor & self, double q, ::std::optional dim=::std::nullopt, bool keepdim=false, c10::string_view interpolation="linear"); +TORCH_API at::Tensor & quantile_out(const at::Tensor & self, double q, ::std::optional dim, bool keepdim, c10::string_view interpolation, at::Tensor & out); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/rand_like_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/rand_like_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..573380f7d78a445c872e74bb3061b0c716dde648 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/rand_like_compositeexplicitautograd_dispatch.h @@ -0,0 +1,35 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor rand_like(const at::Tensor & self, at::TensorOptions options={}, ::std::optional memory_format=::std::nullopt); +TORCH_API at::Tensor rand_like(const at::Tensor & self, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional memory_format); +TORCH_API at::Tensor & rand_like_out(at::Tensor & out, const at::Tensor & self, ::std::optional memory_format=::std::nullopt); +TORCH_API at::Tensor & rand_like_outf(const at::Tensor & self, ::std::optional memory_format, at::Tensor & out); +TORCH_API at::Tensor rand_like(const at::Tensor & self, ::std::optional generator, at::TensorOptions options={}, ::std::optional memory_format=::std::nullopt); +TORCH_API at::Tensor rand_like(const at::Tensor & self, ::std::optional generator, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional memory_format); +TORCH_API at::Tensor & rand_like_out(at::Tensor & out, const at::Tensor & self, ::std::optional generator, ::std::optional memory_format=::std::nullopt); +TORCH_API at::Tensor & rand_like_outf(const at::Tensor & self, ::std::optional generator, ::std::optional memory_format, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/reciprocal_compositeexplicitautogradnonfunctional_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/reciprocal_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..3f2a27a767332ea7e36c8df5e0303908ff0fcf59 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/reciprocal_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor reciprocal(const at::Tensor & self); +TORCH_API at::Tensor & reciprocal_(at::Tensor & self); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/reciprocal_cpu_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/reciprocal_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..cb1a4073af072b8560c1f9e32797a0e0e09ffb10 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/reciprocal_cpu_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor reciprocal(const at::Tensor & self); +TORCH_API at::Tensor & reciprocal_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & reciprocal_outf(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & reciprocal_(at::Tensor & self); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/reciprocal_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/reciprocal_native.h new file mode 100644 index 0000000000000000000000000000000000000000..c0ab0905b8ae242ff24579abd03c0490ec2c713f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/reciprocal_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_reciprocal_out : public at::meta::structured_reciprocal { +void impl(const at::Tensor & self, const at::Tensor & out); +}; +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/reflection_pad1d_backward_meta_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/reflection_pad1d_backward_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..885f38dafdddb83dffab3a58762bf9965df092da --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/reflection_pad1d_backward_meta_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor reflection_pad1d_backward(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef padding); +TORCH_API at::Tensor reflection_pad1d_backward_symint(const at::Tensor & grad_output, const at::Tensor & self, c10::SymIntArrayRef padding); +TORCH_API at::Tensor & reflection_pad1d_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef padding); +TORCH_API at::Tensor & reflection_pad1d_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef padding, at::Tensor & grad_input); +TORCH_API at::Tensor & reflection_pad1d_backward_symint_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, c10::SymIntArrayRef padding); +TORCH_API at::Tensor & reflection_pad1d_backward_symint_outf(const at::Tensor & grad_output, const at::Tensor & self, c10::SymIntArrayRef padding, at::Tensor & grad_input); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/reflection_pad2d_backward_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/reflection_pad2d_backward_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..4ed0bff0a5bd7cf4589a9ae76fe7b5017538c855 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/reflection_pad2d_backward_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API reflection_pad2d_backward_grad_input { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, c10::SymIntArrayRef, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::reflection_pad2d_backward"; + static constexpr const char* overload_name = "grad_input"; + static constexpr const char* schema_str = "reflection_pad2d_backward.grad_input(Tensor grad_output, Tensor self, SymInt[4] padding, *, Tensor(a!) grad_input) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & grad_output, const at::Tensor & self, c10::SymIntArrayRef padding, at::Tensor & grad_input); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & self, c10::SymIntArrayRef padding, at::Tensor & grad_input); +}; + +struct TORCH_API reflection_pad2d_backward { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, c10::SymIntArrayRef); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::reflection_pad2d_backward"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "reflection_pad2d_backward(Tensor grad_output, Tensor self, SymInt[4] padding) -> Tensor"; + static at::Tensor call(const at::Tensor & grad_output, const at::Tensor & self, c10::SymIntArrayRef padding); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & grad_output, const at::Tensor & self, c10::SymIntArrayRef padding); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/reflection_pad3d_backward_compositeexplicitautogradnonfunctional_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/reflection_pad3d_backward_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f23b676568fa352b5054d7d4738cc02d8236f84a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/reflection_pad3d_backward_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor reflection_pad3d_backward(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef padding); +TORCH_API at::Tensor reflection_pad3d_backward_symint(const at::Tensor & grad_output, const at::Tensor & self, c10::SymIntArrayRef padding); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/relu_cuda_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/relu_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..a2c7df7049181ec95ee2f8fa449cc45734e60de7 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/relu_cuda_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor relu(const at::Tensor & self); +TORCH_API at::Tensor & relu_(at::Tensor & self); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/renorm_meta.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/renorm_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..61753c283a599c2cd4a86e3e8a7f68e1b73d2f93 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/renorm_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_renorm : public at::impl::MetaBase { + + + void meta(const at::Tensor & self, const at::Scalar & p, int64_t dim, const at::Scalar & maxnorm); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/replication_pad1d_cpu_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/replication_pad1d_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..cc147f9d936511ed4428de23c6f36fd72d316723 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/replication_pad1d_cpu_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor replication_pad1d(const at::Tensor & self, at::IntArrayRef padding); +TORCH_API at::Tensor replication_pad1d_symint(const at::Tensor & self, c10::SymIntArrayRef padding); +TORCH_API at::Tensor & replication_pad1d_out(at::Tensor & out, const at::Tensor & self, at::IntArrayRef padding); +TORCH_API at::Tensor & replication_pad1d_outf(const at::Tensor & self, at::IntArrayRef padding, at::Tensor & out); +TORCH_API at::Tensor & replication_pad1d_symint_out(at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef padding); +TORCH_API at::Tensor & replication_pad1d_symint_outf(const at::Tensor & self, c10::SymIntArrayRef padding, at::Tensor & out); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/reshape_compositeimplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/reshape_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..6288dbfc4a1d0c8d00909c947358586393a9f7ca --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/reshape_compositeimplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor reshape(const at::Tensor & self, at::IntArrayRef shape); +TORCH_API at::Tensor reshape_symint(const at::Tensor & self, c10::SymIntArrayRef shape); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/resize_as_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/resize_as_native.h new file mode 100644 index 0000000000000000000000000000000000000000..d10297b922bcdb8ed04768fe95dc7e895ba91b39 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/resize_as_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor resize_as(const at::Tensor & self, const at::Tensor & the_template, ::std::optional memory_format=::std::nullopt); +TORCH_API const at::Tensor & resize_as_out(const at::Tensor & self, const at::Tensor & the_template, ::std::optional memory_format, const at::Tensor & out); +TORCH_API const at::Tensor & resize_as_(const at::Tensor & self, const at::Tensor & the_template, ::std::optional memory_format=::std::nullopt); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/resize_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/resize_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..b238316406372af2c656b885a7d999930525cb60 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/resize_compositeexplicitautograd_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor resize(const at::Tensor & self, at::IntArrayRef size, ::std::optional memory_format=::std::nullopt); +TORCH_API at::Tensor resize_symint(const at::Tensor & self, c10::SymIntArrayRef size, ::std::optional memory_format=::std::nullopt); +TORCH_API const at::Tensor & resize_out(const at::Tensor & out, const at::Tensor & self, at::IntArrayRef size, ::std::optional memory_format=::std::nullopt); +TORCH_API const at::Tensor & resize_outf(const at::Tensor & self, at::IntArrayRef size, ::std::optional memory_format, const at::Tensor & out); +TORCH_API const at::Tensor & resize_symint_out(const at::Tensor & out, const at::Tensor & self, c10::SymIntArrayRef size, ::std::optional memory_format=::std::nullopt); +TORCH_API const at::Tensor & resize_symint_outf(const at::Tensor & self, c10::SymIntArrayRef size, ::std::optional memory_format, const at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/result_type_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/result_type_native.h new file mode 100644 index 0000000000000000000000000000000000000000..1c2872d7b9cb61c80bebd2d81d9162f2f9a62a52 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/result_type_native.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::ScalarType result_type(const at::Tensor & tensor, const at::Tensor & other); +TORCH_API at::ScalarType result_type(const at::Tensor & tensor, const at::Scalar & other); +TORCH_API at::ScalarType result_type(const at::Scalar & scalar, const at::Tensor & tensor); +TORCH_API at::ScalarType result_type(const at::Scalar & scalar1, const at::Scalar & scalar2); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/retains_grad_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/retains_grad_native.h new file mode 100644 index 0000000000000000000000000000000000000000..8368a03bee3f968dfb2bd9a0f9905be7aec1425e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/retains_grad_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API bool retains_grad(const at::Tensor & self); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/rnn_relu_cell_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/rnn_relu_cell_native.h new file mode 100644 index 0000000000000000000000000000000000000000..fe699967fc8bec490a92aae59233e8f43c46f744 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/rnn_relu_cell_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor rnn_relu_cell(const at::Tensor & input, const at::Tensor & hx, const at::Tensor & w_ih, const at::Tensor & w_hh, const ::std::optional & b_ih={}, const ::std::optional & b_hh={}); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/roll_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/roll_native.h new file mode 100644 index 0000000000000000000000000000000000000000..3b4ee18904cb8919e395d1021b8e388ead5aeaef --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/roll_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor & roll_out_symint(const at::Tensor & self, c10::SymIntArrayRef shifts, at::IntArrayRef dims, at::Tensor & out); +TORCH_API at::Tensor roll(const at::Tensor & self, at::IntArrayRef shifts, at::IntArrayRef dims={}); +TORCH_API at::Tensor roll_cuda(const at::Tensor & self, at::IntArrayRef shifts, at::IntArrayRef dims={}); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/rshift_cpu_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/rshift_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..035c6298f200f0624afe6669740dd23d26e1c2fa --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/rshift_cpu_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor __rshift__(const at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & __irshift__(at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor __rshift__(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & __irshift__(at::Tensor & self, const at::Tensor & other); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/rsub_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/rsub_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..89ecf79da3dcdade71e3d0ae97eb39be1e283514 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/rsub_ops.h @@ -0,0 +1,67 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API rsub_Tensor { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &, const at::Scalar &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::rsub"; + static constexpr const char* overload_name = "Tensor"; + static constexpr const char* schema_str = "rsub.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha); +}; + +struct TORCH_API rsub_Scalar { + using schema = at::Tensor (const at::Tensor &, const at::Scalar &, const at::Scalar &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::rsub"; + static constexpr const char* overload_name = "Scalar"; + static constexpr const char* schema_str = "rsub.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor"; + static at::Tensor call(const at::Tensor & self, const at::Scalar & other, const at::Scalar & alpha); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Scalar & other, const at::Scalar & alpha); +}; + +struct TORCH_API rsub_Tensor_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, const at::Scalar &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::rsub"; + static constexpr const char* overload_name = "Tensor_out"; + static constexpr const char* schema_str = "rsub.Tensor_out(Tensor self, Tensor other, *, Scalar alpha=1, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha, at::Tensor & out); +}; + +struct TORCH_API rsub_Scalar_out { + using schema = at::Tensor & (const at::Tensor &, const at::Scalar &, const at::Scalar &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::rsub"; + static constexpr const char* overload_name = "Scalar_out"; + static constexpr const char* schema_str = "rsub.Scalar_out(Tensor self, Scalar other, Scalar alpha=1, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, const at::Scalar & other, const at::Scalar & alpha, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, const at::Scalar & other, const at::Scalar & alpha, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sign_cuda_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sign_cuda_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..af9936e1059bba2de8ec47b37ffa2fa2c7d2706c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sign_cuda_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cuda { + +TORCH_API at::Tensor sign(const at::Tensor & self); +TORCH_API at::Tensor & sign_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & sign_outf(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & sign_(at::Tensor & self); + +} // namespace cuda +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sin_meta.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sin_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..455af4907f781d5bad20cade92879dc06364ecb2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sin_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_sin : public TensorIteratorBase { + + + void meta(const at::Tensor & self); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/slice_inverse_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/slice_inverse_native.h new file mode 100644 index 0000000000000000000000000000000000000000..3baceec109bed7623b77493dc646052ecf9d54c0 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/slice_inverse_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor slice_inverse_symint(const at::Tensor & self, const at::Tensor & src, int64_t dim=0, ::std::optional start=::std::nullopt, ::std::optional end=::std::nullopt, c10::SymInt step=1); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/slice_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/slice_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..aa85c91ca1a3a05160c27bfbb3ea7634f5f4190a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/slice_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API slice_Tensor { + using schema = at::Tensor (const at::Tensor &, int64_t, ::std::optional, ::std::optional, c10::SymInt); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::slice"; + static constexpr const char* overload_name = "Tensor"; + static constexpr const char* schema_str = "slice.Tensor(Tensor(a) self, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a)"; + static at::Tensor call(const at::Tensor & self, int64_t dim, ::std::optional start, ::std::optional end, c10::SymInt step); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t dim, ::std::optional start, ::std::optional end, c10::SymInt step); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/softmax_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/softmax_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..23776af941254972c4de69965d66f60d4414d13d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/softmax_compositeexplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & softmax_out(at::Tensor & out, const at::Tensor & self, int64_t dim, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor & softmax_outf(const at::Tensor & self, int64_t dim, ::std::optional dtype, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/softplus_cpu_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/softplus_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..afc38a5c7b9bd6fc88e57c03ffbb40f273dbb83e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/softplus_cpu_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor softplus(const at::Tensor & self, const at::Scalar & beta=1, const at::Scalar & threshold=20); +TORCH_API at::Tensor & softplus_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & beta=1, const at::Scalar & threshold=20); +TORCH_API at::Tensor & softplus_outf(const at::Tensor & self, const at::Scalar & beta, const at::Scalar & threshold, at::Tensor & out); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/softshrink_backward_cpu_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/softshrink_backward_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..3e0efb7642988d25e209b86ed1ac47a85d8416d4 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/softshrink_backward_cpu_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor softshrink_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & lambd); +TORCH_API at::Tensor & softshrink_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & lambd); +TORCH_API at::Tensor & softshrink_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & lambd, at::Tensor & grad_input); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/softshrink_backward_meta.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/softshrink_backward_meta.h new file mode 100644 index 0000000000000000000000000000000000000000..d5e420cf33573cffa8007c1e0b843db4d12b72db --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/softshrink_backward_meta.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeMetaFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace meta { + +struct TORCH_API structured_softshrink_backward : public TensorIteratorBase { + + + void meta(const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & lambd); +}; + +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/softshrink_backward_meta_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/softshrink_backward_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..216942db7241a5dfc7d881094da2ed0e70ec1e58 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/softshrink_backward_meta_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor softshrink_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & lambd); +TORCH_API at::Tensor & softshrink_backward_out(at::Tensor & grad_input, const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & lambd); +TORCH_API at::Tensor & softshrink_backward_outf(const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & lambd, at::Tensor & grad_input); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sparse_coo_tensor_compositeimplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sparse_coo_tensor_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..8e6585cc401f58a147dbed89f2239b165028994b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sparse_coo_tensor_compositeimplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor sparse_coo_tensor(const at::Tensor & indices, const at::Tensor & values, at::TensorOptions options={}, ::std::optional is_coalesced=::std::nullopt); +TORCH_API at::Tensor sparse_coo_tensor(const at::Tensor & indices, const at::Tensor & values, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional is_coalesced); +TORCH_API at::Tensor sparse_coo_tensor(const at::Tensor & indices, const at::Tensor & values, at::IntArrayRef size, at::TensorOptions options={}, ::std::optional is_coalesced=::std::nullopt); +TORCH_API at::Tensor sparse_coo_tensor(const at::Tensor & indices, const at::Tensor & values, at::IntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional is_coalesced); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_bessel_y1_compositeexplicitautogradnonfunctional_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_bessel_y1_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..9a0a7525b0dacf1444ff5cf944042cb0ab9b6315 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_bessel_y1_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor special_bessel_y1(const at::Tensor & self); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_chebyshev_polynomial_v.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_chebyshev_polynomial_v.h new file mode 100644 index 0000000000000000000000000000000000000000..79d3d96f1a6c25df2dc481bd804614370c88b3c4 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_chebyshev_polynomial_v.h @@ -0,0 +1,73 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::special_chebyshev_polynomial_v(Tensor x, Tensor n) -> Tensor +inline at::Tensor special_chebyshev_polynomial_v(const at::Tensor & x, const at::Tensor & n) { + return at::_ops::special_chebyshev_polynomial_v::call(x, n); +} + +// aten::special_chebyshev_polynomial_v.x_scalar(Scalar x, Tensor n) -> Tensor +inline at::Tensor special_chebyshev_polynomial_v(const at::Scalar & x, const at::Tensor & n) { + return at::_ops::special_chebyshev_polynomial_v_x_scalar::call(x, n); +} + +// aten::special_chebyshev_polynomial_v.n_scalar(Tensor x, Scalar n) -> Tensor +inline at::Tensor special_chebyshev_polynomial_v(const at::Tensor & x, const at::Scalar & n) { + return at::_ops::special_chebyshev_polynomial_v_n_scalar::call(x, n); +} + +// aten::special_chebyshev_polynomial_v.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_chebyshev_polynomial_v_out(at::Tensor & out, const at::Tensor & x, const at::Tensor & n) { + return at::_ops::special_chebyshev_polynomial_v_out::call(x, n, out); +} +// aten::special_chebyshev_polynomial_v.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_chebyshev_polynomial_v_outf(const at::Tensor & x, const at::Tensor & n, at::Tensor & out) { + return at::_ops::special_chebyshev_polynomial_v_out::call(x, n, out); +} + +// aten::special_chebyshev_polynomial_v.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_chebyshev_polynomial_v_out(at::Tensor & out, const at::Scalar & x, const at::Tensor & n) { + return at::_ops::special_chebyshev_polynomial_v_x_scalar_out::call(x, n, out); +} +// aten::special_chebyshev_polynomial_v.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_chebyshev_polynomial_v_outf(const at::Scalar & x, const at::Tensor & n, at::Tensor & out) { + return at::_ops::special_chebyshev_polynomial_v_x_scalar_out::call(x, n, out); +} + +// aten::special_chebyshev_polynomial_v.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_chebyshev_polynomial_v_out(at::Tensor & out, const at::Tensor & x, const at::Scalar & n) { + return at::_ops::special_chebyshev_polynomial_v_n_scalar_out::call(x, n, out); +} +// aten::special_chebyshev_polynomial_v.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_chebyshev_polynomial_v_outf(const at::Tensor & x, const at::Scalar & n, at::Tensor & out) { + return at::_ops::special_chebyshev_polynomial_v_n_scalar_out::call(x, n, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_chebyshev_polynomial_v_meta_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_chebyshev_polynomial_v_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..af6618fad256c28477f76a5195f60f0ad8a9d87b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_chebyshev_polynomial_v_meta_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor special_chebyshev_polynomial_v(const at::Tensor & x, const at::Tensor & n); +TORCH_API at::Tensor & special_chebyshev_polynomial_v_out(at::Tensor & out, const at::Tensor & x, const at::Tensor & n); +TORCH_API at::Tensor & special_chebyshev_polynomial_v_outf(const at::Tensor & x, const at::Tensor & n, at::Tensor & out); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_digamma.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_digamma.h new file mode 100644 index 0000000000000000000000000000000000000000..2b5832f6d64bc27ca1319ff7f3f704119def8117 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_digamma.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::special_digamma(Tensor self) -> Tensor +inline at::Tensor special_digamma(const at::Tensor & self) { + return at::_ops::special_digamma::call(self); +} + +// aten::special_digamma.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_digamma_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::special_digamma_out::call(self, out); +} +// aten::special_digamma.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_digamma_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::special_digamma_out::call(self, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_erf.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_erf.h new file mode 100644 index 0000000000000000000000000000000000000000..e914143f78369923612869201599f6b2c364e0a1 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_erf.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::special_erf(Tensor self) -> Tensor +inline at::Tensor special_erf(const at::Tensor & self) { + return at::_ops::special_erf::call(self); +} + +// aten::special_erf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_erf_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::special_erf_out::call(self, out); +} +// aten::special_erf.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_erf_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::special_erf_out::call(self, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_erfcx_compositeexplicitautogradnonfunctional_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_erfcx_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..b4f7bd67d6e8ea0b853bd639ae3bb0445642c784 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_erfcx_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor special_erfcx(const at::Tensor & self); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_expit_compositeimplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_expit_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..d4617c9be68ece00806781b55d07d43f416b3401 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_expit_compositeimplicitautograd_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor special_expit(const at::Tensor & self); +TORCH_API at::Tensor & special_expit_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & special_expit_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_hermite_polynomial_h_meta_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_hermite_polynomial_h_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..48aa9ed4872e6dcd537c1c508c206291773174d1 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_hermite_polynomial_h_meta_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor special_hermite_polynomial_h(const at::Tensor & x, const at::Tensor & n); +TORCH_API at::Tensor & special_hermite_polynomial_h_out(at::Tensor & out, const at::Tensor & x, const at::Tensor & n); +TORCH_API at::Tensor & special_hermite_polynomial_h_outf(const at::Tensor & x, const at::Tensor & n, at::Tensor & out); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_i1_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_i1_native.h new file mode 100644 index 0000000000000000000000000000000000000000..a716d7d3439650d2c2b59628a59ce4872925c2ee --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_i1_native.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +struct TORCH_API structured_special_i1_out : public at::meta::structured_special_i1 { +void impl(const at::Tensor & self, const at::Tensor & out); +}; +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_i1e_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_i1e_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..a7c1ff8b20fe3c93a56aef7c38b00a894d53629f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_i1e_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API special_i1e { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::special_i1e"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "special_i1e(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API special_i1e_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::special_i1e"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "special_i1e.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_log_ndtr_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_log_ndtr_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..f97949f06ed009f09a4404000d5141096bc1aad5 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_log_ndtr_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API special_log_ndtr { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::special_log_ndtr"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "special_log_ndtr(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API special_log_ndtr_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::special_log_ndtr"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "special_log_ndtr.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_modified_bessel_i0_cpu_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_modified_bessel_i0_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..df6efb5cacb85209e44e26a8445e1453110b3727 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_modified_bessel_i0_cpu_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor special_modified_bessel_i0(const at::Tensor & self); +TORCH_API at::Tensor & special_modified_bessel_i0_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & special_modified_bessel_i0_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_modified_bessel_i1.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_modified_bessel_i1.h new file mode 100644 index 0000000000000000000000000000000000000000..ff1fd407e44f94d69627f9f8cf32c6da6f2f4c02 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_modified_bessel_i1.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::special_modified_bessel_i1(Tensor self) -> Tensor +inline at::Tensor special_modified_bessel_i1(const at::Tensor & self) { + return at::_ops::special_modified_bessel_i1::call(self); +} + +// aten::special_modified_bessel_i1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_modified_bessel_i1_out(at::Tensor & out, const at::Tensor & self) { + return at::_ops::special_modified_bessel_i1_out::call(self, out); +} +// aten::special_modified_bessel_i1.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & special_modified_bessel_i1_outf(const at::Tensor & self, at::Tensor & out) { + return at::_ops::special_modified_bessel_i1_out::call(self, out); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_compositeexplicitautogradnonfunctional_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..127ef550df91dd5ade1287adeed7a7e8f867ac6d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_modified_bessel_k1_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor special_modified_bessel_k1(const at::Tensor & self); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_psi_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_psi_native.h new file mode 100644 index 0000000000000000000000000000000000000000..51507986a24c9c0ff92e58d65624bcb651624ac0 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_psi_native.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API at::Tensor special_psi(const at::Tensor & self); +TORCH_API at::Tensor & special_psi_out(const at::Tensor & self, at::Tensor & out); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_scaled_modified_bessel_k0_cpu_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_scaled_modified_bessel_k0_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..181f5dd7f7e54959fe7247b47761bb2e046ce3c9 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_scaled_modified_bessel_k0_cpu_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor special_scaled_modified_bessel_k0(const at::Tensor & x); +TORCH_API at::Tensor & special_scaled_modified_bessel_k0_out(at::Tensor & out, const at::Tensor & x); +TORCH_API at::Tensor & special_scaled_modified_bessel_k0_outf(const at::Tensor & x, at::Tensor & out); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_v_meta_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_v_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..424a0d003df3216964d682d7577654626cf0560e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_v_meta_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor special_shifted_chebyshev_polynomial_v(const at::Tensor & x, const at::Tensor & n); +TORCH_API at::Tensor & special_shifted_chebyshev_polynomial_v_out(at::Tensor & out, const at::Tensor & x, const at::Tensor & n); +TORCH_API at::Tensor & special_shifted_chebyshev_polynomial_v_outf(const at::Tensor & x, const at::Tensor & n, at::Tensor & out); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_w_compositeexplicitautogradnonfunctional_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_w_compositeexplicitautogradnonfunctional_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f759ffc8d155e6859c1e56c74f392a202c2ca9b1 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_w_compositeexplicitautogradnonfunctional_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautogradnonfunctional { + +TORCH_API at::Tensor special_shifted_chebyshev_polynomial_w(const at::Tensor & x, const at::Tensor & n); + +} // namespace compositeexplicitautogradnonfunctional +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_w_meta_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_w_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..44145124eb42944a96f5c7871d5bc5cfb7a6b7cd --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_w_meta_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor special_shifted_chebyshev_polynomial_w(const at::Tensor & x, const at::Tensor & n); +TORCH_API at::Tensor & special_shifted_chebyshev_polynomial_w_out(at::Tensor & out, const at::Tensor & x, const at::Tensor & n); +TORCH_API at::Tensor & special_shifted_chebyshev_polynomial_w_outf(const at::Tensor & x, const at::Tensor & n, at::Tensor & out); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_w_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_w_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..6a922dae1dffad2cf24958fbb22b3f09e07a76ba --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_shifted_chebyshev_polynomial_w_ops.h @@ -0,0 +1,89 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API special_shifted_chebyshev_polynomial_w { + using schema = at::Tensor (const at::Tensor &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::special_shifted_chebyshev_polynomial_w"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "special_shifted_chebyshev_polynomial_w(Tensor x, Tensor n) -> Tensor"; + static at::Tensor call(const at::Tensor & x, const at::Tensor & n); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & x, const at::Tensor & n); +}; + +struct TORCH_API special_shifted_chebyshev_polynomial_w_x_scalar { + using schema = at::Tensor (const at::Scalar &, const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::special_shifted_chebyshev_polynomial_w"; + static constexpr const char* overload_name = "x_scalar"; + static constexpr const char* schema_str = "special_shifted_chebyshev_polynomial_w.x_scalar(Scalar x, Tensor n) -> Tensor"; + static at::Tensor call(const at::Scalar & x, const at::Tensor & n); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Scalar & x, const at::Tensor & n); +}; + +struct TORCH_API special_shifted_chebyshev_polynomial_w_n_scalar { + using schema = at::Tensor (const at::Tensor &, const at::Scalar &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::special_shifted_chebyshev_polynomial_w"; + static constexpr const char* overload_name = "n_scalar"; + static constexpr const char* schema_str = "special_shifted_chebyshev_polynomial_w.n_scalar(Tensor x, Scalar n) -> Tensor"; + static at::Tensor call(const at::Tensor & x, const at::Scalar & n); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & x, const at::Scalar & n); +}; + +struct TORCH_API special_shifted_chebyshev_polynomial_w_out { + using schema = at::Tensor & (const at::Tensor &, const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::special_shifted_chebyshev_polynomial_w"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "special_shifted_chebyshev_polynomial_w.out(Tensor x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & x, const at::Tensor & n, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & x, const at::Tensor & n, at::Tensor & out); +}; + +struct TORCH_API special_shifted_chebyshev_polynomial_w_x_scalar_out { + using schema = at::Tensor & (const at::Scalar &, const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::special_shifted_chebyshev_polynomial_w"; + static constexpr const char* overload_name = "x_scalar_out"; + static constexpr const char* schema_str = "special_shifted_chebyshev_polynomial_w.x_scalar_out(Scalar x, Tensor n, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Scalar & x, const at::Tensor & n, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Scalar & x, const at::Tensor & n, at::Tensor & out); +}; + +struct TORCH_API special_shifted_chebyshev_polynomial_w_n_scalar_out { + using schema = at::Tensor & (const at::Tensor &, const at::Scalar &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::special_shifted_chebyshev_polynomial_w"; + static constexpr const char* overload_name = "n_scalar_out"; + static constexpr const char* schema_str = "special_shifted_chebyshev_polynomial_w.n_scalar_out(Tensor x, Scalar n, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & x, const at::Scalar & n, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & x, const at::Scalar & n, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_sinc_compositeimplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_sinc_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..bece7b2957f86b7f622087d22c098b441c473335 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_sinc_compositeimplicitautograd_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor special_sinc(const at::Tensor & self); +TORCH_API at::Tensor & special_sinc_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & special_sinc_outf(const at::Tensor & self, at::Tensor & out); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_zeta_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_zeta_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..df455651be1915e78672fc427ab99bdc20537822 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_zeta_compositeexplicitautograd_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor special_zeta(const at::Scalar & self, const at::Tensor & other); +TORCH_API at::Tensor & special_zeta_out(at::Tensor & out, const at::Scalar & self, const at::Tensor & other); +TORCH_API at::Tensor & special_zeta_outf(const at::Scalar & self, const at::Tensor & other, at::Tensor & out); +TORCH_API at::Tensor special_zeta(const at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & special_zeta_out(at::Tensor & out, const at::Tensor & self, const at::Scalar & other); +TORCH_API at::Tensor & special_zeta_outf(const at::Tensor & self, const at::Scalar & other, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_zeta_meta_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_zeta_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..9d4021e8141df70062115287e9e0fa3c62264184 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/special_zeta_meta_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor special_zeta(const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & special_zeta_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other); +TORCH_API at::Tensor & special_zeta_outf(const at::Tensor & self, const at::Tensor & other, at::Tensor & out); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/split_with_sizes.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/split_with_sizes.h new file mode 100644 index 0000000000000000000000000000000000000000..68ea4c662284118f258359c6eac7a00698bfba97 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/split_with_sizes.h @@ -0,0 +1,53 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::split_with_sizes(Tensor(a -> *) self, SymInt[] split_sizes, int dim=0) -> Tensor(a)[] +inline ::std::vector split_with_sizes(const at::Tensor & self, at::IntArrayRef split_sizes, int64_t dim=0) { + return at::_ops::split_with_sizes::call(self, c10::fromIntArrayRefSlow(split_sizes), dim); +} +namespace symint { + template >> + ::std::vector split_with_sizes(const at::Tensor & self, at::IntArrayRef split_sizes, int64_t dim=0) { + return at::_ops::split_with_sizes::call(self, c10::fromIntArrayRefSlow(split_sizes), dim); + } +} + +// aten::split_with_sizes(Tensor(a -> *) self, SymInt[] split_sizes, int dim=0) -> Tensor(a)[] +inline ::std::vector split_with_sizes_symint(const at::Tensor & self, c10::SymIntArrayRef split_sizes, int64_t dim=0) { + return at::_ops::split_with_sizes::call(self, split_sizes, dim); +} +namespace symint { + template >> + ::std::vector split_with_sizes(const at::Tensor & self, c10::SymIntArrayRef split_sizes, int64_t dim=0) { + return at::_ops::split_with_sizes::call(self, split_sizes, dim); + } +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sqrt_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sqrt_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..3d68e8878bbcf1cf7baf723f8d658a7c8452080e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sqrt_ops.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API sqrt { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::sqrt"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "sqrt(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API sqrt_ { + using schema = at::Tensor & (at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::sqrt_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "sqrt_(Tensor(a!) self) -> Tensor(a!)"; + static at::Tensor & call(at::Tensor & self); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self); +}; + +struct TORCH_API sqrt_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::sqrt"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "sqrt.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/stft.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/stft.h new file mode 100644 index 0000000000000000000000000000000000000000..813e9cbc0be7c87f861411981bbe781e959d92fa --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/stft.h @@ -0,0 +1,41 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::stft(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool normalized=False, bool? onesided=None, bool? return_complex=None, bool? align_to_window=None) -> Tensor +inline at::Tensor stft(const at::Tensor & self, int64_t n_fft, ::std::optional hop_length, ::std::optional win_length, const ::std::optional & window, bool normalized, ::std::optional onesided=::std::nullopt, ::std::optional return_complex=::std::nullopt, ::std::optional align_to_window=::std::nullopt) { + return at::_ops::stft::call(self, n_fft, hop_length, win_length, window, normalized, onesided, return_complex, align_to_window); +} + +// aten::stft.center(Tensor self, int n_fft, int? hop_length=None, int? win_length=None, Tensor? window=None, bool center=True, str pad_mode="reflect", bool normalized=False, bool? onesided=None, bool? return_complex=None, bool? align_to_window=None) -> Tensor +inline at::Tensor stft(const at::Tensor & self, int64_t n_fft, ::std::optional hop_length=::std::nullopt, ::std::optional win_length=::std::nullopt, const ::std::optional & window={}, bool center=true, c10::string_view pad_mode="reflect", bool normalized=false, ::std::optional onesided=::std::nullopt, ::std::optional return_complex=::std::nullopt, ::std::optional align_to_window=::std::nullopt) { + return at::_ops::stft_center::call(self, n_fft, hop_length, win_length, window, center, pad_mode, normalized, onesided, return_complex, align_to_window); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/stft_compositeimplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/stft_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..d8b9ac858db24721e1b5dd67b7d360776e30df96 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/stft_compositeimplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor stft(const at::Tensor & self, int64_t n_fft, ::std::optional hop_length, ::std::optional win_length, const ::std::optional & window, bool normalized, ::std::optional onesided=::std::nullopt, ::std::optional return_complex=::std::nullopt, ::std::optional align_to_window=::std::nullopt); +TORCH_API at::Tensor stft(const at::Tensor & self, int64_t n_fft, ::std::optional hop_length=::std::nullopt, ::std::optional win_length=::std::nullopt, const ::std::optional & window={}, bool center=true, c10::string_view pad_mode="reflect", bool normalized=false, ::std::optional onesided=::std::nullopt, ::std::optional return_complex=::std::nullopt, ::std::optional align_to_window=::std::nullopt); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sub_meta_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sub_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..1bd8112b150f57cb27f15817f3e15113e92e26b3 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sub_meta_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor sub(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha=1); +TORCH_API at::Tensor & sub_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha=1); +TORCH_API at::Tensor & sub_outf(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha, at::Tensor & out); +TORCH_API at::Tensor & sub_(at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha=1); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sum_cpu_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sum_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..f63d7cf1ad6e91ed1e7656ae9309ea677595ca27 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sum_cpu_dispatch.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API at::Tensor sum(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim=false, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor & sum_out(at::Tensor & out, const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim=false, ::std::optional dtype=::std::nullopt); +TORCH_API at::Tensor & sum_outf(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype, at::Tensor & out); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/swapdims.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/swapdims.h new file mode 100644 index 0000000000000000000000000000000000000000..fcb9bf3e777032c829e2cdc3f1928a7cb3df59c9 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/swapdims.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::swapdims(Tensor(a) self, int dim0, int dim1) -> Tensor(a) +inline at::Tensor swapdims(const at::Tensor & self, int64_t dim0, int64_t dim1) { + return at::_ops::swapdims::call(self, dim0, dim1); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sym_is_contiguous_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sym_is_contiguous_native.h new file mode 100644 index 0000000000000000000000000000000000000000..23d4d50729290acad14c7102c8a1a513aeb5635f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sym_is_contiguous_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API c10::SymBool sym_is_contiguous(const at::Tensor & self, at::MemoryFormat memory_format=c10::MemoryFormat::Contiguous); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sym_numel_compositeimplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sym_numel_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..fb006b36caa729d58e415f802ae2f7e946ff0211 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sym_numel_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API c10::SymInt sym_numel(const at::Tensor & self); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sym_numel_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sym_numel_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..29c405ebfb1d4ea983134d625e0017fb80b8f1ce --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sym_numel_ops.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API sym_numel { + using schema = c10::SymInt (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::sym_numel"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "sym_numel(Tensor self) -> SymInt"; + static c10::SymInt call(const at::Tensor & self); + static c10::SymInt redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sym_stride_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sym_stride_native.h new file mode 100644 index 0000000000000000000000000000000000000000..279fa220f89888f80b525ace9401c8606dedac44 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/sym_stride_native.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +namespace at { +namespace native { +TORCH_API c10::SymInt sym_stride(const at::Tensor & self, int64_t dim); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/take.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/take.h new file mode 100644 index 0000000000000000000000000000000000000000..182131798de6673c3234bcc7827ede09fbebdb79 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/take.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::take.out(Tensor self, Tensor index, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & take_out(at::Tensor & out, const at::Tensor & self, const at::Tensor & index) { + return at::_ops::take_out::call(self, index, out); +} +// aten::take.out(Tensor self, Tensor index, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & take_outf(const at::Tensor & self, const at::Tensor & index, at::Tensor & out) { + return at::_ops::take_out::call(self, index, out); +} + +// aten::take(Tensor self, Tensor index) -> Tensor +inline at::Tensor take(const at::Tensor & self, const at::Tensor & index) { + return at::_ops::take::call(self, index); +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/tan_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/tan_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..69d098be3bcd9b739341838a50684de6a75a41ff --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/tan_ops.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API tan { + using schema = at::Tensor (const at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::tan"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "tan(Tensor self) -> Tensor"; + static at::Tensor call(const at::Tensor & self); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self); +}; + +struct TORCH_API tan_ { + using schema = at::Tensor & (at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::tan_"; + static constexpr const char* overload_name = ""; + static constexpr const char* schema_str = "tan_(Tensor(a!) self) -> Tensor(a!)"; + static at::Tensor & call(at::Tensor & self); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, at::Tensor & self); +}; + +struct TORCH_API tan_out { + using schema = at::Tensor & (const at::Tensor &, at::Tensor &); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::tan"; + static constexpr const char* overload_name = "out"; + static constexpr const char* schema_str = "tan.out(Tensor self, *, Tensor(a!) out) -> Tensor(a!)"; + static at::Tensor & call(const at::Tensor & self, at::Tensor & out); + static at::Tensor & redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Tensor & out); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/tanh_meta_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/tanh_meta_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..d3a895b128ef051143517fd139efb1f24355bd08 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/tanh_meta_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace meta { + +TORCH_API at::Tensor tanh(const at::Tensor & self); +TORCH_API at::Tensor & tanh_out(at::Tensor & out, const at::Tensor & self); +TORCH_API at::Tensor & tanh_outf(const at::Tensor & self, at::Tensor & out); +TORCH_API at::Tensor & tanh_(at::Tensor & self); + +} // namespace meta +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/to_padded_tensor.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/to_padded_tensor.h new file mode 100644 index 0000000000000000000000000000000000000000..764536ea189c74634443a5046ff8f7309b2a1f17 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/to_padded_tensor.h @@ -0,0 +1,89 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +namespace symint { + template >> + at::Tensor to_padded_tensor(const at::Tensor & self, double padding, at::OptionalIntArrayRef output_size=::std::nullopt) { + return at::_ops::to_padded_tensor::call(self, padding, output_size.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*output_size)) : ::std::nullopt); + } +} + +namespace symint { + template >> + at::Tensor to_padded_tensor(const at::Tensor & self, double padding, at::OptionalSymIntArrayRef output_size=::std::nullopt) { + return at::_ops::to_padded_tensor::call(self, padding, output_size); + } +} + +// aten::to_padded_tensor.out(Tensor self, float padding, SymInt[]? output_size=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & to_padded_tensor_out(at::Tensor & out, const at::Tensor & self, double padding, at::OptionalIntArrayRef output_size=::std::nullopt) { + return at::_ops::to_padded_tensor_out::call(self, padding, output_size.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*output_size)) : ::std::nullopt, out); +} +namespace symint { + template >> + at::Tensor & to_padded_tensor_out(at::Tensor & out, const at::Tensor & self, double padding, at::OptionalIntArrayRef output_size=::std::nullopt) { + return at::_ops::to_padded_tensor_out::call(self, padding, output_size.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*output_size)) : ::std::nullopt, out); + } +} + +// aten::to_padded_tensor.out(Tensor self, float padding, SymInt[]? output_size=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & to_padded_tensor_outf(const at::Tensor & self, double padding, at::OptionalIntArrayRef output_size, at::Tensor & out) { + return at::_ops::to_padded_tensor_out::call(self, padding, output_size.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*output_size)) : ::std::nullopt, out); +} +namespace symint { + template >> + at::Tensor & to_padded_tensor_outf(const at::Tensor & self, double padding, at::OptionalIntArrayRef output_size, at::Tensor & out) { + return at::_ops::to_padded_tensor_out::call(self, padding, output_size.has_value() ? ::std::make_optional(c10::fromIntArrayRefSlow(*output_size)) : ::std::nullopt, out); + } +} + +// aten::to_padded_tensor.out(Tensor self, float padding, SymInt[]? output_size=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & to_padded_tensor_symint_out(at::Tensor & out, const at::Tensor & self, double padding, at::OptionalSymIntArrayRef output_size=::std::nullopt) { + return at::_ops::to_padded_tensor_out::call(self, padding, output_size, out); +} +namespace symint { + template >> + at::Tensor & to_padded_tensor_out(at::Tensor & out, const at::Tensor & self, double padding, at::OptionalSymIntArrayRef output_size=::std::nullopt) { + return at::_ops::to_padded_tensor_out::call(self, padding, output_size, out); + } +} + +// aten::to_padded_tensor.out(Tensor self, float padding, SymInt[]? output_size=None, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & to_padded_tensor_symint_outf(const at::Tensor & self, double padding, at::OptionalSymIntArrayRef output_size, at::Tensor & out) { + return at::_ops::to_padded_tensor_out::call(self, padding, output_size, out); +} +namespace symint { + template >> + at::Tensor & to_padded_tensor_outf(const at::Tensor & self, double padding, at::OptionalSymIntArrayRef output_size, at::Tensor & out) { + return at::_ops::to_padded_tensor_out::call(self, padding, output_size, out); + } +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/to_sparse_compositeimplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/to_sparse_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..30a3c4d3f7365c6b5ed5a07857c29de8f273541a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/to_sparse_compositeimplicitautograd_dispatch.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor to_sparse(const at::Tensor & self, int64_t sparse_dim); +TORCH_API at::Tensor to_sparse(const at::Tensor & self, ::std::optional layout=::std::nullopt, at::OptionalIntArrayRef blocksize=::std::nullopt, ::std::optional dense_dim=::std::nullopt); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/topk_cpu_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/topk_cpu_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..3a8f54e1eb91c405493aaeea1175c82119de161a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/topk_cpu_dispatch.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace cpu { + +TORCH_API ::std::tuple topk(const at::Tensor & self, int64_t k, int64_t dim=-1, bool largest=true, bool sorted=true); +TORCH_API ::std::tuple topk_symint(const at::Tensor & self, c10::SymInt k, int64_t dim=-1, bool largest=true, bool sorted=true); +TORCH_API ::std::tuple topk_out(at::Tensor & values, at::Tensor & indices, const at::Tensor & self, int64_t k, int64_t dim=-1, bool largest=true, bool sorted=true); +TORCH_API ::std::tuple topk_outf(const at::Tensor & self, int64_t k, int64_t dim, bool largest, bool sorted, at::Tensor & values, at::Tensor & indices); +TORCH_API ::std::tuple topk_symint_out(at::Tensor & values, at::Tensor & indices, const at::Tensor & self, c10::SymInt k, int64_t dim=-1, bool largest=true, bool sorted=true); +TORCH_API ::std::tuple topk_symint_outf(const at::Tensor & self, c10::SymInt k, int64_t dim, bool largest, bool sorted, at::Tensor & values, at::Tensor & indices); + +} // namespace cpu +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/unflatten_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/unflatten_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..c8492037c22d847d2ed7aa94b6d59be9f18e7b8e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/unflatten_ops.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Operator.h + +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { +namespace _ops { + + +struct TORCH_API unflatten_int { + using schema = at::Tensor (const at::Tensor &, int64_t, c10::SymIntArrayRef); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::unflatten"; + static constexpr const char* overload_name = "int"; + static constexpr const char* schema_str = "unflatten.int(Tensor(a) self, int dim, SymInt[] sizes) -> Tensor(a)"; + static at::Tensor call(const at::Tensor & self, int64_t dim, c10::SymIntArrayRef sizes); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, int64_t dim, c10::SymIntArrayRef sizes); +}; + +struct TORCH_API unflatten_Dimname { + using schema = at::Tensor (const at::Tensor &, at::Dimname, c10::SymIntArrayRef, at::DimnameList); + using ptr_schema = schema*; + // See Note [static constexpr char* members for windows NVCC] + static constexpr const char* name = "aten::unflatten"; + static constexpr const char* overload_name = "Dimname"; + static constexpr const char* schema_str = "unflatten.Dimname(Tensor(a) self, Dimname dim, SymInt[] sizes, Dimname[] names) -> Tensor(a)"; + static at::Tensor call(const at::Tensor & self, at::Dimname dim, c10::SymIntArrayRef sizes, at::DimnameList names); + static at::Tensor redispatch(c10::DispatchKeySet dispatchKeySet, const at::Tensor & self, at::Dimname dim, c10::SymIntArrayRef sizes, at::DimnameList names); +}; + +}} // namespace at::_ops + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/unfold_backward.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/unfold_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..12081d1c029a9ed0f455f9f068d4503b22bcbb92 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/unfold_backward.h @@ -0,0 +1,97 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from Function.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + + +#include + +namespace at { + + +// aten::unfold_backward(Tensor grad_in, SymInt[] input_sizes, int dim, int size, int step) -> Tensor +inline at::Tensor unfold_backward(const at::Tensor & grad_in, at::IntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step) { + return at::_ops::unfold_backward::call(grad_in, c10::fromIntArrayRefSlow(input_sizes), dim, size, step); +} +namespace symint { + template >> + at::Tensor unfold_backward(const at::Tensor & grad_in, at::IntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step) { + return at::_ops::unfold_backward::call(grad_in, c10::fromIntArrayRefSlow(input_sizes), dim, size, step); + } +} + +// aten::unfold_backward(Tensor grad_in, SymInt[] input_sizes, int dim, int size, int step) -> Tensor +inline at::Tensor unfold_backward_symint(const at::Tensor & grad_in, c10::SymIntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step) { + return at::_ops::unfold_backward::call(grad_in, input_sizes, dim, size, step); +} +namespace symint { + template >> + at::Tensor unfold_backward(const at::Tensor & grad_in, c10::SymIntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step) { + return at::_ops::unfold_backward::call(grad_in, input_sizes, dim, size, step); + } +} + +// aten::unfold_backward.out(Tensor grad_in, SymInt[] input_sizes, int dim, int size, int step, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & unfold_backward_out(at::Tensor & out, const at::Tensor & grad_in, at::IntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step) { + return at::_ops::unfold_backward_out::call(grad_in, c10::fromIntArrayRefSlow(input_sizes), dim, size, step, out); +} +namespace symint { + template >> + at::Tensor & unfold_backward_out(at::Tensor & out, const at::Tensor & grad_in, at::IntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step) { + return at::_ops::unfold_backward_out::call(grad_in, c10::fromIntArrayRefSlow(input_sizes), dim, size, step, out); + } +} + +// aten::unfold_backward.out(Tensor grad_in, SymInt[] input_sizes, int dim, int size, int step, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & unfold_backward_outf(const at::Tensor & grad_in, at::IntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step, at::Tensor & out) { + return at::_ops::unfold_backward_out::call(grad_in, c10::fromIntArrayRefSlow(input_sizes), dim, size, step, out); +} +namespace symint { + template >> + at::Tensor & unfold_backward_outf(const at::Tensor & grad_in, at::IntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step, at::Tensor & out) { + return at::_ops::unfold_backward_out::call(grad_in, c10::fromIntArrayRefSlow(input_sizes), dim, size, step, out); + } +} + +// aten::unfold_backward.out(Tensor grad_in, SymInt[] input_sizes, int dim, int size, int step, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & unfold_backward_symint_out(at::Tensor & out, const at::Tensor & grad_in, c10::SymIntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step) { + return at::_ops::unfold_backward_out::call(grad_in, input_sizes, dim, size, step, out); +} +namespace symint { + template >> + at::Tensor & unfold_backward_out(at::Tensor & out, const at::Tensor & grad_in, c10::SymIntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step) { + return at::_ops::unfold_backward_out::call(grad_in, input_sizes, dim, size, step, out); + } +} + +// aten::unfold_backward.out(Tensor grad_in, SymInt[] input_sizes, int dim, int size, int step, *, Tensor(a!) out) -> Tensor(a!) +inline at::Tensor & unfold_backward_symint_outf(const at::Tensor & grad_in, c10::SymIntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step, at::Tensor & out) { + return at::_ops::unfold_backward_out::call(grad_in, input_sizes, dim, size, step, out); +} +namespace symint { + template >> + at::Tensor & unfold_backward_outf(const at::Tensor & grad_in, c10::SymIntArrayRef input_sizes, int64_t dim, int64_t size, int64_t step, at::Tensor & out) { + return at::_ops::unfold_backward_out::call(grad_in, input_sizes, dim, size, step, out); + } +} + +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_bilinear2d_compositeexplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_bilinear2d_compositeexplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..cc556695c6c0d95f8c1fc6692c25d51db00eb6e7 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_bilinear2d_compositeexplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeexplicitautograd { + +TORCH_API at::Tensor & upsample_bilinear2d_out(at::Tensor & out, const at::Tensor & input, at::OptionalIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors); +TORCH_API at::Tensor & upsample_bilinear2d_outf(const at::Tensor & input, at::OptionalIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors, at::Tensor & out); +TORCH_API at::Tensor & upsample_bilinear2d_symint_out(at::Tensor & out, const at::Tensor & input, at::OptionalSymIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors); +TORCH_API at::Tensor & upsample_bilinear2d_symint_outf(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, bool align_corners, ::std::optional> scale_factors, at::Tensor & out); + +} // namespace compositeexplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_nearest2d_native.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_nearest2d_native.h new file mode 100644 index 0000000000000000000000000000000000000000..96c8b6c16ab718f28e88c1cff8a1353f895f168d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/upsample_nearest2d_native.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// @generated by torchgen/gen.py from NativeFunction.h + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace at { +namespace native { +TORCH_API at::Tensor upsample_nearest2d(const at::Tensor & input, at::OptionalIntArrayRef output_size, ::std::optional> scale_factors); +TORCH_API at::Tensor & upsample_nearest2d_vec_out_symint(const at::Tensor & input, at::OptionalSymIntArrayRef output_size, ::std::optional> scale_factors, at::Tensor & out); +struct TORCH_API structured_upsample_nearest2d_out_cpu : public at::meta::structured_upsample_nearest2d { +void impl(const at::Tensor & self, at::ArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w, const at::Tensor & out); +}; +struct TORCH_API structured_upsample_nearest2d_out_cuda : public at::meta::structured_upsample_nearest2d { +void impl(const at::Tensor & self, at::ArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w, const at::Tensor & out); +}; +TORCH_API at::Tensor upsample_nearest2d_quantized_cpu(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h=::std::nullopt, ::std::optional scales_w=::std::nullopt); +} // namespace native +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/var_mean_compositeimplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/var_mean_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..77a5f6d4c6556bb4e1c47d4c1b1c3a5610c35c76 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/var_mean_compositeimplicitautograd_dispatch.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API ::std::tuple var_mean(const at::Tensor & self, bool unbiased); +TORCH_API ::std::tuple var_mean(const at::Tensor & self, at::OptionalIntArrayRef dim, bool unbiased, bool keepdim=false); +TORCH_API ::std::tuple var_mean(const at::Tensor & self, at::DimnameList dim, bool unbiased, bool keepdim=false); +TORCH_API ::std::tuple var_mean(const at::Tensor & self, at::DimnameList dim, const ::std::optional & correction=::std::nullopt, bool keepdim=false); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/view_as_compositeimplicitautograd_dispatch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/view_as_compositeimplicitautograd_dispatch.h new file mode 100644 index 0000000000000000000000000000000000000000..e1a51f15bd65c99588600e68e5ce5fc25afacbe9 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/ops/view_as_compositeimplicitautograd_dispatch.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// @generated by torchgen/gen.py from DispatchKeyFunction.h + +// NB: The implementing C++ file is RegisterDispatchKey.cpp + +// The only #includes we need are for custom classes that have defaults in the C++ API +#include +#include +#include + +// Forward declarations of any types needed in the operator signatures. +// We can't directly include these classes because it will cause circular include dependencies. +// This file is included by TensorBody.h, which defines the Tensor class. +#include + +namespace at { + +namespace compositeimplicitautograd { + +TORCH_API at::Tensor view_as(const at::Tensor & self, const at::Tensor & other); + +} // namespace compositeimplicitautograd +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/quantized/QTensorImpl.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/quantized/QTensorImpl.h new file mode 100644 index 0000000000000000000000000000000000000000..036c73e6760f565f0d58dbe1b76f9e339e4a5a64 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/quantized/QTensorImpl.h @@ -0,0 +1,130 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace at { + +/** + * QTensorImpl is a TensorImpl for Quantized Tensors, it stores Quantizer which + * specifies the quantization scheme and parameters, for more information please + * see ATen/quantized/Quantizer.h + * + * We'll use QTensor in code or documentation to refer to a Tensor with QTensorImpl. + */ +struct TORCH_API QTensorImpl : public c10::TensorImpl { + public: + QTensorImpl( + Storage&& storage, + DispatchKeySet key_set, + const caffe2::TypeMeta data_type, + QuantizerPtr quantizer); + + // See Note [Enum ImplType] + QTensorImpl( + ImplType type, + Storage&& storage, + DispatchKeySet key_set, + const caffe2::TypeMeta data_type, + QuantizerPtr quantizer); + + + // TODO: Expose in PyTorch Frontend + QuantizerPtr quantizer() { + return quantizer_; + } + + void set_quantizer_(QuantizerPtr quantizer) { + quantizer_ = quantizer; + } + + /** + * Return a TensorImpl that is a shallow-copy of this TensorImpl. + * + * For usage of `version_counter` and `allow_tensor_metadata_change`, + * see NOTE [ TensorImpl Shallow-Copying ]. + */ + c10::intrusive_ptr shallow_copy_and_detach( + const c10::VariableVersion& version_counter, + bool allow_tensor_metadata_change) const override { + auto impl = c10::make_intrusive( + Storage(storage()), key_set(), data_type_, quantizer_); + copy_tensor_metadata( + /*src_q_impl=*/this, + /*dest_q_impl=*/impl.get(), + /*version_counter=*/version_counter, + /*allow_tensor_metadata_change=*/allow_tensor_metadata_change); + impl->refresh_numel(); + impl->refresh_contiguous(); + return impl; + } + + /** + * Return a TensorImpl that is a shallow-copy of this TensorImpl. + * + * For usage of `version_counter` and `allow_tensor_metadata_change`, + * see NOTE [ TensorImpl Shallow-Copying ]. + */ + c10::intrusive_ptr shallow_copy_and_detach( + c10::VariableVersion&& version_counter, + bool allow_tensor_metadata_change) const override { + auto impl = c10::make_intrusive( + Storage(storage()), key_set(), data_type_, quantizer_); + copy_tensor_metadata( + /*src_q_impl=*/this, + /*dest_q_impl=*/impl.get(), + /*version_counter=*/std::move(version_counter), + /*allow_tensor_metadata_change=*/allow_tensor_metadata_change); + impl->refresh_numel(); + impl->refresh_contiguous(); + return impl; + } + + /** + * Shallow-copies data from another TensorImpl into this TensorImpl. + * + * For why this function doesn't check this TensorImpl's `allow_tensor_metadata_change_`, + * see NOTE [ TensorImpl Shallow-Copying ]. + */ + void shallow_copy_from(const c10::intrusive_ptr& impl) override { + AT_ASSERT(has_compatible_shallow_copy_type(impl->key_set())); + auto q_impl = static_cast(impl.get()); + copy_tensor_metadata( + /*src_q_impl=*/q_impl, + /*dest_q_impl=*/this, + /*version_counter=*/version_counter(), + /*allow_tensor_metadata_change=*/allow_tensor_metadata_change()); + refresh_numel(); + refresh_contiguous(); + } + + private: + QuantizerPtr quantizer_; + + const char* tensorimpl_type_name() const override; + + /** + * Copy the tensor metadata fields (e.g. sizes / strides / storage pointer / storage_offset) + * from one TensorImpl to another TensorImpl. + * + * For usage of `version_counter` and `allow_tensor_metadata_change`, see NOTE [ TensorImpl Shallow-Copying ]. + */ + static void copy_tensor_metadata( + const QTensorImpl* src_q_impl, + QTensorImpl* dest_q_impl, + const c10::VariableVersion& version_counter, + bool allow_tensor_metadata_change) { + TensorImpl::copy_tensor_metadata(src_q_impl, dest_q_impl, version_counter, allow_tensor_metadata_change); + + // OpaqueTensorImpl-specific fields. + dest_q_impl->quantizer_ = src_q_impl->quantizer_; + } +}; + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/quantized/Quantizer.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/quantized/Quantizer.h new file mode 100644 index 0000000000000000000000000000000000000000..787f69064348d095ec856205b15a69172194c44b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/ATen/quantized/Quantizer.h @@ -0,0 +1,284 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include +#include +#include + +namespace at { + +/** + * UnknownQuantizer is a placeholder quantizer for functions that implement + * quantization in a two step process. First a tensor is allocated but with + * unknown quantizer, and then the quantization kernel decides what the final + * quantizer will be. + */ +struct TORCH_API UnknownQuantizer : public Quantizer { + explicit UnknownQuantizer(ScalarType scalar_type) + : Quantizer(scalar_type) {} + + Tensor quantize(const Tensor& tensor) override; + Tensor dequantize(const Tensor& qtensor) override; + Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override; + QScheme qscheme() const override; + bool equalTo(QuantizerPtr other) const override; +}; + +/** + * UniformQuantizer is the parent class for all uniform quantizers. + * These quantization scheme will map float value uniformly to + * the quantized value. For example, affine quantizer is + * the most commonly used scheme in this category. + */ +struct TORCH_API UniformQuantizer : public Quantizer { + explicit UniformQuantizer(ScalarType scalar_type) : Quantizer(scalar_type) {} +}; + +/** + * NonUniformQuantizer is the parent class for all non-uniform quantizers. + * These quantization scheme may map float value non-uniformly to the quantized + * value. K-means quantization is a representative example in this category. + */ +struct TORCH_API NonUniformQuantizer : public Quantizer { + explicit NonUniformQuantizer(ScalarType scalar_type) : Quantizer(scalar_type) {} +}; + +// There is also StochasticQuantizer which is uniform but not affine + +/** + * AffineQuantizer uses affine transformation to do quantization. + * + * For quantize: + * Y = clamp(round(X / scale + zero_point), min, max) + * For dequantize: + * X = (Y - zero_point) * scale + */ +struct TORCH_API AffineQuantizer : public UniformQuantizer { + explicit AffineQuantizer(ScalarType scalar_type) : UniformQuantizer(scalar_type) {} +}; + +// Note that we will not have Symmetric Quantizer in backend to reduce +// complications in quantized kernel implementation. + +/** + * PerTensorAffineQuantizer stores a scale and a zero_point, which is used for + * all the values in the Tensor. + */ +struct TORCH_API PerTensorAffineQuantizer : public AffineQuantizer { + explicit PerTensorAffineQuantizer(ScalarType scalar_type, double scale, int64_t zero_point) + : AffineQuantizer(scalar_type), + scale_(scale), + zero_point_(zero_point) {} + + Tensor quantize(const Tensor& tensor) override; + Tensor dequantize(const Tensor& qtensor) override; + Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override; + + QScheme qscheme() const override { + return kPerTensorAffine; + } + + double scale() const { + return scale_; + } + + int64_t zero_point() const { + return zero_point_; + } + + bool equalTo(QuantizerPtr other) const override { + if (!other.get() || other->qscheme() != kPerTensorAffine) { + return false; + } + auto* other_per_tensor_affine = + static_cast(other.get()); + return scalar_type() == other_per_tensor_affine->scalar_type() && + scale() == other_per_tensor_affine->scale() && + zero_point() == other_per_tensor_affine->zero_point(); + } + + private: + const double scale_; + // We use int64_t for consistency with Python + const int64_t zero_point_; +}; + +/** + * PerChannelAffineQuantizer is the same as PerTensorAffineQuantizer + * except that we have an independent scale and zero_point parameter + * for each channel. + * + * Also note that per channel quantization is mostly applied to output channels + * of weights since per-input channel of weight quantization or per-channel + * quantization for activations can't be efficiently supported in most of + * processors since it requires each multiplication result within a single + * dot-product to have a different scale. + */ +struct TORCH_API PerChannelAffineQuantizer : public AffineQuantizer { + explicit PerChannelAffineQuantizer( + ScalarType scalar_type, + Tensor scales, + Tensor zero_points, + int64_t axis) + : AffineQuantizer(scalar_type), + scales_(std::move(scales)), + zero_points_(std::move(zero_points)), + axis_(axis) {} + + QScheme qscheme() const override { + return kPerChannelAffine; + } + + Tensor scales() const { + return scales_; + } + + Tensor zero_points() const { + return zero_points_; + } + + int64_t axis() const { + return axis_; + } + + Tensor quantize(const Tensor& tensor) override; + Tensor dequantize(const Tensor& qtensor) override; + Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override; + + bool equalTo(QuantizerPtr other) const override { + if (!other.get() || other->qscheme() != kPerChannelAffine) { + return false; + } + auto* other_per_channel_affine = + static_cast(other.get()); + return scalar_type() == other_per_channel_affine->scalar_type() && + scales().equal(other_per_channel_affine->scales()) && + zero_points().equal(other_per_channel_affine->zero_points()) && + axis() == other_per_channel_affine->axis(); + } + + protected: + Tensor scales_; + Tensor zero_points_; + const int64_t axis_; +}; + +/** + * PerChannelAffineFloatQParamsQuantizer is the same as PerChannelAffineQuantizer + * except that it expects both scale and zero point to be floating point values. + * + * This quantizer uses the kPerChannelAffineFloatQParams qscheme which is a variant of + * kPerChannelAffine. + * + * The quantize equation in this case looks like - + * Xq = (Xf - zero_point) * inv_scale, where inv_scale = 1.0/scale + * + * Note: Usage of floating point zero point is useful in cases where 0 doesn't need to + * be exactly represented in the quantized space. We can get additional precision by + * using floating point values for zero point. + */ +struct TORCH_API PerChannelAffineFloatQParamsQuantizer : public PerChannelAffineQuantizer { + explicit PerChannelAffineFloatQParamsQuantizer( + ScalarType scalar_type, + Tensor scales, + Tensor zero_points, + int64_t axis) + : PerChannelAffineQuantizer(scalar_type, + scales, + zero_points, + axis) {} + + QScheme qscheme() const override { + return kPerChannelAffineFloatQParams; + } + + Tensor quantize(const Tensor& tensor) override; + Tensor dequantize(const Tensor& qtensor) override; + Tensor& dequantize_out(Tensor& rtensor, const Tensor& qtensor) override; + + bool equalTo(QuantizerPtr other) const override { + if (!other.get() || other->qscheme() != kPerChannelAffineFloatQParams) { + return false; + } + auto* other_per_channel_float_qparams = + static_cast(other.get()); + return scalar_type() == other_per_channel_float_qparams->scalar_type() && + scales().equal(other_per_channel_float_qparams->scales()) && + zero_points().equal(other_per_channel_float_qparams->zero_points()) && + axis() == other_per_channel_float_qparams->axis(); + } +}; + +// This is an internal utility function for getting at the QTensorImpl, +// You should only use this for writing low level +// setters/getters for QTensorImpl fields; otherwise, you should use +// the low level setters/getters that were implemented using this. +// This may be called repeatedly, so make sure it's pretty cheap. +TORCH_API QTensorImpl* get_qtensorimpl(const TensorBase& self); + +// double and int64_t are because of the native function API, we only have these +// argument types right now in native functions +TORCH_API QuantizerPtr +make_per_tensor_affine_quantizer( + double scale, int64_t zero_point, ScalarType scalar_type); + +TORCH_API QuantizerPtr make_per_channel_affine_quantizer( + const Tensor& scales, + const Tensor& zero_points, + int64_t axis, + ScalarType scalar_type); + +TORCH_API QuantizerPtr make_unknown_quantizer(ScalarType scalar_type); + +// Create a Quantized Tensor given arguments for normal Tensor and a quantizer +TORCH_API Tensor new_qtensor( + IntArrayRef sizes, + const TensorOptions& options, + QuantizerPtr quantizer); + +TORCH_API void set_quantizer_(const Tensor& self, ConstQuantizerPtr quantizer); + +TORCH_API Tensor from_blob_quantized_per_tensor_affine( + void* data, + IntArrayRef sizes, + IntArrayRef strides, + std::function deleter, + const float scale, + const int64_t zeroPoint, + const TensorOptions& options); + +TORCH_API Tensor from_blob_quantized_per_tensor_affine( + void* data, + IntArrayRef sizes, + std::function deleter, + const float scale, + const int64_t zeroPoint, + const TensorOptions& options); + +TORCH_API Tensor from_blob_quantized_per_channel_affine( + void* data, + IntArrayRef sizes, + std::function deleter, + const Tensor& scales, + const Tensor& zero_points, + const int64_t axis, + const TensorOptions& options); + +} // namespace at + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/all.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/all.h new file mode 100644 index 0000000000000000000000000000000000000000..562bf7b668723c13741a2c0b43a5c83ef854bde5 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/all.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#if !defined(_MSC_VER) && __cplusplus < 201703L +#error C++17 or later compatible compiler is required to use PyTorch. +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/arg.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/arg.h new file mode 100644 index 0000000000000000000000000000000000000000..d177687513f0465037245143b2363d64d9e081a5 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/arg.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#define TORCH_ARG(T, name) \ + public: \ + inline auto name(const T& new_##name) -> decltype(*this) { /* NOLINT */ \ + this->name##_ = new_##name; \ + return *this; \ + } \ + inline auto name(T&& new_##name) -> decltype(*this) { /* NOLINT */ \ + this->name##_ = std::move(new_##name); \ + return *this; \ + } \ + inline const T& name() const noexcept { /* NOLINT */ \ + return this->name##_; \ + } \ + inline T& name() noexcept { /* NOLINT */ \ + return this->name##_; \ + } \ + \ + private: \ + T name##_ /* NOLINT */ + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/autograd.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/autograd.h new file mode 100644 index 0000000000000000000000000000000000000000..30c6ddc7908ad6163ebc85eeec17d4135da0d845 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/autograd.h @@ -0,0 +1,10 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/cuda.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/cuda.h new file mode 100644 index 0000000000000000000000000000000000000000..7927bdbd6e4a9db940283080ee390a292f582495 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/cuda.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +namespace torch::cuda { + +/// Returns the number of CUDA devices available. +c10::DeviceIndex TORCH_API device_count(); + +/// Returns true if at least one CUDA device is available. +bool TORCH_API is_available(); + +/// Returns true if CUDA is available, and CuDNN is available. +bool TORCH_API cudnn_is_available(); + +/// Sets the seed for the current GPU. +void TORCH_API manual_seed(uint64_t seed); + +/// Sets the seed for all available GPUs. +void TORCH_API manual_seed_all(uint64_t seed); + +/// Waits for all kernels in all streams on a CUDA device to complete. +void TORCH_API synchronize(int64_t device_index = -1); + +} // namespace torch::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data.h new file mode 100644 index 0000000000000000000000000000000000000000..7067ae09c3b1a2d15a99dc39beb8ec30424c309c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data.h @@ -0,0 +1,18 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +// Some "exports". + +namespace torch::data { +using datasets::BatchDataset; // NOLINT +using datasets::Dataset; // NOLINT +} // namespace torch::data + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h new file mode 100644 index 0000000000000000000000000000000000000000..8fa046be5dcef6c54cc058b772419fc094c57910 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/dataloader.h @@ -0,0 +1,59 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +#include + +#include +#include +#include +#include + +namespace torch::data { + +/// Creates a `DataLoader` instance for a stateless `dataset`, a `sampler` and +/// some `options`. +template +std::enable_if_t< + !Dataset::is_stateful, + std::unique_ptr>> +make_data_loader(Dataset dataset, Sampler sampler, DataLoaderOptions options) { + return std::make_unique>( + std::move(dataset), std::move(sampler), options); +} + +/// Creates a `DataLoader` instance for a stateless `dataset` and some +/// `options`. A sampler (by default a `RandomSampler`) will be constructed from +/// the size of the dataset. +template +std::enable_if_t< + !Dataset::is_stateful && std::is_constructible_v, + std::unique_ptr>> +make_data_loader( + Dataset dataset, + DataLoaderOptions options = DataLoaderOptions()) { + const std::optional size = dataset.size(); + TORCH_CHECK( + size.has_value(), + "Expected the dataset to be sized in " + "order to construct the Sampler"); + return make_data_loader(std::move(dataset), Sampler(*size), options); +} + +/// Creates a `DataLoader` for a stateful `dataset` and some `options`. +template > +std::unique_ptr> make_data_loader( + Dataset dataset, + DataLoaderOptions options = DataLoaderOptions()) { + return std::make_unique>( + std::move(dataset), options); +} +} // namespace torch::data + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h new file mode 100644 index 0000000000000000000000000000000000000000..711889ae4f43e9ecceb6637206fe67acfcf063d0 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/base.h @@ -0,0 +1,259 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace torch::data { +template +class DataLoaderBase { + public: + using BatchType = Batch; + using BatchRequestType = BatchRequest; + + /// Constructs a new DataLoader from a `dataset` to sample from, `options` + /// to configure the DataLoader with, and a `sampler` that specifies the + /// sampling strategy. + DataLoaderBase( + DataLoaderOptions options, + std::unique_ptr main_thread_dataset = nullptr) + : options_(options), + main_thread_dataset_(std::move(main_thread_dataset)), + sequencer_(new_sequencer()) {} + + DataLoaderBase(const DataLoaderBase&) = delete; + DataLoaderBase(DataLoaderBase&&) = delete; + DataLoaderBase& operator=(const DataLoaderBase&) = delete; + DataLoaderBase& operator=(DataLoaderBase&&) = delete; + // NOLINTNEXTLINE(bugprone-exception-escape) + virtual ~DataLoaderBase() { + join(); + } + + /// Returns an iterator into the DataLoader. The lifetime of the iterator is + /// bound to the DataLoader. In C++ standards language, the category of the + /// iterator is `OutputIterator`. See + /// https://en.cppreference.com/w/cpp/named_req/OutputIterator for what this + /// means. In short: you may increment the iterator and dereference it, but + /// cannot go back, or step forward more than one position at a time. When the + /// DataLoader is exhausted, it will compare equal with the special + /// "sentinel" iterator returned by `DataLoader::end()`. Most of the time, you + /// should only use range-for loops to loop over the DataLoader, but + /// standard algorithms like `std::copy(dataloader.begin(), dataloader.end(), + /// output_iterator)` are supported too. + Iterator begin() { + TORCH_CHECK( + shuttle_.in_flight_jobs() == 0, + "Attempted to get a new DataLoader iterator " + "while another iterator is not yet exhausted"); + reset(); + return Iterator(std::make_unique>( + [this] { return this->next(); })); + } + + /// Returns a special "sentinel" iterator that compares equal with a + /// non-sentinel iterator once the DataLoader is exhausted. + Iterator end() { + return Iterator(std::make_unique>()); + } + + /// Joins the DataLoader's worker threads and drains internal queues. + /// This function may only be invoked from the main thread (in which the + /// DataLoader lives). + void join() { + if (joined_) { + return; + } + shuttle_.drain(); + // Send one 'quit' message per worker. Since a worker dies (exits its + // thread) after receiving this message, each `QuitWorker()` message will be + // read by exactly one worker. + for ([[maybe_unused]] const auto w : c10::irange(options_.workers)) { + push_job(QuitWorker()); + } + for (auto& worker : workers_) { + worker.join(); + } + joined_ = true; + } + + /// Returns the options with which the DataLoader was configured. + const FullDataLoaderOptions& options() const noexcept { + return options_; + } + + protected: + /// Simple mix-in to give something a sequence number. + struct Sequenced { + Sequenced() = default; + Sequenced(size_t sqn) : sequence_number(sqn) {} + size_t sequence_number; + }; + + struct QuitWorker {}; + + /// A `Job` is either a `BatchRequest` (new indices to fetch data at) or a + /// `QuitWorker` object, to indicate the worker should shut down. + struct Job : Sequenced { + Job() = default; + Job(QuitWorker q, size_t sqn) : Sequenced(sqn), quit(q) {} + Job(BatchRequest&& i, size_t sqn) + : Sequenced(sqn), batch_request(std::move(i)) {} + std::optional quit; + std::optional batch_request; + }; + + /// The finished result of a job. + struct Result : Sequenced { + Result() = default; + Result(std::optional&& b, size_t sqn) + : Sequenced(sqn), batch(std::move(b)) {} + Result(std::exception_ptr exception, size_t sqn) + : Sequenced(sqn), exception(std::move(exception)) {} + std::optional batch; + std::exception_ptr exception; + }; + + /// Subclass hook for getting the next batch request. The stateless case will + /// ask the sampler for a new batch request (e.g. a vector of indices), while + /// the stateful one will simply return the batch size. + virtual std::optional get_batch_request() = 0; + + /// Resets the internal state of the DataLoader, optionally pre-fetching + /// new jobs. + virtual void reset() { + shuttle_.drain(); + sequence_number_ = 0; + sequencer_ = new_sequencer(); + prefetch(); + } + + /// Schedules `requested_jobs` many new batches to be fetched. The actual + /// number of jobs scheduled may be less if the DataLoader exhausts. + void prefetch(size_t requested_jobs) { + for ([[maybe_unused]] const auto r : c10::irange(requested_jobs)) { + if (auto batch_request = get_batch_request()) { + this->push_job(std::move(*batch_request)); + } else { + break; + } + } + } + + /// Schedules the maximum number of jobs (based on the `max_jobs` option). + void prefetch() { + prefetch(options_.max_jobs); + } + + /// Returns the next batch of data, or an empty `optional` if the DataLoader + /// is exhausted. This operation will block until a batch is available if one + /// is still expected. + std::optional next() { + if (options_.workers > 0) { + while (std::optional result = this->pop_result()) { + if (result->exception) { + throw WorkerException(result->exception); + } else if (result->batch) { + prefetch(1); + return std::move(result->batch); + } + } + } else if (auto batch_request = get_batch_request()) { + return this->main_thread_dataset_->get_batch(std::move(*batch_request)); + } + return std::nullopt; + } + + /// The function that worker threads run. + void worker_thread(Dataset& dataset) { + while (true) { + auto job = shuttle_.pop_job(); + if (job.quit) { + break; + } + try { + auto batch = dataset.get_batch(std::move(*job.batch_request)); + shuttle_.push_result({std::move(batch), job.sequence_number}); + } catch (...) { + shuttle_.push_result({std::current_exception(), job.sequence_number}); + } + } + } + + /// Convenience method that calls `shuttle_.push_job()` with the next sequence + /// number. + template + void push_job(T value) { + shuttle_.push_job({std::move(value), sequence_number_++}); + } + + /// Convenience method that gets the next result from the sequencer. + std::optional pop_result() { + return sequencer_->next( + [this] { return this->shuttle_.pop_result(this->options_.timeout); }); + } + + /// Convenience method that creates a new sequencer based on the + /// `enforce_ordering` option. + std::unique_ptr> new_sequencer() { + if (options_.enforce_ordering) { + return std::make_unique>( + options_.max_jobs); + } + return std::make_unique>(); + } + + /// The options the DataLoader was configured with. + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const FullDataLoaderOptions options_; + + /// The dataset for the main thread, only has a value if the number of + /// worker threads was configured as zero, meaning the main thread has to do + /// all the work (synchronously). NOTE: Really want this to be on the heap + /// when empty, therefore `unique_ptr` and not `optional`. + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::unique_ptr main_thread_dataset_; + + /// The sequence number for the *next* batch to be retrieved from the + /// dataset. + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + size_t sequence_number_ = 0; + + /// The worker threads, running the `worker_thread()` method. + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::vector workers_; + + /// The `DataShuttle` which takes care of the life cycle of a job. + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + detail::DataShuttle shuttle_; + + /// The `Sequencer`, which handles optional ordering of batches. + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::unique_ptr> sequencer_; + + /// True if the DataLoader has joined its worker threads. + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + bool joined_ = false; +}; +} // namespace torch::data + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h new file mode 100644 index 0000000000000000000000000000000000000000..9e22c3f23d12c74d0bf272f89dcc5360c6dbc131 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateful.h @@ -0,0 +1,66 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include + +namespace torch::data { + +/// A dataloader for stateful datasets. +/// +/// A dataloader for stateful datatasets differs from one for stateless +/// datasets one in that the dataset is shared among worker threads, and that +/// this dataset is itself responsible for producing batches rather than +/// depending on a sampler. The statefulness here actually refers to the +/// dataset. The StatefulDataLoader simply alters the data loading algorithm to +/// accommodate the stateful, shared nature of the dataset. Note that the +/// dataset must be thread safe if more than one worker thread is used. +/// +/// A stateful dataloader is created by calling `make_data_loader` with a +/// stateful dataset. +template +class StatefulDataLoader : public DataLoaderBase< + Dataset, + typename Dataset::BatchType::value_type, + typename Dataset::BatchRequestType> { + public: + using super = DataLoaderBase< + Dataset, + typename Dataset::BatchType::value_type, + typename Dataset::BatchRequestType>; + using typename super::BatchRequestType; + + /// Constructs the `StatefulDataLoader` from a `dataset` and some `options`. + StatefulDataLoader(Dataset dataset, DataLoaderOptions options) + : super(options, std::make_unique(std::move(dataset))) { + for ([[maybe_unused]] const auto _ : c10::irange(this->options_.workers)) { + // As opposed to the stateless case, here all worker threads access the + // same underlying dataset. + this->workers_.emplace_back( + [this] { this->worker_thread(*this->main_thread_dataset_); }); + } + } + + private: + /// Resets the internal state of the dataloader and the dataset. + void reset() override { + this->main_thread_dataset_->reset(); + // Call the base class method last because it calls `prefetch()` + super::reset(); + } + + /// For stateful datasets, the batch request is always the batch size. The + /// dataset is responsible for determining what goes into the batch next. + std::optional get_batch_request() override { + return this->options_.batch_size; + } +}; +} // namespace torch::data + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateless.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateless.h new file mode 100644 index 0000000000000000000000000000000000000000..f439bd2e151b4cf2b5557a162276c37b13110dc4 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/dataloader/stateless.h @@ -0,0 +1,85 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include + +#include +#include +#include + +namespace torch::data { + +/// A dataloader for stateless datasets. +/// +/// This dataloader follows the traditional PyTorch dataloader design, whereby a +/// (possibly) stateful sampler produces *batch requests* for a stateless +/// dataset, which acts as a simple batch request to batch mapping. The batch +/// request will often be an array of indices, and if the dataset is a simple +/// image dataset, the dataset would produce the images at those indices. +template +class StatelessDataLoader : public DataLoaderBase< + Dataset, + typename Dataset::BatchType, + typename Sampler::BatchRequestType> { + public: + using super = DataLoaderBase< + Dataset, + typename Dataset::BatchType, + typename Sampler::BatchRequestType>; + using typename super::BatchRequestType; + + /// Constructs the `StatelessDataLoader` from a `dataset`, a `sampler` and + /// some `options`. + StatelessDataLoader( + Dataset dataset, + Sampler sampler, + DataLoaderOptions options) + : super(options), sampler_(std::move(sampler)) { + for (const auto w : c10::irange(this->options_.workers)) { + // Here we copy the dataset into the worker thread closure. Each worker + // has its own copy of the dataset. This means the dataset must be + // trivially copiable, or else we don't expect more than one worker to + // be in use. + (void)w; // Suppress unused variable warning + this->workers_.emplace_back( + [this, dataset]() mutable { this->worker_thread(dataset); }); + } + if (this->options_.workers == 0) { + this->main_thread_dataset_ = + std::make_unique(std::move(dataset)); + } + } + + private: + /// Resets the internal state of the dataloader and the sampler. + void reset() override { + sampler_.reset(); + // Call the base class method last because it calls `prefetch()` + super::reset(); + } + + /// Queries the sampler for the next batch request (possibly progressing its + /// internal state). + std::optional get_batch_request() override { + auto indices = sampler_.next(this->options_.batch_size); + if (!indices || + (indices->size() < this->options_.batch_size && + this->options_.drop_last)) { + return std::nullopt; + } + AT_ASSERT(indices->size() > 0); + return indices; + } + + /// The `Sampler` used to produce batch requests. + Sampler sampler_; +}; +} // namespace torch::data + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h new file mode 100644 index 0000000000000000000000000000000000000000..27cdc65a62978772bda174ee26977520fb646df1 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/dataloader_options.h @@ -0,0 +1,68 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include + +namespace torch::data { + +/// Options to configure a `DataLoader`. +struct DataLoaderOptions { + DataLoaderOptions() = default; + /* implicit */ DataLoaderOptions(size_t batch_size) + : batch_size_(batch_size) {} + + /// The size of each batch to fetch. + TORCH_ARG(size_t, batch_size) = 1; + + /// The number of worker threads to launch. If zero, the main thread will + /// synchronously perform the data loading. + TORCH_ARG(size_t, workers) = 0; + + /// The maximum number of jobs to enqueue for fetching by worker threads. + /// Defaults to two times the number of worker threads. + TORCH_ARG(std::optional, max_jobs); + + /// An optional limit on the time to wait for the next batch. + TORCH_ARG(std::optional, timeout); + + /// Whether to enforce ordering of batches when multiple are loaded + /// asynchronously by worker threads. Set to `false` for better performance if + /// you do not care about determinism. + TORCH_ARG(bool, enforce_ordering) = true; + + /// Whether to omit the last batch if it contains less than `batch_size` + /// examples. + TORCH_ARG(bool, drop_last) = false; +}; + +/// Like `DataLoaderOptions`, but without any unconfigured state. +/// `DataLoaderOptions` has some options that depend on other options +/// (`max_jobs` => `2 * workers`). In the spirit of properly using the C++ type +/// system, `DataLoaderOptions` allows only setting values. To access values, +/// you must create a `FullDataLoaderOptions` from a `DataLoaderOptions` +/// instance, which will do any necessary coalescing. +struct FullDataLoaderOptions { + explicit FullDataLoaderOptions(DataLoaderOptions options) + : batch_size(options.batch_size()), + workers(options.workers()), + max_jobs(options.max_jobs().value_or(2 * workers)), + timeout(options.timeout()), + enforce_ordering(options.enforce_ordering()), + drop_last(options.drop_last()) {} + + size_t batch_size; + size_t workers; + size_t max_jobs; + std::optional timeout; + bool enforce_ordering; + bool drop_last; +}; +} // namespace torch::data + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets.h new file mode 100644 index 0000000000000000000000000000000000000000..979f7a12962f8d2faa56327d99d4cf132b5f859b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets.h @@ -0,0 +1,14 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/base.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/base.h new file mode 100644 index 0000000000000000000000000000000000000000..fd8fb7471710bd1f96bed17cd8bf722272879639 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/base.h @@ -0,0 +1,101 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +#include +#include +#include +#include +#include + +namespace torch::data::datasets { +template +class MapDataset; +template +MapDataset map(D, T); // NOLINT +} // namespace torch::data::datasets + +namespace torch::data::datasets { +namespace detail { +template +struct is_optional : std::false_type {}; +template +struct is_optional> : std::true_type {}; +} // namespace detail + +/// A dataset that can yield data only in batches. +template < + typename Self, + typename Batch = std::vector>, + typename BatchRequest = ArrayRef> +class BatchDataset { + public: + using SelfType = Self; + using BatchType = Batch; + using BatchRequestType = BatchRequest; + constexpr static bool is_stateful = detail::is_optional::value; + + virtual ~BatchDataset() = default; + + /// Returns a batch of data given an index. + virtual Batch get_batch(BatchRequest request) = 0; + + /// Returns the size of the dataset, or an empty std::optional if it is + /// unsized. + virtual std::optional size() const = 0; + + /// Creates a `MapDataset` that applies the given `transform` to this dataset. + template + MapDataset map(TransformType transform) & { + return datasets::map(static_cast(*this), std::move(transform)); + } + + /// Creates a `MapDataset` that applies the given `transform` to this dataset. + template + MapDataset map(TransformType transform) && { + return datasets::map( + std::move(static_cast(*this)), std::move(transform)); + } +}; + +/// A dataset that can yield data in batches, or as individual examples. +/// +/// A `Dataset` is a `BatchDataset`, because it supports random access and +/// therefore batched access is implemented (by default) by calling the random +/// access indexing function for each index in the requested batch of indices. +/// This can be customized. +template > +class Dataset : public BatchDataset> { + public: + using ExampleType = SingleExample; + + /// Returns the example at the given index. + virtual ExampleType get(size_t index) = 0; + + /// Returns a batch of data. + /// The default implementation calls `get()` for every requested index + /// in the batch. + std::vector get_batch(ArrayRef indices) override { + std::vector batch; + batch.reserve(indices.size()); + for (const auto i : indices) { + batch.push_back(get(i)); + } + return batch; + } +}; + +/// A `StreamDataset` represents a dataset that is a potentially infinite +/// stream. It takes as batch index only a number, which is the batch size, and +/// yields that many elements from the stream. +template >> +using StreamDataset = BatchDataset; +} // namespace torch::data::datasets + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/chunk.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/chunk.h new file mode 100644 index 0000000000000000000000000000000000000000..78d57e7d88d0d9d8235f3d985b6e37fa26afbe0b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/chunk.h @@ -0,0 +1,532 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace torch::data::datasets { + +/// Interface for chunk reader, which performs data chunking and reading of +/// entire chunks. +/// +/// A chunk could be an entire file, such as an audio data file or an image, +/// or part of a file in the case of a large text-file split based on seek +/// positions. +template < + typename ExampleType_, + typename ChunkType_ = std::vector> +class ChunkDataReader { + public: + virtual ~ChunkDataReader() = default; + + using ChunkType = ChunkType_; + using ExampleType = ExampleType_; + + /// Read an entire chunk. + virtual ChunkType read_chunk(size_t chunk_index) = 0; + + /// Returns the number of chunks available in this reader. + virtual size_t chunk_count() = 0; + + /// This will clear any internal state associate with this reader. + virtual void reset() = 0; +}; + +namespace detail { +/// BatchDataBuffer manages a queue of UnwrappedBatchData. After a new chunk is +/// loaded, BatchDataBuffer splits it into small batches and push them into the +/// queue. When get_batch is called from data loader, it pops cached batches and +/// return. If the cache is empty, it either waits to load more chunks or return +/// null if all chunks are loaded. +template < + typename UnwrappedBatch, + typename ExampleSampler = samplers::RandomSampler> +class BatchDataBuffer { + public: + using UnwrappedBatchType = UnwrappedBatch; + using BatchType = std::optional; + using BatchRequestType = typename ExampleSampler::BatchRequestType; + + BatchDataBuffer( + size_t batch_size, + ExampleSampler& example_sampler, + size_t queue_capacity) + : batch_size_(batch_size), + example_sampler_(example_sampler), + queue_capacity_(queue_capacity) {} + + /// Return batch data from the queue. Called from the ChunkDataset main + /// thread. + BatchType get_batch() { + std::unique_lock lock(queue_mutex_); + cv_read_.wait(lock, [this] { + // wait till there is available data in the queue or if all chunks are + // loaded (i.e. the dataset is exhausted for this epoch) + return ( + this->total_example_count_in_queue_ >= batch_size_ || this->stop_); + }); + if (batch_queue_.empty()) { + AT_ASSERT(stop_); + // All batches have been retrieved. Return an empty batch. + return std::nullopt; + } + + UnwrappedBatchData batch = std::move(batch_queue_.front()); + batch_queue_.pop(); + if (batch.exception) { + throw WorkerException(batch.exception); + } + + total_example_count_in_queue_ -= batch.batch_data.size(); + lock.unlock(); + cv_write_.notify_all(); + + return batch.batch_data; + } + + /// Push preloaded chunks to batch queue. Called from the ChunkDataset worker + /// threads. + void add_chunk_data(UnwrappedBatchType data) { + std::unique_lock lock(queue_mutex_); + cv_write_.wait(lock, [this] { + // stop loading if we have preloaded enough data. + return this->total_example_count_in_queue_ < this->queue_capacity_ || + this->stop_; + }); + if (stop_) { + // When stop_ is true, it means no further chunk loading is necessary. + // Return without any further processing. + return; + } + + auto data_size = data.size(); + auto remaining_size = data_size; + example_sampler_.reset(data_size); + + auto fill_batch = [&](size_t example_count, UnwrappedBatchType& batch) { + auto batch_example_indices = this->example_sampler_.next(example_count); + AT_ASSERT( + batch_example_indices && + batch_example_indices.value().size() == example_count); + BatchRequestType& indices = batch_example_indices.value(); + for (size_t i : indices) { + TORCH_CHECK(i < data_size, "Index out of range"); + batch.emplace_back(std::move(data[i])); + } + remaining_size -= example_count; + }; + + if (!batch_queue_.empty()) { + // if the queue has existing data, and the last batch doesn't have enough + // examples to fill a batch_size batch, add more example to this batch + // first. + auto& batch = batch_queue_.back(); + size_t current_count = batch.batch_data.size(); + if (current_count < batch_size_) { + auto example_count = + std::min(remaining_size, batch_size_ - current_count); + fill_batch(example_count, batch.batch_data); + } + } + + // If we still have data remaining after filling the last pushed batch, add + // them to the queue too. + while (remaining_size > 0) { + UnwrappedBatchType current_batch; + + // Allocate the batch memory ahead of time. + current_batch.reserve(batch_size_); + + auto example_count = std::min(remaining_size, batch_size_); + fill_batch(example_count, current_batch); + batch_queue_.emplace(std::move(current_batch)); + } + total_example_count_in_queue_ += data_size; + lock.unlock(); + cv_read_.notify_all(); + } + + /// Push exceptions thrown during preloading into batch queue. Called from + /// the ChunkDataset worker threads. + void add_chunk_data(std::exception_ptr e_ptr) { + std::unique_lock lock(queue_mutex_); + cv_write_.wait(lock, [this] { + // stop loading if we have preloaded enough data. + return ( + this->total_example_count_in_queue_ < this->queue_capacity_ || + this->stop_); + }); + if (stop_) { + // When stop_ is true, it means this current thread needs to be tore down, + // the batch buffer will be discarded, so no need to enqueue any new + // exceptions. + return; + } + + batch_queue_.emplace(e_ptr); + lock.unlock(); + cv_read_.notify_all(); + } + + void stop() { + { + // Hold the lock before changing stop_ to prevent a race condition which + // can cause a deadlock. To be more specific, conditional variable + // cv_write_ waits on predicate stop_ in add_chunk_data(). The wait + // happens in two steps: 1) while still holding the lock, check if + // predicate is true; 2) if it is true, proceeds, otherwise, release the + // lock and wait until notified. Without holding a lock, cv_write_'s + // notification can happen in between step 1) and 2). In that case, as + // cv_write_ is not in waiting status yet, so the notification is lost and + // cv_write_ will sleep forever. By taking a lock before changing + // predicate stop_, it is ensured updating and evaluating stop_ always + // happen in a synchronized way + std::lock_guard lock(queue_mutex_); + stop_ = true; + } + + // notify all writers, wake them from wait to exit current method. + cv_write_.notify_all(); + // notify all readers too. + cv_read_.notify_all(); + } + /// The batch size is needed to create batches from the chunk data. Similar to + /// regular dataloader where the batches are created with prefetches, + /// BatchDataBuffer perform the batch creation using the provided batch size. + size_t batch_size_ = 0; + + /// count of total example stored in the queue + size_t total_example_count_in_queue_ = 0; + + /// struct that contains a raw unwrapped batch unit. An unwrapped batch unit + /// is the raw data without 'optional' wrapper. It can be a collection of + /// images, utterances, e.t.c. + struct UnwrappedBatchData { + explicit UnwrappedBatchData(UnwrappedBatchType data) + : batch_data(std::move(data)) {} + + explicit UnwrappedBatchData(std::exception_ptr e) + : exception(std::move(e)) {} + + /// batch data to return + UnwrappedBatchType batch_data; + + /// exception pointer which captures any abnormal exceptions while creating + /// the batch. + std::exception_ptr exception; + }; + + /// local cache to store example batches from loaded chunk + std::queue batch_queue_; + + // sync batch_queue_ update. + std::mutex queue_mutex_; + + std::condition_variable cv_read_; + std::condition_variable cv_write_; + + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + ExampleSampler& example_sampler_; + + // configurable maximum number of elements the queue can hold at one time. + size_t queue_capacity_; + + // When set to true, it wakes the writer threads from the wait and exit + // current function call. This is needed when ChunkDataSet.Reset is called + // while the previous epoch is not exhausted yet. When ChunkDataset is waiting + // its preloader to finish previous work before tearing down the thread, the + // preloader could be still waiting for the conditional variable, thus cause + // the program to hang. This boolean is used to break this waiting condition. + bool stop_ = false; +}; +} // namespace detail + +/// Options to configure a `ChunkDataset`. +struct ChunkDatasetOptions { + ChunkDatasetOptions() = delete; + ChunkDatasetOptions( + size_t preloader_count, + size_t batch_size, + size_t cache_size = 2048, + size_t cross_chunk_shuffle_count = 1) + : preloader_count_(preloader_count), + batch_size_(batch_size), + cache_size_(cache_size), + cross_chunk_shuffle_count_(cross_chunk_shuffle_count) { + TORCH_CHECK( + preloader_count_ > 0, + "Preloader count is 0. At least one preloader needs to be specified."); + TORCH_CHECK( + batch_size_ > 0, + "Batch size is 0. A positive batch size needs to be specified."); + TORCH_CHECK( + cache_size_ > 0, + "Cache size is 0. A positive cache size needs to be specified."); + TORCH_CHECK( + cache_size_ >= batch_size_, + "Cache size is less than batch size. Cache needs to be large enough to " + "hold at least one batch."); + TORCH_CHECK( + cross_chunk_shuffle_count_ > 0, + "cross_chunk_shuffle_count needs to be greater than 0."); + } + + /// The number of worker thread to preload chunk data. + TORCH_ARG(size_t, preloader_count); + + /// The size of each batch. + TORCH_ARG(size_t, batch_size); + + /// The capacity of the queue for batch caching. + TORCH_ARG(size_t, cache_size) = 2048; + + // The number of chunks to perform cross-chunk shuffling. Default to 1 meaning + // no cross-chunk shuffling. When it is equal to n (n > 1), n random + // chunks will be loaded at once and example shuffling will be performed + // across all those n chunks. + // Note: Usually the default config (1 chunk shuffle + example shuffle) is + // good enough to generate random distributed data. Use this parameter only if + // you know cross-shuffle is needed in your case. Also there is a performance + // penalty when this value is greater than 1, as we need to do extra merge + // between multiple chunks before performing example sampling. + TORCH_ARG(size_t, cross_chunk_shuffle_count) = 1; +}; + +/// A stateful dataset that support hierarchical sampling and prefetching of +/// entre chunks. +/// +/// Unlike regular dataset, chunk dataset require two samplers to operate and +/// keeps an internal state. `ChunkSampler` selects, which chunk to load next, +/// while the `ExampleSampler` determines the order of Examples that are +/// returned in each `get_batch` call. The hierarchical sampling approach used +/// here is inspired by this paper +/// http://martin.zinkevich.org/publications/nips2010.pdf +template < + typename ChunkReader, + typename ChunkSampler = samplers::RandomSampler, + typename ExampleSampler = samplers::RandomSampler> +class ChunkDataset final + : public StatefulDataset< + ChunkDataset, + typename ChunkReader::BatchType, + size_t> { + public: + using BatchType = std::optional; + using UnwrappedBatchType = typename ChunkReader::BatchType; + using BatchRequestType = size_t; + using ChunkSamplerType = ChunkSampler; + using ExampleSamplerType = ExampleSampler; + + ChunkDataset( + ChunkReader chunk_reader, + ChunkSampler chunk_sampler, + ExampleSampler example_sampler, + ChunkDatasetOptions options, + std::function preprocessing_policy = + std::function()) + : chunk_reader_(std::move(chunk_reader)), + chunk_sampler_(std::move(chunk_sampler)), + example_sampler_(std::move(example_sampler)), + options_(options), + preprocessing_policy_(std::move(preprocessing_policy)), + quit_worker_(false), + running_preloaders_(0) {} + + ~ChunkDataset() override { + // stop batch buffer first. + if (batch_buffer_) { + batch_buffer_->stop(); + } + free_workers(); + } + + /// Default get_batch method of BatchDataset. This method returns + /// Example batches created from the preloaded chunks. The implementation + /// is dataset agnostic and does not need overriding in different chunk + /// datasets. + BatchType get_batch(size_t batch_size) override { + TORCH_CHECK( + batch_buffer_ != nullptr, + "Dataset needs to call reset() before calling get_batch()."); + + TORCH_CHECK( + batch_size == options_.batch_size(), + "The requested batch size does not match with the initialized batch size.\n" + " The requested batch size is ", + batch_size, + ", while the dataset is created with batch size equal to ", + options_.batch_size()); + return batch_buffer_->get_batch(); + } + + /// Helper method around get_batch as `batch_size` is not strictly necessary + BatchType get_batch() { + return get_batch(options_.batch_size()); + } + + /// This will clear any internal state and starts the internal prefetching + /// mechanism for the chunk dataset. + void reset() override { + // We need this to support partial data reads via dataloader iterator. + if (batch_buffer_) { + batch_buffer_->stop(); + } + // free workers from previous reset if there is any. + free_workers(); + preload_threads_.clear(); + + if (!load_checkpoint_) { + chunk_reader_.reset(); + chunk_sampler_.reset(chunk_reader_.chunk_count()); + load_checkpoint_ = false; + } + + // Throw out any existing cached batch in the buffer and re-creates a new + // chunk buffer. + batch_buffer_ = std::make_unique< + detail::BatchDataBuffer>( + options_.batch_size(), example_sampler_, options_.cache_size()); + + // create new workers for this new epoch. + quit_worker_ = false; + + AT_ASSERT(running_preloaders_ == 0); + running_preloaders_ = options_.preloader_count(); + for (const auto i : c10::irange(options_.preloader_count())) { + preload_threads_.emplace_back([this, i]() { this->preloader(i); }); + } + } + + /// size is not used for chunk dataset. + std::optional size() const override { + return std::nullopt; + } + + // provide a references to chunk sampler. Used mainly in distributed data + // loading to set the epoch number for the sampler. + ChunkSamplerType& chunk_sampler() { + return chunk_sampler_; + } + + void save(serialize::OutputArchive& archive) const override { + std::lock_guard lock(chunk_index_guard_); + chunk_sampler_.save(archive); + } + + void load(serialize::InputArchive& archive) override { + std::lock_guard lock(chunk_index_guard_); + chunk_sampler_.load(archive); + load_checkpoint_ = true; + } + + private: + /// running on worker thread to preload chunk data. + void preloader(size_t id) { + while (!quit_worker_.load()) { + try { + std::vector chunk_idx; + { + std::lock_guard lock(chunk_index_guard_); + if (auto chunk_sampler_result = chunk_sampler_.next( + this->options_.cross_chunk_shuffle_count())) { + chunk_idx = chunk_sampler_result.value(); + } else { + break; + } + } + UnwrappedBatchType data = chunk_reader_.read_chunk(chunk_idx[0]); + for (const auto i : c10::irange(1, chunk_idx.size())) { + auto chunk_data = chunk_reader_.read_chunk(chunk_idx[i]); + std::move( + chunk_data.begin(), chunk_data.end(), std::back_inserter(data)); + } + if (preprocessing_policy_) { + preprocessing_policy_(data); + } + if (!data.empty()) { // skip empty chunks. + batch_buffer_->add_chunk_data(std::move(data)); + } + } catch (...) { + batch_buffer_->add_chunk_data(std::current_exception()); + } + } + AT_ASSERT(running_preloaders_.load() > 0); + --running_preloaders_; + if (running_preloaders_.load() == 0) { + // all preloaders are completed, so we can notify the batch_buffer. + batch_buffer_->stop(); + } + } + + /// Block the current thread until the workers finish execution and exit. + void free_workers() { + if (!quit_worker_.load()) { + quit_worker_ = true; + for (auto& worker_thread : preload_threads_) { + worker_thread.join(); + } + } + } + + private: + // Templated class that defines what is a chunk and how to read chunk data. + // When a chunk is returned by chunk_reader_, ChunkDataset split it into + // batches and caches them in batch_buffer_. + ChunkReader chunk_reader_; + + // chunk sampler to shuffle different chunks + ChunkSamplerType chunk_sampler_; + + // example sampler to shuffle examples in a specific chunk + ExampleSamplerType example_sampler_; + + // batch data buffer which holds chunk data from preloading thread. + std::shared_ptr< + detail::BatchDataBuffer> + batch_buffer_; + + // worker thread pool + std::vector preload_threads_; + + /// The options the Dataset was configured with. + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const ChunkDatasetOptions options_; + + // function pointer wrapper to apply custom processing over chunk data. This + // is considered an advanced parameter for developers who want to apply a + // pre-process to the chunk data before sampling into minibatch. + // Different than the collate function, this policy is applied on the chunk + // level, instead of minibatch level. When a chunk of data is loaded (multiple + // chunks if cross_chunk_shuffle_count_ is greater than 1), this policy is + // applied to the full loaded data. It is useful if developers want to + // perform pre-processing (like bucketing) to the chunk data before + // example sampler samples the data. By default it's an empty pointer and no + // action will be taken. + std::function preprocessing_policy_; + + // indicate whether the worker thread can be teared down + std::atomic quit_worker_; + + // keep track of running preloaders to notify batch buffer. A value 0 + // indicates that the chunk loading is completed. + std::atomic running_preloaders_; + + // mutex to synchronize chunk sampler next() call. + mutable std::mutex chunk_index_guard_; + + // boolean value to indicate whether we need to load the checkpoint for + // chunk_sampler_. + bool load_checkpoint_{false}; +}; +} // namespace torch::data::datasets + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/map.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/map.h new file mode 100644 index 0000000000000000000000000000000000000000..7f763b199d610139b62c55abdfd908472dd806c6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/map.h @@ -0,0 +1,119 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +#include +#include +#include + +namespace torch::data::datasets { +namespace detail { +template +using optional_if_t = std::conditional_t, T>; +} // namespace detail + +/// A `MapDataset` is a dataset that applies a transform to a source dataset. +template +class MapDataset : public BatchDataset< + MapDataset, + detail::optional_if_t< + SourceDataset::is_stateful, + typename AppliedTransform::OutputBatchType>, + typename SourceDataset::BatchRequestType> { + public: + using DatasetType = SourceDataset; + using TransformType = AppliedTransform; + using BatchRequestType = typename SourceDataset::BatchRequestType; + using OutputBatchType = detail::optional_if_t< + SourceDataset::is_stateful, + typename AppliedTransform::OutputBatchType>; + + MapDataset(DatasetType dataset, TransformType transform) + : dataset_(std::move(dataset)), transform_(std::move(transform)) {} + + /// Gets a batch from the source dataset and applies the transform to it, + /// returning the result. + OutputBatchType get_batch(BatchRequestType indices) override { + return get_batch_impl(std::move(indices)); + } + + /// Returns the size of the source dataset. + // NOLINTNEXTLINE(bugprone-exception-escape) + std::optional size() const noexcept override { + return dataset_.size(); + } + + /// Calls `reset()` on the underlying dataset. + /// NOTE: Stateless datasets do not have a reset() method, so a call to this + /// method will only compile for stateful datasets (which have a reset() + /// method). + void reset() { + dataset_.reset(); + } + + /// Returns the underlying dataset. + const SourceDataset& dataset() noexcept { + return dataset_; + } + + /// Returns the transform being applied. + const AppliedTransform& transform() noexcept { + return transform_; + } + + private: + /// The implementation of `get_batch()` for the stateless case, which simply + /// applies the transform to the output of `get_batch()` from the dataset. + template < + typename D = SourceDataset, + typename = std::enable_if_t> + OutputBatchType get_batch_impl(BatchRequestType indices) { + return transform_.apply_batch(dataset_.get_batch(std::move(indices))); + } + + /// The implementation of `get_batch()` for the stateful case. Here, we follow + /// the semantics of `Optional.map()` in many functional languages, which + /// applies a transformation to the optional's content when the optional + /// contains a value, and returns a new optional (of a different type) if the + /// original optional returned by `get_batch()` was empty. + template + std::enable_if_t get_batch_impl( + BatchRequestType indices) { + if (auto batch = dataset_.get_batch(std::move(indices))) { + return transform_.apply_batch(std::move(*batch)); + } + return std::nullopt; + } + + /// The underlying dataset being transformed. + SourceDataset dataset_; + + // The transformation that is applied to batches received from the dataset. + AppliedTransform transform_; +}; + +/// Creates a `MapDataset` with the given dataset and transform. +template +MapDataset map( + DatasetType dataset, + TransformType transform) { + static_assert( + std::is_same_v< + std::conditional_t< + DatasetType::is_stateful, + typename DatasetType::BatchType::value_type, + typename DatasetType::BatchType>, + typename TransformType::InputBatchType>, + "BatchType type of dataset does not match input type of transform"); + return {std::move(dataset), std::move(transform)}; +} + +} // namespace torch::data::datasets + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/mnist.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/mnist.h new file mode 100644 index 0000000000000000000000000000000000000000..1e55d9ed51d5c15c4780b50832aee8cc1a72f721 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/mnist.h @@ -0,0 +1,49 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +#include +#include + +namespace torch::data::datasets { +/// The MNIST dataset. +class TORCH_API MNIST : public Dataset { + public: + /// The mode in which the dataset is loaded. + enum class Mode { kTrain, kTest }; + + /// Loads the MNIST dataset from the `root` path. + /// + /// The supplied `root` path should contain the *content* of the unzipped + /// MNIST dataset, available from http://yann.lecun.com/exdb/mnist. + explicit MNIST(const std::string& root, Mode mode = Mode::kTrain); + + /// Returns the `Example` at the given `index`. + Example<> get(size_t index) override; + + /// Returns the size of the dataset. + std::optional size() const override; + + /// Returns true if this is the training subset of MNIST. + // NOLINTNEXTLINE(bugprone-exception-escape) + bool is_train() const noexcept; + + /// Returns all images stacked into a single tensor. + const Tensor& images() const; + + /// Returns all targets stacked into a single tensor. + const Tensor& targets() const; + + private: + Tensor images_, targets_; +}; +} // namespace torch::data::datasets + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/shared.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/shared.h new file mode 100644 index 0000000000000000000000000000000000000000..ee516362464826e3a6b4640ceb1dbf1487ed2aec --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/shared.h @@ -0,0 +1,84 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include + +namespace torch::data::datasets { + +/// A dataset that wraps another dataset in a shared pointer and implements the +/// `BatchDataset` API, delegating all calls to the shared instance. This is +/// useful when you want all worker threads in the dataloader to access the same +/// dataset instance. The dataset must take care of synchronization and +/// thread-safe access itself. +/// +/// Use `torch::data::datasets::make_shared_dataset()` to create a new +/// `SharedBatchDataset` like you would a `std::shared_ptr`. +template +class SharedBatchDataset : public BatchDataset< + SharedBatchDataset, + typename UnderlyingDataset::BatchType, + typename UnderlyingDataset::BatchRequestType> { + public: + using BatchType = typename UnderlyingDataset::BatchType; + using BatchRequestType = typename UnderlyingDataset::BatchRequestType; + + /// Constructs a new `SharedBatchDataset` from a `shared_ptr` to the + /// `UnderlyingDataset`. + /* implicit */ SharedBatchDataset( + std::shared_ptr shared_dataset) + : dataset_(std::move(shared_dataset)) {} + + /// Calls `get_batch` on the underlying dataset. + BatchType get_batch(BatchRequestType request) override { + return dataset_->get_batch(std::move(request)); + } + + /// Returns the `size` from the underlying dataset. + std::optional size() const override { + return dataset_->size(); + } + + /// Accesses the underlying dataset. + UnderlyingDataset& operator*() { + return *dataset_; + } + + /// Accesses the underlying dataset. + const UnderlyingDataset& operator*() const { + return *dataset_; + } + + /// Accesses the underlying dataset. + UnderlyingDataset* operator->() { + return dataset_.get(); + } + + /// Accesses the underlying dataset. + const UnderlyingDataset* operator->() const { + return dataset_.get(); + } + + /// Calls `reset()` on the underlying dataset. + void reset() { + dataset_->reset(); + } + + private: + std::shared_ptr dataset_; +}; + +/// Constructs a new `SharedBatchDataset` by creating a +/// `shared_ptr`. All arguments are forwarded to +/// `make_shared`. +template +SharedBatchDataset make_shared_dataset(Args&&... args) { + return std::make_shared(std::forward(args)...); +} +} // namespace torch::data::datasets + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/stateful.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/stateful.h new file mode 100644 index 0000000000000000000000000000000000000000..23720e40db66a4bbf4fa73a2e1ae8707b534e7ff --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/stateful.h @@ -0,0 +1,69 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include + +namespace torch::serialize { +class OutputArchive; +class InputArchive; +} // namespace torch::serialize + +namespace torch::data::datasets { + +/// A stateful dataset is a dataset that maintains some internal state, which +/// will be `reset()` at the beginning of each epoch. Subclasses can override +/// the `reset()` method to configure this behavior. Further, the return type of +/// a stateful dataset's `get_batch()` method is always an `optional`. When the +/// stateful dataset wants to indicate to the dataloader that its epoch has +/// ended, it should return an empty optional. The dataloader knows to modify +/// its implementation based on whether the dataset is stateless or stateful. +/// +/// Note that when subclassing a from `StatefulDataset`, the return +/// type of `get_batch()`, which the subclass must override, will be +/// `optional` (i.e. the type specified in the `StatefulDataset` +/// specialization is automatically boxed into an `optional` for the dataset's +/// `BatchType`). +template < + typename Self, + typename Batch = std::vector>, + typename BatchRequest = size_t> +class StatefulDataset + : public BatchDataset, BatchRequest> { + public: + /// Resets internal state of the dataset. + virtual void reset() = 0; + + /// Saves the statefulDataset's state to OutputArchive. + virtual void save(serialize::OutputArchive& archive) const = 0; + + /// Deserializes the statefulDataset's state from the `archive`. + virtual void load(serialize::InputArchive& archive) = 0; +}; + +/// Serializes a statefulDataset to `OutputArchive`. +template +serialize::OutputArchive& operator<<( + serialize::OutputArchive& archive, + const StatefulDataset& statefulDataset) { + statefulDataset.save(archive); + return archive; +} + +/// Deserializes a statefulDataset from an `InputArchive`. +template +serialize::InputArchive& operator>>( + serialize::InputArchive& archive, + StatefulDataset& statefulDataset) { + statefulDataset.load(archive); + return archive; +} + +} // namespace torch::data::datasets + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/tensor.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/tensor.h new file mode 100644 index 0000000000000000000000000000000000000000..213be9b6e45c4bd20c8adceff8676f6843772c60 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/datasets/tensor.h @@ -0,0 +1,39 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include + +namespace torch::data::datasets { + +/// A dataset of tensors. +/// Stores a single tensor internally, which is then indexed inside `get()`. +struct TensorDataset : public Dataset { + /// Creates a `TensorDataset` from a vector of tensors. + explicit TensorDataset(const std::vector& tensors) + : TensorDataset(torch::stack(tensors)) {} + + explicit TensorDataset(torch::Tensor tensor) : tensor(std::move(tensor)) {} + + /// Returns a single `TensorExample`. + TensorExample get(size_t index) override { + return tensor[static_cast(index)]; + } + + /// Returns the number of tensors in the dataset. + std::optional size() const override { + return tensor.size(0); + } + + Tensor tensor; +}; + +} // namespace torch::data::datasets + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/detail/data_shuttle.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/detail/data_shuttle.h new file mode 100644 index 0000000000000000000000000000000000000000..433ed49aab5bf2506e4fcea421f683f8c2fb5e12 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/detail/data_shuttle.h @@ -0,0 +1,88 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include + +#include +#include + +namespace torch::data::detail { + +/// Encapsulates the full life cycle of DataLoader jobs. +/// +/// When a new job is enqueued to the `DataShuttle`, a counter for in-flight +/// jobs is bumped. This job is said to be "in-flight" until its result is +/// popped. Worker threads dequeue jobs as soon as they are available. When a +/// worker finishes a job, it enqueues the result. Only when the main thread +/// dequeues a result is the count of in-flight jobs decremented. When the main +/// thread attempts to dequeue a job but no jobs are in-flight, that means the +/// epoch is complete and `pop_result` returns an empty optional. +template +class DataShuttle { + public: + /// Pushes a new job. Called by the main thread. + void push_job(Job job) { + new_jobs_.push(std::move(job)); + ++in_flight_jobs_; + } + + /// Pushes the result of a job. Called by worker threads. + void push_result(Result result) { + results_.push(std::move(result)); + } + + /// Returns the next job, blocking until there is one available. Called by + /// worker threads. + Job pop_job() { + return new_jobs_.pop(); + } + + /// Returns the result of a job, or nullopt if all jobs were exhausted. Called + /// by the main thread. + std::optional pop_result( + std::optional timeout = std::nullopt) { + if (in_flight_jobs_ > 0) { + auto result = results_.pop(timeout); + --in_flight_jobs_; + return result; + } + return std::nullopt; + } + + /// Discards any jobs that are not yet in flight, and waits for all in-flight + /// jobs to finish, discarding their result. + void drain() { + // Clear all inputs so that no further jobs are scheduled. + auto number_cleared = new_jobs_.clear(); + in_flight_jobs_ -= number_cleared; + // Remove any outstanding results. + while (in_flight_jobs_ > 0) { + pop_result(); + } + } + + /// Returns the number of jobs that are still in progress. + /// When this number is zero, an epoch is finished. + size_t in_flight_jobs() const noexcept { + return in_flight_jobs_; + } + + private: + /// The queue for jobs that are not yet in flight. + Queue new_jobs_; + /// The number of in-flight jobs. + /// NOTE: Not atomic because only manipulated by the main thread. + size_t in_flight_jobs_ = 0; + /// The queue for results of finished jobs. + Queue results_; +}; + +} // namespace torch::data::detail + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/detail/queue.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/detail/queue.h new file mode 100644 index 0000000000000000000000000000000000000000..d369014923cbacf1d0f3a98ef830e864f1b852ee --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/detail/queue.h @@ -0,0 +1,85 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include + +#include +#include +#include +#include +#include + +namespace torch::data::detail { + +/// A basic locked, blocking MPMC queue. +/// +/// Every `push` and `pop` is guarded by a mutex. A condition variable is used +/// to communicate insertion of new elements, such that waiting threads will be +/// woken up if they are currently waiting inside a call to `pop()`. +/// +/// Note that this data structure is written specifically for use with the +/// `DataLoader`. Its behavior is tailored to this use case and may not be +/// applicable to more general uses. +template +class Queue { + public: + /// Pushes a new value to the back of the `Queue` and notifies one thread on + /// the waiting side about this event. + void push(T value) { + { + std::lock_guard lock(mutex_); + queue_.push(std::move(value)); + } + cv_.notify_one(); + } + + /// Blocks until at least one element is ready to be popped from the front of + /// the queue. An optional `timeout` in seconds can be used to limit the time + /// spent waiting for an element. If the wait times out, an exception is + /// raised. + T pop(std::optional timeout = std::nullopt) { + std::unique_lock lock(mutex_); + if (timeout) { + if (!cv_.wait_for( + lock, *timeout, [this] { return !this->queue_.empty(); })) { + // clang-format off + TORCH_CHECK(false, + "Timeout in DataLoader queue while waiting for next batch" + " (timeout was ", timeout->count(), " ms)"); + // clang-format on + } + } else { + cv_.wait(lock, [this] { return !this->queue_.empty(); }); + } + AT_ASSERT(!queue_.empty()); + T value = queue_.front(); + queue_.pop(); + lock.unlock(); + return value; + } + + /// Empties the queue and returns the number of elements that were present at + /// the start of the function. No threads are notified about this event as it + /// is assumed to be used to drain the queue during shutdown of a + /// `DataLoader`. + size_t clear() { + std::lock_guard lock(this->mutex_); + const auto size = queue_.size(); + while (!queue_.empty()) { + queue_.pop(); + } + return size; + } + + private: + std::queue queue_; + std::mutex mutex_; + std::condition_variable cv_; +}; +} // namespace torch::data::detail + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/detail/sequencers.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/detail/sequencers.h new file mode 100644 index 0000000000000000000000000000000000000000..02aa79dc20b234a2fa97055a10a9626f7214aa7b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/detail/sequencers.h @@ -0,0 +1,112 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include +#include + +namespace torch::data::detail::sequencers { +namespace detail { +template +bool buffer_contains_result(const std::vector>& buffer) { + return std::any_of( + buffer.begin(), buffer.end(), [](const std::optional& result) { + return result.has_value(); + }); +} +} // namespace detail + +/// A `Sequencer` accepts a function that yields the next result of a +/// `DataLoader` and then has the opportunity to influence the order in which +/// these results are returned. The `NoSequencer` does not enforce any +/// sequencing and returns any result directly. The `OrderedSequencer` instead +/// buffers results internally to return them in order of their sequence number. +template +struct Sequencer { + using ResultProducer = std::function()>; + virtual ~Sequencer() = default; + virtual std::optional next(ResultProducer next_result) = 0; +}; + +/// A `Sequencer` that does not enforce any ordering. It is effectively the +/// identity function. +template +struct NoSequencer final : public Sequencer { + using typename Sequencer::ResultProducer; + std::optional next(ResultProducer next_result) override { + return next_result(); + } +}; + +/// A `Sequencer` that buffers results and returns them in order of their +/// sequence number. The `OrderedSequencer` maintains an internal, monotonically +/// incrementing counter for the next sequence number it expects. If it receives +/// a result with a higher sequence number, it will buffer it for later (when +/// the sequence number reaches that of this result). Otherwise, if the sequence +/// numbers match, the result is returned. +/// +/// Implementation note: The `OrderedSequencer` is implemented with a fixed-size +/// buffer. Let `m` be the maximum number of jobs in the data loader's queue and +/// `s` be the current sequence number. Assume `m` jobs are scheduled in the +/// `DataLoader`. Any new result is stored at index `job.sqn mod m` in the +/// `OrderedSequencer`. Why are we sure sequence numbers of new jobs will not +/// collide with sequence numbers of buffered jobs? The `OrderedSequencer` will +/// not return from `next()` until it receives the result with sqn `s`. This +/// means no new jobs can be scheduled in the `DataLoader` in the meantime, +/// which enforces that as long as sqn `s` has not been received, `s + m` (which +/// would cause a collision in the fixed-size buffer) will not yet be scheduled. +template +struct OrderedSequencer : public Sequencer { + using typename Sequencer::ResultProducer; + + /// Constructs the `OrderedSequencer` with the maximum number of results it + /// will ever hold at one point in time. + explicit OrderedSequencer(size_t max_jobs) : buffer_(max_jobs) {} + + /// Buffers results until the next one in the expected order is received. + std::optional next(ResultProducer next_result) override { + // If we already have the result for the next sqn, return it. + if (auto& maybe_result = buffer(next_sequence_number_)) { + auto result = std::move(*maybe_result); + buffer(next_sequence_number_++).reset(); + return result; + } + // Otherwise wait for the next result. + while (true) { + auto result = next_result(); + if (!result) { + AT_ASSERT(!detail::buffer_contains_result(buffer_)); + break; + } + // If it was not nullopt and the sequence numbers match, return it + // directly and bump the sequence number. + if (result->sequence_number == next_sequence_number_) { + ++next_sequence_number_; + return result; + } + // Stash the result for later. + AT_ASSERT(!buffer(result->sequence_number).has_value()); + buffer(result->sequence_number) = std::move(result); + } + // The result was an empty optional, so we are done with this epoch. + return std::nullopt; + } + + /// Accesses the buffer at the `index` modulo the buffer size. + std::optional& buffer(size_t index) { + return buffer_.at(index % buffer_.size()); + } + + /// The monotonically increasing sequence number we expect. + size_t next_sequence_number_ = 0; + + /// A fixed-size buffer (after construction). + std::vector> buffer_; +}; +} // namespace torch::data::detail::sequencers + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/example.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/example.h new file mode 100644 index 0000000000000000000000000000000000000000..cfb331a9f064a8b50aa2684d127309f62f3007be --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/example.h @@ -0,0 +1,58 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::data { + +/// An `Example` from a dataset. +/// +/// A dataset consists of data and an associated target (label). +template +struct Example { + using DataType = Data; + using TargetType = Target; + + Example() = default; + Example(Data data, Target target) + : data(std::move(data)), target(std::move(target)) {} + + Data data; + Target target; +}; + +namespace example { +using NoTarget = void; +} // namespace example + +/// A specialization for `Example` that does not have a target. +/// +/// This class exists so that code can be written for a templated `Example` +/// type, and work both for labeled and unlabeled datasets. +template +struct Example { + using DataType = Data; + using TargetType = example::NoTarget; + + Example() = default; + /* implicit */ Example(Data data) : data(std::move(data)) {} + + // When a DataLoader returns an Example like this, that example should be + // implicitly convertible to the underlying data type. + + operator Data&() { + return data; + } + operator const Data&() const { + return data; + } + + Data data; +}; + +using TensorExample = Example; +} // namespace torch::data + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/iterator.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/iterator.h new file mode 100644 index 0000000000000000000000000000000000000000..1e87fea1878dfc484e7ef79eb295fec6479bc3c8 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/iterator.h @@ -0,0 +1,183 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +#include +#include +#include +#include +#include + +namespace torch::data { +namespace detail { +// For increased safety and more separated logic, this implementation of +// `Iterator` consists of a `ValidIterator` and a `SentinelIterator`. A +// `ValidIterator` yields new batches until the `DataLoader` is exhausted. While +// the `DataLoader` is not exhausted, `ValidIterator`s compare equal if they are +// the same object. When the `ValidIterator` becomes exhausted, it compares +// equal to the `SentinelIterator`, but not before. Half the code here is to +// implement double dispatch for the comparison. Got damnit, C++. + +template +struct ValidIterator; + +template +struct SentinelIterator; + +/// Base class for the `ValidIterator` and `SentinelIterator` +template +struct IteratorImpl { + virtual ~IteratorImpl() = default; + virtual void next() = 0; + virtual Batch& get() = 0; + virtual bool operator==(const IteratorImpl& other) const = 0; + virtual bool operator==(const ValidIterator& other) const = 0; + virtual bool operator==(const SentinelIterator& other) const = 0; +}; + +template +struct ValidIterator : public IteratorImpl { + using BatchProducer = std::function()>; + + explicit ValidIterator(BatchProducer next_batch) + : next_batch_(std::move(next_batch)) {} + + /// Fetches the next batch. + void next() override { + // If we didn't get the very first batch yet, get it now. + lazy_initialize(); + TORCH_CHECK( + batch_.has_value(), "Attempted to increment iterator past the end"); + // Increment to the next batch. + batch_ = next_batch_(); + } + + /// Returns the current batch. The precondition for this operation to not + /// throw an exception is that it has been compared to the `SentinelIterator` + /// and did not compare equal. + Batch& get() override { + // If we didn't get the very first batch yet, get it now. + lazy_initialize(); + TORCH_CHECK( + batch_.has_value(), + "Attempted to dereference iterator that was past the end"); + return batch_.value(); + } + + /// Does double dispatch. + bool operator==(const IteratorImpl& other) const override { + return other == *this; + } + + /// A `ValidIterator` is equal to the `SentinelIterator` iff. the + /// `ValidIterator` has reached the end of the dataloader. + bool operator==(const SentinelIterator& /* unused */) const override { + lazy_initialize(); + return !batch_; + } + + /// Returns true if the memory address of `other` equals that of `this`. + bool operator==(const ValidIterator& other) const override { + return &other == this; + } + + /// Gets the very first batch if it has not yet been fetched. + void lazy_initialize() const { + if (!initialized_) { + batch_ = next_batch_(); + initialized_ = true; + } + } + + BatchProducer next_batch_; + mutable std::optional batch_; + mutable bool initialized_ = false; +}; + +template +struct SentinelIterator : public IteratorImpl { + void next() override { + TORCH_CHECK( + false, + "Incrementing the DataLoader's past-the-end iterator is not allowed"); + } + + Batch& get() override { + TORCH_CHECK( + false, + "Dereferencing the DataLoader's past-the-end iterator is not allowed"); + } + + /// Does double dispatch. + bool operator==(const IteratorImpl& other) const override { + return other == *this; + } + + /// Calls the comparison operator between `ValidIterator` and + /// `SentinelIterator`. + bool operator==(const ValidIterator& other) const override { + return other == *this; + } + + /// Sentinel iterators always compare equal. + bool operator==(const SentinelIterator& other) const override { + return true; + } +}; +} // namespace detail + +template +class Iterator { + public: + // Type aliases to make the class recognized as a proper iterator. + using difference_type = std::ptrdiff_t; + using value_type = Batch; + using pointer = Batch*; + using reference = Batch&; + using iterator_category = std::input_iterator_tag; + + explicit Iterator(std::unique_ptr> impl) + : impl_(std::move(impl)) {} + + /// Increments the iterator. + /// Only permitted for valid iterators (not past the end). + Iterator& operator++() { + impl_->next(); + return *this; + } + + /// Returns the current batch. + /// Only permitted for valid iterators (not past the end). + Batch& operator*() { + return impl_->get(); + } + + /// Returns a pointer to the current batch. + /// Only permitted for valid iterators (not past the end). + Batch* operator->() { + return &impl_->get(); + } + + /// Compares two iterators for equality. + bool operator==(const Iterator& other) const { + return *impl_ == *other.impl_; + } + + /// Compares two iterators for inequality. + bool operator!=(const Iterator& other) const { + return !(*this == other); + } + + private: + /// Points either to a `ValidIterator` or to a `SentinelIterator`. + std::shared_ptr> impl_; +}; +} // namespace torch::data + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers.h new file mode 100644 index 0000000000000000000000000000000000000000..02532bc75c74c2cff35313dd9a63fd8188ce5e9e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers.h @@ -0,0 +1,14 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/base.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/base.h new file mode 100644 index 0000000000000000000000000000000000000000..9d253649072d50dcae35dd67d40c08d7716b518b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/base.h @@ -0,0 +1,46 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include + +namespace torch::serialize { +class OutputArchive; +class InputArchive; +} // namespace torch::serialize + +namespace torch::data::samplers { +/// A `Sampler` is an object that yields an index with which to access a +/// dataset. +template > +class Sampler { + public: + using BatchRequestType = BatchRequest; + + virtual ~Sampler() = default; + + /// Resets the `Sampler`'s internal state. + /// Typically called before a new epoch. + /// Optionally, accepts a new size when resetting the sampler. + virtual void reset(std::optional new_size) = 0; + + /// Returns the next index if possible, or an empty optional if the + /// sampler is exhausted for this epoch. + virtual std::optional next(size_t batch_size) = 0; + + /// Serializes the `Sampler` to the `archive`. + virtual void save(serialize::OutputArchive& archive) const = 0; + + /// Deserializes the `Sampler` from the `archive`. + virtual void load(serialize::InputArchive& archive) = 0; +}; + +} // namespace torch::data::samplers + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/custom_batch_request.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/custom_batch_request.h new file mode 100644 index 0000000000000000000000000000000000000000..637ef93b2902905cb9d6125af973345993838dc7 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/custom_batch_request.h @@ -0,0 +1,22 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::data::samplers { +/// A base class for custom index types. +struct TORCH_API CustomBatchRequest { + CustomBatchRequest() = default; + CustomBatchRequest(const CustomBatchRequest&) = default; + CustomBatchRequest(CustomBatchRequest&&) noexcept = default; + virtual ~CustomBatchRequest() = default; + + /// The number of elements accessed by this index. + virtual size_t size() const = 0; +}; +} // namespace torch::data::samplers + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/distributed.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/distributed.h new file mode 100644 index 0000000000000000000000000000000000000000..1435621eb41c1f776e3e607d45bfa320a471e795 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/distributed.h @@ -0,0 +1,138 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include + +namespace torch::serialize { +class OutputArchive; +class InputArchive; +} // namespace torch::serialize + +namespace torch::data::samplers { + +/// A `Sampler` that selects a subset of indices to sample from and defines a +/// sampling behavior. In a distributed setting, this selects a subset of the +/// indices depending on the provided num_replicas and rank parameters. The +/// `Sampler` performs a rounding operation based on the `allow_duplicates` +/// parameter to decide the local sample count. +template > +class DistributedSampler : public Sampler { + public: + DistributedSampler( + size_t size, + size_t num_replicas = 1, + size_t rank = 0, + bool allow_duplicates = true) + : size_(size), + num_replicas_(num_replicas), + rank_(rank), + + allow_duplicates_(allow_duplicates) {} + + /// Set the epoch for the current enumeration. This can be used to alter the + /// sample selection and shuffling behavior. + void set_epoch(size_t epoch) { + epoch_ = epoch; + } + + size_t epoch() const { + return epoch_; + } + + protected: + size_t local_sample_count() { + if (allow_duplicates_) { + return (size_ + num_replicas_ - 1) / num_replicas_; + } else { + return size_ / num_replicas_; + } + } + + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + size_t size_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + size_t num_replicas_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + size_t rank_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + size_t epoch_{0}; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + bool allow_duplicates_; +}; + +/// Select samples randomly. The sampling order is shuffled at each `reset()` +/// call. +class TORCH_API DistributedRandomSampler : public DistributedSampler<> { + public: + DistributedRandomSampler( + size_t size, + size_t num_replicas = 1, + size_t rank = 0, + bool allow_duplicates = true); + + /// Resets the `DistributedRandomSampler` to a new set of indices. + void reset(std::optional new_size = std::nullopt) override; + + /// Returns the next batch of indices. + std::optional> next(size_t batch_size) override; + + /// Serializes the `DistributedRandomSampler` to the `archive`. + void save(serialize::OutputArchive& archive) const override; + + /// Deserializes the `DistributedRandomSampler` from the `archive`. + void load(serialize::InputArchive& archive) override; + + /// Returns the current index of the `DistributedRandomSampler`. + size_t index() const noexcept; + + private: + void populate_indices(); + + size_t begin_index_; + size_t end_index_; + size_t sample_index_; + std::vector all_indices_; +}; + +/// Select samples sequentially. +class TORCH_API DistributedSequentialSampler : public DistributedSampler<> { + public: + DistributedSequentialSampler( + size_t size, + size_t num_replicas = 1, + size_t rank = 0, + bool allow_duplicates = true); + + /// Resets the `DistributedSequentialSampler` to a new set of indices. + void reset(std::optional new_size = std::nullopt) override; + + /// Returns the next batch of indices. + std::optional> next(size_t batch_size) override; + + /// Serializes the `DistributedSequentialSampler` to the `archive`. + void save(serialize::OutputArchive& archive) const override; + + /// Deserializes the `DistributedSequentialSampler` from the `archive`. + void load(serialize::InputArchive& archive) override; + + /// Returns the current index of the `DistributedSequentialSampler`. + size_t index() const noexcept; + + private: + void populate_indices(); + + size_t begin_index_; + size_t end_index_; + size_t sample_index_; + std::vector all_indices_; +}; + +} // namespace torch::data::samplers + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/random.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/random.h new file mode 100644 index 0000000000000000000000000000000000000000..a671b92c54fb1687527c5a6a781ef93c2bf5ad18 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/random.h @@ -0,0 +1,53 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include + +namespace torch::serialize { +class OutputArchive; +class InputArchive; +} // namespace torch::serialize + +namespace torch::data::samplers { + +/// A `Sampler` that returns random indices. +class TORCH_API RandomSampler : public Sampler<> { + public: + /// Constructs a `RandomSampler` with a size and dtype for the stored indices. + /// + /// The constructor will eagerly allocate all required indices, which is the + /// sequence `0 ... size - 1`. `index_dtype` is the data type of the stored + /// indices. You can change it to influence memory usage. + explicit RandomSampler(int64_t size, Dtype index_dtype = torch::kInt64); + + ~RandomSampler() override; + + /// Resets the `RandomSampler` to a new set of indices. + void reset(std::optional new_size = std::nullopt) override; + + /// Returns the next batch of indices. + std::optional> next(size_t batch_size) override; + + /// Serializes the `RandomSampler` to the `archive`. + void save(serialize::OutputArchive& archive) const override; + + /// Deserializes the `RandomSampler` from the `archive`. + void load(serialize::InputArchive& archive) override; + + /// Returns the current index of the `RandomSampler`. + size_t index() const noexcept; + + private: + at::Tensor indices_; + int64_t index_ = 0; +}; +} // namespace torch::data::samplers + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/sequential.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/sequential.h new file mode 100644 index 0000000000000000000000000000000000000000..ed2a5c936da892a4a354546dc696d5ed075023e8 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/sequential.h @@ -0,0 +1,49 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include + +namespace torch::serialize { +class OutputArchive; +class InputArchive; +} // namespace torch::serialize + +namespace torch::data::samplers { + +/// A `Sampler` that returns indices sequentially. +class TORCH_API SequentialSampler : public Sampler<> { + public: + /// Creates a `SequentialSampler` that will return indices in the range + /// `0...size - 1`. + explicit SequentialSampler(size_t size); + + /// Resets the `SequentialSampler` to zero. + void reset(std::optional new_size = std::nullopt) override; + + /// Returns the next batch of indices. + std::optional> next(size_t batch_size) override; + + /// Serializes the `SequentialSampler` to the `archive`. + void save(serialize::OutputArchive& archive) const override; + + /// Deserializes the `SequentialSampler` from the `archive`. + void load(serialize::InputArchive& archive) override; + + /// Returns the current index of the `SequentialSampler`. + size_t index() const noexcept; + + private: + size_t size_; + size_t index_{0}; +}; + +} // namespace torch::data::samplers + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/serialize.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/serialize.h new file mode 100644 index 0000000000000000000000000000000000000000..bc1bed5f38dc3ae924119ee44db9c8e2f1c2a997 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/serialize.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::data::samplers { +/// Serializes a `Sampler` into an `OutputArchive`. +template +serialize::OutputArchive& operator<<( + serialize::OutputArchive& archive, + const Sampler& sampler) { + sampler.save(archive); + return archive; +} + +/// Deserializes a `Sampler` from an `InputArchive`. +template +serialize::InputArchive& operator>>( + serialize::InputArchive& archive, + Sampler& sampler) { + sampler.load(archive); + return archive; +} +} // namespace torch::data::samplers + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/stream.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/stream.h new file mode 100644 index 0000000000000000000000000000000000000000..c0ff4614aefe85516fac03f88cbd9b2d622abb83 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/samplers/stream.h @@ -0,0 +1,62 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include + +namespace torch::serialize { +class InputArchive; +class OutputArchive; +} // namespace torch::serialize + +namespace torch::data::samplers { + +/// A wrapper around a batch size value, which implements the +/// `CustomBatchRequest` interface. +struct TORCH_API BatchSize : public CustomBatchRequest { + explicit BatchSize(size_t size); + size_t size() const noexcept override; + operator size_t() const noexcept; + size_t size_; +}; + +/// A sampler for (potentially infinite) streams of data. +/// +/// The major feature of the `StreamSampler` is that it does not return +/// particular indices, but instead only the number of elements to fetch from +/// the dataset. The dataset has to decide how to produce those elements. +class TORCH_API StreamSampler : public Sampler { + public: + /// Constructs the `StreamSampler` with the number of individual examples that + /// should be fetched until the sampler is exhausted. + explicit StreamSampler(size_t epoch_size); + + /// Resets the internal state of the sampler. + void reset(std::optional new_size = std::nullopt) override; + + /// Returns a `BatchSize` object with the number of elements to fetch in the + /// next batch. This number is the minimum of the supplied `batch_size` and + /// the difference between the `epoch_size` and the current index. If the + /// `epoch_size` has been reached, returns an empty optional. + std::optional next(size_t batch_size) override; + + /// Serializes the `StreamSampler` to the `archive`. + void save(serialize::OutputArchive& archive) const override; + + /// Deserializes the `StreamSampler` from the `archive`. + void load(serialize::InputArchive& archive) override; + + private: + size_t examples_retrieved_so_far_ = 0; + size_t epoch_size_; +}; + +} // namespace torch::data::samplers + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms.h new file mode 100644 index 0000000000000000000000000000000000000000..49473859bd0f820cd71d04e0e389f93d950ed41d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms.h @@ -0,0 +1,12 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/base.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/base.h new file mode 100644 index 0000000000000000000000000000000000000000..1681070363cecfe3685c45b31626ebdf6a6e8812 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/base.h @@ -0,0 +1,54 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include + +namespace torch::data::transforms { + +/// A transformation of a batch to a new batch. +template +class BatchTransform { + public: + using InputBatchType = InputBatch; + using OutputBatchType = OutputBatch; + + virtual ~BatchTransform() = default; + + /// Applies the transformation to the given `input_batch`. + virtual OutputBatch apply_batch(InputBatch input_batch) = 0; +}; + +/// A transformation of individual input examples to individual output examples. +/// +/// Just like a `Dataset` is a `BatchDataset`, a `Transform` is a +/// `BatchTransform` that can operate on the level of individual examples rather +/// than entire batches. The batch-level transform is implemented (by default) +/// in terms of the example-level transform, though this can be customized. +template +class Transform + : public BatchTransform, std::vector> { + public: + using InputType = Input; + using OutputType = Output; + + /// Applies the transformation to the given `input`. + virtual OutputType apply(InputType input) = 0; + + /// Applies the `transformation` over the entire `input_batch`. + std::vector apply_batch(std::vector input_batch) override { + std::vector output_batch; + output_batch.reserve(input_batch.size()); + for (auto&& input : input_batch) { + output_batch.push_back(apply(std::move(input))); + } + return output_batch; + } +}; +} // namespace torch::data::transforms + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/collate.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/collate.h new file mode 100644 index 0000000000000000000000000000000000000000..10aa60d70be40ed8b06c88318e94ad28fd274c88 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/collate.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +namespace torch::data::transforms { + +/// A `Collation` is a transform that reduces a batch into a single value. +/// The result is a `BatchDataset` that has the type of the single value as its +/// `BatchType`. +template > +using Collation = BatchTransform; + +/// A `Collate` allows passing a custom function to reduce/collate a batch +/// into a single value. It's effectively the lambda version of `Collation`, +/// which you could subclass and override `operator()` to achieve the same. +/// +/// \rst +/// .. code-block:: cpp +/// using namespace torch::data; +/// +/// auto dataset = datasets::MNIST("path/to/mnist") +/// .map(transforms::Collate>([](std::vector> e) { +/// return std::move(e.front()); +/// })); +/// \endrst +template > +using Collate = BatchLambda; +} // namespace torch::data::transforms + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/lambda.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/lambda.h new file mode 100644 index 0000000000000000000000000000000000000000..9bb9e3e986224f0618cdfe5cf65a9f5896b765df --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/lambda.h @@ -0,0 +1,57 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include +#include + +namespace torch::data::transforms { + +/// A `BatchTransform` that applies a user-provided functor to a batch. +template +class BatchLambda : public BatchTransform { + public: + using typename BatchTransform::InputBatchType; + using typename BatchTransform::OutputBatchType; + using FunctionType = std::function; + + /// Constructs the `BatchLambda` from the given `function` object. + explicit BatchLambda(FunctionType function) + : function_(std::move(function)) {} + + /// Applies the user-provided function object to the `input_batch`. + OutputBatchType apply_batch(InputBatchType input_batch) override { + return function_(std::move(input_batch)); + } + + private: + FunctionType function_; +}; + +// A `Transform` that applies a user-provided functor to individual examples. +template +class Lambda : public Transform { + public: + using typename Transform::InputType; + using typename Transform::OutputType; + using FunctionType = std::function; + + /// Constructs the `Lambda` from the given `function` object. + explicit Lambda(FunctionType function) : function_(std::move(function)) {} + + /// Applies the user-provided function object to the `input`. + OutputType apply(InputType input) override { + return function_(std::move(input)); + } + + private: + FunctionType function_; +}; + +} // namespace torch::data::transforms + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/stack.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/stack.h new file mode 100644 index 0000000000000000000000000000000000000000..c0f5db9b43ba3d41ddb64dbb4482aefe6c774d77 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/stack.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include + +namespace torch::data::transforms { + +template > +struct Stack; + +/// A `Collation` for `Example` types that stacks all data +/// tensors into one tensor, and all target (label) tensors into one tensor. +template <> +struct Stack> : public Collation> { + Example<> apply_batch(std::vector> examples) override { + std::vector data, targets; + data.reserve(examples.size()); + targets.reserve(examples.size()); + for (auto& example : examples) { + data.push_back(std::move(example.data)); + targets.push_back(std::move(example.target)); + } + return {torch::stack(data), torch::stack(targets)}; + } +}; + +/// A `Collation` for `Example` types that stacks all data +/// tensors into one tensor. +template <> +struct Stack + : public Collation> { + TensorExample apply_batch(std::vector examples) override { + std::vector data; + data.reserve(examples.size()); + for (auto& example : examples) { + data.push_back(std::move(example.data)); + } + return torch::stack(data); + } +}; +} // namespace torch::data::transforms + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/tensor.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/tensor.h new file mode 100644 index 0000000000000000000000000000000000000000..cba46ead0d39671b9358d78d22c639a8585ffcf4 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/transforms/tensor.h @@ -0,0 +1,78 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include + +namespace torch::data::transforms { + +/// A `Transform` that is specialized for the typical `Example` +/// combination. It exposes a single `operator()` interface hook (for +/// subclasses), and calls this function on input `Example` objects. +template +class TensorTransform + : public Transform, Example> { + public: + using E = Example; + using typename Transform::InputType; + using typename Transform::OutputType; + + /// Transforms a single input tensor to an output tensor. + virtual Tensor operator()(Tensor input) = 0; + + /// Implementation of `Transform::apply` that calls `operator()`. + OutputType apply(InputType input) override { + input.data = (*this)(std::move(input.data)); + return input; + } +}; + +/// A `Lambda` specialized for the typical `Example` input type. +template +class TensorLambda : public TensorTransform { + public: + using FunctionType = std::function; + + /// Creates a `TensorLambda` from the given `function`. + explicit TensorLambda(FunctionType function) + : function_(std::move(function)) {} + + /// Applies the user-provided functor to the input tensor. + Tensor operator()(Tensor input) override { + return function_(std::move(input)); + } + + private: + FunctionType function_; +}; + +/// Normalizes input tensors by subtracting the supplied mean and dividing by +/// the given standard deviation. +template +struct Normalize : public TensorTransform { + /// Constructs a `Normalize` transform. The mean and standard deviation can be + /// anything that is broadcastable over the input tensors (like single + /// scalars). + Normalize(ArrayRef mean, ArrayRef stddev) + : mean(torch::tensor(mean, torch::kFloat32) + .unsqueeze(/*dim=*/1) + .unsqueeze(/*dim=*/2)), + stddev(torch::tensor(stddev, torch::kFloat32) + .unsqueeze(/*dim=*/1) + .unsqueeze(/*dim=*/2)) {} + + torch::Tensor operator()(Tensor input) override { + return input.sub(mean).div(stddev); + } + + torch::Tensor mean, stddev; +}; +} // namespace torch::data::transforms + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/worker_exception.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/worker_exception.h new file mode 100644 index 0000000000000000000000000000000000000000..f68702007c761a3e77959476220039f97476fbc4 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/data/worker_exception.h @@ -0,0 +1,42 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::data { + +/// An exception thrown when a DataLoader's worker thread throws an exception, +/// which is caught. A `WorkerException` stores an `exception_ptr` to the +/// original exception thrown in the worker thread. +struct WorkerException : public std::exception { + /// Constructs a `WorkerException` from an `exception_ptr`. + explicit WorkerException(std::exception_ptr original) + // NOLINTNEXTLINE(bugprone-throw-keyword-missing) + : original_exception(std::move(original)), + message("Caught exception in DataLoader worker thread.") { + try { + std::rethrow_exception(original_exception); + } catch (std::exception& e) { + message += " Original message: "; + message += e.what(); + } + } + + const char* what() const noexcept override { + return message.c_str(); + } + + /// The original exception thrown in the worker thread. + std::exception_ptr original_exception; + + /// This exception's message (not the original exception's message). + std::string message; +}; + +} // namespace torch::data + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/detail/TensorDataContainer.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/detail/TensorDataContainer.h new file mode 100644 index 0000000000000000000000000000000000000000..0a41b0394c56d0c2f997859ca9a60621594a11b5 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/detail/TensorDataContainer.h @@ -0,0 +1,354 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include + +#ifndef AT_PER_OPERATOR_HEADERS +#include +#else +#include +#include +#endif + +#include + +namespace torch::detail { + +enum class TensorDataContainerType { Scalar, InitList, Tensor }; + +struct TensorDataContainer; + +inline std::ostream& operator<<( + std::ostream& stream, + const TensorDataContainer& tensor_data_container); + +inline c10::ScalarType compute_desired_dtype(c10::ScalarType scalar_type) { + if (scalar_type == at::kInt || scalar_type == at::kLong) { + // C++ `torch::tensor` with an integer type or an `at::ArrayRef` / + // `std::vector` / (nested) braced-init-list of integer types always + // produces a tensor of dtype `at::kLong` (aka. int64_t), matching Python + // `torch.tensor` behavior. + return at::kLong; + } else if (scalar_type == at::kFloat || scalar_type == at::kDouble) { + // C++ `torch::tensor` with a floating-point type or an `at::ArrayRef` / + // `std::vector` / (nested) braced-init-list of floating-point types always + // produces a tensor of dtype `torch::get_default_dtype()`, matching Python + // `torch.tensor` behavior. + return at::typeMetaToScalarType(at::get_default_dtype()); + } else { + return scalar_type; + } +} + +// We use `TensorDataContainer` to support converting the following data +// container types into the equivalent Tensor: +// +// 1. Arbitrarily nested braced-init-list (e.g. `{{1, 2}, {3, 4}}`). +// 2. `at::ArrayRef` of supported tensor data types. +// 3. `std::vector` of supported tensor data types. +// +// At any time, a `TensorDataContainer` object represents one of the following: +// +// 1. A scalar with value `scalar()` and type `scalar_type()`. +// 2. A Tensor represented in `std::initializer_list` form, +// with value `init_list()`, Tensor scalar type `scalar_type()`, and Tensor +// sizes `sizes()`. +// 3. A Tensor represented in `at::Tensor` form, with value `tensor()`, scalar +// type `scalar_type()`, +// and Tensor sizes `sizes()`. +// +// All the infrastructure here is mostly to support converting an arbitrarily +// nested braced-init-list to the equivalent Tensor successfully. Consider the +// following example: +// +// `torch::tensor({{1}, {2}})` +// +// this will call into the `torch::tensor` function: +// +// `at::Tensor tensor(detail::TensorDataContainer tensor_data_container, const +// at::TensorOptions& options = {})` +// +// the compiler will first try to convert `{{1}, {2}}` to `TensorDataContainer` +// type: +// +// `TensorDataContainer({{1}, {2}})` +// +// which matches to the +// `TensorDataContainer(std::initializer_list)` +// constructor, and in an attempt to convert `{1}` and `{2}` to +// `TensorDataContainer`, it calls the following: +// +// `TensorDataContainer({1})` (same call path happens for `{2}`, and we'll just +// focus on `{1}` here) +// +// At this point, theoretically there are two plausible ways for `{1}` to be +// matched to one of the constructors of `TensorDataContainer`: +// +// 1. It can be a list-initialization of a scalar value, thus matching +// `TensorDataContainer(int value)`. +// 2. It can be converted to `std::initializer_list`, thus +// matching +// `TensorDataContainer(std::initializer_list)`. +// +// How does the compiler decide which one to choose? According to +// `https://en.cppreference.com/w/cpp/language/list_initialization`, +// braced-init-list always prefers the constructor that takes +// `std::initializer_list`. Hence we happily move forward with constructor #2, +// and it calls the following: +// +// `TensorDataContainer(1)` +// +// Now it matches `TensorDataContainer(int value)`, which stores `1` as a scalar +// value. All is good. +struct TensorDataContainer { + // NOTE: For tensors with zero-size dimensions (e.g. `torch::tensor({{}, + // {}})`), the innermost empty braced-init-list `{}` matches the default + // constructor of the innermost `TensorDataContainer`. + TensorDataContainer() + : sizes_({0}), + // NOTE: In Python, the dtype of tensors with zero-size dimensions (e.g. + // `torch.tensor([[], []])`) depends on the value of + // `torch.get_default_dtype()`, and we should do the same for the C++ + // equivalent. + scalar_type_(at::typeMetaToScalarType(at::get_default_dtype())), + type_(TensorDataContainerType::InitList) {} +#define TENSOR(T, S) \ + TensorDataContainer(T value) \ + : scalar_type_(at::k##S), \ + type_(TensorDataContainerType::Scalar), \ + scalar_(value) {} + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TENSOR) + AT_FORALL_COMPLEX_TYPES(TENSOR) +#undef TENSOR + TensorDataContainer(std::initializer_list init_list) + : scalar_type_(init_list.begin()->scalar_type()), + type_(TensorDataContainerType::InitList), + init_list_(init_list) { + const TensorDataContainer& first_elem = *(init_list.begin()); + for (const auto& elem : init_list) { + TORCH_CHECK( + elem.sizes() == first_elem.sizes(), + "Expected all sub-lists to have sizes: ", + first_elem.sizes(), + " (e.g. ", + first_elem, + "), ", + "but got sub-list ", + elem, + " with sizes: ", + elem.sizes()); + TORCH_CHECK( + elem.scalar_type() == first_elem.scalar_type(), + "Expected all elements of the tensor to have the same scalar type: ", + first_elem.scalar_type(), + ", but got element of scalar type: ", + elem.scalar_type()); + } + sizes_.reserve(first_elem.sizes().size() + 1); + sizes_.push_back(static_cast(init_list.size())); + sizes_.insert( + sizes_.end(), first_elem.sizes().begin(), first_elem.sizes().end()); + } + +#define TENSOR(T, S) \ + TensorDataContainer(at::ArrayRef values) \ + : sizes_({(int64_t)values.size()}), \ + scalar_type_(at::k##S), \ + type_(TensorDataContainerType::Tensor) { \ + at::AutoDispatchBelowAutograd mode; \ + if (scalar_type_ == at::kBool) { \ + tensor_ = at::tensor(values, at::TensorOptions().device(at::kCPU)); \ + } else { \ + tensor_ = at::tensor(values, at::dtype(scalar_type_).device(at::kCPU)); \ + } \ + } + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TENSOR) + AT_FORALL_COMPLEX_TYPES(TENSOR) +#undef TENSOR + + // NOTE: We need to handle `std::vector` explicitly instead of relying on an + // implicit conversion to `at::ArrayRef`, otherwise the following error can be + // thrown when calling `torch::tensor(std::vector({1, 2}))`: + // ``` + // error: no matching function for call to 'tensor(const std::vector&)' + // no known conversion for argument 1 from 'const std::vector' to + // 'torch::detail::TensorDataContainer' + // ``` + // + // NOTE: `torch::tensor(std::vector)` is not supported for now, because + // ArrayRef cannot be constructed from a std::vector bitfield. +#define TENSOR(T, S) \ + TensorDataContainer(const std::vector& values) \ + : TensorDataContainer(at::ArrayRef(values)) {} + AT_FORALL_SCALAR_TYPES_AND2(Half, BFloat16, TENSOR) + AT_FORALL_COMPLEX_TYPES(TENSOR) +#undef TENSOR + + bool is_scalar() const { + return type_ == TensorDataContainerType::Scalar; + } + + const c10::Scalar& scalar() const { + TORCH_CHECK( + is_scalar(), + "Can only call `scalar()` on a TensorDataContainer that has `is_scalar() == true`"); + return scalar_; + } + + bool is_init_list() const { + return type_ == TensorDataContainerType::InitList; + } + + const std::initializer_list& init_list() const { + TORCH_CHECK( + is_init_list(), + "Can only call `init_list()` on a TensorDataContainer that has `is_init_list() == true`"); + return init_list_; + } + + bool is_tensor() const { + return type_ == TensorDataContainerType::Tensor; + } + + const at::Tensor& tensor() const { + TORCH_CHECK( + is_tensor(), + "Can only call `tensor()` on a TensorDataContainer that has `is_tensor() == true`"); + return tensor_; + } + + const std::vector& sizes() const { + return sizes_; + } + + const c10::ScalarType& scalar_type() const { + return scalar_type_; + } + + at::Tensor convert_to_tensor(at::TensorOptions options) const { + if (!options.has_dtype()) { + options = options.dtype(compute_desired_dtype(scalar_type_)); + } + + if (is_scalar()) { + at::AutoDispatchBelowAutograd mode; + return at::scalar_tensor(scalar_, options); + } else if (is_init_list()) { + // NOTE: Here we explicitly choose to initialize the tensor on CPU first, + // fill each element of the tensor, and then move the tensor to the + // desired device. For CUDA device, this approach only involves 1 CUDA + // kernel launch, and is much faster than initializing the tensor on CUDA + // first and then filling each element of it (which involves `N` CUDA + // kernel launches where `N` is the number of the elements in the tensor). + at::Tensor tensor = ([&]() { + at::AutoDispatchBelowAutograd mode; + return at::empty(sizes_, options.device(at::kCPU)); + })(); + fill_tensor(tensor); + return tensor.to(options.device()); + } else if (is_tensor()) { + auto output = tensor_.to(options); + TORCH_CHECK( + !tensor_.is_complex() || output.is_complex(), + "can not do torch::tensor(complex, dtype=non-complex) because complex can not be casted to real number without loss of information"); + return output; + } else { + TORCH_INTERNAL_ASSERT(false, "Invalid TensorDataContainer type"); + } + } + + void pretty_print_recursive(std::ostream& stream) const { + if (is_scalar()) { + AT_DISPATCH_ALL_TYPES_AND3( + at::kBool, + at::kHalf, + at::kBFloat16, + scalar_type_, + "TensorDataContainer_pretty_print_scalar", + [&] { stream << scalar_.to(); }); + } else if (is_init_list()) { + stream << '{'; + for (const TensorDataContainer* it = init_list_.begin(); + it != init_list_.end(); + it++) { + stream << *it; + if (std::next(it) != init_list_.end()) + stream << ", "; + } + stream << '}'; + } else if (is_tensor()) { + stream << '{'; + for (const auto i : c10::irange(tensor_.sizes()[0])) { + AT_DISPATCH_ALL_TYPES_AND3( + at::kBool, + at::kHalf, + at::kBFloat16, + scalar_type_, + "TensorDataContainer_pretty_print_tensor_item", + [&] { stream << tensor_[i].item(); }); + if (i != tensor_.sizes()[0] - 1) + stream << ", "; + } + stream << '}'; + } else { + TORCH_INTERNAL_ASSERT(false, "Invalid TensorDataContainer type"); + } + } + + private: + void fill_tensor(at::Tensor& tensor) const { + if (is_scalar()) { + TORCH_INTERNAL_ASSERT( + tensor.dim() == 0, + "Expected a 0-dim Tensor, but got Tensor with dimensions: ", + tensor.dim()); + at::NoGradGuard guard; + tensor.fill_(scalar_); + } else if (is_init_list()) { + TORCH_INTERNAL_ASSERT( + tensor.sizes()[0] == (int64_t)init_list_.size(), + "Expected a Tensor with size ", + init_list_.size(), + " in its first dimension, but got Tensor with size ", + tensor.sizes()[0], + " in its first dimension"); + int64_t index = 0; + for (const auto& elem : init_list_) { + at::Tensor slice = tensor[index]; + elem.fill_tensor(slice); + index++; + } + } else if (is_tensor()) { + TORCH_INTERNAL_ASSERT( + false, + "TensorDataContainer is already a Tensor type, `fill_tensor` should not be called"); + } else { + TORCH_INTERNAL_ASSERT(false, "Invalid TensorDataContainer type"); + } + } + + std::vector sizes_; + c10::ScalarType scalar_type_; + TensorDataContainerType type_; + c10::Scalar scalar_; + std::initializer_list init_list_; + at::Tensor tensor_; +}; + +inline std::ostream& operator<<( + std::ostream& stream, + const TensorDataContainer& tensor_data_container) { + tensor_data_container.pretty_print_recursive(stream); + return stream; +} + +} // namespace torch::detail + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/detail/static.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/detail/static.h new file mode 100644 index 0000000000000000000000000000000000000000..0701bc0776063af5ca1dc73a74937e40e8854ef9 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/detail/static.h @@ -0,0 +1,64 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include + +namespace torch::nn { +class Module; +} // namespace torch::nn + +namespace torch::detail { +/// Detects if a type T has a forward() method. +template +struct has_forward { + // Declare two types with differing size. + using yes = int8_t; + using no = int16_t; + + // Here we declare two functions. The first is only enabled if `&U::forward` + // is well-formed and returns the `yes` type. In C++, the ellipsis parameter + // type (`...`) always puts the function at the bottom of overload resolution. + // This is specified in the standard as: 1) A standard conversion sequence is + // always better than a user-defined conversion sequence or an ellipsis + // conversion sequence. 2) A user-defined conversion sequence is always better + // than an ellipsis conversion sequence This means that if the first overload + // is viable, it will be preferred over the second as long as we pass any + // convertible type. The type of `&U::forward` is a pointer type, so we can + // pass e.g. 0. + template + static yes test(decltype(&U::forward)); + template + static no test(...); + + // Finally we test statically whether the size of the type returned by the + // selected overload is the size of the `yes` type. + static constexpr bool value = (sizeof(test(nullptr)) == sizeof(yes)); +}; + +template +constexpr bool check_not_lvalue_references() { + return (!std::is_lvalue_reference_v || + std::is_const_v>) && + check_not_lvalue_references(); +} + +template <> +inline constexpr bool check_not_lvalue_references() { + return true; +} + +/// A type trait whose `value` member is true if `M` derives from `Module`. +template +using is_module = std::is_base_of>; + +template +using enable_if_module_t = std::enable_if_t::value, T>; +} // namespace torch::detail + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/enum.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/enum.h new file mode 100644 index 0000000000000000000000000000000000000000..acc0f0a4d15ae1dfedf26f2fd1b6352cb97e9225 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/enum.h @@ -0,0 +1,215 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include + +#define TORCH_ENUM_DECLARE(name) \ + namespace torch { \ + namespace enumtype { \ + /* \ + NOTE: We need to provide the default constructor for each struct, \ + otherwise Clang 3.8 would complain: \ + ``` \ + error: default initialization of an object of const type 'const \ + enumtype::Enum1' without a user-provided default constructor \ + ``` \ + */ \ + struct k##name { \ + k##name() {} \ + }; \ + } \ + TORCH_API extern const enumtype::k##name k##name; \ + } + +#define TORCH_ENUM_DEFINE(name) \ + namespace torch { \ + const enumtype::k##name k##name; \ + } + +#define TORCH_ENUM_PRETTY_PRINT(name) \ + std::string operator()(const enumtype::k##name& v [[maybe_unused]]) const { \ + std::string k("k"); \ + return k + #name; \ + } + +// NOTE: Backstory on why we need the following two macros: +// +// Consider the following options class: +// +// ``` +// struct TORCH_API SomeOptions { +// typedef std::variant +// reduction_t; SomeOptions(reduction_t reduction = torch::kMean) : +// reduction_(reduction) {} +// +// TORCH_ARG(reduction_t, reduction); +// }; +// ``` +// +// and the functional that uses it: +// +// ``` +// Tensor some_functional( +// const Tensor& input, +// SomeOptions options = {}) { +// ... +// } +// ``` +// +// Normally, we would expect this to work: +// +// `F::some_functional(input, torch::kNone)` +// +// However, it throws the following error instead: +// +// ``` +// error: could not convert `torch::kNone` from `const torch::enumtype::kNone` +// to `torch::nn::SomeOptions` +// ``` +// +// To get around this problem, we explicitly provide the following constructors +// for `SomeOptions`: +// +// ``` +// SomeOptions(torch::enumtype::kNone reduction) : reduction_(torch::kNone) {} +// SomeOptions(torch::enumtype::kMean reduction) : reduction_(torch::kMean) {} +// SomeOptions(torch::enumtype::kSum reduction) : reduction_(torch::kSum) {} +// ``` +// +// so that the conversion from `torch::kNone` to `SomeOptions` would work. +// +// Note that we also provide the default constructor `SomeOptions() {}`, so that +// `SomeOptions options = {}` can work. +#define TORCH_OPTIONS_CTOR_VARIANT_ARG3( \ + OPTIONS_NAME, ARG_NAME, TYPE1, TYPE2, TYPE3) \ + OPTIONS_NAME() = default; \ + OPTIONS_NAME(torch::enumtype::TYPE1 ARG_NAME) : ARG_NAME##_(torch::TYPE1) {} \ + OPTIONS_NAME(torch::enumtype::TYPE2 ARG_NAME) : ARG_NAME##_(torch::TYPE2) {} \ + OPTIONS_NAME(torch::enumtype::TYPE3 ARG_NAME) : ARG_NAME##_(torch::TYPE3) {} + +#define TORCH_OPTIONS_CTOR_VARIANT_ARG4( \ + OPTIONS_NAME, ARG_NAME, TYPE1, TYPE2, TYPE3, TYPE4) \ + OPTIONS_NAME() = default; \ + OPTIONS_NAME(torch::enumtype::TYPE1 ARG_NAME) : ARG_NAME##_(torch::TYPE1) {} \ + OPTIONS_NAME(torch::enumtype::TYPE2 ARG_NAME) : ARG_NAME##_(torch::TYPE2) {} \ + OPTIONS_NAME(torch::enumtype::TYPE3 ARG_NAME) : ARG_NAME##_(torch::TYPE3) {} \ + OPTIONS_NAME(torch::enumtype::TYPE4 ARG_NAME) : ARG_NAME##_(torch::TYPE4) {} + +TORCH_ENUM_DECLARE(Linear) +TORCH_ENUM_DECLARE(Conv1D) +TORCH_ENUM_DECLARE(Conv2D) +TORCH_ENUM_DECLARE(Conv3D) +TORCH_ENUM_DECLARE(ConvTranspose1D) +TORCH_ENUM_DECLARE(ConvTranspose2D) +TORCH_ENUM_DECLARE(ConvTranspose3D) +TORCH_ENUM_DECLARE(Sigmoid) +TORCH_ENUM_DECLARE(Tanh) +TORCH_ENUM_DECLARE(ReLU) +TORCH_ENUM_DECLARE(GELU) +TORCH_ENUM_DECLARE(SiLU) +TORCH_ENUM_DECLARE(Mish) +TORCH_ENUM_DECLARE(LeakyReLU) +TORCH_ENUM_DECLARE(FanIn) +TORCH_ENUM_DECLARE(FanOut) +TORCH_ENUM_DECLARE(Constant) +TORCH_ENUM_DECLARE(Reflect) +TORCH_ENUM_DECLARE(Replicate) +TORCH_ENUM_DECLARE(Circular) +TORCH_ENUM_DECLARE(Nearest) +TORCH_ENUM_DECLARE(Bilinear) +TORCH_ENUM_DECLARE(Bicubic) +TORCH_ENUM_DECLARE(Trilinear) +TORCH_ENUM_DECLARE(Area) +TORCH_ENUM_DECLARE(NearestExact) +TORCH_ENUM_DECLARE(Sum) +TORCH_ENUM_DECLARE(Mean) +TORCH_ENUM_DECLARE(Max) +TORCH_ENUM_DECLARE(None) +TORCH_ENUM_DECLARE(BatchMean) +TORCH_ENUM_DECLARE(Zeros) +TORCH_ENUM_DECLARE(Border) +TORCH_ENUM_DECLARE(Reflection) +TORCH_ENUM_DECLARE(RNN_TANH) +TORCH_ENUM_DECLARE(RNN_RELU) +TORCH_ENUM_DECLARE(LSTM) +TORCH_ENUM_DECLARE(GRU) +TORCH_ENUM_DECLARE(Valid) +TORCH_ENUM_DECLARE(Same) + +namespace torch::enumtype { + +struct _compute_enum_name { + TORCH_ENUM_PRETTY_PRINT(Linear) + TORCH_ENUM_PRETTY_PRINT(Conv1D) + TORCH_ENUM_PRETTY_PRINT(Conv2D) + TORCH_ENUM_PRETTY_PRINT(Conv3D) + TORCH_ENUM_PRETTY_PRINT(ConvTranspose1D) + TORCH_ENUM_PRETTY_PRINT(ConvTranspose2D) + TORCH_ENUM_PRETTY_PRINT(ConvTranspose3D) + TORCH_ENUM_PRETTY_PRINT(Sigmoid) + TORCH_ENUM_PRETTY_PRINT(Tanh) + TORCH_ENUM_PRETTY_PRINT(ReLU) + TORCH_ENUM_PRETTY_PRINT(GELU) + TORCH_ENUM_PRETTY_PRINT(SiLU) + TORCH_ENUM_PRETTY_PRINT(Mish) + TORCH_ENUM_PRETTY_PRINT(LeakyReLU) + TORCH_ENUM_PRETTY_PRINT(FanIn) + TORCH_ENUM_PRETTY_PRINT(FanOut) + TORCH_ENUM_PRETTY_PRINT(Constant) + TORCH_ENUM_PRETTY_PRINT(Reflect) + TORCH_ENUM_PRETTY_PRINT(Replicate) + TORCH_ENUM_PRETTY_PRINT(Circular) + TORCH_ENUM_PRETTY_PRINT(Nearest) + TORCH_ENUM_PRETTY_PRINT(Bilinear) + TORCH_ENUM_PRETTY_PRINT(Bicubic) + TORCH_ENUM_PRETTY_PRINT(Trilinear) + TORCH_ENUM_PRETTY_PRINT(Area) + TORCH_ENUM_PRETTY_PRINT(NearestExact) + TORCH_ENUM_PRETTY_PRINT(Sum) + TORCH_ENUM_PRETTY_PRINT(Mean) + TORCH_ENUM_PRETTY_PRINT(Max) + TORCH_ENUM_PRETTY_PRINT(None) + TORCH_ENUM_PRETTY_PRINT(BatchMean) + TORCH_ENUM_PRETTY_PRINT(Zeros) + TORCH_ENUM_PRETTY_PRINT(Border) + TORCH_ENUM_PRETTY_PRINT(Reflection) + TORCH_ENUM_PRETTY_PRINT(RNN_TANH) + TORCH_ENUM_PRETTY_PRINT(RNN_RELU) + TORCH_ENUM_PRETTY_PRINT(LSTM) + TORCH_ENUM_PRETTY_PRINT(GRU) + TORCH_ENUM_PRETTY_PRINT(Valid) + TORCH_ENUM_PRETTY_PRINT(Same) +}; + +template +std::string get_enum_name(V variant_enum) { + return std::visit(enumtype::_compute_enum_name{}, variant_enum); +} + +template +at::Reduction::Reduction reduction_get_enum(V variant_enum) { + if (std::holds_alternative(variant_enum)) { + return at::Reduction::None; + } else if (std::holds_alternative(variant_enum)) { + return at::Reduction::Mean; + } else if (std::holds_alternative(variant_enum)) { + return at::Reduction::Sum; + } else { + TORCH_CHECK( + false, + get_enum_name(variant_enum), + " is not a valid value for reduction"); + return at::Reduction::END; + } +} + +} // namespace torch::enumtype + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/expanding_array.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/expanding_array.h new file mode 100644 index 0000000000000000000000000000000000000000..fa1591860a6f590919442d87d99d5055aebe436b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/expanding_array.h @@ -0,0 +1,187 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace torch { + +/// A utility class that accepts either a container of `D`-many values, or a +/// single value, which is internally repeated `D` times. This is useful to +/// represent parameters that are multidimensional, but often equally sized in +/// all dimensions. For example, the kernel size of a 2D convolution has an `x` +/// and `y` length, but `x` and `y` are often equal. In such a case you could +/// just pass `3` to an `ExpandingArray<2>` and it would "expand" to `{3, 3}`. +template +class ExpandingArray { + public: + /// Constructs an `ExpandingArray` from an `initializer_list`. The extent of + /// the length is checked against the `ExpandingArray`'s extent parameter `D` + /// at runtime. + /*implicit*/ ExpandingArray(std::initializer_list list) + : ExpandingArray(c10::ArrayRef(list)) {} + + /// Constructs an `ExpandingArray` from an `std::vector`. The extent of + /// the length is checked against the `ExpandingArray`'s extent parameter `D` + /// at runtime. + /*implicit*/ ExpandingArray(std::vector vec) + : ExpandingArray(c10::ArrayRef(vec)) {} + + /// Constructs an `ExpandingArray` from an `c10::ArrayRef`. The extent of + /// the length is checked against the `ExpandingArray`'s extent parameter `D` + /// at runtime. + /*implicit*/ ExpandingArray(c10::ArrayRef values) { + // clang-format off + TORCH_CHECK( + values.size() == D, + "Expected ", D, " values, but instead got ", values.size()); + // clang-format on + std::copy(values.begin(), values.end(), values_.begin()); + } + + /// Constructs an `ExpandingArray` from a single value, which is repeated `D` + /// times (where `D` is the extent parameter of the `ExpandingArray`). + /*implicit*/ ExpandingArray(T single_size) { + values_.fill(single_size); + } + + /// Constructs an `ExpandingArray` from a correctly sized `std::array`. + /*implicit*/ ExpandingArray(const std::array& values) + : values_(values) {} + + /// Accesses the underlying `std::array`. + std::array& operator*() { + return values_; + } + + /// Accesses the underlying `std::array`. + const std::array& operator*() const { + return values_; + } + + /// Accesses the underlying `std::array`. + std::array* operator->() { + return &values_; + } + + /// Accesses the underlying `std::array`. + const std::array* operator->() const { + return &values_; + } + + /// Returns an `ArrayRef` to the underlying `std::array`. + operator c10::ArrayRef() const { + return values_; + } + + /// Returns the extent of the `ExpandingArray`. + size_t size() const noexcept { + return D; + } + + protected: + /// The backing array. + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::array values_; +}; + +template +std::ostream& operator<<( + std::ostream& stream, + const ExpandingArray& expanding_array) { + if (expanding_array.size() == 1) { + return stream << expanding_array->at(0); + } + return stream << static_cast>(expanding_array); +} + +/// A utility class that accepts either a container of `D`-many +/// `std::optional` values, or a single `std::optional` value, which is +/// internally repeated `D` times. It has the additional ability to accept +/// containers of the underlying type `T` and convert them to a container of +/// `std::optional`. +template +class ExpandingArrayWithOptionalElem + : public ExpandingArray> { + public: + using ExpandingArray>::ExpandingArray; + + /// Constructs an `ExpandingArrayWithOptionalElem` from an `initializer_list` + /// of the underlying type `T`. The extent of the length is checked against + /// the `ExpandingArrayWithOptionalElem`'s extent parameter `D` at runtime. + /*implicit*/ ExpandingArrayWithOptionalElem(std::initializer_list list) + : ExpandingArrayWithOptionalElem(c10::ArrayRef(list)) {} + + /// Constructs an `ExpandingArrayWithOptionalElem` from an `std::vector` of + /// the underlying type `T`. The extent of the length is checked against the + /// `ExpandingArrayWithOptionalElem`'s extent parameter `D` at runtime. + /*implicit*/ ExpandingArrayWithOptionalElem(std::vector vec) + : ExpandingArrayWithOptionalElem(c10::ArrayRef(vec)) {} + + /// Constructs an `ExpandingArrayWithOptionalElem` from an `c10::ArrayRef` of + /// the underlying type `T`. The extent of the length is checked against the + /// `ExpandingArrayWithOptionalElem`'s extent parameter `D` at runtime. + /*implicit*/ ExpandingArrayWithOptionalElem(c10::ArrayRef values) + : ExpandingArray>(0) { + // clang-format off + TORCH_CHECK( + values.size() == D, + "Expected ", D, " values, but instead got ", values.size()); + // clang-format on + for (const auto i : c10::irange(this->values_.size())) { + this->values_[i] = values[i]; + } + } + + /// Constructs an `ExpandingArrayWithOptionalElem` from a single value of the + /// underlying type `T`, which is repeated `D` times (where `D` is the extent + /// parameter of the `ExpandingArrayWithOptionalElem`). + /*implicit*/ ExpandingArrayWithOptionalElem(T single_size) + : ExpandingArray>(0) { + for (const auto i : c10::irange(this->values_.size())) { + this->values_[i] = single_size; + } + } + + /// Constructs an `ExpandingArrayWithOptionalElem` from a correctly sized + /// `std::array` of the underlying type `T`. + /*implicit*/ ExpandingArrayWithOptionalElem(const std::array& values) + : ExpandingArray>(0) { + for (const auto i : c10::irange(this->values_.size())) { + this->values_[i] = values[i]; + } + } +}; + +template +std::ostream& operator<<( + std::ostream& stream, + const ExpandingArrayWithOptionalElem& expanding_array_with_opt_elem) { + if (expanding_array_with_opt_elem.size() == 1) { + const auto& elem = expanding_array_with_opt_elem->at(0); + stream << (elem.has_value() ? c10::str(elem.value()) : "None"); + } else { + std::vector str_array; + for (const auto& elem : *expanding_array_with_opt_elem) { + str_array.emplace_back( + elem.has_value() ? c10::str(elem.value()) : "None"); + } + stream << c10::ArrayRef(str_array); + } + return stream; +} + +} // namespace torch + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/fft.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/fft.h new file mode 100644 index 0000000000000000000000000000000000000000..dbeaa66d573f79dbcc694b458343c31a98fbfaab --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/fft.h @@ -0,0 +1,395 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +namespace torch::fft { + +/// Computes the 1 dimensional fast Fourier transform over a given dimension. +/// See https://pytorch.org/docs/main/fft.html#torch.fft.fft. +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, dtype=kComplexDouble); +/// torch::fft::fft(t); +/// ``` +inline Tensor fft( + const Tensor& self, + std::optional n = std::nullopt, + int64_t dim = -1, + std::optional norm = std::nullopt) { + return torch::fft_fft_symint(self, std::move(n), dim, norm); +} + +/// Computes the 1 dimensional inverse Fourier transform over a given dimension. +/// See https://pytorch.org/docs/main/fft.html#torch.fft.ifft. +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, dtype=kComplexDouble); +/// torch::fft::ifft(t); +/// ``` +inline Tensor ifft( + const Tensor& self, + std::optional n = std::nullopt, + int64_t dim = -1, + std::optional norm = std::nullopt) { + return torch::fft_ifft_symint(self, std::move(n), dim, norm); +} + +/// Computes the 2-dimensional fast Fourier transform over the given dimensions. +/// See https://pytorch.org/docs/main/fft.html#torch.fft.fft2. +/// +/// Example: +/// ``` +/// auto t = torch::randn({128, 128}, dtype=kComplexDouble); +/// torch::fft::fft2(t); +/// ``` +inline Tensor fft2( + const Tensor& self, + OptionalIntArrayRef s = std::nullopt, + IntArrayRef dim = {-2, -1}, + std::optional norm = std::nullopt) { + return torch::fft_fft2(self, s, dim, norm); +} + +/// Computes the inverse of torch.fft.fft2 +/// See https://pytorch.org/docs/main/fft.html#torch.fft.ifft2. +/// +/// Example: +/// ``` +/// auto t = torch::randn({128, 128}, dtype=kComplexDouble); +/// torch::fft::ifft2(t); +/// ``` +inline Tensor ifft2( + const Tensor& self, + at::OptionalIntArrayRef s = std::nullopt, + IntArrayRef dim = {-2, -1}, + std::optional norm = std::nullopt) { + return torch::fft_ifft2(self, s, dim, norm); +} + +/// Computes the N dimensional fast Fourier transform over given dimensions. +/// See https://pytorch.org/docs/main/fft.html#torch.fft.fftn. +/// +/// Example: +/// ``` +/// auto t = torch::randn({128, 128}, dtype=kComplexDouble); +/// torch::fft::fftn(t); +/// ``` +inline Tensor fftn( + const Tensor& self, + at::OptionalIntArrayRef s = std::nullopt, + at::OptionalIntArrayRef dim = std::nullopt, + std::optional norm = std::nullopt) { + return torch::fft_fftn(self, s, dim, norm); +} + +/// Computes the N dimensional fast Fourier transform over given dimensions. +/// See https://pytorch.org/docs/main/fft.html#torch.fft.ifftn. +/// +/// Example: +/// ``` +/// auto t = torch::randn({128, 128}, dtype=kComplexDouble); +/// torch::fft::ifftn(t); +/// ``` +inline Tensor ifftn( + const Tensor& self, + at::OptionalIntArrayRef s = std::nullopt, + at::OptionalIntArrayRef dim = std::nullopt, + std::optional norm = std::nullopt) { + return torch::fft_ifftn(self, s, dim, norm); +} + +/// Computes the 1 dimensional FFT of real input with onesided Hermitian output. +/// See https://pytorch.org/docs/main/fft.html#torch.fft.rfft. +/// +/// Example: +/// ``` +/// auto t = torch::randn(128); +/// auto T = torch::fft::rfft(t); +/// assert(T.is_complex() && T.numel() == 128 / 2 + 1); +/// ``` +inline Tensor rfft( + const Tensor& self, + std::optional n = std::nullopt, + int64_t dim = -1, + std::optional norm = std::nullopt) { + return torch::fft_rfft_symint(self, std::move(n), dim, norm); +} + +/// Computes the inverse of torch.fft.rfft +/// +/// The input is a onesided Hermitian Fourier domain signal, with real-valued +/// output. See https://pytorch.org/docs/main/fft.html#torch.fft.irfft +/// +/// Example: +/// ``` +/// auto T = torch::randn(128 / 2 + 1, torch::kComplexDouble); +/// auto t = torch::fft::irfft(t, /*n=*/128); +/// assert(t.is_floating_point() && T.numel() == 128); +/// ``` +inline Tensor irfft( + const Tensor& self, + std::optional n = std::nullopt, + int64_t dim = -1, + std::optional norm = std::nullopt) { + return torch::fft_irfft_symint(self, std::move(n), dim, norm); +} + +/// Computes the 2-dimensional FFT of real input. Returns a onesided Hermitian +/// output. See https://pytorch.org/docs/main/fft.html#torch.fft.rfft2 +/// +/// Example: +/// ``` +/// auto t = torch::randn({128, 128}, dtype=kDouble); +/// torch::fft::rfft2(t); +/// ``` +inline Tensor rfft2( + const Tensor& self, + at::OptionalIntArrayRef s = std::nullopt, + IntArrayRef dim = {-2, -1}, + std::optional norm = std::nullopt) { + return torch::fft_rfft2(self, s, dim, norm); +} + +/// Computes the inverse of torch.fft.rfft2. +/// See https://pytorch.org/docs/main/fft.html#torch.fft.irfft2. +/// +/// Example: +/// ``` +/// auto t = torch::randn({128, 128}, dtype=kComplexDouble); +/// torch::fft::irfft2(t); +/// ``` +inline Tensor irfft2( + const Tensor& self, + at::OptionalIntArrayRef s = std::nullopt, + IntArrayRef dim = {-2, -1}, + std::optional norm = std::nullopt) { + return torch::fft_irfft2(self, s, dim, norm); +} + +/// Computes the N dimensional FFT of real input with onesided Hermitian output. +/// See https://pytorch.org/docs/main/fft.html#torch.fft.rfftn +/// +/// Example: +/// ``` +/// auto t = torch::randn({128, 128}, dtype=kDouble); +/// torch::fft::rfftn(t); +/// ``` +inline Tensor rfftn( + const Tensor& self, + at::OptionalIntArrayRef s = std::nullopt, + at::OptionalIntArrayRef dim = std::nullopt, + std::optional norm = std::nullopt) { + return torch::fft_rfftn(self, s, dim, norm); +} + +/// Computes the inverse of torch.fft.rfftn. +/// See https://pytorch.org/docs/main/fft.html#torch.fft.irfftn. +/// +/// Example: +/// ``` +/// auto t = torch::randn({128, 128}, dtype=kComplexDouble); +/// torch::fft::irfftn(t); +/// ``` +inline Tensor irfftn( + const Tensor& self, + at::OptionalIntArrayRef s = std::nullopt, + at::OptionalIntArrayRef dim = std::nullopt, + std::optional norm = std::nullopt) { + return torch::fft_irfftn(self, s, dim, norm); +} + +/// Computes the 1 dimensional FFT of a onesided Hermitian signal +/// +/// The input represents a Hermitian symmetric time domain signal. The returned +/// Fourier domain representation of such a signal is a real-valued. See +/// https://pytorch.org/docs/main/fft.html#torch.fft.hfft +/// +/// Example: +/// ``` +/// auto t = torch::randn(128 / 2 + 1, torch::kComplexDouble); +/// auto T = torch::fft::hfft(t, /*n=*/128); +/// assert(T.is_floating_point() && T.numel() == 128); +/// ``` +inline Tensor hfft( + const Tensor& self, + std::optional n = std::nullopt, + int64_t dim = -1, + std::optional norm = std::nullopt) { + return torch::fft_hfft_symint(self, std::move(n), dim, norm); +} + +/// Computes the inverse FFT of a real-valued Fourier domain signal. +/// +/// The output is a onesided representation of the Hermitian symmetric time +/// domain signal. See https://pytorch.org/docs/main/fft.html#torch.fft.ihfft. +/// +/// Example: +/// ``` +/// auto T = torch::randn(128, torch::kDouble); +/// auto t = torch::fft::ihfft(T); +/// assert(t.is_complex() && T.numel() == 128 / 2 + 1); +/// ``` +inline Tensor ihfft( + const Tensor& self, + std::optional n = std::nullopt, + int64_t dim = -1, + std::optional norm = std::nullopt) { + return torch::fft_ihfft_symint(self, std::move(n), dim, norm); +} + +/// Computes the 2-dimensional FFT of a Hermitian symmetric input signal. +/// +/// The input is a onesided representation of the Hermitian symmetric time +/// domain signal. See https://pytorch.org/docs/main/fft.html#torch.fft.hfft2. +/// +/// Example: +/// ``` +/// auto t = torch::randn({128, 65}, torch::kComplexDouble); +/// auto T = torch::fft::hfft2(t, /*s=*/{128, 128}); +/// assert(T.is_floating_point() && T.numel() == 128 * 128); +/// ``` +inline Tensor hfft2( + const Tensor& self, + at::OptionalIntArrayRef s = std::nullopt, + IntArrayRef dim = {-2, -1}, + std::optional norm = std::nullopt) { + return torch::fft_hfft2(self, s, dim, norm); +} + +/// Computes the 2-dimensional IFFT of a real input signal. +/// +/// The output is a onesided representation of the Hermitian symmetric time +/// domain signal. See +/// https://pytorch.org/docs/main/fft.html#torch.fft.ihfft2. +/// +/// Example: +/// ``` +/// auto T = torch::randn({128, 128}, torch::kDouble); +/// auto t = torch::fft::hfft2(T); +/// assert(t.is_complex() && t.size(1) == 65); +/// ``` +inline Tensor ihfft2( + const Tensor& self, + at::OptionalIntArrayRef s = std::nullopt, + IntArrayRef dim = {-2, -1}, + std::optional norm = std::nullopt) { + return torch::fft_ihfft2(self, s, dim, norm); +} + +/// Computes the N-dimensional FFT of a Hermitian symmetric input signal. +/// +/// The input is a onesided representation of the Hermitian symmetric time +/// domain signal. See https://pytorch.org/docs/main/fft.html#torch.fft.hfftn. +/// +/// Example: +/// ``` +/// auto t = torch::randn({128, 65}, torch::kComplexDouble); +/// auto T = torch::fft::hfftn(t, /*s=*/{128, 128}); +/// assert(T.is_floating_point() && T.numel() == 128 * 128); +/// ``` +inline Tensor hfftn( + const Tensor& self, + at::OptionalIntArrayRef s = std::nullopt, + IntArrayRef dim = {-2, -1}, + std::optional norm = std::nullopt) { + return torch::fft_hfftn(self, s, dim, norm); +} + +/// Computes the N-dimensional IFFT of a real input signal. +/// +/// The output is a onesided representation of the Hermitian symmetric time +/// domain signal. See +/// https://pytorch.org/docs/main/fft.html#torch.fft.ihfftn. +/// +/// Example: +/// ``` +/// auto T = torch::randn({128, 128}, torch::kDouble); +/// auto t = torch::fft::hfft2(T); +/// assert(t.is_complex() && t.size(1) == 65); +/// ``` +inline Tensor ihfftn( + const Tensor& self, + at::OptionalIntArrayRef s = std::nullopt, + IntArrayRef dim = {-2, -1}, + std::optional norm = std::nullopt) { + return torch::fft_ihfftn(self, s, dim, norm); +} + +/// Computes the discrete Fourier Transform sample frequencies for a signal of +/// size n. +/// +/// See https://pytorch.org/docs/main/fft.html#torch.fft.fftfreq +/// +/// Example: +/// ``` +/// auto frequencies = torch::fft::fftfreq(128, torch::kDouble); +/// ``` +inline Tensor fftfreq(int64_t n, double d, const TensorOptions& options = {}) { + return torch::fft_fftfreq(n, d, options); +} + +inline Tensor fftfreq(int64_t n, const TensorOptions& options = {}) { + return torch::fft_fftfreq(n, /*d=*/1.0, options); +} + +/// Computes the sample frequencies for torch.fft.rfft with a signal of size n. +/// +/// Like torch.fft.rfft, only the positive frequencies are included. +/// See https://pytorch.org/docs/main/fft.html#torch.fft.rfftfreq +/// +/// Example: +/// ``` +/// auto frequencies = torch::fft::rfftfreq(128, torch::kDouble); +/// ``` +inline Tensor rfftfreq(int64_t n, double d, const TensorOptions& options) { + return torch::fft_rfftfreq(n, d, options); +} + +inline Tensor rfftfreq(int64_t n, const TensorOptions& options) { + return torch::fft_rfftfreq(n, /*d=*/1.0, options); +} + +/// Reorders n-dimensional FFT output to have negative frequency terms first, by +/// a torch.roll operation. +/// +/// See https://pytorch.org/docs/main/fft.html#torch.fft.fftshift +/// +/// Example: +/// ``` +/// auto x = torch::randn({127, 4}); +/// auto centred_fft = torch::fft::fftshift(torch::fft::fftn(x)); +/// ``` +inline Tensor fftshift( + const Tensor& x, + at::OptionalIntArrayRef dim = std::nullopt) { + return torch::fft_fftshift(x, dim); +} + +/// Inverse of torch.fft.fftshift +/// +/// See https://pytorch.org/docs/main/fft.html#torch.fft.ifftshift +/// +/// Example: +/// ``` +/// auto x = torch::randn({127, 4}); +/// auto shift = torch::fft::fftshift(x) +/// auto unshift = torch::fft::ifftshift(shift); +/// assert(torch::allclose(x, unshift)); +/// ``` +inline Tensor ifftshift( + const Tensor& x, + at::OptionalIntArrayRef dim = std::nullopt) { + return torch::fft_ifftshift(x, dim); +} + +} // namespace torch::fft + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/imethod.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/imethod.h new file mode 100644 index 0000000000000000000000000000000000000000..cd174fd4d9d0f8bdb0dee94104bbc9f985a8022f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/imethod.h @@ -0,0 +1,58 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include + +namespace torch { + +class TORCH_API IMethod { + /* + IMethod provides a portable interface for torch methods, whether + they are backed by torchscript or python/deploy. + + This is helpful since torchscript methods provide additional information + (e.g. FunctionSchema, Graph) which aren't available in pure python methods. + + Higher level APIs should prefer depending on this interface rather + than a specific implementation of it, to promote portability and reuse, and + avoid unintentional dependencies on e.g. script methods. + + Note: This API is experimental, and may evolve. + */ + public: + using IValueList = std::vector; + using IValueMap = std::unordered_map; + + IMethod() = default; + IMethod(const IMethod&) = default; + IMethod& operator=(const IMethod&) = default; + IMethod(IMethod&&) noexcept = default; + IMethod& operator=(IMethod&&) noexcept = default; + virtual ~IMethod() = default; + + virtual c10::IValue operator()( + std::vector args, + const IValueMap& kwargs = IValueMap()) const = 0; + + virtual const std::string& name() const = 0; + + // Returns an ordered list of argument names, possible in both + // script and python methods. This is a more portable dependency + // than a ScriptMethod FunctionSchema, which has more information + // than can be generally expected from a python method. + const std::vector& getArgumentNames() const; + + protected: + virtual void setArgumentNames( + std::vector& argumentNames) const = 0; + + private: + mutable bool isArgumentNamesInitialized_{false}; + mutable std::vector argumentNames_; +}; + +} // namespace torch + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/jit.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/jit.h new file mode 100644 index 0000000000000000000000000000000000000000..8965894e4bfb11e15a6560dbe34db8c48ef21a97 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/jit.h @@ -0,0 +1,39 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include + +namespace torch::jit { + +/// Compiles script code into an executable graph. +/// +/// Takes a string containing functions in script syntax and compiles them into +/// a module (graph). The returned module provides a `run_method` function +/// that may be used to invoke the compiled functions. +/// +/// For example: +/// \rst +/// .. code-block:: cpp +/// +/// auto module = torch::jit::compile(R"JIT( +/// def relu_script(a, b): +/// return torch.relu(a + b) +/// def test_while(a, i): +/// while i < 10: +/// a += a +/// i += 1 +/// return a +/// )JIT"); +/// IValue output = module->run_method("relu_script", a, b); +/// \endrst +TORCH_API std::shared_ptr compile(const std::string& source); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/mps.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/mps.h new file mode 100644 index 0000000000000000000000000000000000000000..79f97ede8c5a4b669f8764c8e5c90741d559163b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/mps.h @@ -0,0 +1,47 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include + +#ifdef __OBJC__ +#include +#include +using MTLCommandBuffer_t = id; +using DispatchQueue_t = dispatch_queue_t; +#else +using MTLCommandBuffer_t = void*; +using DispatchQueue_t = void*; +#endif + +namespace torch::mps { + +/// Returns true if MPS device is available. +bool TORCH_API is_available(); + +/// Sets the RNG seed for the MPS device. +void TORCH_API manual_seed(uint64_t seed); + +/// Waits for all streams on the MPS device to complete. +/// This blocks the calling CPU thread by using the 'waitUntilCompleted()' +/// method to wait for Metal command buffers finish executing all the +/// encoded GPU operations before returning. +void TORCH_API synchronize(); + +/// Submits the currently active command buffer to run on the MPS device. +void TORCH_API commit(); + +/// Get the current command buffer to encode the Metal commands. +MTLCommandBuffer_t TORCH_API get_command_buffer(); + +/// Get the dispatch_queue_t to synchronize encoding the custom kernels +/// with the PyTorch MPS backend. +DispatchQueue_t TORCH_API get_dispatch_queue(); + +} // namespace torch::mps + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nativert/ModelRunnerHandle.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nativert/ModelRunnerHandle.h new file mode 100644 index 0000000000000000000000000000000000000000..3ff9b6097752e200e0b6c6e6ec30e23b85809ed3 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nativert/ModelRunnerHandle.h @@ -0,0 +1,51 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include +#include + +namespace torch::nativert { + +// We don't want to forward declare in general but including ModelRunner will +// pollute the public API namespace too much. Therefore, we just use pimpl an +// incomplete ModelRunner here. +class ModelRunner; + +class TORCH_API ModelRunnerHandle { + public: + ModelRunnerHandle( + const std::string& packagePath, + const std::string& modelName); + + ModelRunnerHandle(ModelRunnerHandle&&) = default; + ModelRunnerHandle& operator=(ModelRunnerHandle&&) = default; + ModelRunnerHandle(const ModelRunnerHandle&) = delete; + ModelRunnerHandle& operator=(const ModelRunnerHandle&) = delete; + ~ModelRunnerHandle(); + + c10::IValue run( + const std::vector& args, + const std::unordered_map& kwargs); + + /** + * A low level API which expects user to always pass in flattened inputs. + * The ownership of the entire input list must be transferred to the + * executor via std::move or in-place construction. + */ + std::vector runWithFlatInputsAndOutputs( + std::vector flatInputs); + + private: + std::unique_ptr impl_; +}; + +} // namespace torch::nativert + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nested.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nested.h new file mode 100644 index 0000000000000000000000000000000000000000..9a6aecc0c63b937d03082b21ad57215b40325303 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nested.h @@ -0,0 +1,98 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::nested { + +/// Nested tensor +/// +/// See +/// https://pytorch.org/docs/main/nested.html#torch.nested.nested_tensor +/// +/// ``` +// implemented on python object to allow torch.nested.nested_tensor to be +// constructed with arbitrarily nested python objects - for now, only arbitrary +// python lists and lists of Tensors +// See torch/csrc/autograd/python_nested_functions_manual.cpp for Python +// implementation +// See here for C++ implementation +inline at::Tensor nested_tensor( + at::TensorList nested_tensor_data, + const at::TensorOptions& options = {}) { + auto out = at::_nested_tensor_from_tensor_list( + nested_tensor_data, + c10::typeMetaToScalarType(options.dtype()), + std::nullopt, + options.device(), + options.pinned_memory()); + if (options.has_requires_grad() && options.requires_grad()) { + out.requires_grad_(true); + } + return out; +} + +inline at::Tensor nested_tensor( + at::ArrayRef nested_tensor_data, + const at::TensorOptions& options = {}) { + for (const auto& tdc : nested_tensor_data) { + TORCH_CHECK( + tdc.is_init_list(), + "nested_tensor() not implemented for these parameters"); + } + // Construct a TensorList using nested_tensor_data + std::vector tensor_list(nested_tensor_data.size()); + std::transform( + nested_tensor_data.begin(), + nested_tensor_data.end(), + tensor_list.begin(), + [&](const detail::TensorDataContainer& tdc) { + return tdc.convert_to_tensor(options); + }); + auto out = at::_nested_tensor_from_tensor_list( + tensor_list, + c10::typeMetaToScalarType(options.dtype()), + std::nullopt, + options.device(), + options.pinned_memory()); + if (options.has_requires_grad() && options.requires_grad()) { + out.requires_grad_(true); + } + return out; +} + +/// As Nested Tensor +/// +/// See +/// https://pytorch.org/docs/main/nested.html#torch.nested.as_nested_tensor +/// +/// ``` +inline at::Tensor as_nested_tensor( + at::TensorList list, + std::optional dtype = std::nullopt, + std::optional device = std::nullopt) { + return at::_nested_tensor_from_tensor_list( + list, dtype, std::nullopt, device, std::nullopt); +} + +/// Nested to padded tensor +/// +/// See +/// https://pytorch.org/docs/main/nested.html#torch.nested.to_padded_tensor +/// +/// ``` +inline at::Tensor to_padded_tensor( + const at::Tensor& self, + double padding, + at::OptionalIntArrayRef output_size = std::nullopt) { + return at::nested_to_padded_tensor(self, padding, output_size); +} + +} // namespace torch::nested + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn.h new file mode 100644 index 0000000000000000000000000000000000000000..58c215310e2c9369666e4e338e9b0169a6402982 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn.h @@ -0,0 +1,15 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/cloneable.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/cloneable.h new file mode 100644 index 0000000000000000000000000000000000000000..108063740fdcb3a53067b3b380de36b018790070 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/cloneable.h @@ -0,0 +1,99 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include + +#include +#include + +namespace torch::nn { +/// The `clone()` method in the base `Module` class does not have knowledge of +/// the concrete runtime type of its subclasses. Therefore, `clone()` must +/// either be called from within the subclass, or from a base class that has +/// knowledge of the concrete type. `Cloneable` uses the CRTP to gain +/// knowledge of the subclass' static type and provide an implementation of the +/// `clone()` method. We do not want to use this pattern in the base class, +/// because then storing a module would always require templatizing it. +template +// NOLINTNEXTLINE(bugprone-exception-escape) +class Cloneable : public Module { + public: + using Module::Module; + + /// `reset()` must perform initialization of all members with reference + /// semantics, most importantly parameters, buffers and submodules. + virtual void reset() = 0; + + /// Performs a recursive "deep copy" of the `Module`, such that all parameters + /// and submodules in the cloned module are different from those in the + /// original module. + std::shared_ptr clone( + const std::optional& device = std::nullopt) const override { + NoGradGuard no_grad; + + const auto& self = static_cast(*this); + auto copy = std::make_shared(self); + copy->parameters_.clear(); + copy->buffers_.clear(); + copy->children_.clear(); + copy->reset(); + TORCH_CHECK( + copy->parameters_.size() == parameters_.size(), + "The cloned module does not have the same number of " + "parameters as the original module after calling reset(). " + "Are you sure you called register_parameter() inside reset() " + "and not the constructor?"); + for (const auto& parameter : named_parameters(/*recurse=*/false)) { + auto& tensor = *parameter; + auto data = device && tensor.device() != *device ? tensor.to(*device) + : tensor.clone(); + copy->parameters_[parameter.key()].set_data(data); + } + TORCH_CHECK( + copy->buffers_.size() == buffers_.size(), + "The cloned module does not have the same number of " + "buffers as the original module after calling reset(). " + "Are you sure you called register_buffer() inside reset() " + "and not the constructor?"); + for (const auto& buffer : named_buffers(/*recurse=*/false)) { + auto& tensor = *buffer; + auto data = device && tensor.device() != *device ? tensor.to(*device) + : tensor.clone(); + copy->buffers_[buffer.key()].set_data(data); + } + TORCH_CHECK( + copy->children_.size() == children_.size(), + "The cloned module does not have the same number of " + "child modules as the original module after calling reset(). " + "Are you sure you called register_module() inside reset() " + "and not the constructor?"); + for (const auto& child : children_) { + copy->children_[child.key()]->clone_(*child.value(), device); + } + return copy; + } + + private: + void clone_(Module& other, const std::optional& device) final { + // Here we are *pretty* certain that `other's` type is `Derived` (because it + // was registered under the same name as `this`), but you never know what + // crazy things `reset()` does, so `dynamic_cast` just to be safe. + auto clone = std::dynamic_pointer_cast(other.clone(device)); + TORCH_CHECK( + clone != nullptr, + "Attempted to clone submodule, but it is of a " + "different type than the submodule it was to be cloned into"); + static_cast(*this) = *clone; + } +}; + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional.h new file mode 100644 index 0000000000000000000000000000000000000000..b7cf2633b0c5816c52156fdc9036dc3c611b39dc --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional.h @@ -0,0 +1,22 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/activation.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/activation.h new file mode 100644 index 0000000000000000000000000000000000000000..83b5ca946a9d36be53d77f94bab5819e81ca00e3 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/activation.h @@ -0,0 +1,966 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace torch::nn::functional { + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor elu(Tensor input, double alpha, bool inplace) { + if (inplace) { + return torch::elu_(input, alpha); + } else { + return torch::elu(input, alpha); + } +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.elu +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::ELUFuncOptions` class to +/// learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::elu(x, F::ELUFuncOptions().alpha(0.42).inplace(true)); +/// ``` +inline Tensor elu(Tensor input, const ELUFuncOptions& options = {}) { + return detail::elu(std::move(input), options.alpha(), options.inplace()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor selu(Tensor input, bool inplace) { + if (inplace) { + return torch::selu_(input); + } else { + return torch::selu(input); + } +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.selu +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::SELUFuncOptions` class to +/// learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::selu(input, F::SELUFuncOptions(false)); +/// ``` +inline Tensor selu(Tensor input, const SELUFuncOptions& options = {}) { + return detail::selu(std::move(input), options.inplace()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor hardshrink(const Tensor& input, double lambda) { + return torch::hardshrink(input, lambda); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.hardshrink +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::HardshrinkFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::hardshrink(x, F::HardshrinkFuncOptions().lambda(0.42)); +/// ``` +inline Tensor hardshrink( + const Tensor& input, + const HardshrinkFuncOptions& options = {}) { + return detail::hardshrink(input, options.lambda()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor hardtanh( + Tensor input, + double min_val, + double max_val, + bool inplace) { + if (inplace) { + return torch::hardtanh_(input, min_val, max_val); + } else { + return torch::hardtanh(input, min_val, max_val); + } +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.hardtanh +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::HardtanhFuncOptions` class +/// to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::hardtanh(x, +/// F::HardtanhFuncOptions().min_val(-1.0).max_val(1.0).inplace(true)); +/// ``` +inline Tensor hardtanh(Tensor input, const HardtanhFuncOptions& options = {}) { + return detail::hardtanh( + std::move(input), + options.min_val(), + options.max_val(), + options.inplace()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor leaky_relu(Tensor input, double negative_slope, bool inplace) { + if (inplace) { + return torch::leaky_relu_(input, negative_slope); + } else { + return torch::leaky_relu(input, negative_slope); + } +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.leaky_relu +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::LeakyReLUFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::leaky_relu(x, +/// F::LeakyReLUFuncOptions().negative_slope(0.42).inplace(true)); +/// ``` +inline Tensor leaky_relu( + Tensor input, + const LeakyReLUFuncOptions& options = {}) { + return detail::leaky_relu( + std::move(input), options.negative_slope(), options.inplace()); +} + +// ============================================================================ + +inline Tensor logsigmoid(const Tensor& input) { + return torch::log_sigmoid(input); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor gumbel_softmax( + const Tensor& logits, + double tau, + bool hard, + int dim) { + auto gumbels = + -torch::empty_like(logits).exponential_().log(); // ~Gumbel(0,1) + gumbels = (logits + gumbels) / tau; // ~Gumbel(logits, tau) + auto y_soft = gumbels.softmax(dim); + + torch::Tensor ret; + if (hard) { + // Straight through. + auto index = std::get<1>(y_soft.max(dim, /*keepdim=*/true)); + auto y_hard = torch::zeros_like(logits).scatter_(dim, index, 1.0); + ret = y_hard - y_soft.detach() + y_soft; + } else { + ret = y_soft; + } + return ret; +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.gumbel_softmax +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::GumbelSoftmaxFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::gumbel_softmax(logits, F::GumbelSoftmaxFuncOptions().hard(true).dim(-1)); +/// ``` +inline Tensor gumbel_softmax( + const Tensor& logits, + const GumbelSoftmaxFuncOptions& options = {}) { + return detail::gumbel_softmax( + logits, options.tau(), options.hard(), options.dim()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor softmax( + const Tensor& input, + int64_t dim, + std::optional dtype) { + Tensor ret; + + if (dtype == std::nullopt) { + ret = input.softmax(dim); + } else { + ret = input.softmax(dim, dtype); + } + + return ret; +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.softmax +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::SoftmaxFuncOptions` class +/// to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::softmax(input, F::SoftmaxFuncOptions(1)); +/// ``` +inline Tensor softmax(const Tensor& input, const SoftmaxFuncOptions& options) { + return detail::softmax(input, options.dim(), options.dtype()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor softmin( + const Tensor& input, + int64_t dim, + std::optional dtype) { + Tensor ret; + + if (dtype == std::nullopt) { + ret = (-input).softmax(dim); + } else { + ret = (-input).softmax(dim, dtype); + } + + return ret; +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.softmin +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::SoftminFuncOptions` class +/// to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::softmin(input, F::SoftminFuncOptions(1)); +/// ``` +inline Tensor softmin(const Tensor& input, const SoftminFuncOptions& options) { + return detail::softmin(input, options.dim(), options.dtype()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor log_softmax( + const Tensor& input, + int64_t dim, + std::optional dtype) { + Tensor ret; + + if (dtype == std::nullopt) { + ret = input.log_softmax(dim); + } else { + ret = input.log_softmax(dim, dtype); + } + + return ret; +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.log_softmax +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::LogSoftmaxFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::log_softmax(input, LogSoftmaxFuncOptions(1)); +/// ``` +inline Tensor log_softmax( + const Tensor& input, + const LogSoftmaxFuncOptions& options) { + return detail::log_softmax(input, options.dim(), options.dtype()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor glu(const Tensor& input, int64_t dim) { + TORCH_CHECK( + input.dim() != 0, + "glu does not support scalars because halving size must be even"); + return torch::glu(input, dim); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.glu +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::GLUFuncOptions` class to +/// learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::glu(input, GLUFuncOptions(1)); +/// ``` +inline Tensor glu(const Tensor& input, const GLUFuncOptions& options = {}) { + return detail::glu(input, options.dim()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor gelu(const Tensor& input, const std::string& approximate) { + return torch::gelu(input, approximate); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +inline Tensor gelu(const Tensor& input, const GELUFuncOptions& options = {}) { + return detail::gelu(input, options.approximate()); +} + +// ============================================================================ + +inline Tensor silu(const Tensor& input) { + return torch::silu(input); +} + +// ============================================================================ + +inline Tensor mish(const Tensor& input) { + return torch::mish(input); +} + +// ============================================================================ + +inline Tensor prelu(const Tensor& input, const Tensor& weight) { + return torch::prelu(input, weight); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor relu(Tensor input, bool inplace) { + if (inplace) { + return torch::relu_(input); + } else { + return torch::relu(input); + } +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.relu +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::ReLUFuncOptions` class to +/// learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::relu(x, F::ReLUFuncOptions().inplace(true)); +/// ``` +inline Tensor relu(Tensor input, const ReLUFuncOptions& options = {}) { + return detail::relu(std::move(input), options.inplace()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor relu6(Tensor input, bool inplace) { + if (inplace) { + return torch::relu6_(input); + } else { + return torch::relu6(input); + } +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.relu6 +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::ReLU6FuncOptions` class to +/// learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::relu6(x, F::ReLU6FuncOptions().inplace(true)); +/// ``` +inline Tensor relu6(Tensor input, const ReLU6FuncOptions& options = {}) { + return detail::relu6(std::move(input), options.inplace()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor rrelu( + Tensor input, + double lower, + double upper, + bool training, + bool inplace) { + if (inplace) { + return torch::rrelu_(input, lower, upper, training); + } else { + return torch::rrelu(input, lower, upper, training); + } +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.rrelu +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::RReLUFuncOptions` class to +/// learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::rrelu(x, F::RReLUFuncOptions().lower(0.1).upper(0.4).inplace(true)); +/// ``` +inline Tensor rrelu(Tensor input, const RReLUFuncOptions& options = {}) { + return detail::rrelu( + std::move(input), + options.lower(), + options.upper(), + options.training(), + options.inplace()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor celu(Tensor input, double alpha, bool inplace) { + if (inplace) { + return torch::celu_(input, alpha); + } else { + return torch::celu(input, alpha); + } +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.celu +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::CELUFuncOptions` class to +/// learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::celu(x, F::CELUFuncOptions().alpha(0.42).inplace(true)); +/// ``` +inline Tensor celu(Tensor input, const CELUFuncOptions& options = {}) { + return detail::celu(std::move(input), options.alpha(), options.inplace()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor softplus(const Tensor& input, double beta, double threshold) { + return torch::softplus(input, beta, threshold); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.softplus +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::SoftplusFuncOptions` class +/// to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::softplus(x, F::SoftplusFuncOptions().beta(0.5).threshold(3.0)); +/// ``` +inline Tensor softplus( + const Tensor& input, + const SoftplusFuncOptions& options = {}) { + return detail::softplus(input, options.beta(), options.threshold()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor softshrink(const Tensor& input, double lambda) { + return torch::softshrink(input, lambda); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.softshrink +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::SoftshrinkFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::softshrink(x, F::SoftshrinkFuncOptions(0.42)); +/// ``` +inline Tensor softshrink( + const Tensor& input, + const SoftshrinkFuncOptions& options = {}) { + return detail::softshrink(input, options.lambda()); +} + +// ============================================================================ + +inline Tensor softsign(const Tensor& input) { + return input / (input.abs() + 1); +} + +// ============================================================================ + +inline Tensor tanhshrink(const Tensor& input) { + return input - input.tanh(); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor threshold( + Tensor input, + double threshold, + double value, + bool inplace) { + if (inplace) { + return torch::threshold_(input, threshold, value); + } else { + return torch::threshold(input, threshold, value); + } +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.threshold +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::ThresholdFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::threshold(x, F::ThresholdFuncOptions(0.5, 0.5).inplace(true)); +/// ``` +inline Tensor threshold(Tensor input, const ThresholdFuncOptions& options) { + return detail::threshold( + std::move(input), + options.threshold(), + options.value(), + options.inplace()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline std::tuple multi_head_attention_forward( + const Tensor& query, + const Tensor& key, + const Tensor& value, + int64_t embed_dim_to_check, + int64_t num_heads, + const Tensor& in_proj_weight, + const Tensor& in_proj_bias, + const Tensor& bias_k, + const Tensor& bias_v, + bool add_zero_attn, + double dropout_p, + const Tensor& out_proj_weight, + const Tensor& out_proj_bias, + bool training = true, + const Tensor& key_padding_mask = {}, + bool need_weights = true, + const Tensor& attn_mask = {}, + bool use_separate_proj_weight = false, + const Tensor& q_proj_weight = {}, + const Tensor& k_proj_weight = {}, + const Tensor& v_proj_weight = {}, + const Tensor& static_k = {}, + const Tensor& static_v = {}, + bool average_attn_weights = true) { + namespace F = torch::nn::functional; + + const auto query_sizes = query.sizes(); + const auto& tgt_len = query_sizes[0]; + const auto& bsz = query_sizes[1]; + const auto& embed_dim = query_sizes[2]; + TORCH_INTERNAL_ASSERT(embed_dim == embed_dim_to_check); + TORCH_INTERNAL_ASSERT(key.sizes() == value.sizes()); + + const auto head_dim = embed_dim / num_heads; + TORCH_CHECK( + head_dim * num_heads == embed_dim, + "embed_dim must be divisible by num_heads"); + const auto scaling = 1 / std::sqrt(head_dim); + + Tensor q, k, v; + if (!use_separate_proj_weight) { + if (torch::equal(query, key) && torch::equal(key, value)) { + // self-attention + const auto chunks = + F::linear(query, in_proj_weight, in_proj_bias).chunk(3, /*dim=*/-1); + q = chunks[0]; + k = chunks[1]; + v = chunks[2]; + } else if (torch::equal(key, value)) { + // encoder-decoder attention + // This is inline in_proj function with in_proj_weight and in_proj_bias + auto _b = in_proj_bias; + int64_t _start = 0; + auto _end = embed_dim; + auto _w = in_proj_weight.slice(/*dim=*/0, _start, _end); + if (_b.defined()) { + _b = _b.slice(/*dim=*/0, _start, _end); + } + q = F::linear(query, _w, _b); + + if (!key.defined()) { + TORCH_INTERNAL_ASSERT(!value.defined()); + k.reset(); + v.reset(); + } else { + // This is inline in_proj function with in_proj_weight and in_proj_bias + _b = in_proj_bias; + _start = embed_dim; + _w = in_proj_weight.slice(/*dim=*/0, _start); + if (_b.defined()) { + _b = _b.slice(/*dim=*/0, _start); + } + const auto chunks = F::linear(key, _w, _b).chunk(2, /*dim=*/-1); + k = chunks[0]; + v = chunks[1]; + } + } else { + // This is inline in_proj function with in_proj_weight and in_proj_bias + auto _b = in_proj_bias; + int64_t _start = 0; + auto _end = embed_dim; + auto _w = in_proj_weight.slice(/*dim=*/0, _start, _end); + if (_b.defined()) { + _b = _b.slice(/*dim=*/0, _start, _end); + } + q = F::linear(query, _w, _b); + + // This is inline in_proj function with in_proj_weight and in_proj_bias + _b = in_proj_bias; + _start = embed_dim; + _end = embed_dim * 2; + _w = in_proj_weight.slice(/*dim=*/0, _start, _end); + if (_b.defined()) { + _b = _b.slice(/*dim=*/0, _start, _end); + } + k = F::linear(key, _w, _b); + + // This is inline in_proj function with in_proj_weight and in_proj_bias + _b = in_proj_bias; + _start = embed_dim * 2; + _w = in_proj_weight.slice(/*dim=*/0, _start); + if (_b.defined()) { + _b = _b.slice(0, _start); + } + v = F::linear(value, _w, _b); + } + } else { + const auto& q_proj_weight_non_opt = q_proj_weight; + { + const auto sizes = q_proj_weight_non_opt.sizes(); + const auto len1 = sizes[0]; + const auto len2 = sizes[1]; + TORCH_CHECK(len1 == embed_dim && len2 == query.size(-1)); + } + + const auto& k_proj_weight_non_opt = k_proj_weight; + { + const auto sizes = k_proj_weight_non_opt.sizes(); + const auto len1 = sizes[0]; + const auto len2 = sizes[1]; + TORCH_CHECK(len1 == embed_dim && len2 == key.size(-1)); + } + + const auto& v_proj_weight_non_opt = v_proj_weight; + { + const auto sizes = v_proj_weight_non_opt.sizes(); + const auto len1 = sizes[0]; + const auto len2 = sizes[1]; + TORCH_CHECK(len1 == embed_dim && len2 == value.size(-1)); + } + + if (in_proj_bias.defined()) { + q = F::linear( + query, + q_proj_weight_non_opt, + in_proj_bias.slice(/*dim=*/0, 0, embed_dim)); + k = F::linear( + key, + k_proj_weight_non_opt, + in_proj_bias.slice(/*dim=*/0, embed_dim, (embed_dim * 2))); + v = F::linear( + value, + v_proj_weight_non_opt, + in_proj_bias.slice(/*dim=*/0, (embed_dim * 2))); + } else { + q = F::linear(query, q_proj_weight_non_opt, in_proj_bias); + k = F::linear(key, k_proj_weight_non_opt, in_proj_bias); + v = F::linear(value, v_proj_weight_non_opt, in_proj_bias); + } + } + q = q * scaling; + Tensor attn_mask_ = attn_mask; + Tensor key_padding_mask_ = key_padding_mask; + if (bias_k.defined() && bias_v.defined()) { + if (!static_k.defined() && !static_v.defined()) { + k = torch::cat({k, bias_k.repeat({1, bsz, 1})}); + v = torch::cat({v, bias_v.repeat({1, bsz, 1})}); + if (attn_mask_.defined()) { + attn_mask_ = torch::cat( + {attn_mask_, + torch::zeros( + {attn_mask_.size(0), 1}, + at::TensorOptions(attn_mask_.dtype()) + .device(attn_mask_.device()))}, + /*dim=*/1); + } + if (key_padding_mask_.defined()) { + key_padding_mask_ = torch::cat( + {key_padding_mask_, + torch::zeros( + {key_padding_mask_.size(0), 1}, + at::TensorOptions(key_padding_mask_.dtype()) + .device(key_padding_mask_.device()))}, + /*dim=*/1); + } + } else { + TORCH_CHECK(!static_k.defined(), "bias cannot be added to static key."); + TORCH_CHECK(!static_v.defined(), "bias cannot be added to static value."); + } + } else { + TORCH_CHECK(!bias_k.defined()); + TORCH_CHECK(!bias_v.defined()); + } + q = q.contiguous().view({tgt_len, bsz * num_heads, head_dim}).transpose(0, 1); + if (k.defined()) { + k = k.contiguous().view({-1, bsz * num_heads, head_dim}).transpose(0, 1); + } + if (v.defined()) { + v = v.contiguous().view({-1, bsz * num_heads, head_dim}).transpose(0, 1); + } + if (static_k.defined()) { + TORCH_CHECK(static_k.size(0) == bsz * num_heads); + TORCH_CHECK(static_k.size(2) == head_dim); + k = static_k; + } + if (static_v.defined()) { + TORCH_CHECK(static_v.size(0) == bsz * num_heads); + TORCH_CHECK(static_v.size(2) == head_dim); + v = static_v; + } + auto src_len = k.size(1); + if (key_padding_mask_.defined()) { + TORCH_CHECK(key_padding_mask_.size(0) == bsz); + TORCH_CHECK(key_padding_mask_.size(1) == src_len); + } + if (add_zero_attn) { + src_len += 1; + auto k_sizes = k.sizes().vec(); + k_sizes[1] = 1; + k = torch::cat( + {k, + torch::zeros( + k_sizes, at::TensorOptions(k.dtype()).device(k.device()))}, + /*dim=*/1); + auto v_sizes = v.sizes().vec(); + v_sizes[1] = 1; + v = torch::cat( + {v, + torch::zeros( + v_sizes, at::TensorOptions(v.dtype()).device(v.device()))}, + /*dim=*/1); + if (attn_mask_.defined()) { + attn_mask_ = torch::cat( + {attn_mask_, + torch::zeros( + {attn_mask_.size(0), 1}, + at::TensorOptions(attn_mask_.dtype()) + .device(attn_mask_.device()))}, + /*dim=*/1); + } + if (key_padding_mask_.defined()) { + key_padding_mask_ = torch::cat( + {key_padding_mask_, + torch::zeros( + {key_padding_mask_.size(0), 1}, + at::TensorOptions(key_padding_mask_.dtype()) + .device(key_padding_mask_.device()))}, + /*dim=*/1); + } + } + auto attn_output_weights = torch::bmm(q, k.transpose(1, 2)); + TORCH_CHECK( + attn_output_weights.sizes() == + IntArrayRef({bsz * num_heads, tgt_len, src_len})); + if (attn_mask_.defined()) { + attn_mask_ = attn_mask_.unsqueeze(0); + attn_output_weights += attn_mask_; + } + if (key_padding_mask_.defined()) { + attn_output_weights = + attn_output_weights.view({bsz, num_heads, tgt_len, src_len}); + attn_output_weights = AT_DISPATCH_FLOATING_TYPES( + attn_output_weights.scalar_type(), + "attn_output_weights.masked_fill", + [&]() { + return attn_output_weights.masked_fill( + key_padding_mask_.unsqueeze(1).unsqueeze(2), + -std::numeric_limits::infinity()); + }); + attn_output_weights = + attn_output_weights.view({bsz * num_heads, tgt_len, src_len}); + } + attn_output_weights = F::softmax(attn_output_weights, /*options=*/-1); + attn_output_weights = F::dropout( + attn_output_weights, + F::DropoutFuncOptions().p(dropout_p).training(training)); + auto attn_output = torch::bmm(attn_output_weights, v); + TORCH_CHECK( + attn_output.sizes() == IntArrayRef({bsz * num_heads, tgt_len, head_dim})); + attn_output = + attn_output.transpose(0, 1).contiguous().view({tgt_len, bsz, embed_dim}); + attn_output = F::linear(attn_output, out_proj_weight, out_proj_bias); + if (need_weights) { + attn_output_weights = + attn_output_weights.view({bsz, num_heads, tgt_len, src_len}); + if (average_attn_weights) { + // average attention weights over heads + attn_output_weights = attn_output_weights.sum(/*dim=*/1) / num_heads; + } + return std::make_tuple(attn_output, attn_output_weights); + } else { + return std::make_tuple(attn_output, Tensor()); + } +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +inline std::tuple multi_head_attention_forward( + const Tensor& query, + const Tensor& key, + const Tensor& value, + const MultiheadAttentionForwardFuncOptions& options) { + return detail::multi_head_attention_forward( + query, + key, + value, + options.embed_dim_to_check(), + options.num_heads(), + options.in_proj_weight(), + options.in_proj_bias(), + options.bias_k(), + options.bias_v(), + options.add_zero_attn(), + options.dropout_p(), + options.out_proj_weight(), + options.out_proj_bias(), + options.training(), + options.key_padding_mask(), + options.need_weights(), + options.attn_mask(), + options.use_separate_proj_weight(), + options.q_proj_weight(), + options.k_proj_weight(), + options.v_proj_weight(), + options.static_k(), + options.static_v(), + options.average_attn_weights()); +} + +} // namespace torch::nn::functional + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/batchnorm.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/batchnorm.h new file mode 100644 index 0000000000000000000000000000000000000000..32217cb21ee7743f4052d9c72a0d0c920dcab662 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/batchnorm.h @@ -0,0 +1,84 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::nn::functional { + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor batch_norm( + const Tensor& input, + const Tensor& running_mean, + const Tensor& running_var, + Tensor weight, + Tensor bias, + bool training, + double momentum, + double eps) { + TORCH_CHECK( + input.dim() >= 2, + "Expected at least 2 input dimensions, but got ", + input.dim()); + if (training) { + auto size = input.sizes(); + int64_t size_prods = size[0]; + for (const auto i : c10::irange(size.size() - 2)) { + size_prods *= size[i + 2]; + } + TORCH_CHECK( + size_prods != 1, + "Expected more than 1 value per channel when training, got input size ", + size); + } + + return torch::batch_norm( + input, + weight, + bias, + running_mean, + running_var, + training, + momentum, + eps, + at::globalContext().userEnabledCuDNN()); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.batch_norm +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::BatchNormFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::batch_norm(input, mean, variance, +/// F::BatchNormFuncOptions().weight(weight).bias(bias).momentum(0.1).eps(1e-05).training(false)); +/// ``` +inline Tensor batch_norm( + const Tensor& input, + const Tensor& running_mean, + const Tensor& running_var, + const BatchNormFuncOptions& options = {}) { + return detail::batch_norm( + input, + running_mean, + running_var, + options.weight(), + options.bias(), + options.training(), + options.momentum(), + options.eps()); +} + +} // namespace torch::nn::functional + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/conv.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/conv.h new file mode 100644 index 0000000000000000000000000000000000000000..e439f43638721b495eb8effeaa66258158527b02 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/conv.h @@ -0,0 +1,302 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::nn::functional { + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { + +inline std::string padding_unwrap(enumtype::kValid /*unused*/) { + return "valid"; +} + +inline std::string padding_unwrap(enumtype::kSame /*unused*/) { + return "same"; +} + +template +IntArrayRef padding_unwrap(const ExpandingArray& array) { + return array; +} + +inline Tensor conv1d( + const Tensor& input, + const Tensor& weight, + const Tensor& bias, + ExpandingArray<1> stride, + const Conv1dFuncOptions::padding_t& padding, + ExpandingArray<1> dilation, + int64_t groups) { + return std::visit( + [&](const auto& pad) { + return torch::conv1d( + input, weight, bias, stride, padding_unwrap(pad), dilation, groups); + }, + padding); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.conv1d +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::Conv1dFuncOptions` class +/// to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::conv1d(x, weight, F::Conv1dFuncOptions().stride(1)); +/// ``` +inline Tensor conv1d( + const Tensor& input, + const Tensor& weight, + const Conv1dFuncOptions& options = {}) { + return detail::conv1d( + input, + weight, + options.bias(), + options.stride(), + options.padding(), + options.dilation(), + options.groups()); +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor conv2d( + const Tensor& input, + const Tensor& weight, + const Tensor& bias, + ExpandingArray<2> stride, + const Conv2dFuncOptions::padding_t& padding, + ExpandingArray<2> dilation, + int64_t groups) { + return std::visit( + [&](const auto& pad) { + return torch::conv2d( + input, weight, bias, stride, padding_unwrap(pad), dilation, groups); + }, + padding); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.conv2d +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::Conv2dFuncOptions` class +/// to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::conv2d(x, weight, F::Conv2dFuncOptions().stride(1)); +/// ``` +inline Tensor conv2d( + const Tensor& input, + const Tensor& weight, + const Conv2dFuncOptions& options = {}) { + return detail::conv2d( + input, + weight, + options.bias(), + options.stride(), + options.padding(), + options.dilation(), + options.groups()); +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor conv3d( + const Tensor& input, + const Tensor& weight, + const Tensor& bias, + ExpandingArray<3> stride, + const Conv3dFuncOptions::padding_t& padding, + ExpandingArray<3> dilation, + int64_t groups) { + return std::visit( + [&](const auto& pad) { + return torch::conv3d( + input, weight, bias, stride, padding_unwrap(pad), dilation, groups); + }, + padding); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.conv3d +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::Conv3dFuncOptions` class +/// to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::conv3d(x, weight, F::Conv3dFuncOptions().stride(1)); +/// ``` +inline Tensor conv3d( + const Tensor& input, + const Tensor& weight, + const Conv3dFuncOptions& options = {}) { + return detail::conv3d( + input, + weight, + options.bias(), + options.stride(), + options.padding(), + options.dilation(), + options.groups()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor conv_transpose1d( + const Tensor& input, + const Tensor& weight, + const Tensor& bias, + IntArrayRef stride, + IntArrayRef padding, + IntArrayRef output_padding, + int64_t groups, + IntArrayRef dilation) { + return torch::conv_transpose1d( + input, weight, bias, stride, padding, output_padding, groups, dilation); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.conv_transpose1d +/// about the exact behavior of this functional. +/// +/// See the documentation for +/// `torch::nn::functional::ConvTranspose1dFuncOptions` class to learn what +/// optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::conv_transpose1d(x, weight, F::ConvTranspose1dFuncOptions().stride(1)); +/// ``` +inline Tensor conv_transpose1d( + const Tensor& input, + const Tensor& weight, + const ConvTranspose1dFuncOptions& options = {}) { + return detail::conv_transpose1d( + input, + weight, + options.bias(), + options.stride(), + options.padding(), + options.output_padding(), + options.groups(), + options.dilation()); +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor conv_transpose2d( + const Tensor& input, + const Tensor& weight, + const Tensor& bias, + IntArrayRef stride, + IntArrayRef padding, + IntArrayRef output_padding, + int64_t groups, + IntArrayRef dilation) { + return torch::conv_transpose2d( + input, weight, bias, stride, padding, output_padding, groups, dilation); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.conv_transpose2d +/// about the exact behavior of this functional. +/// +/// See the documentation for +/// `torch::nn::functional::ConvTranspose2dFuncOptions` class to learn what +/// optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::conv_transpose2d(x, weight, F::ConvTranspose2dFuncOptions().stride(1)); +/// ``` +inline Tensor conv_transpose2d( + const Tensor& input, + const Tensor& weight, + const ConvTranspose2dFuncOptions& options = {}) { + return detail::conv_transpose2d( + input, + weight, + options.bias(), + options.stride(), + options.padding(), + options.output_padding(), + options.groups(), + options.dilation()); +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor conv_transpose3d( + const Tensor& input, + const Tensor& weight, + const Tensor& bias, + IntArrayRef stride, + IntArrayRef padding, + IntArrayRef output_padding, + int64_t groups, + IntArrayRef dilation) { + return torch::conv_transpose3d( + input, weight, bias, stride, padding, output_padding, groups, dilation); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.conv_transpose3d +/// about the exact behavior of this functional. +/// +/// See the documentation for +/// `torch::nn::functional::ConvTranspose3dFuncOptions` class to learn what +/// optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::conv_transpose3d(x, weight, F::ConvTranspose3dFuncOptions().stride(1)); +/// ``` +inline Tensor conv_transpose3d( + const Tensor& input, + const Tensor& weight, + const ConvTranspose3dFuncOptions& options = {}) { + return detail::conv_transpose3d( + input, + weight, + options.bias(), + options.stride(), + options.padding(), + options.output_padding(), + options.groups(), + options.dilation()); +} + +} // namespace torch::nn::functional + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/distance.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/distance.h new file mode 100644 index 0000000000000000000000000000000000000000..1d1c852b731d29546e24eab524fa169d1ada624c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/distance.h @@ -0,0 +1,89 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::nn::functional { + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor cosine_similarity( + const Tensor& x1, + const Tensor& x2, + int64_t dim, + double eps) { + return torch::cosine_similarity(x1, x2, dim, eps); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.cosine_similarity +/// about the exact behavior of this functional. +/// +/// See the documentation for +/// `torch::nn::functional::CosineSimilarityFuncOptions` class to learn what +/// optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::cosine_similarity(input1, input2, +/// F::CosineSimilarityFuncOptions().dim(1)); +/// ``` +inline Tensor cosine_similarity( + const Tensor& x1, + const Tensor& x2, + const CosineSimilarityFuncOptions& options = {}) { + return detail::cosine_similarity(x1, x2, options.dim(), options.eps()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor pairwise_distance( + const Tensor& x1, + const Tensor& x2, + double p, + double eps, + bool keepdim) { + return torch::pairwise_distance(x1, x2, p, eps, keepdim); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.pairwise_distance +/// about the exact behavior of this functional. +/// +/// See the documentation for +/// `torch::nn::functional::PairwiseDistanceFuncOptions` class to learn what +/// optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::pairwise_distance(input1, input2, F::PairwiseDistanceFuncOptions().p(1)); +/// ``` +inline Tensor pairwise_distance( + const Tensor& x1, + const Tensor& x2, + const PairwiseDistanceFuncOptions& options = {}) { + return detail::pairwise_distance( + x1, x2, options.p(), options.eps(), options.keepdim()); +} + +// ============================================================================ + +/// Computes the p-norm distance between every pair of row vectors in the input. +/// This function will be faster if the rows are contiguous. +inline Tensor pdist(const Tensor& input, double p = 2.0) { + return torch::pdist(input, p); +} + +} // namespace torch::nn::functional + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/dropout.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/dropout.h new file mode 100644 index 0000000000000000000000000000000000000000..8d02a96660a7770d3ba71be62072d37e877094e5 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/dropout.h @@ -0,0 +1,235 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include + +namespace torch::nn::functional { + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { + +inline Tensor dropout(Tensor input, double p, bool training, bool inplace) { + TORCH_CHECK( + p >= 0. && p <= 1., + "dropout probability has to be between 0 and 1, but got ", + p); + if (inplace) { + return torch::dropout_(input, p, training); + } else { + return torch::dropout(input, p, training); + } +} + +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.dropout +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::DropoutFuncOptions` class +/// to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::dropout(input, F::DropoutFuncOptions().p(0.5)); +/// ``` +inline Tensor dropout(Tensor input, const DropoutFuncOptions& options = {}) { + return detail::dropout( + std::move(input), options.p(), options.training(), options.inplace()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { + +template +inline Tensor _dropoutNd_helper( + Tensor input, + double p, + bool training, + bool inplace, + const char* fn_name) { + TORCH_CHECK( + p >= 0. && p <= 1., + "dropout probability has to be between 0 and 1, but got ", + p); + + auto inp_dim = input.dim(); + auto is_batched = inp_dim == batched_dim; + if (!is_batched) { + if (inplace) { + input = input.unsqueeze_(0); + } else { + input = input.unsqueeze(0); + } + } + + Tensor result; + if (inplace) { + result = torch::feature_dropout_(input, p, training); + } else { + result = torch::feature_dropout(input, p, training); + } + + if (!is_batched) { + if (inplace) { + result = result.squeeze_(0); + } else { + result = result.squeeze(0); + } + } + return result; +} + +inline Tensor dropout2d(Tensor input, double p, bool training, bool inplace) { + return _dropoutNd_helper<3, 4>( + std::move(input), p, training, inplace, "dropout2d"); +} + +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.dropout2d +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::Dropout2dFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::dropout2d(input, F::Dropout2dFuncOptions().p(0.5)); +/// ``` +inline Tensor dropout2d( + Tensor input, + const Dropout2dFuncOptions& options = {}) { + return detail::dropout2d( + std::move(input), options.p(), options.training(), options.inplace()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { + +inline Tensor dropout3d(Tensor input, double p, bool training, bool inplace) { + return _dropoutNd_helper<4, 5>( + std::move(input), p, training, inplace, "dropout3d"); +} + +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.dropout3d +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::Dropout3dFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::dropout3d(input, F::Dropout3dFuncOptions().p(0.5)); +/// ``` +inline Tensor dropout3d( + Tensor input, + const Dropout3dFuncOptions& options = {}) { + return detail::dropout3d( + std::move(input), options.p(), options.training(), options.inplace()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { + +inline Tensor alpha_dropout( + Tensor input, + double p, + bool training, + bool inplace) { + if (p < 0. || p > 1.) { + TORCH_CHECK( + false, "dropout probability has to be between 0 and 1, but got ", p); + } + return inplace ? torch::alpha_dropout_(input, p, training) + : torch::alpha_dropout(input, p, training); +} + +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.alpha_dropout +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::AlphaDropoutFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::alpha_dropout(input, +/// F::AlphaDropoutFuncOptions().p(0.5).training(false)); +/// ``` +inline Tensor alpha_dropout( + Tensor input, + const AlphaDropoutFuncOptions& options = {}) { + return detail::alpha_dropout( + std::move(input), options.p(), options.training(), options.inplace()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { + +inline Tensor feature_alpha_dropout( + Tensor input, + double p, + bool training, + bool inplace) { + if (p < 0. || p > 1.) { + TORCH_CHECK( + false, "dropout probability has to be between 0 and 1, but got ", p); + } + return inplace ? torch::feature_alpha_dropout_(input, p, training) + : torch::feature_alpha_dropout(input, p, training); +} + +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.feature_alpha_dropout +/// about the exact behavior of this functional. +/// +/// See the documentation for +/// `torch::nn::functional::FeatureAlphaDropoutFuncOptions` class to learn what +/// optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::feature_alpha_dropout(input, +/// F::FeatureAlphaDropoutFuncOptions().p(0.5).training(false)); +/// ``` +inline Tensor feature_alpha_dropout( + Tensor input, + const FeatureAlphaDropoutFuncOptions& options = {}) { + return detail::feature_alpha_dropout( + std::move(input), options.p(), options.training(), options.inplace()); +} + +} // namespace torch::nn::functional + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/embedding.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/embedding.h new file mode 100644 index 0000000000000000000000000000000000000000..a0db4e457eccfbc7d58abc76f5e26573be70e140 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/embedding.h @@ -0,0 +1,211 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::nn::functional { + +inline Tensor one_hot(const Tensor& tensor, int64_t num_classes = -1) { + return torch::one_hot(tensor, num_classes); +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline void _no_grad_embedding_renorm_( + Tensor weight, + const Tensor& input, + float max_norm, + float norm_type) { + torch::NoGradGuard no_grad; + torch::embedding_renorm_(weight, input, max_norm, norm_type); +} + +inline Tensor embedding( + const Tensor& input, + const Tensor& weight, + std::optional padding_idx, + std::optional max_norm, + double norm_type, + bool scale_grad_by_freq, + bool sparse) { + auto input_ = input; + + if (padding_idx != std::nullopt) { + if (*padding_idx > 0) { + TORCH_CHECK( + *padding_idx < weight.size(0), + "Padding_idx must be within num_embeddings"); + } else if (*padding_idx < 0) { + TORCH_CHECK( + *padding_idx >= -weight.size(0), + "Padding_idx must be within num_embedding"); + padding_idx = weight.size(0) + *padding_idx; + } + } else { + padding_idx = -1; + } + + if (max_norm != std::nullopt) { + input_ = input_.contiguous(); + // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions) + _no_grad_embedding_renorm_(weight, input_, *max_norm, norm_type); + } + return torch::embedding( + weight, input_, *padding_idx, scale_grad_by_freq, sparse); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.embedding +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::EmbeddingFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::embedding(input, weight, +/// F::EmbeddingFuncOptions().norm_type(2.5).scale_grad_by_freq(true).sparse(true)); +/// ``` +inline Tensor embedding( + const Tensor& input, + const Tensor& weight, + const EmbeddingFuncOptions& options = {}) { + return detail::embedding( + input, + weight, + options.padding_idx(), + options.max_norm(), + options.norm_type(), + options.scale_grad_by_freq(), + options.sparse()); +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor embedding_bag( + const Tensor& input, + const Tensor& weight, + const Tensor& offsets, + std::optional max_norm, + double norm_type, + bool scale_grad_by_freq, + EmbeddingBagMode mode, + bool sparse, + const Tensor& per_sample_weights, + bool include_last_offset, + std::optional padding_idx) { + auto input_ = input; + auto offsets_ = offsets; + auto per_sample_weights_ = per_sample_weights; + TORCH_CHECK( + !per_sample_weights_.defined() || + input_.sizes() == per_sample_weights_.sizes(), + "embedding_bag: If per_sample_weights (", + per_sample_weights_.sizes(), + ") is not null, then it must have the same shape as the input (", + input_.sizes(), + ")"); + if (input_.dim() == 2) { + TORCH_CHECK( + !offsets_.defined(), + "If input is 2D, then offsets has to be null, as input is treated is a mini-batch of fixed length sequences. However, found offsets of type Tensor"); + offsets_ = torch::arange( + 0, + input_.numel(), + input_.size(1), + torch::TensorOptions().dtype(torch::kLong).device(input_.device())); + input_ = input_.reshape(-1); + if (per_sample_weights_.defined()) { + per_sample_weights_ = per_sample_weights_.reshape(-1); + } + } else if (input_.dim() == 1) { + TORCH_CHECK( + offsets_.defined(), "offsets has to be a 1D Tensor but got null"); + TORCH_CHECK(offsets_.dim() == 1, "offsets has to be a 1D Tensor"); + } else { + TORCH_CHECK( + false, + "input has to be 1D or 2D Tensor, but got Tensor of dimension ", + input_.dim()); + } + + int mode_enum = 0; + if (std::holds_alternative(mode)) { + mode_enum = 0; + } else if (std::holds_alternative(mode)) { + mode_enum = 1; + } else if (std::holds_alternative(mode)) { + mode_enum = 2; + TORCH_CHECK( + !scale_grad_by_freq, + "max mode does not support scaling the gradient by the frequency"); + TORCH_CHECK(!sparse, "max mode does not support sparse weights"); + } else { + TORCH_CHECK(false, "mode has to be one of sum, mean or max"); + } + + if (max_norm != std::nullopt) { + // NOLINTNEXTLINE(cppcoreguidelines-narrowing-conversions,bugprone-narrowing-conversions) + _no_grad_embedding_renorm_(weight, input_, *max_norm, norm_type); + } + + TORCH_CHECK( + !per_sample_weights_.defined() || std::get_if(&mode), + "embedding_bag: per_sample_weights was not null. ", + "per_sample_weights is only supported for mode='kSum' (got mode='", + torch::enumtype::get_enum_name(mode), + "').Please open a feature request on GitHub."); + + return std::get<0>(torch::embedding_bag( + weight, + input_, + offsets_, + scale_grad_by_freq, + mode_enum, + sparse, + per_sample_weights_, + include_last_offset, + padding_idx)); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.embedding_bag +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::EmbeddingBagFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::embedding_bag(input, weight, +/// F::EmbeddingBagFuncOptions().mode(torch::kSum).offsets(offsets)); +/// ``` +inline Tensor embedding_bag( + const Tensor& input, + const Tensor& weight, + const EmbeddingBagFuncOptions& options = {}) { + return detail::embedding_bag( + input, + weight, + options.offsets(), + options.max_norm(), + options.norm_type(), + options.scale_grad_by_freq(), + options.mode(), + options.sparse(), + options.per_sample_weights(), + options.include_last_offset(), + options.padding_idx()); +} + +} // namespace torch::nn::functional + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/fold.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/fold.h new file mode 100644 index 0000000000000000000000000000000000000000..1a2f563ca5dcbb5571b515270c1f70a415c52d63 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/fold.h @@ -0,0 +1,103 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::nn::functional { + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor fold( + const Tensor& input, + ExpandingArray<2> output_size, + ExpandingArray<2> kernel_size, + ExpandingArray<2> dilation, + ExpandingArray<2> padding, + ExpandingArray<2> stride) { + if (input.dim() == 3 || input.dim() == 2) { + return torch::col2im( + input, output_size, kernel_size, dilation, padding, stride); + } else { + TORCH_CHECK( + false, + "Input Error: Only unbatched (2D) or batched (3D) input Tensors are supported " + "(got ", + input.dim(), + "D)"); + } +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.fold +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::FoldFuncOptions` class to +/// learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::fold(input, F::FoldFuncOptions({3, 2}, {2, 2})); +/// ``` +inline Tensor fold(const Tensor& input, const FoldFuncOptions& options) { + return detail::fold( + input, + options.output_size(), + options.kernel_size(), + options.dilation(), + options.padding(), + options.stride()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor unfold( + const Tensor& input, + ExpandingArray<2> kernel_size, + ExpandingArray<2> dilation, + ExpandingArray<2> padding, + ExpandingArray<2> stride) { + if (input.dim() == 4) { + return torch::im2col(input, kernel_size, dilation, padding, stride); + } else { + TORCH_CHECK( + false, + "Input Error: Only 4D input Tensors are supported " + "(got ", + input.dim(), + "D)"); + } +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.unfold +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::UnfoldFuncOptions` class +/// to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::unfold(input, F::UnfoldFuncOptions({2, 2}).padding(1).stride(2)); +/// ``` +inline Tensor unfold(const Tensor& input, const UnfoldFuncOptions& options) { + return detail::unfold( + input, + options.kernel_size(), + options.dilation(), + options.padding(), + options.stride()); +} + +} // namespace torch::nn::functional + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/instancenorm.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/instancenorm.h new file mode 100644 index 0000000000000000000000000000000000000000..3c892f8626bf25feea5dcbe8dea5caf1c73cea64 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/instancenorm.h @@ -0,0 +1,64 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::nn::functional { + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor instance_norm( + const Tensor& input, + const Tensor& running_mean, + const Tensor& running_var, + const Tensor& weight, + const Tensor& bias, + bool use_input_stats, + double momentum, + double eps) { + return torch::instance_norm( + input, + weight, + bias, + running_mean, + running_var, + use_input_stats, + momentum, + eps, + at::globalContext().userEnabledCuDNN()); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.instance_norm +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::InstanceNormFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::instance_norm(input, +/// F::InstanceNormFuncOptions().running_mean(mean).running_var(variance).weight(weight).bias(bias).momentum(0.1).eps(1e-5)); +/// ``` +inline Tensor instance_norm( + const Tensor& input, + const InstanceNormFuncOptions& options = {}) { + return detail::instance_norm( + input, + options.running_mean(), + options.running_var(), + options.weight(), + options.bias(), + options.use_input_stats(), + options.momentum(), + options.eps()); +} + +} // namespace torch::nn::functional + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/linear.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/linear.h new file mode 100644 index 0000000000000000000000000000000000000000..04c7ffaafa8d3c1f6e23252facfb59b0dea910c9 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/linear.h @@ -0,0 +1,38 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::nn::functional { + +inline Tensor bilinear( + const Tensor& input1, + const Tensor& input2, + const Tensor& weight, + const Tensor& bias = Tensor()) { + return torch::bilinear(input1, input2, weight, bias); +} + +// ============================================================================ + +inline Tensor linear( + const Tensor& input, + const Tensor& weight, + const Tensor& bias = {}) { + if (input.dim() == 2 && bias.defined()) { + // fused op is marginally faster + return torch::addmm(bias, input, weight.t()); + } else { + auto output = input.matmul(weight.t()); + if (bias.defined()) { + output += bias; + } + return output; + } +} + +} // namespace torch::nn::functional + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/loss.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/loss.h new file mode 100644 index 0000000000000000000000000000000000000000..17a553809973d9c2d43c3150ea0b8f400a6cfb20 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/loss.h @@ -0,0 +1,1044 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::nn::functional { + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor l1_loss( + const Tensor& input, + const Tensor& target, + L1LossFuncOptions::reduction_t reduction) { + return torch::l1_loss(input, target, enumtype::reduction_get_enum(reduction)); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.l1_loss +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::L1LossFuncOptions` class +/// to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::l1_loss(input, target, F::L1LossFuncOptions(torch::kNone)); +/// ``` +inline Tensor l1_loss( + const Tensor& input, + const Tensor& target, + const L1LossFuncOptions& options = {}) { + return detail::l1_loss(input, target, options.reduction()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor kl_div( + const Tensor& input, + const Tensor& target, + KLDivFuncOptions::reduction_t reduction, + bool log_target = false) { + torch::Reduction::Reduction reduction_enum{}; + + if (std::holds_alternative(reduction)) { + TORCH_WARN( + "reduction: 'mean' divides the total loss by both the batch size and the support size." + "'batchmean' divides only by the batch size, and aligns with the KL div math definition." + "'mean' will be changed to behave the same as 'batchmean' in the next major release."); + } + + // special case for batchmean + if (std::holds_alternative(reduction)) { + reduction_enum = torch::Reduction::Sum; + } else { + reduction_enum = enumtype::reduction_get_enum(reduction); + } + + auto reduced = torch::kl_div(input, target, reduction_enum, log_target); + + if (std::holds_alternative(reduction) && + input.dim() != 0) { + reduced = reduced / input.sizes()[0]; + } + + return reduced; +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.kl_div +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::KLDivFuncOptions` class to +/// learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::kl_div(input, target, +/// F::KLDivFuncOptions.reduction(torch::kNone).log_target(false)); +/// ``` +inline Tensor kl_div( + const Tensor& input, + const Tensor& target, + const KLDivFuncOptions& options = {}) { + return detail::kl_div( + input, target, options.reduction(), options.log_target()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor mse_loss( + const Tensor& input, + const Tensor& target, + MSELossFuncOptions::reduction_t reduction) { + if (!(target.sizes() == input.sizes())) { + TORCH_WARN( + "Using a target size (", + target.sizes(), + ") that is different to the input size (", + input.sizes(), + "). ", + "This will likely lead to incorrect results due to broadcasting. ", + "Please ensure they have the same size."); + } + std::vector broadcast_tensors = + torch::broadcast_tensors({input, target}); + auto expanded_input = broadcast_tensors[0]; + auto expanded_target = broadcast_tensors[1]; + return torch::mse_loss( + expanded_input, expanded_target, enumtype::reduction_get_enum(reduction)); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.mse_loss +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::MSELossFuncOptions` class +/// to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::mse_loss(input, target, F::MSELossFuncOptions(torch::kNone)); +/// ``` +inline Tensor mse_loss( + const Tensor& input, + const Tensor& target, + const MSELossFuncOptions& options = {}) { + return detail::mse_loss(input, target, options.reduction()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor binary_cross_entropy( + const Tensor& input, + const Tensor& target, + const Tensor& weight, + BinaryCrossEntropyFuncOptions::reduction_t reduction) { + auto reduction_enum = enumtype::reduction_get_enum(reduction); + + if (target.sizes() != input.sizes()) { + TORCH_CHECK( + false, + "Using a target size (", + target.sizes(), + ") ", + "that is different to the input size (", + input.sizes(), + ") is deprecated. ", + "Please ensure they have the same size."); + } + + auto weight_ = weight; + if (weight_.defined()) { + auto new_size = at::infer_size(target.sizes(), weight_.sizes()); + weight_ = weight_.expand(new_size); + } + + return torch::binary_cross_entropy(input, target, weight_, reduction_enum); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.binary_cross_entropy +/// about the exact behavior of this functional. +/// +/// See the documentation for +/// `torch::nn::functional::BinaryCrossEntropyFuncOptions` class to learn what +/// optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::binary_cross_entropy(input, target, +/// F::BinaryCrossEntropyFuncOptions().weight(weight)); +/// ``` +inline Tensor binary_cross_entropy( + const Tensor& input, + const Tensor& target, + const BinaryCrossEntropyFuncOptions& options = {}) { + return detail::binary_cross_entropy( + input, target, options.weight(), options.reduction()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor hinge_embedding_loss( + const Tensor& input, + const Tensor& target, + double margin, + HingeEmbeddingLossFuncOptions::reduction_t reduction) { + return torch::hinge_embedding_loss( + input, target, margin, enumtype::reduction_get_enum(reduction)); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.hinge_embedding_loss +/// about the exact behavior of this functional. +/// +/// See the documentation for +/// `torch::nn::functional::HingeEmbeddingLossFuncOptions` class to learn what +/// optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::hinge_embedding_loss(input, target, +/// F::HingeEmbeddingLossFuncOptions().margin(2)); +/// ``` +inline Tensor hinge_embedding_loss( + const Tensor& input, + const Tensor& target, + const HingeEmbeddingLossFuncOptions& options = {}) { + return detail::hinge_embedding_loss( + input, target, options.margin(), options.reduction()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor multi_margin_loss( + const Tensor& input, + const Tensor& target, + int64_t p, + double margin, + const Tensor& weight, + MultiMarginLossFuncOptions::reduction_t reduction) { + TORCH_CHECK(p == 1 || p == 2, "only p == 1 and p == 2 supported"); + if (weight.defined()) { + TORCH_CHECK(weight.dim() == 1, "weight must be one-dimensional"); + } + + return torch::multi_margin_loss( + input, + target, + p, + margin, + weight, + enumtype::reduction_get_enum(reduction)); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.multi_margin_loss +/// about the exact behavior of this functional. +/// +/// See the documentation for +/// `torch::nn::functional::MultiMarginLossFuncOptions` class to learn what +/// optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::multi_margin_loss(input, target, +/// F::MultiMarginLossFuncOptions().margin(2).weight(weight)); +/// ``` +inline Tensor multi_margin_loss( + const Tensor& input, + const Tensor& target, + const MultiMarginLossFuncOptions& options = {}) { + return detail::multi_margin_loss( + input, + target, + options.p(), + options.margin(), + options.weight(), + options.reduction()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor cosine_embedding_loss( + const Tensor& input1, + const Tensor& input2, + const Tensor& target, + double margin, + CosineEmbeddingLossFuncOptions::reduction_t reduction) { + return torch::cosine_embedding_loss( + input1, input2, target, margin, enumtype::reduction_get_enum(reduction)); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.cosine_embedding_loss +/// about the exact behavior of this functional. +/// +/// See the documentation for +/// `torch::nn::functional::CosineEmbeddingLossFuncOptions` class to learn what +/// optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::cosine_embedding_loss(input1, input2, target, +/// F::CosineEmbeddingLossFuncOptions().margin(0.5)); +/// ``` +inline Tensor cosine_embedding_loss( + const Tensor& input1, + const Tensor& input2, + const Tensor& target, + const CosineEmbeddingLossFuncOptions& options = {}) { + return detail::cosine_embedding_loss( + input1, input2, target, options.margin(), options.reduction()); +} + +// ============================================================================ + +inline Tensor _smooth_l1_loss( + const Tensor& input, + const Tensor& target, + double beta = 1.) { + auto t = torch::abs(input - target); + return torch::where(t < beta, 0.5 * torch::pow(t, 2) / beta, t - 0.5 * beta); +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor smooth_l1_loss( + const Tensor& input, + const Tensor& target, + SmoothL1LossFuncOptions::reduction_t reduction, + std::optional beta_opt = std::nullopt) { + if (target.sizes() != input.sizes()) { + TORCH_WARN( + "Using a target size (", + target.sizes(), + ") that is different to the input size (", + input.sizes(), + "). ", + "This will likely lead to incorrect results due to broadcasting. ", + "Please ensure they have the same size."); + } + double beta = beta_opt.value_or(1.0); + + std::vector expanded_tensors = + torch::broadcast_tensors({input, target}); + return torch::smooth_l1_loss( + expanded_tensors[0], + expanded_tensors[1], + enumtype::reduction_get_enum(reduction), + beta); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.smooth_l1_loss +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::SmoothL1LossFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::smooth_l1_loss(input, target, F::SmoothL1LossFuncOptions(torch::kNone)); +/// ``` +inline Tensor smooth_l1_loss( + const Tensor& input, + const Tensor& target, + const SmoothL1LossFuncOptions& options = {}) { + return detail::smooth_l1_loss( + input, target, options.reduction(), options.beta()); +} + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.smooth_l1_loss +/// about the exact behavior of this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::smooth_l1_loss(input, target, /*options=*/torch::kNone, /*beta=*/0.5); +/// ``` +inline Tensor smooth_l1_loss( + const Tensor& input, + const Tensor& target, + const SmoothL1LossFuncOptions& options, + double beta) { + TORCH_CHECK( + !options.beta().has_value(), + "expected beta not to be provided in 'options', but got ", + options.beta()); + return detail::smooth_l1_loss(input, target, options.reduction(), beta); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor huber_loss( + const Tensor& input, + const Tensor& target, + HuberLossFuncOptions::reduction_t reduction, + double delta = 1.) { + if (target.sizes() != input.sizes()) { + TORCH_WARN( + "Using a target size (", + target.sizes(), + ") that is different to the input size (", + input.sizes(), + "). ", + "This will likely lead to incorrect results due to broadcasting. ", + "Please ensure they have the same size."); + } + + std::vector expanded_tensors = + torch::broadcast_tensors({input, target}); + return torch::huber_loss( + expanded_tensors[0], + expanded_tensors[1], + enumtype::reduction_get_enum(reduction), + delta); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.huber_loss +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::HuberLossFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::huber_loss(input, target, +/// F::HuberLossFuncOptions().reduction(torch::kNone).delta(0.5)); +/// ``` +inline Tensor huber_loss( + const Tensor& input, + const Tensor& target, + const HuberLossFuncOptions& options = {}) { + return detail::huber_loss( + input, target, options.reduction(), options.delta()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor multilabel_margin_loss( + const Tensor& input, + const Tensor& target, + MultilabelMarginLossFuncOptions::reduction_t reduction) { + return torch::multilabel_margin_loss( + input, target, enumtype::reduction_get_enum(reduction)); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.multilabel_margin_loss +/// about the exact behavior of this functional. +/// +/// See the documentation for +/// `torch::nn::functional::MultilabelMarginLossFuncOptions` class to learn what +/// optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::multilabel_margin_loss(input, target, +/// F::MultilabelMarginLossFuncOptions(torch::kNone)); +/// ``` +inline Tensor multilabel_margin_loss( + const Tensor& input, + const Tensor& target, + const MultilabelMarginLossFuncOptions& options = {}) { + return detail::multilabel_margin_loss(input, target, options.reduction()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor soft_margin_loss( + const Tensor& input, + const Tensor& target, + SoftMarginLossFuncOptions::reduction_t reduction) { + return torch::soft_margin_loss( + input, target, enumtype::reduction_get_enum(reduction)); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.soft_margin_loss +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::SoftMarginLossFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::soft_margin_loss(input, target, +/// F::SoftMarginLossFuncOptions(torch::kNone)); +/// ``` +inline Tensor soft_margin_loss( + const Tensor& input, + const Tensor& target, + const SoftMarginLossFuncOptions& options = {}) { + return detail::soft_margin_loss(input, target, options.reduction()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor multilabel_soft_margin_loss( + const Tensor& input, + const Tensor& target, + const Tensor& weight, + MultilabelSoftMarginLossFuncOptions::reduction_t reduction) { + auto loss = + -(target * torch::log_sigmoid(input) + + (1 - target) * torch::log_sigmoid(-input)); + if (weight.defined()) { + loss = loss * weight; + } + + auto class_dim = input.dim() - 1; + auto C = input.size(class_dim); + loss = loss.sum(class_dim) / C; // only return N loss values + + Tensor ret; + + if (std::holds_alternative(reduction)) { + ret = loss; + } else if (std::holds_alternative(reduction)) { + ret = loss.mean(); + } else if (std::holds_alternative(reduction)) { + ret = loss.sum(); + } else { + ret = input; + TORCH_INTERNAL_ASSERT( + false, enumtype::get_enum_name(reduction), " is not valid"); + } + return ret; +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.multilabel_soft_margin_loss +/// about the exact behavior of this functional. +/// +/// See the documentation for +/// `torch::nn::functional::MultilabelSoftMarginLossFuncOptions` class to learn +/// what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::multilabel_soft_margin_loss(input, target, +/// F::MultilabelSoftMarginLossFuncOptions().reduction(torch::kNone).weight(weight)); +/// ``` +inline Tensor multilabel_soft_margin_loss( + const Tensor& input, + const Tensor& target, + const MultilabelSoftMarginLossFuncOptions& options = {}) { + return detail::multilabel_soft_margin_loss( + input, target, options.weight(), options.reduction()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor triplet_margin_loss( + const Tensor& anchor, + const Tensor& positive, + const Tensor& negative, + double margin, + double p, + double eps, + bool swap, + TripletMarginLossFuncOptions::reduction_t reduction) { + return torch::triplet_margin_loss( + anchor, + positive, + negative, + margin, + p, + eps, + swap, + enumtype::reduction_get_enum(reduction)); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.triplet_margin_loss +/// about the exact behavior of this functional. +/// +/// See the documentation for +/// `torch::nn::functional::TripletMarginLossFuncOptions` class to learn what +/// optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::triplet_margin_loss(anchor, positive, negative, +/// F::TripletMarginLossFuncOptions().margin(1.0)); +/// ``` +inline Tensor triplet_margin_loss( + const Tensor& anchor, + const Tensor& positive, + const Tensor& negative, + const TripletMarginLossFuncOptions& options = {}) { + return detail::triplet_margin_loss( + anchor, + positive, + negative, + options.margin(), + options.p(), + options.eps(), + options.swap(), + options.reduction()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor triplet_margin_with_distance_loss( + const Tensor& anchor, + const Tensor& positive, + const Tensor& negative, + std::optional + distance_function, + double margin, + bool swap, + TripletMarginWithDistanceLossFuncOptions::reduction_t reduction) { + Tensor dist_pos, dist_neg; + if (distance_function.has_value()) { + auto distance_function_impl = distance_function.value(); + dist_pos = distance_function_impl(anchor, positive); + dist_neg = distance_function_impl(anchor, negative); + } else { + dist_pos = pairwise_distance(anchor, positive); + dist_neg = pairwise_distance(anchor, negative); + } + + if (swap) { + Tensor dist_swap; + if (distance_function.has_value()) { + dist_swap = distance_function.value()(positive, negative); + } else { + dist_swap = pairwise_distance(positive, negative); + } + dist_neg = torch::min(dist_neg, dist_swap); + } + + auto loss = torch::clamp_min(dist_pos - dist_neg + margin, 0); + + Tensor ret; + if (std::holds_alternative(reduction)) { + ret = loss; + } else if (std::holds_alternative(reduction)) { + ret = loss.mean(); + } else if (std::holds_alternative(reduction)) { + ret = loss.sum(); + } else { + ret = anchor; + TORCH_INTERNAL_ASSERT( + false, enumtype::get_enum_name(reduction), " is not valid"); + } + return ret; +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.triplet_margin_with_distance_loss +/// about the exact behavior of this functional. +/// +/// See the documentation for +/// `torch::nn::functional::TripletMarginWithDistanceLossFuncOptions` class to +/// learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::triplet_margin_with_distance_loss(anchor, positive, negative, +/// F::TripletMarginWithDistanceLossFuncOptions().margin(1.0)); +/// ``` +inline Tensor triplet_margin_with_distance_loss( + const Tensor& anchor, + const Tensor& positive, + const Tensor& negative, + const TripletMarginWithDistanceLossFuncOptions& options = {}) { + return detail::triplet_margin_with_distance_loss( + anchor, + positive, + negative, + options.distance_function(), + options.margin(), + options.swap(), + options.reduction()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor ctc_loss( + const Tensor& log_probs, + const Tensor& targets, + const Tensor& input_lengths, + const Tensor& target_lengths, + int64_t blank, + CTCLossFuncOptions::reduction_t reduction, + bool zero_infinity) { + return torch::ctc_loss( + log_probs, + targets, + input_lengths, + target_lengths, + blank, + enumtype::reduction_get_enum(reduction), + zero_infinity); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.ctc_loss +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::CTCLossFuncOptions` class +/// to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::ctc_loss(log_probs, targets, input_lengths, target_lengths, +/// F::CTCLossFuncOptions().reduction(torch::kNone)); +/// ``` +inline Tensor ctc_loss( + const Tensor& log_probs, + const Tensor& targets, + const Tensor& input_lengths, + const Tensor& target_lengths, + const CTCLossFuncOptions& options = {}) { + return detail::ctc_loss( + log_probs, + targets, + input_lengths, + target_lengths, + options.blank(), + options.reduction(), + options.zero_infinity()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor poisson_nll_loss( + const Tensor& input, + const Tensor& target, + bool log_input, + bool full, + double eps, + PoissonNLLLossFuncOptions::reduction_t reduction) { + return torch::poisson_nll_loss( + input, + target, + log_input, + full, + eps, + enumtype::reduction_get_enum(reduction)); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.poisson_nll_loss +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::PoissonNLLLossFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::poisson_nll_loss(input, target, +/// F::PoissonNLLLossFuncOptions().reduction(torch::kNone)); +/// ``` +inline Tensor poisson_nll_loss( + const Tensor& input, + const Tensor& target, + const PoissonNLLLossFuncOptions& options = {}) { + return detail::poisson_nll_loss( + input, + target, + options.log_input(), + options.full(), + options.eps(), + options.reduction()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor margin_ranking_loss( + const Tensor& input1, + const Tensor& input2, + const Tensor& target, + double margin, + MarginRankingLossFuncOptions::reduction_t reduction) { + TORCH_CHECK( + input1.dim() == input2.dim() && input1.dim() == target.dim(), + "margin_ranking_loss : All input tensors should have same dimension but got sizes: " + "input1: ", + input1.sizes(), + ", input2: ", + input2.sizes(), + ", target: ", + target.sizes()); + return torch::margin_ranking_loss( + input1, input2, target, margin, enumtype::reduction_get_enum(reduction)); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.margin_ranking_loss +/// about the exact behavior of this functional. +/// +/// See the documentation for +/// `torch::nn::functional::MarginRankingLossFuncOptions` class to learn what +/// optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::margin_ranking_loss(input1, input2, target, +/// F::MarginRankingLossFuncOptions().margin(0.5).reduction(torch::kSum)); +/// ``` +inline Tensor margin_ranking_loss( + const Tensor& input1, + const Tensor& input2, + const Tensor& target, + const MarginRankingLossFuncOptions& options = {}) { + return detail::margin_ranking_loss( + input1, input2, target, options.margin(), options.reduction()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor nll_loss( + const Tensor& input, + const Tensor& target, + const Tensor& weight, + int64_t ignore_index, + const NLLLossFuncOptions::reduction_t& reduction) { + if (input.dim() < 2) { + TORCH_CHECK(false, "Expected 2 or more dimensions (got ", input.dim(), ")"); + } + + if (input.sizes()[0] != target.sizes()[0]) { + TORCH_CHECK( + false, + "Expected input batch_size (", + input.sizes()[0], + ") to match target batch_size (", + target.sizes()[0], + ")."); + } + + return torch::nll_loss_nd( + input, + target, + weight, + enumtype::reduction_get_enum(reduction), + ignore_index); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.nll_loss +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::NLLLossFuncOptions` class +/// to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::nll_loss(input, target, +/// F::NLLLossFuncOptions().ignore_index(-100).reduction(torch::kMean)); +/// ``` +inline Tensor nll_loss( + const Tensor& input, + const Tensor& target, + const NLLLossFuncOptions& options = {}) { + return detail::nll_loss( + input, + target, + options.weight(), + options.ignore_index(), + options.reduction()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor cross_entropy( + const Tensor& input, + const Tensor& target, + const Tensor& weight, + int64_t ignore_index, + CrossEntropyFuncOptions::reduction_t reduction, + double label_smoothing) { + return torch::cross_entropy_loss( + input, + target, + weight, + enumtype::reduction_get_enum(reduction), + ignore_index, + label_smoothing); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.cross_entropy +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::CrossEntropyFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::cross_entropy(input, target, +/// F::CrossEntropyFuncOptions().ignore_index(-100).reduction(torch::kMean)); +/// ``` +inline Tensor cross_entropy( + const Tensor& input, + const Tensor& target, + const CrossEntropyFuncOptions& options = {}) { + return detail::cross_entropy( + input, + target, + options.weight(), + options.ignore_index(), + options.reduction(), + options.label_smoothing()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor binary_cross_entropy_with_logits( + const Tensor& input, + const Tensor& target, + const Tensor& weight, + BinaryCrossEntropyWithLogitsFuncOptions::reduction_t reduction, + const Tensor& pos_weight) { + TORCH_CHECK( + target.sizes() == input.sizes(), + "Target size (", + target.sizes(), + ") must be the same as input size (", + input.sizes(), + ")"); + + return torch::binary_cross_entropy_with_logits( + input, + target, + weight, + pos_weight, + enumtype::reduction_get_enum(reduction)); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.binary_cross_entropy_with_logits +/// about the exact behavior of this functional. +/// +/// See the documentation for +/// `torch::nn::functional::BinaryCrossEntropyWithLogitsFuncOptions` class to +/// learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::binary_cross_entropy_with_logits(input, target, +/// F::BinaryCrossEntropyWithLogitsFuncOptions().pos_weight(pos_weight).reduction(torch::kSum)); +/// ``` +inline Tensor binary_cross_entropy_with_logits( + const Tensor& input, + const Tensor& target, + const BinaryCrossEntropyWithLogitsFuncOptions& options = {}) { + return detail::binary_cross_entropy_with_logits( + input, + target, + options.weight(), + options.reduction(), + options.pos_weight()); +} + +} // namespace torch::nn::functional + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/normalization.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/normalization.h new file mode 100644 index 0000000000000000000000000000000000000000..78a06019d7ed60ffbc6f1455503be29998a62a7d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/normalization.h @@ -0,0 +1,212 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::nn::functional { + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor normalize( + const Tensor& input, + double p, + int64_t dim, + double eps, + std::optional out) { + if (out == std::nullopt) { + auto denom = input.norm(p, dim, true).clamp_min(eps).expand_as(input); + return input / denom; + } else { + auto denom = input.norm(p, dim, true).clamp_min(eps).expand_as(input); + return torch::div_out(*out, input, denom); + } +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.normalize +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::NormalizeFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::normalize(input, F::NormalizeFuncOptions().p(1).dim(-1)); +/// ``` +inline Tensor normalize( + const Tensor& input, + NormalizeFuncOptions options = {}) { + return detail::normalize( + input, options.p(), options.dim(), options.eps(), options.out()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor layer_norm( + const Tensor& input, + const std::vector& normalized_shape, + const Tensor& weight, + const Tensor& bias, + double eps) { + return torch::layer_norm(input, normalized_shape, weight, bias, eps); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.layer_norm +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::LayerNormFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::layer_norm(input, F::LayerNormFuncOptions({2, 2}).eps(2e-5)); +/// ``` +inline Tensor layer_norm( + const Tensor& input, + const LayerNormFuncOptions& options) { + return detail::layer_norm( + input, + options.normalized_shape(), + options.weight(), + options.bias(), + options.eps()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor local_response_norm( + const Tensor& input, + int64_t size, + double alpha, + double beta, + double k) { + auto dim = input.dim(); + TORCH_CHECK( + dim >= 3, + "Expected 3D or higher dimensionality input (got ", + dim, + " dimensions)"); + auto div = input.mul(input).unsqueeze(1); + if (dim == 3) { + div = detail::pad( + div, + /*pad=*/{0, 0, size / 2, (size - 1) / 2}, + /*mode=*/torch::kConstant, + /*value=*/0); + div = detail::avg_pool2d( + div, + /*kernel_size=*/{size, 1}, + /*stride=*/1, + /*padding=*/0, + /*ceil_mode=*/false, + /*count_include_pad=*/true, + /*divisor_override=*/std::nullopt) + .squeeze(1); + } else { + auto sizes = input.sizes(); + div = div.view({sizes[0], 1, sizes[1], sizes[2], -1}); + div = detail::pad( + div, + /*pad=*/{0, 0, 0, 0, size / 2, (size - 1) / 2}, + /*mode=*/torch::kConstant, + /*value=*/0); + div = detail::avg_pool3d( + div, + /*kernel_size=*/{size, 1, 1}, + /*stride=*/1, + /*padding=*/0, + /*ceil_mode=*/false, + /*count_include_pad=*/true, + /*divisor_override=*/std::nullopt) + .squeeze(1); + div = div.view(sizes); + } + div = div.mul(alpha).add(k).pow(beta); + return input / div; +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.local_response_norm +/// about the exact behavior of this functional. +/// +/// See the documentation for +/// `torch::nn::functional::LocalResponseNormFuncOptions` class to learn what +/// optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::local_response_norm(x, F::LocalResponseNormFuncOptions(2)); +/// ``` +inline Tensor local_response_norm( + const Tensor& input, + const LocalResponseNormFuncOptions& options) { + return detail::local_response_norm( + input, options.size(), options.alpha(), options.beta(), options.k()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor group_norm( + const Tensor& input, + int64_t num_groups, + const Tensor& weight, + const Tensor& bias, + double eps) { + return torch::group_norm( + input, + num_groups, + weight, + bias, + eps, + at::globalContext().userEnabledCuDNN()); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.group_norm +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::GroupNormFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::group_norm(input, F::GroupNormFuncOptions(2).eps(2e-5)); +/// ``` +inline Tensor group_norm( + const Tensor& input, + const GroupNormFuncOptions& options) { + return detail::group_norm( + input, + options.num_groups(), + options.weight(), + options.bias(), + options.eps()); +} + +} // namespace torch::nn::functional + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/padding.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/padding.h new file mode 100644 index 0000000000000000000000000000000000000000..01186e4d52fc79dc9deb10e5fa1035da2821412c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/padding.h @@ -0,0 +1,59 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::nn::functional { + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor pad( + const Tensor& input, + IntArrayRef pad, + PadFuncOptions::mode_t mode, + double value) { + const auto mode_enum = [&] { + if (std::holds_alternative(mode)) { + return at::padding_mode::constant; + } else if (std::holds_alternative(mode)) { + return at::padding_mode::reflect; + } else if (std::holds_alternative(mode)) { + return at::padding_mode::replicate; + } else if (std::holds_alternative(mode)) { + return at::padding_mode::circular; + } + TORCH_CHECK(false, "Unrecognised padding mode"); + }(); + + std::optional fill_value; + if (value != 0.0) { + fill_value = value; + } + return at::_pad_enum(input, pad, static_cast(mode_enum), fill_value); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.pad +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::PadFuncOptions` class to +/// learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::pad(input, F::PadFuncOptions({1, 2, 2, 1, 1, +/// 2}).mode(torch::kReplicate)); +/// ``` +inline Tensor pad(const Tensor& input, const PadFuncOptions& options) { + return detail::pad(input, options.pad(), options.mode(), options.value()); +} + +} // namespace torch::nn::functional + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/pixelshuffle.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/pixelshuffle.h new file mode 100644 index 0000000000000000000000000000000000000000..83bf797f827b7ab696af03a213fc96c985ec49b0 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/pixelshuffle.h @@ -0,0 +1,48 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::nn::functional { + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor pixel_shuffle(const Tensor& input, int64_t upscale_factor) { + return torch::pixel_shuffle(input, upscale_factor); +} + +inline Tensor pixel_unshuffle(const Tensor& input, int64_t downscale_factor) { + return torch::pixel_unshuffle(input, downscale_factor); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.pixel_shuffle +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::PixelShuffleFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::pixel_shuffle(x, F::PixelShuffleFuncOptions(2)); +/// ``` +inline Tensor pixel_shuffle( + const Tensor& input, + const PixelShuffleFuncOptions& options) { + return detail::pixel_shuffle(input, options.upscale_factor()); +} + +inline Tensor pixel_unshuffle( + const Tensor& input, + const PixelUnshuffleFuncOptions& options) { + return detail::pixel_unshuffle(input, options.downscale_factor()); +} + +} // namespace torch::nn::functional + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/pooling.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/pooling.h new file mode 100644 index 0000000000000000000000000000000000000000..629a91ef803b54935775c3e0292f7bc820707fe3 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/pooling.h @@ -0,0 +1,1154 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::nn::functional { + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor avg_pool1d( + const Tensor& input, + ExpandingArray<1> kernel_size, + ExpandingArray<1> stride, + ExpandingArray<1> padding, + bool ceil_mode, + bool count_include_pad) { + return torch::avg_pool1d( + input, kernel_size, stride, padding, ceil_mode, count_include_pad); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.avg_pool1d +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::AvgPool1dFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::avg_pool1d(x, F::AvgPool1dFuncOptions(3).stride(2)); +/// ``` +inline Tensor avg_pool1d( + const Tensor& input, + const AvgPool1dFuncOptions& options) { + return avg_pool1d( + input, + options.kernel_size(), + options.stride(), + options.padding(), + options.ceil_mode(), + options.count_include_pad()); +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor avg_pool2d( + const Tensor& input, + ExpandingArray<2> kernel_size, + ExpandingArray<2> stride, + ExpandingArray<2> padding, + bool ceil_mode, + bool count_include_pad, + std::optional divisor_override) { + return torch::avg_pool2d( + input, + kernel_size, + stride, + padding, + ceil_mode, + count_include_pad, + divisor_override); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.avg_pool2d +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::AvgPool2dFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::avg_pool2d(x, F::AvgPool2dFuncOptions(3).stride(2)); +/// ``` +inline Tensor avg_pool2d( + const Tensor& input, + const AvgPool2dFuncOptions& options) { + return detail::avg_pool2d( + input, + options.kernel_size(), + options.stride(), + options.padding(), + options.ceil_mode(), + options.count_include_pad(), + options.divisor_override()); +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor avg_pool3d( + const Tensor& input, + ExpandingArray<3> kernel_size, + ExpandingArray<3> stride, + ExpandingArray<3> padding, + bool ceil_mode, + bool count_include_pad, + std::optional divisor_override) { + return torch::avg_pool3d( + input, + kernel_size, + stride, + padding, + ceil_mode, + count_include_pad, + divisor_override); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.avg_pool3d +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::AvgPool3dFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::avg_pool3d(x, F::AvgPool3dFuncOptions(3).stride(2)); +/// ``` +inline Tensor avg_pool3d( + const Tensor& input, + const AvgPool3dFuncOptions& options) { + return detail::avg_pool3d( + input, + options.kernel_size(), + options.stride(), + options.padding(), + options.ceil_mode(), + options.count_include_pad(), + options.divisor_override()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor max_pool1d( + const Tensor& input, + ExpandingArray<1> kernel_size, + ExpandingArray<1> stride, + ExpandingArray<1> padding, + ExpandingArray<1> dilation, + bool ceil_mode) { + return torch::max_pool1d( + input, kernel_size, stride, padding, dilation, ceil_mode); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.max_pool1d +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::MaxPool1dFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::max_pool1d(x, F::MaxPool1dFuncOptions(3).stride(2)); +/// ``` +inline Tensor max_pool1d( + const Tensor& input, + const MaxPool1dFuncOptions& options) { + return detail::max_pool1d( + input, + options.kernel_size(), + options.stride(), + options.padding(), + options.dilation(), + options.ceil_mode()); +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline std::tuple max_pool1d_with_indices( + const Tensor& input, + ExpandingArray<1> kernel_size, + ExpandingArray<1> stride, + ExpandingArray<1> padding, + ExpandingArray<1> dilation, + bool ceil_mode) { + return torch::max_pool1d_with_indices( + input, kernel_size, stride, padding, dilation, ceil_mode); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See the documentation for `torch::nn::functional::MaxPool1dFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::max_pool1d_with_indices(x, F::MaxPool1dFuncOptions(3).stride(2)); +/// ``` +inline std::tuple max_pool1d_with_indices( + const Tensor& input, + const MaxPool1dFuncOptions& options) { + return detail::max_pool1d_with_indices( + input, + options.kernel_size(), + options.stride(), + options.padding(), + options.dilation(), + options.ceil_mode()); +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor max_pool2d( + const Tensor& input, + ExpandingArray<2> kernel_size, + ExpandingArray<2> stride, + ExpandingArray<2> padding, + ExpandingArray<2> dilation, + bool ceil_mode) { + return torch::max_pool2d( + input, kernel_size, stride, padding, dilation, ceil_mode); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.max_pool2d +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::MaxPool2dFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::max_pool2d(x, F::MaxPool2dFuncOptions(3).stride(2)); +/// ``` +inline Tensor max_pool2d( + const Tensor& input, + const MaxPool2dFuncOptions& options) { + return detail::max_pool2d( + input, + options.kernel_size(), + options.stride(), + options.padding(), + options.dilation(), + options.ceil_mode()); +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline std::tuple max_pool2d_with_indices( + const Tensor& input, + ExpandingArray<2> kernel_size, + ExpandingArray<2> stride, + ExpandingArray<2> padding, + ExpandingArray<2> dilation, + bool ceil_mode) { + return torch::max_pool2d_with_indices( + input, kernel_size, stride, padding, dilation, ceil_mode); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See the documentation for `torch::nn::functional::MaxPool2dFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::max_pool2d_with_indices(x, F::MaxPool2dFuncOptions(3).stride(2)); +/// ``` +inline std::tuple max_pool2d_with_indices( + const Tensor& input, + const MaxPool2dFuncOptions& options) { + return detail::max_pool2d_with_indices( + input, + options.kernel_size(), + options.stride(), + options.padding(), + options.dilation(), + options.ceil_mode()); +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor max_pool3d( + const Tensor& input, + ExpandingArray<3> kernel_size, + ExpandingArray<3> stride, + ExpandingArray<3> padding, + ExpandingArray<3> dilation, + bool ceil_mode) { + return torch::max_pool3d( + input, kernel_size, stride, padding, dilation, ceil_mode); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.max_pool3d +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::MaxPool3dFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::max_pool3d(x, F::MaxPool3dFuncOptions(3).stride(2)); +/// ``` +inline Tensor max_pool3d( + const Tensor& input, + const MaxPool3dFuncOptions& options) { + return detail::max_pool3d( + input, + options.kernel_size(), + options.stride(), + options.padding(), + options.dilation(), + options.ceil_mode()); +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline std::tuple max_pool3d_with_indices( + const Tensor& input, + ExpandingArray<3> kernel_size, + ExpandingArray<3> stride, + ExpandingArray<3> padding, + ExpandingArray<3> dilation, + bool ceil_mode) { + return torch::max_pool3d_with_indices( + input, kernel_size, stride, padding, dilation, ceil_mode); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See the documentation for `torch::nn::functional::MaxPool3dFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::max_pool3d_with_indices(x, F::MaxPool3dFuncOptions(3).stride(2)); +/// ``` +inline std::tuple max_pool3d_with_indices( + const Tensor& input, + const MaxPool3dFuncOptions& options) { + return detail::max_pool3d_with_indices( + input, + options.kernel_size(), + options.stride(), + options.padding(), + options.dilation(), + options.ceil_mode()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline std::tuple adaptive_max_pool1d_with_indices( + const Tensor& input, + ExpandingArray<1> output_size) { + return torch::adaptive_max_pool1d(input, output_size); +} +} // namespace detail + +/// See the documentation for +/// `torch::nn::functional::AdaptiveMaxPool1dFuncOptions` class to learn what +/// optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::adaptive_max_pool1d_with_indices(x, F::AdaptiveMaxPool1dFuncOptions(3)); +/// ``` +inline std::tuple adaptive_max_pool1d_with_indices( + const Tensor& input, + const AdaptiveMaxPool1dFuncOptions& options) { + return detail::adaptive_max_pool1d_with_indices(input, options.output_size()); +} + +namespace detail { +inline Tensor adaptive_max_pool1d( + const Tensor& input, + ExpandingArray<1> output_size) { + return std::get<0>(adaptive_max_pool1d_with_indices(input, output_size)); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.adaptive_max_pool1d +/// about the exact behavior of this functional. +/// +/// See the documentation for +/// `torch::nn::functional::AdaptiveMaxPool1dFuncOptions` class to learn what +/// optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::adaptive_max_pool1d(x, F::AdaptiveMaxPool1dFuncOptions(3)); +/// ``` +inline Tensor adaptive_max_pool1d( + const Tensor& input, + const AdaptiveMaxPool1dFuncOptions& options) { + return detail::adaptive_max_pool1d(input, options.output_size()); +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline std::tuple adaptive_max_pool2d_with_indices( + const Tensor& input, + ExpandingArrayWithOptionalElem<2> output_size) { + auto output_size_ = + torch::nn::modules::utils::_list_with_default(output_size, input.sizes()); + return torch::adaptive_max_pool2d(input, output_size_); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See the documentation for +/// `torch::nn::functional::AdaptiveMaxPool2dFuncOptions` class to learn what +/// optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::adaptive_max_pool2d_with_indices(x, F::AdaptiveMaxPool2dFuncOptions(3)); +/// ``` +inline std::tuple adaptive_max_pool2d_with_indices( + const Tensor& input, + const AdaptiveMaxPool2dFuncOptions& options) { + return detail::adaptive_max_pool2d_with_indices(input, options.output_size()); +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor adaptive_max_pool2d( + const Tensor& input, + ExpandingArrayWithOptionalElem<2> output_size) { + return std::get<0>(adaptive_max_pool2d_with_indices(input, output_size)); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.adaptive_max_pool2d +/// about the exact behavior of this functional. +/// +/// See the documentation for +/// `torch::nn::functional::AdaptiveMaxPool2dFuncOptions` class to learn what +/// optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::adaptive_max_pool2d(x, F::AdaptiveMaxPool2dFuncOptions(3)); +/// ``` +inline Tensor adaptive_max_pool2d( + const Tensor& input, + const AdaptiveMaxPool2dFuncOptions& options) { + return detail::adaptive_max_pool2d(input, options.output_size()); +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline std::tuple adaptive_max_pool3d_with_indices( + const Tensor& input, + ExpandingArrayWithOptionalElem<3> output_size) { + auto output_size_ = + torch::nn::modules::utils::_list_with_default(output_size, input.sizes()); + return torch::adaptive_max_pool3d(input, output_size_); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See the documentation for +/// `torch::nn::functional::AdaptiveMaxPool3dFuncOptions` class to learn what +/// optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::adaptive_max_pool3d_with_indices(x, F::AdaptiveMaxPool3dFuncOptions(3)); +/// ``` +inline std::tuple adaptive_max_pool3d_with_indices( + const Tensor& input, + const AdaptiveMaxPool3dFuncOptions& options) { + return detail::adaptive_max_pool3d_with_indices(input, options.output_size()); +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor adaptive_max_pool3d( + const Tensor& input, + ExpandingArrayWithOptionalElem<3> output_size) { + return std::get<0>(adaptive_max_pool3d_with_indices(input, output_size)); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.adaptive_max_pool3d +/// about the exact behavior of this functional. +/// +/// See the documentation for +/// `torch::nn::functional::AdaptiveMaxPool3dFuncOptions` class to learn what +/// optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::adaptive_max_pool3d(x, F::AdaptiveMaxPool3dFuncOptions(3)); +/// ``` +inline Tensor adaptive_max_pool3d( + const Tensor& input, + const AdaptiveMaxPool3dFuncOptions& options) { + return detail::adaptive_max_pool3d(input, options.output_size()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor adaptive_avg_pool1d( + const Tensor& input, + ExpandingArray<1> output_size) { + return torch::adaptive_avg_pool1d(input, output_size); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.adaptive_avg_pool1d +/// about the exact behavior of this functional. +/// +/// See the documentation for +/// `torch::nn::functional::AdaptiveAvgPool1dFuncOptions` class to learn what +/// optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::adaptive_avg_pool1d(x, F::AdaptiveAvgPool1dFuncOptions(3)); +/// ``` +inline Tensor adaptive_avg_pool1d( + const Tensor& input, + const AdaptiveAvgPool1dFuncOptions& options) { + return detail::adaptive_avg_pool1d(input, options.output_size()); +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor adaptive_avg_pool2d( + const Tensor& input, + ExpandingArrayWithOptionalElem<2> output_size) { + auto output_size_ = + torch::nn::modules::utils::_list_with_default(output_size, input.sizes()); + return torch::adaptive_avg_pool2d(input, output_size_); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.adaptive_avg_pool2d +/// about the exact behavior of this functional. +/// +/// See the documentation for +/// `torch::nn::functional::AdaptiveAvgPool2dFuncOptions` class to learn what +/// optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::adaptive_avg_pool2d(x, F::AdaptiveAvgPool2dFuncOptions(3)); +/// ``` +inline Tensor adaptive_avg_pool2d( + const Tensor& input, + const AdaptiveAvgPool2dFuncOptions& options) { + return detail::adaptive_avg_pool2d(input, options.output_size()); +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor adaptive_avg_pool3d( + const Tensor& input, + ExpandingArrayWithOptionalElem<3> output_size) { + auto output_size_ = + torch::nn::modules::utils::_list_with_default(output_size, input.sizes()); + return torch::adaptive_avg_pool3d(input, output_size_); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.adaptive_avg_pool3d +/// about the exact behavior of this functional. +/// +/// See the documentation for +/// `torch::nn::functional::AdaptiveAvgPool3dFuncOptions` class to learn what +/// optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::adaptive_avg_pool3d(x, F::AdaptiveAvgPool3dFuncOptions(3)); +/// ``` +inline Tensor adaptive_avg_pool3d( + const Tensor& input, + const AdaptiveAvgPool3dFuncOptions& options) { + return detail::adaptive_avg_pool3d(input, options.output_size()); +} + +// ============================================================================ + +inline std::vector _unpool_output_size( + const Tensor& input, + const IntArrayRef& kernel_size, + const IntArrayRef& stride, + const IntArrayRef& padding, + const std::optional>& output_size) { + auto input_size = input.sizes(); + std::vector default_size; + for (const auto d : c10::irange(kernel_size.size())) { + default_size.push_back( + (input_size[input_size.size() - kernel_size.size() + d] - 1) * + stride[d] + + kernel_size[d] - 2 * padding[d]); + } + if (!output_size) { + return default_size; + } else { + std::vector output_size_; + if (output_size->size() == kernel_size.size() + 2) { + output_size_ = IntArrayRef(*output_size).slice(2).vec(); + } + if (output_size_.size() != kernel_size.size()) { + TORCH_CHECK( + false, + "output_size should be a sequence containing ", + kernel_size.size(), + " or ", + kernel_size.size() + 2, + " elements, but it has a length of '", + output_size_.size(), + "'"); + } + for (const auto d : c10::irange(kernel_size.size())) { + const auto min_size = default_size[d] - stride[d]; + const auto max_size = default_size[d] + stride[d]; + if (!(min_size <= output_size_[d] && output_size_[d] <= max_size)) { + TORCH_CHECK( + false, + "invalid output_size ", + output_size_, + " (dim ", + d, + " must be between ", + min_size, + " and ", + max_size, + ")"); + } + } + return output_size_; + } +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor max_unpool1d( + const Tensor& input, + const Tensor& indices, + ExpandingArray<1> kernel_size, + ExpandingArray<1> stride, + ExpandingArray<1> padding, + const std::optional>& output_size) { + auto output_size_ = + _unpool_output_size(input, kernel_size, stride, padding, output_size); + output_size_.push_back(1); + return torch::max_unpool2d( + input.unsqueeze(-1), indices.unsqueeze(-1), output_size_) + .squeeze(-1); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.max_unpool1d +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::MaxUnpool1dFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::max_unpool1d(x, indices, +/// F::MaxUnpool1dFuncOptions(3).stride(2).padding(1)); +/// ``` +inline Tensor max_unpool1d( + const Tensor& input, + const Tensor& indices, + const MaxUnpool1dFuncOptions& options) { + return detail::max_unpool1d( + input, + indices, + options.kernel_size(), + options.stride(), + options.padding(), + options.output_size()); +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor max_unpool2d( + const Tensor& input, + const Tensor& indices, + ExpandingArray<2> kernel_size, + ExpandingArray<2> stride, + ExpandingArray<2> padding, + const std::optional>& output_size) { + auto output_size_ = + _unpool_output_size(input, kernel_size, stride, padding, output_size); + + return torch::max_unpool2d(input, indices, output_size_); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.max_unpool2d +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::MaxUnpool2dFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::max_unpool2d(x, indices, +/// F::MaxUnpool2dFuncOptions(3).stride(2).padding(1)); +/// ``` +inline Tensor max_unpool2d( + const Tensor& input, + const Tensor& indices, + const MaxUnpool2dFuncOptions& options) { + return detail::max_unpool2d( + input, + indices, + options.kernel_size(), + options.stride(), + options.padding(), + options.output_size()); +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor max_unpool3d( + const Tensor& input, + const Tensor& indices, + ExpandingArray<3> kernel_size, + ExpandingArray<3> stride, + ExpandingArray<3> padding, + const std::optional>& output_size) { + auto output_size_ = + _unpool_output_size(input, kernel_size, stride, padding, output_size); + + return torch::max_unpool3d(input, indices, output_size_, stride, padding); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.max_unpool3d +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::MaxUnpool3dFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::max_unpool3d(x, indices, F::MaxUnpool3dFuncOptions(3)); +/// ``` +inline Tensor max_unpool3d( + const Tensor& input, + const Tensor& indices, + const MaxUnpool3dFuncOptions& options) { + return detail::max_unpool3d( + input, + indices, + options.kernel_size(), + options.stride(), + options.padding(), + options.output_size()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline std::tuple fractional_max_pool2d_with_indices( + const Tensor& input, + const ExpandingArray<2>& kernel_size, + const std::optional>& output_size, + const std::optional>& output_ratio, + const Tensor& _random_samples) { + if (output_size == std::nullopt && output_ratio == std::nullopt) { + TORCH_CHECK( + false, + "fractional_max_pool2d requires specifying either ", + "an output_size or an output_ratio"); + } + std::optional> output_size_ = output_size; + if (output_size_ == std::nullopt) { + TORCH_INTERNAL_ASSERT(output_ratio != std::nullopt); + output_size_ = { + (int64_t)(static_cast(input.size(-2)) * + (*output_ratio.value())[0]), + (int64_t)(static_cast(input.size(-1)) * + (*output_ratio.value())[1])}; + } + + Tensor _random_samples_ = _random_samples; + if (!_random_samples_.defined()) { + auto n_batch = input.dim() == 3 ? 1 : input.size(0); + _random_samples_ = torch::rand( + {n_batch, input.size(-3), 2}, + torch::TensorOptions().dtype(input.dtype()).device(input.device())); + } + return torch::fractional_max_pool2d( + input, kernel_size, *output_size_, _random_samples_); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See the documentation for +/// `torch::nn::functional::FractionalMaxPool2dFuncOptions` class to learn what +/// optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::fractional_max_pool2d_with_indices(x, +/// F::FractionalMaxPool2dFuncOptions(3).output_size(2)); +/// ``` +inline std::tuple fractional_max_pool2d_with_indices( + const Tensor& input, + const FractionalMaxPool2dFuncOptions& options) { + return detail::fractional_max_pool2d_with_indices( + input, + options.kernel_size(), + options.output_size(), + options.output_ratio(), + options._random_samples()); +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor fractional_max_pool2d( + const Tensor& input, + ExpandingArray<2> kernel_size, + std::optional> output_size, + std::optional> output_ratio, + const Tensor& _random_samples) { + return std::get<0>(fractional_max_pool2d_with_indices( + input, kernel_size, output_size, output_ratio, _random_samples)); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See the documentation for +/// `torch::nn::functional::FractionalMaxPool2dFuncOptions` class to learn what +/// optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::fractional_max_pool2d(x, +/// F::FractionalMaxPool2dFuncOptions(3).output_size(2)); +/// ``` +inline Tensor fractional_max_pool2d( + const Tensor& input, + const FractionalMaxPool2dFuncOptions& options) { + return detail::fractional_max_pool2d( + input, + options.kernel_size(), + options.output_size(), + options.output_ratio(), + options._random_samples()); +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline std::tuple fractional_max_pool3d_with_indices( + const Tensor& input, + const ExpandingArray<3>& kernel_size, + const std::optional>& output_size, + const std::optional>& output_ratio, + const Tensor& _random_samples) { + if (output_size == std::nullopt && output_ratio == std::nullopt) { + TORCH_CHECK( + false, + "fractional_max_pool3d requires specifying either ", + "an output_size or an output_ratio"); + } + + std::optional> output_size_ = output_size; + if (output_size_ == std::nullopt) { + TORCH_INTERNAL_ASSERT(output_ratio != std::nullopt); + output_size_ = { + (int64_t)(static_cast(input.size(-3)) * + (*output_ratio.value())[0]), + (int64_t)(static_cast(input.size(-2)) * + (*output_ratio.value())[1]), + (int64_t)(static_cast(input.size(-1)) * + (*output_ratio.value())[2])}; + } + + Tensor _random_samples_ = _random_samples; + if (!_random_samples_.defined()) { + auto n_batch = input.dim() == 4 ? 1 : input.size(0); + _random_samples_ = torch::rand( + {n_batch, input.size(-4), 3}, + torch::TensorOptions().dtype(input.dtype()).device(input.device())); + } + return torch::fractional_max_pool3d( + input, kernel_size, *output_size_, _random_samples_); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See the documentation for +/// `torch::nn::functional::FractionalMaxPool3dFuncOptions` class to learn what +/// optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::fractional_max_pool3d_with_indices(x, +/// F::FractionalMaxPool3dFuncOptions(3).output_size(2)); +/// ``` +inline std::tuple fractional_max_pool3d_with_indices( + const Tensor& input, + const FractionalMaxPool3dFuncOptions& options) { + return detail::fractional_max_pool3d_with_indices( + input, + options.kernel_size(), + options.output_size(), + options.output_ratio(), + options._random_samples()); +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor fractional_max_pool3d( + const Tensor& input, + ExpandingArray<3> kernel_size, + std::optional> output_size, + std::optional> output_ratio, + const Tensor& _random_samples) { + return std::get<0>(fractional_max_pool3d_with_indices( + input, kernel_size, output_size, output_ratio, _random_samples)); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See the documentation for +/// `torch::nn::functional::FractionalMaxPool3dFuncOptions` class to learn what +/// optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::fractional_max_pool3d(x, +/// F::FractionalMaxPool3dFuncOptions(3).output_size(2)); +/// ``` +inline Tensor fractional_max_pool3d( + const Tensor& input, + const FractionalMaxPool3dFuncOptions& options) { + return detail::fractional_max_pool3d( + input, + options.kernel_size(), + options.output_size(), + options.output_ratio(), + options._random_samples()); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor lp_pool1d( + const Tensor& input, + double norm_type, + ExpandingArray<1> kernel_size, + ExpandingArray<1> stride, + bool ceil_mode) { + Tensor out = detail::avg_pool1d( + input.pow(norm_type), + kernel_size, + stride, + /*padding=*/0, + ceil_mode, + /*count_include_pad=*/true); + + return (torch::sign(out) * relu(torch::abs(out))) + .mul((*kernel_size)[0]) + .pow(1. / norm_type); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.lp_pool1d +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::LPPool1dFuncOptions` class +/// to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::lp_pool1d(x, F::LPPool1dFuncOptions(2, 3).stride(2)); +/// ``` +inline Tensor lp_pool1d( + const Tensor& input, + const LPPool1dFuncOptions& options) { + return detail::lp_pool1d( + input, + options.norm_type(), + options.kernel_size(), + options.stride(), + options.ceil_mode()); +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor lp_pool2d( + const Tensor& input, + double norm_type, + ExpandingArray<2> kernel_size, + ExpandingArray<2> stride, + bool ceil_mode) { + auto kw = (*kernel_size)[0]; + auto kh = (*kernel_size)[1]; + Tensor out = detail::avg_pool2d( + input.pow(norm_type), + kernel_size, + stride, + /*padding=*/0, + ceil_mode, + /*count_include_pad=*/true, + /*divisor_override=*/std::nullopt); + + return (torch::sign(out) * relu(torch::abs(out))) + .mul(kw * kh) + .pow(1. / norm_type); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.lp_pool2d +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::LPPool2dFuncOptions` class +/// to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::lp_pool2d(x, F::LPPool2dFuncOptions(2, {2, 3}).stride(2)); +/// ``` +inline Tensor lp_pool2d( + const Tensor& input, + const LPPool2dFuncOptions& options) { + return detail::lp_pool2d( + input, + options.norm_type(), + options.kernel_size(), + options.stride(), + options.ceil_mode()); +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor lp_pool3d( + const Tensor& input, + double norm_type, + ExpandingArray<3> kernel_size, + ExpandingArray<3> stride, + bool ceil_mode) { + auto kd = (*kernel_size)[0]; + auto kw = (*kernel_size)[1]; + auto kh = (*kernel_size)[2]; + Tensor out = detail::avg_pool3d( + input.pow(norm_type), + kernel_size, + stride, + /*padding=*/0, + ceil_mode, + /*count_include_pad=*/true, + /*divisor_override=*/std::nullopt); + + return (torch::sign(out) * relu(torch::abs(out))) + .mul(kd * kw * kh) + .pow(1. / norm_type); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.lp_pool3d +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::LPPool3dFuncOptions` class +/// to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::lp_pool3d(x, F::LPPool3dFuncOptions(3, {3, 3, 5}).stride(3)); +/// ``` +inline Tensor lp_pool3d( + const Tensor& input, + const LPPool3dFuncOptions& options) { + return detail::lp_pool3d( + input, + options.norm_type(), + options.kernel_size(), + options.stride(), + options.ceil_mode()); +} + +} // namespace torch::nn::functional + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/upsampling.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/upsampling.h new file mode 100644 index 0000000000000000000000000000000000000000..7c7f80c472f1f9ea73b2655d4433d2b0f34178c7 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/upsampling.h @@ -0,0 +1,291 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include + +namespace torch::nn::functional { + +inline std::vector _interp_output_size( + int64_t dim, + std::tuple< + Tensor, + std::optional>, + std::optional>, + std::optional> closed_over_args) { + auto [input, size, scale_factor, recompute_scale_factor] = + std::move(closed_over_args); + if (size == std::nullopt && scale_factor == std::nullopt) { + TORCH_CHECK(false, "either size or scale_factor should be defined"); + } + if (size != std::nullopt && scale_factor != std::nullopt) { + TORCH_CHECK(false, "only one of size or scale_factor should be defined"); + } + if (scale_factor != std::nullopt) { + if (static_cast(scale_factor.value().size()) != dim) { + TORCH_CHECK( + false, + "scale_factor shape must match input shape. ", + "Input is ", + dim, + "D, scale_factor size is ", + torch::ArrayRef(*scale_factor)); + } + } + if (size != std::nullopt) { + return *size; + } + + TORCH_INTERNAL_ASSERT(scale_factor != std::nullopt); + auto scale_factors = *scale_factor; + + if (recompute_scale_factor == std::nullopt) { + // only warn when the scales have floating values since + // the result for ints is the same with/without recompute_scale_factor + bool is_float_scale_factor = false; + for (double scale : scale_factors) { + is_float_scale_factor = floor(scale) != scale; + if (is_float_scale_factor) { + break; + } + } + if (is_float_scale_factor) { + TORCH_WARN( + "The default behavior for interpolate/upsample with float scale_factor changed " + "in 1.6.0 to align with other frameworks/libraries, and uses scale_factor directly, " + "instead of relying on the computed output size. " + "If you wish to keep the old behavior, please set recompute_scale_factor=True. " + "See the documentation of nn.Upsample for details. "); + } + } + + std::vector ret; + for (const auto i : c10::irange(dim)) { + ret.emplace_back(static_cast( + floor(static_cast(input.size(i + 2)) * scale_factors[i]))); + } + return ret; +} + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor interpolate( + const Tensor& input, + const std::optional>& size, + const std::optional>& scale_factor, + InterpolateFuncOptions::mode_t mode, + std::optional align_corners, + std::optional recompute_scale_factor, + bool antialias) { + if (std::holds_alternative(mode) || + std::get_if(&mode)) { + if (align_corners != std::nullopt) { + TORCH_CHECK( + false, + "align_corners option can only be set with the " + "interpolating modes: linear | bilinear | bicubic | trilinear"); + } + } else { + if (align_corners == std::nullopt) { + TORCH_WARN( + "Default upsampling behavior when mode=", + enumtype::get_enum_name(mode), + " is changed " + "to align_corners=False since 0.4.0. Please specify " + "align_corners=True if the old behavior is desired. " + "See the documentation of nn.Upsample for details."); + align_corners = false; + } + } + + TORCH_CHECK( + input.dim() >= 3 && input.dim() <= 5, + "Input Error: Only 3D, 4D and 5D input Tensors supported " + "(got ", + input.dim(), + "D) for the modes: nearest | linear | bilinear | bicubic | trilinear " + "(got ", + enumtype::get_enum_name(mode), + ")"); + + auto scale_factor_len = input.dim() - 2; + std::vector> scale_factor_list( + scale_factor_len, std::nullopt); + if (scale_factor != std::nullopt && !recompute_scale_factor.value_or(false)) { + auto _scale_factor_repeated = *scale_factor; + scale_factor_list = {}; + for (const auto& elem : _scale_factor_repeated) { + scale_factor_list.emplace_back(elem); + } + } + + if (antialias && + !(input.dim() == 4 && + (std::get_if(&mode) || + std::get_if(&mode)))) { + TORCH_CHECK( + false, + "Anti-alias option is only supported for bilinear and bicubic modes"); + } + + auto closed_over_args = + std::make_tuple(input, size, scale_factor, recompute_scale_factor); + if (input.dim() == 3 && std::get_if(&mode)) { + return torch::upsample_nearest1d( + input, + _interp_output_size(1, std::move(closed_over_args)), + scale_factor_list.at(0)); + } else if (input.dim() == 4 && std::get_if(&mode)) { + return torch::upsample_nearest2d( + input, + _interp_output_size(2, std::move(closed_over_args)), + scale_factor_list.at(0), + scale_factor_list.at(1)); + } else if (input.dim() == 5 && std::get_if(&mode)) { + return torch::upsample_nearest3d( + input, + _interp_output_size(3, std::move(closed_over_args)), + scale_factor_list.at(0), + scale_factor_list.at(1), + scale_factor_list.at(2)); + } else if (input.dim() == 3 && std::get_if(&mode)) { + return torch::_upsample_nearest_exact1d( + input, + _interp_output_size(1, std::move(closed_over_args)), + scale_factor_list.at(0)); + } else if (input.dim() == 4 && std::get_if(&mode)) { + return torch::_upsample_nearest_exact2d( + input, + _interp_output_size(2, std::move(closed_over_args)), + scale_factor_list.at(0), + scale_factor_list.at(1)); + } else if (input.dim() == 5 && std::get_if(&mode)) { + return torch::_upsample_nearest_exact3d( + input, + _interp_output_size(3, std::move(closed_over_args)), + scale_factor_list.at(0), + scale_factor_list.at(1), + scale_factor_list.at(2)); + } else if (input.dim() == 3 && std::get_if(&mode)) { + return detail::adaptive_avg_pool1d( + input, _interp_output_size(1, std::move(closed_over_args))); + } else if (input.dim() == 4 && std::get_if(&mode)) { + return detail::adaptive_avg_pool2d( + input, _interp_output_size(2, std::move(closed_over_args))); + } else if (input.dim() == 5 && std::get_if(&mode)) { + return detail::adaptive_avg_pool3d( + input, _interp_output_size(3, std::move(closed_over_args))); + } else if (input.dim() == 3 && std::get_if(&mode)) { + TORCH_CHECK( + align_corners != std::nullopt, "align_corners should be specified."); + return torch::upsample_linear1d( + input, + _interp_output_size(1, std::move(closed_over_args)), + *align_corners, + scale_factor_list.at(0)); + } else if (input.dim() == 3 && std::get_if(&mode)) { + TORCH_CHECK(false, "Got 3D input, but bilinear mode needs 4D input"); + } else if (input.dim() == 3 && std::get_if(&mode)) { + TORCH_CHECK(false, "Got 3D input, but trilinear mode needs 5D input"); + } else if (input.dim() == 4 && std::get_if(&mode)) { + TORCH_CHECK(false, "Got 4D input, but linear mode needs 3D input"); + } else if (input.dim() == 4 && std::get_if(&mode)) { + TORCH_CHECK( + align_corners != std::nullopt, "align_corners should be specified."); + if (antialias) { + return torch::_upsample_bilinear2d_aa( + input, + _interp_output_size(2, std::move(closed_over_args)), + *align_corners, + scale_factor_list.at(0), + scale_factor_list.at(1)); + } + return torch::upsample_bilinear2d( + input, + _interp_output_size(2, std::move(closed_over_args)), + *align_corners, + scale_factor_list.at(0), + scale_factor_list.at(1)); + } else if (input.dim() == 4 && std::get_if(&mode)) { + TORCH_CHECK(false, "Got 4D input, but trilinear mode needs 5D input"); + } else if (input.dim() == 5 && std::get_if(&mode)) { + TORCH_CHECK(false, "Got 5D input, but linear mode needs 3D input"); + } else if (input.dim() == 5 && std::get_if(&mode)) { + TORCH_CHECK(false, "Got 5D input, but bilinear mode needs 4D input"); + } else if (input.dim() == 5 && std::get_if(&mode)) { + TORCH_CHECK( + align_corners != std::nullopt, "align_corners should be specified."); + return torch::upsample_trilinear3d( + input, + _interp_output_size(3, std::move(closed_over_args)), + *align_corners, + scale_factor_list.at(0), + scale_factor_list.at(1), + scale_factor_list.at(2)); + } else if (input.dim() == 4 && std::get_if(&mode)) { + TORCH_CHECK( + align_corners != std::nullopt, "align_corners should be specified."); + if (antialias) { + return torch::_upsample_bicubic2d_aa( + input, + _interp_output_size(2, std::move(closed_over_args)), + *align_corners, + scale_factor_list.at(0), + scale_factor_list.at(1)); + } + return torch::upsample_bicubic2d( + input, + _interp_output_size(2, std::move(closed_over_args)), + *align_corners, + scale_factor_list.at(0), + scale_factor_list.at(1)); + } else { + TORCH_CHECK( + false, + "Input Error: Only 3D, 4D and 5D input Tensors supported " + "(got ", + input.dim(), + "D) for the modes: nearest | linear | bilinear | bicubic | trilinear " + "(got ", + enumtype::get_enum_name(mode), + ")"); + } +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.interpolate +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::InterpolateFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::interpolate(input, +/// F::InterpolateFuncOptions().size({4}).mode(torch::kNearest)); +/// ``` +inline Tensor interpolate( + const Tensor& input, + const InterpolateFuncOptions& options = {}) { + return detail::interpolate( + input, + options.size(), + options.scale_factor(), + options.mode(), + options.align_corners(), + options.recompute_scale_factor(), + options.antialias()); +} + +} // namespace torch::nn::functional + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/vision.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/vision.h new file mode 100644 index 0000000000000000000000000000000000000000..67a22be5e28f617fdbd99a3c0d9955d83781264e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/functional/vision.h @@ -0,0 +1,124 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::nn::functional { + +inline Tensor affine_grid( + const Tensor& theta, + const IntArrayRef& size, + bool align_corners = false) { + // enforce floating point dtype on theta + TORCH_CHECK( + theta.is_floating_point(), + "Expected theta to have floating point type, but got ", + theta.dtype()); + + // check that shapes and sizes match + if (size.size() == 4) { + TORCH_CHECK( + theta.dim() == 3 && theta.size(-2) == 2 && theta.size(-1) == 3, + "Expected a batch of 2D affine matrices of shape Nx2x3 for size ", + size, + ". Got ", + theta.sizes(), + "."); + } else if (size.size() == 5) { + TORCH_CHECK( + theta.dim() == 3 && theta.size(-2) == 3 && theta.size(-1) == 4, + "Expected a batch of 3D affine matrices of shape Nx3x4 for size ", + size, + ". Got ", + theta.sizes(), + "."); + } else { + TORCH_CHECK( + false, + "affine_grid only supports 4D and 5D sizes, ", + "for 2D and 3D affine transforms, respectively. ", + "Got size ", + size); + } + + if (*std::min_element(size.begin(), size.end()) <= 0) { + TORCH_CHECK(false, "Expected non-zero, positive output size. Got ", size); + } + + return torch::affine_grid_generator(theta, size, align_corners); +} + +// ============================================================================ + +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace detail { +inline Tensor grid_sample( + const Tensor& input, + const Tensor& grid, + GridSampleFuncOptions::mode_t mode, + GridSampleFuncOptions::padding_mode_t padding_mode, + std::optional align_corners) { + int64_t mode_enum = 0, padding_mode_enum = 0; + + if (std::holds_alternative(mode)) { + mode_enum = 0; + } else if (std::holds_alternative(mode)) { + mode_enum = 1; + } else { /// mode == 'bicubic' + mode_enum = 2; + } + + if (std::holds_alternative(padding_mode)) { + padding_mode_enum = 0; + } else if (std::holds_alternative(padding_mode)) { + padding_mode_enum = 1; + } else { /// padding_mode == 'reflection' + padding_mode_enum = 2; + } + + if (!align_corners.has_value()) { + TORCH_WARN( + "Default grid_sample and affine_grid behavior has changed ", + "to align_corners=False since 1.3.0. Please specify ", + "align_corners=True if the old behavior is desired. ", + "See the documentation of grid_sample for details."); + align_corners = false; + } + + return torch::grid_sampler( + input, grid, mode_enum, padding_mode_enum, align_corners.value()); +} +} // namespace detail +#endif /* DOXYGEN_SHOULD_SKIP_THIS */ + +/// See +/// https://pytorch.org/docs/main/nn.functional.html#torch.nn.functional.grid_sample +/// about the exact behavior of this functional. +/// +/// See the documentation for `torch::nn::functional::GridSampleFuncOptions` +/// class to learn what optional arguments are supported for this functional. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::grid_sample(input, grid, +/// F::GridSampleFuncOptions().mode(torch::kBilinear).padding_mode(torch::kZeros).align_corners(true)); +/// ``` +inline Tensor grid_sample( + const Tensor& input, + const Tensor& grid, + const GridSampleFuncOptions& options = {}) { + return detail::grid_sample( + input, + grid, + options.mode(), + options.padding_mode(), + options.align_corners()); +} + +} // namespace torch::nn::functional + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/init.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/init.h new file mode 100644 index 0000000000000000000000000000000000000000..9ca2c916da22db149dda172aa1c2c21851dd9d0f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/init.h @@ -0,0 +1,127 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch { + +namespace nn::init { + +using NonlinearityType = std::variant< + enumtype::kLinear, + enumtype::kConv1D, + enumtype::kConv2D, + enumtype::kConv3D, + enumtype::kConvTranspose1D, + enumtype::kConvTranspose2D, + enumtype::kConvTranspose3D, + enumtype::kSigmoid, + enumtype::kTanh, + enumtype::kReLU, + enumtype::kLeakyReLU>; + +using FanModeType = std::variant; + +} // namespace nn::init + +namespace nn::init { + +/// Return the recommended gain value for the given nonlinearity function. +TORCH_API double calculate_gain( + NonlinearityType nonlinearity, + double param = 0.01); + +/// Fills the given `tensor` with the provided `value` in-place, and returns it. +/// No gradient will be recorded for this operation. +TORCH_API Tensor constant_(Tensor tensor, Scalar value); + +/// Fills the given `tensor` with the Dirac delta function in-place, and returns +/// it. No gradient will be recorded for this operation. +TORCH_API Tensor dirac_(Tensor tensor); + +/// Fills the given 2-dimensional `matrix` with an identity matrix. +/// No gradient will be recorded for this operation. +TORCH_API Tensor eye_(Tensor matrix); + +/// Fills the given 2-dimensional `matrix` with values drawn from a normal +/// distribution parameterized by `mean` and `std`. +/// No gradient will be recorded for this operation. +TORCH_API Tensor normal_(Tensor tensor, double mean = 0, double std = 1); + +/// Fills the given `tensor` with ones. +/// No gradient will be recorded for this operation. +TORCH_API Tensor ones_(Tensor tensor); + +/// Fills the input `Tensor` with a (semi) orthogonal matrix, as described in +/// "Exact solutions to the nonlinear dynamics of learning in deep linear neural +/// networks" - Saxe, A. et al. (2013). The input tensor must have at least 2 +/// dimensions, and for tensors with more than 2 dimensions the trailing +/// dimensions are flattened. +/// No gradient will be recorded for this operation. +TORCH_API Tensor orthogonal_(Tensor tensor, double gain = 1.0); + +/// Fills the 2D input `Tensor` as a sparse matrix, where the +/// non-zero elements will be drawn from a centered normal distribution +/// with the given standard deviation `std`, as described in "Deep learning via +/// Hessian-free optimization" - Martens, J. (2010). The `sparsity` is a real +/// value between 0 and 1 that controls the fraction of elements in each column +/// to be set to zero. +/// No gradient will be recorded for this operation. +TORCH_API Tensor sparse_(Tensor tensor, double sparsity, double std = 0.01); + +/// Fills the given 2-dimensional `matrix` with values drawn from a uniform +/// distribution parameterized by `low` and `high`. +/// No gradient will be recorded for this operation. +TORCH_API Tensor uniform_(Tensor tensor, double low = 0, double high = 1); + +/// Fills the input `Tensor` with values according to the method +/// described in "Delving deep into rectifiers: Surpassing human-level +/// performance on ImageNet classification" - He, K. et al. (2015), using a +/// normal distribution. Also known as He initialization. +/// No gradient will be recorded for this operation. +TORCH_API Tensor kaiming_normal_( + Tensor tensor, + double a = 0, + FanModeType mode = torch::kFanIn, + NonlinearityType nonlinearity = torch::kLeakyReLU); + +/// Fills the input `Tensor` with values according to the method +/// described in "Delving deep into rectifiers: Surpassing human-level +/// performance on ImageNet classification" - He, K. et al. (2015), using a +/// uniform distribution. Also known as He initialization. +/// No gradient will be recorded for this operation. +TORCH_API Tensor kaiming_uniform_( + Tensor tensor, + double a = 0, + FanModeType mode = torch::kFanIn, + NonlinearityType nonlinearity = torch::kLeakyReLU); + +/// Fills the input `Tensor` with values according to the method +/// described in "Understanding the difficulty of training deep feedforward +/// neural networks" - Glorot, X. & Bengio, Y. (2010). Values are scaled by the +/// `gain` parameter. No gradient will be recorded for this operation. +TORCH_API Tensor xavier_normal_(Tensor tensor, double gain = 1.0); + +/// Fills the input `Tensor` with values according to the method +/// described in "Understanding the difficulty of training deep feedforward +/// neural networks" - Glorot, X. & Bengio, Y. (2010), using a uniform +/// distribution. Values are scaled by the `gain` parameter +/// No gradient will be recorded for this operation. +TORCH_API Tensor xavier_uniform_(Tensor tensor, double gain = 1.0); + +/// Fills the given `tensor` with zeros. +/// No gradient will be recorded for this operation. +TORCH_API Tensor zeros_(Tensor tensor); + +TORCH_API std::tuple _calculate_fan_in_and_fan_out( + const Tensor& tensor); + +} // namespace nn::init + +} // namespace torch + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/module.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/module.h new file mode 100644 index 0000000000000000000000000000000000000000..1c7dd281078d54820e77e729fa6cfb2a891aad9f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/module.h @@ -0,0 +1,705 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include + +namespace torch::nn { + +/// The base class for all modules in PyTorch. +/// +/// \rst +/// .. note:: +/// The design and implementation of this class is largely based on the Python +/// API. You may want to consult the python documentation for +/// :py:class:`pytorch:torch.nn.Module` for further clarification on certain +/// methods or behavior. +/// \endrst +/// +/// A `Module` is an abstraction over the implementation of some function or +/// algorithm, possibly associated with some persistent data. A `Module` may +/// contain further `Module`s ("submodules"), each with their own +/// implementation, persistent data and further submodules. `Module`s can thus +/// be said to form a recursive tree structure. A `Module` is registered as a +/// submodule to another `Module` by calling `register_module()`, typically from +/// within a parent module's constructor. +/// +/// A distinction is made between three kinds of persistent data that may be +/// associated with a `Module`: +/// +/// 1. *Parameters*: tensors that record gradients, typically weights updated +/// during the backward step (e.g. the `weight` of a `Linear` module), +/// 2. *Buffers*: tensors that do not record gradients, typically updated during +/// the forward step, such as running statistics (e.g. `mean` and `variance` +/// in the `BatchNorm` module), +/// 3. Any additional state, not necessarily tensors, required for the +/// implementation or configuration of a `Module`. +/// +/// The first two kinds of state are special in that they may be registered +/// with the `Module` system to allow convenient access and batch configuration. +/// For example, registered parameters in any `Module` may be iterated over via +/// the `parameters()` accessor. Further, changing the data type of a `Module`'s +/// registered parameters can be done conveniently via `Module::to()`, e.g. +/// `module->to(torch::kCUDA)` to move all parameters to GPU memory. Lastly, +/// registered parameters and buffers are handled specially during a `clone()` +/// operation, which performs a deepcopy of a cloneable `Module` hierarchy. +/// +/// Parameters are registered with a `Module` via `register_parameter`. Buffers +/// are registered separately via `register_buffer`. These methods are part of +/// the public API of `Module` and are typically invoked from within a +/// concrete `Module`s constructor. +class TORCH_API Module : public std::enable_shared_from_this { + public: + using ModuleApplyFunction = std::function; + using ConstModuleApplyFunction = std::function; + using NamedModuleApplyFunction = + std::function; + using ConstNamedModuleApplyFunction = + std::function; + using ModulePointerApplyFunction = + std::function&)>; + using NamedModulePointerApplyFunction = + std::function&)>; + + /// Tells the base `Module` about the name of the submodule. + explicit Module(std::string name); + + /// Constructs the module without immediate knowledge of the submodule's name. + /// The name of the submodule is inferred via RTTI (if possible) the first + /// time `.name()` is invoked. + Module(); + Module(const Module&) = default; + Module& operator=(const Module&) = default; + Module(Module&&) noexcept = default; + Module& operator=(Module&&) noexcept = default; + + virtual ~Module() = default; + + /// Returns the name of the `Module`. + /// + /// A `Module` has an associated `name`, which is a string representation of + /// the kind of concrete `Module` it represents, such as `"Linear"` for the + /// `Linear` module. Under most circumstances, this name is automatically + /// inferred via runtime type information (RTTI). In the unusual circumstance + /// that you have this feature disabled, you may want to manually name your + /// `Module`s by passing the string name to the `Module` base class' + /// constructor. + const std::string& name() const noexcept; + + /// Performs a recursive deep copy of the module and all its registered + /// parameters, buffers and submodules. + /// + /// Optionally, this method sets the current device + /// to the one supplied before cloning. If no device is given, each + /// parameter and buffer will be moved to the device of its source. + /// + /// \rst + /// .. attention:: + /// Attempting to call the `clone()` method inherited from the base `Module` + /// class (the one documented here) will fail. To inherit an actual + /// implementation of `clone()`, you must subclass `Cloneable`. `Cloneable` + /// is templatized on the concrete module type, and can thus properly copy a + /// `Module`. This method is provided on the base class' API solely for an + /// easier-to-use polymorphic interface. + /// \endrst + virtual std::shared_ptr clone( + const std::optional& device = std::nullopt) const; + + /// Applies the `function` to the `Module` and recursively to every submodule. + /// The function must accept a `Module&`. + /// + /// \rst + /// .. code-block:: cpp + /// MyModule module; + /// module->apply([](nn::Module& module) { + /// std::cout << module.name() << std::endl; + /// }); + /// \endrst + void apply(const ModuleApplyFunction& function); + + /// Applies the `function` to the `Module` and recursively to every submodule. + /// The function must accept a `const Module&`. + /// + /// \rst + /// .. code-block:: cpp + /// MyModule module; + /// module->apply([](const nn::Module& module) { + /// std::cout << module.name() << std::endl; + /// }); + /// \endrst + void apply(const ConstModuleApplyFunction& function) const; + + /// Applies the `function` to the `Module` and recursively to every submodule. + /// The function must accept a `const std::string&` for the key of the module, + /// and a `Module&`. The key of the module itself is the empty string. If + /// `name_prefix` is given, it is prepended to every key as + /// `.` (and just `name_prefix` for the module itself). + /// + /// \rst + /// .. code-block:: cpp + /// MyModule module; + /// module->apply([](const std::string& key, nn::Module& module) { + /// std::cout << key << ": " << module.name() << std::endl; + /// }); + /// \endrst + void apply( + const NamedModuleApplyFunction& function, + const std::string& name_prefix = std::string()); + + /// Applies the `function` to the `Module` and recursively to every submodule. + /// The function must accept a `const std::string&` for the key of the module, + /// and a `const Module&`. The key of the module itself is the empty string. + /// If `name_prefix` is given, it is prepended to every key as + /// `.` (and just `name_prefix` for the module itself). + /// + /// \rst + /// .. code-block:: cpp + /// MyModule module; + /// module->apply([](const std::string& key, const nn::Module& module) { + /// std::cout << key << ": " << module.name() << std::endl; + /// }); + /// \endrst + void apply( + const ConstNamedModuleApplyFunction& function, + const std::string& name_prefix = std::string()) const; + + /// Applies the `function` to the `Module` and recursively to every submodule. + /// The function must accept a `const std::shared_ptr&`. + /// + /// \rst + /// .. code-block:: cpp + /// MyModule module; + /// module->apply([](const std::shared_ptr& module) { + /// std::cout << module->name() << std::endl; + /// }); + /// \endrst + void apply(const ModulePointerApplyFunction& function) const; + + /// Applies the `function` to the `Module` and recursively to every submodule. + /// The function must accept a `const std::string&` for the key of the module, + /// and a `const std::shared_ptr&`. The key of the module itself is + /// the empty string. If `name_prefix` is given, it is prepended to every key + /// as + /// `.` (and just `name_prefix` for the module itself). + /// + /// \rst + /// .. code-block:: cpp + /// MyModule module; + /// module->apply([](const std::string& key, + /// const std::shared_ptr& module) { + /// std::cout << key << ": " << module->name() << std::endl; + /// }); + /// \endrst + void apply( + const NamedModulePointerApplyFunction& function, + const std::string& name_prefix = std::string()) const; + + /// Returns the parameters of this `Module` and if `recurse` is true, also + /// recursively of every submodule. + std::vector parameters(bool recurse = true) const; + + /// Returns an `OrderedDict` with the parameters of this `Module` along with + /// their keys, and if `recurse` is true also recursively of every submodule. + OrderedDict named_parameters(bool recurse = true) const; + + /// Returns the buffers of this `Module` and if `recurse` is true, also + /// recursively of every submodule. + std::vector buffers(bool recurse = true) const; + + /// Returns an `OrderedDict` with the buffers of this `Module` along with + /// their keys, and if `recurse` is true also recursively of every submodule. + OrderedDict named_buffers(bool recurse = true) const; + + /// Returns the submodules of this `Module` (the entire submodule hierarchy) + /// and if `include_self` is true, also inserts a `shared_ptr` to this module + /// in the first position. + /// + /// \rst + /// .. warning:: + /// Only pass `include_self` as `true` if this `Module` is stored in a + /// `shared_ptr`! Otherwise an exception will be thrown. You may still call + /// this method with `include_self` set to false if your `Module` is not + /// stored in a `shared_ptr`. + /// \endrst + std::vector> modules(bool include_self = true) const; + + /// Returns an `OrderedDict` of the submodules of this `Module` (the entire + /// submodule hierarchy) and their keys, and if `include_self` is true, also + /// inserts a `shared_ptr` to this module in the first position. If + /// `name_prefix` is given, it is prepended to every key as + /// `.` (and just `name_prefix` for the module itself). + /// + /// \rst + /// .. warning:: + /// Only pass `include_self` as `true` if this `Module` is stored in a + /// `shared_ptr`! Otherwise an exception will be thrown. You may still call + /// this method with `include_self` set to false if your `Module` is not + /// stored in a `shared_ptr`. + /// \endrst + OrderedDict> named_modules( + const std::string& name_prefix = std::string(), + bool include_self = true) const; + + /// Returns the direct submodules of this `Module`. + std::vector> children() const; + + /// Returns an `OrderedDict` of the direct submodules of this `Module` and + /// their keys. + OrderedDict> named_children() const; + + /// Enables "training" mode. + virtual void train(bool on = true); + + /// Calls train(false) to enable "eval" mode. + /// Do not override this method, override `train()` instead. + void eval(); + + /// True if the module is in training mode. + /// + /// Every `Module` has a boolean associated with it that determines whether + /// the `Module` is currently in *training* mode (set via `.train()`) or in + /// *evaluation* (inference) mode (set via `.eval()`). This property is + /// exposed via `is_training()`, and may be used by the implementation of a + /// concrete module to modify its runtime behavior. See the `BatchNorm` or + /// `Dropout` modules for examples of `Module`s that use different code paths + /// depending on this property. + virtual bool is_training() const noexcept; + + /// Recursively casts all parameters to the given `dtype` and `device`. + /// + /// If `non_blocking` is true and the source is in pinned memory and + /// destination is on the GPU or vice versa, the copy is performed + /// asynchronously with respect to the host. Otherwise, the argument has no + /// effect. + virtual void to( + torch::Device device, + torch::Dtype dtype, + bool non_blocking = false); + + /// Recursively casts all parameters to the given dtype. + /// + /// If `non_blocking` is true and the source is in pinned memory and + /// destination is on the GPU or vice versa, the copy is performed + /// asynchronously with respect to the host. Otherwise, the argument has no + /// effect. + virtual void to(torch::Dtype dtype, bool non_blocking = false); + + /// Recursively moves all parameters to the given device. + /// + /// If `non_blocking` is true and the source is in pinned memory and + /// destination is on the GPU or vice versa, the copy is performed + /// asynchronously with respect to the host. Otherwise, the argument has no + /// effect. + virtual void to(torch::Device device, bool non_blocking = false); + + /// Recursively zeros out the `grad` value of each registered parameter. + virtual void zero_grad(bool set_to_none = true); + + /// Attempts to cast this `Module` to the given `ModuleType`. + /// + /// This method is useful when calling `apply()`. + /// \rst + /// .. code-block:: cpp + /// + /// void initialize_weights(nn::Module& module) { + /// torch::NoGradGuard no_grad; + /// if (auto* linear = module.as()) { + /// linear->weight.normal_(0.0, 0.02); + /// } + /// } + /// + /// MyModule module; + /// module->apply(initialize_weights); + /// \endrst + template + typename ModuleType::ContainedType* as() noexcept; + + /// Attempts to cast this `Module` to the given `ModuleType`. + /// + /// This method is useful when calling `apply()`. + /// \rst + /// .. code-block:: cpp + /// void initialize_weights(nn::Module& module) { + /// torch::NoGradGuard no_grad; + /// if (auto* linear = module.as()) { + /// linear->weight.normal_(0.0, 0.02); + /// } + /// } + /// + /// MyModule module; + /// module->apply(initialize_weights); + /// \endrst + template + const typename ModuleType::ContainedType* as() const noexcept; + + /// Attempts to cast this `Module` to the given `ModuleType`. + /// + /// This method is useful when calling `apply()`. + /// \rst + /// .. code-block:: cpp + /// + /// void initialize_weights(nn::Module& module) { + /// torch::NoGradGuard no_grad; + /// if (auto* linear = module.as()) { + /// linear->weight.normal_(0.0, 0.02); + /// } + /// } + /// + /// MyModule module; + /// module.apply(initialize_weights); + /// \endrst + template < + typename ModuleType, + typename = torch::detail::disable_if_module_holder_t> + ModuleType* as() noexcept; + + /// Attempts to cast this `Module` to the given `ModuleType`. + /// + /// This method is useful when calling `apply()`. + /// \rst + /// .. code-block:: cpp + /// + /// void initialize_weights(nn::Module& module) { + /// torch::NoGradGuard no_grad; + /// if (auto* linear = module.as()) { + /// linear->weight.normal_(0.0, 0.02); + /// } + /// } + /// + /// MyModule module; + /// module.apply(initialize_weights); + /// \endrst + template < + typename ModuleType, + typename = torch::detail::disable_if_module_holder_t> + const ModuleType* as() const noexcept; + + /// Serializes the `Module` into the given `OutputArchive`. + /// + /// If the `Module` contains unserializable submodules (e.g. + /// `nn::Functional`), those submodules are skipped when serializing. + virtual void save(serialize::OutputArchive& archive) const; + + /// Deserializes the `Module` from the given `InputArchive`. + /// + /// If the `Module` contains unserializable submodules (e.g. + /// `nn::Functional`), we don't check the existence of those submodules in the + /// `InputArchive` when deserializing. + virtual void load(serialize::InputArchive& archive); + + /// Streams a pretty representation of the `Module` into the given `stream`. + /// By default, this representation will be the name of the module (taken from + /// `name()`), followed by a recursive pretty print of all of the `Module`'s + /// submodules. + /// + /// Override this method to change the pretty print. The input + /// `stream` should be returned from the method, to allow easy chaining. + virtual void pretty_print(std::ostream& stream) const; + + /// Returns whether the `Module` is serializable. + virtual bool is_serializable() const; + + /// Registers a parameter with this `Module`. + /// + /// A parameter should be any gradient-recording tensor used in the + /// implementation of your `Module`. Registering it makes it available to + /// methods such as `parameters()`, `clone()` or `to().` + /// + /// Note that registering an undefined Tensor (e.g. + /// `module.register_parameter("param", Tensor())`) is allowed, and is + /// equivalent to `module.register_parameter("param", None)` in Python API. + /// + /// \rst + /// .. code-block:: cpp + /// + /// MyModule::MyModule() { + /// weight_ = register_parameter("weight", torch::randn({A, B})); + /// } + /// \endrst + Tensor& register_parameter( + std::string name, + Tensor tensor, + bool requires_grad = true); + + /// Registers a buffer with this `Module`. + /// + /// A buffer is intended to be state in your module that does not record + /// gradients, such as running statistics. Registering it makes it available + /// to methods such as `buffers()`, `clone()` or `to(). + /// + /// \rst + /// .. code-block:: cpp + /// + /// MyModule::MyModule() { + /// mean_ = register_buffer("mean", torch::empty({num_features_})); + /// } + /// \endrst + Tensor& register_buffer(std::string name, Tensor tensor); + + /// Registers a submodule with this `Module`. + /// + /// Registering a module makes it available to methods such as `modules()`, + /// `clone()` or `to()`. + /// + /// \rst + /// .. code-block:: cpp + /// + /// MyModule::MyModule() { + /// submodule_ = register_module("linear", torch::nn::Linear(3, 4)); + /// } + /// \endrst + template + std::shared_ptr register_module( + std::string name, + std::shared_ptr module); + + /// Registers a submodule with this `Module`. + /// + /// This method deals with `ModuleHolder`s. + /// + /// Registering a module makes it available to methods such as `modules()`, + /// `clone()` or `to()`. + /// + /// \rst + /// .. code-block:: cpp + /// + /// MyModule::MyModule() { + /// submodule_ = register_module("linear", torch::nn::Linear(3, 4)); + /// } + /// \endrst + template + std::shared_ptr register_module( + std::string name, + ModuleHolder module_holder); + + /// Replaces a registered submodule with this `Module`. + /// + /// This takes care of the registration, if you used submodule members, you + /// should + // assign the submodule as well, i.e. use as + /// module->submodule_ = module->replace_module("linear", + /// torch::nn::Linear(3, 4)); + /// It only works when a module of the name is already registered. + /// + /// This is useful for replacing a module after initialization, e.g. + /// for finetuning. + template + std::shared_ptr replace_module( + const std::string& name, + std::shared_ptr module); + + /// Replaces a registered submodule with this `Module`. + /// This method deals with `ModuleHolder`s. + /// + /// This takes care of the registration, if you used submodule members, you + /// should + // assign the submodule as well, i.e. use as + /// module->submodule_ = module->replace_module("linear", linear_holder); + /// It only works when a module of the name is already registered. + /// + /// This is useful for replacing a module after initialization, e.g. + /// for finetuning. + template + std::shared_ptr replace_module( + const std::string& name, + ModuleHolder module_holder); + + /// Unregisters a submodule from this `Module`. If there is no such module + /// with `name` an exception is thrown. + void unregister_module(const std::string& name); + + protected: + /// The following three functions allow a module with default arguments in its + /// forward method to be used in a Sequential module. + /// You should NEVER override these functions manually. Instead, you should + /// use the `FORWARD_HAS_DEFAULT_ARGS` macro. + virtual bool _forward_has_default_args() { + return false; + } + + virtual unsigned int _forward_num_required_args() { + TORCH_CHECK( + false, + "torch::nn::Module subclass that has default arguments in `forward` method ", + "must override `_forward_num_required_args` method. Please use ", + "`FORWARD_HAS_DEFAULT_ARGS` macro to do so."); + } + + virtual std::vector _forward_populate_default_args( + std::vector&& arguments) { + TORCH_CHECK( + false, + "torch::nn::Module subclass that has default arguments in `forward` method ", + "must override `_forward_populate_default_args` method. Please use ", + "`FORWARD_HAS_DEFAULT_ARGS` macro to do so."); + } + + /// The registered parameters of this `Module`. + /// Inorder to access parameters_ in ParameterDict and ParameterList + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + OrderedDict parameters_; + + private: + // Friend classes. + + template + friend class Cloneable; + + template + friend struct AnyModuleHolder; + + /// Pretty prints the given `Module` into the `ostream`. + TORCH_API friend std::ostream& operator<<( + std::ostream& stream, + const nn::Module& module); + + // data parallel using this method to configure gradient edges during the + // replicate step. + template + friend void replicate_grad_edges( + const std::shared_ptr& module, + const std::vector>& replicas, + const std::vector& devices); + + // Private methods. + + /// Used in the implementation of `Cloneable`. + virtual void clone_(Module& other, const std::optional& device); + + /// The implementation of the various `to()` methods. + template + void to_impl(Ts&&... ts); + + /// Implements pretty printing the module hierarchy. + void pretty_print_recursive( + std::ostream& stream, + const std::string& indentation) const; + + /// Applies the `function` to every submodule recursively, starting at this + /// `Module`'s children (thus not including the module itself). + void apply_to_submodules( + const NamedModulePointerApplyFunction& function, + const std::string& name_prefix = std::string()) const; + + /// Returns a shared_ptr to `this` in a safe (checked) way. + std::shared_ptr shared_from_this_checked() const; + + /// The registered buffers of this `Module`. + OrderedDict buffers_; + + /// The registered (direct) submodules of this `Module`. + OrderedDict> children_; + + /// The module's name (e.g. "LSTM"). + mutable std::optional name_; + + /// Whether the module is in training mode. + bool is_training_{true}; +}; + +/// Serialize a `Module` pointer into an `OutputArchive`. +TORCH_API serialize::OutputArchive& operator<<( + serialize::OutputArchive& archive, + const std::shared_ptr& module); + +/// Deserializes a `Module` from an `InputArchive`. +TORCH_API serialize::InputArchive& operator>>( + serialize::InputArchive& archive, + const std::shared_ptr& module); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ nn::Module ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +template +typename ModuleType::ContainedType* Module::as() noexcept { + // Use the contained type of the `ModuleHolder`, e.g. `LinearImpl` for + // `Linear`, since `LinearImpl` inherits `nn::Module`. + return as(); +} + +template +const typename ModuleType::ContainedType* Module::as() const noexcept { + // Use the contained type of the `ModuleHolder`, e.g. `LinearImpl` for + // `Linear`, since `LinearImpl` inherits `nn::Module`. + return as(); +} + +template +ModuleType* Module::as() noexcept { + return dynamic_cast(this); +} + +template +const ModuleType* Module::as() const noexcept { + return dynamic_cast(this); +} + +template +std::shared_ptr Module::register_module( + std::string name, + std::shared_ptr module) { + TORCH_CHECK(!name.empty(), "Submodule name must not be empty"); + TORCH_CHECK( + name.find('.') == std::string::npos, + "Submodule name must not contain a dot (got '", + name, + "')"); + auto& base_module = children_.insert(std::move(name), std::move(module)); + return std::dynamic_pointer_cast(base_module); +} + +template +std::shared_ptr Module::register_module( + std::string name, + ModuleHolder module_holder) { + return register_module(std::move(name), module_holder.ptr()); +} + +template +std::shared_ptr Module::replace_module( + const std::string& name, + std::shared_ptr module) { + auto& base_module = (children_[name] = std::move(module)); + return std::dynamic_pointer_cast(base_module); +} + +template +std::shared_ptr Module::replace_module( + const std::string& name, + ModuleHolder module_holder) { + return replace_module(name, module_holder.ptr()); +} + +template +void Module::to_impl(Ts&&... ts) { + // First call `to()` on every child module. + for (auto& child : children_) { + child.value()->to(ts...); + } + // Then move every parameter to the new dtype/device. + for (auto& parameter : named_parameters(/*recurse=*/false)) { + parameter->set_data(parameter->to(ts...)); + } + // Then move every buffer to the new dtype/device. + for (auto& buffer : named_buffers(/*recurse=*/false)) { + buffer->set_data(buffer->to(ts...)); + } +} + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules.h new file mode 100644 index 0000000000000000000000000000000000000000..c1faaa2df38718158f0151e74c22f20e3a73dbcd --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules.h @@ -0,0 +1,41 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// Common +#include + +// Containers +#include +#include +#include +#include +#include +#include +#include +#include + +// Layers +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/_functions.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/_functions.h new file mode 100644 index 0000000000000000000000000000000000000000..5dad23c1309e4c89276b4ded34dac8368484a326 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/_functions.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::nn::functions { + +class CrossMapLRN2d : public torch::autograd::Function { + public: + static torch::autograd::Variable forward( + torch::autograd::AutogradContext* ctx, + const torch::autograd::Variable& input, + const CrossMapLRN2dOptions& options); + + static torch::autograd::variable_list backward( + torch::autograd::AutogradContext* ctx, + torch::autograd::variable_list grad_output); +}; + +} // namespace torch::nn::functions + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/activation.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/activation.h new file mode 100644 index 0000000000000000000000000000000000000000..6fa81658738c47b562c5bf927a8e844804e45e14 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/activation.h @@ -0,0 +1,878 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace torch::nn { + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ELU ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies elu over a given input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.ELU to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::ELUOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// ELU model(ELUOptions().alpha(42.42).inplace(true)); +/// ``` +class TORCH_API ELUImpl : public torch::nn::Cloneable { + public: + explicit ELUImpl(const ELUOptions& options_ = {}); + + Tensor forward(Tensor input); + + void reset() override; + + /// Pretty prints the `ELU` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// The options with which this `Module` was constructed. + ELUOptions options; +}; + +/// A `ModuleHolder` subclass for `ELUImpl`. +/// See the documentation for `ELUImpl` class to learn what methods it +/// provides, and examples of how to use `ELU` with `torch::nn::ELUOptions`. +/// See the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(ELU); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SELU ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies the selu function element-wise. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.SELU to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::SELUOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// SELU model(SELUOptions().inplace(true)); +/// ``` +class TORCH_API SELUImpl : public torch::nn::Cloneable { + public: + explicit SELUImpl(const SELUOptions& options_ = {}); + + Tensor forward(Tensor input); + + void reset() override; + + /// Pretty prints the `SELU` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// The options with which this `Module` was constructed. + SELUOptions options; +}; + +/// A `ModuleHolder` subclass for `SELUImpl`. +/// See the documentation for `SELUImpl` class to learn what methods it +/// provides, and examples of how to use `SELU` with `torch::nn::SELUOptions`. +/// See the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(SELU); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Hardshrink ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies the hard shrinkage function element-wise. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.Hardshrink to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::HardshrinkOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// Hardshrink model(HardshrinkOptions().lambda(42.42)); +/// ``` +class TORCH_API HardshrinkImpl : public torch::nn::Cloneable { + public: + explicit HardshrinkImpl(const HardshrinkOptions& options_ = {}); + + Tensor forward(const Tensor& input); + + void reset() override; + + /// Pretty prints the `Hardshrink` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// The options with which this `Module` was constructed. + HardshrinkOptions options; +}; + +/// A `ModuleHolder` subclass for `HardshrinkImpl`. +/// See the documentation for `HardshrinkImpl` class to learn what methods it +/// provides, and examples of how to use `Hardshrink` with +/// `torch::nn::HardshrinkOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(Hardshrink); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Hardtanh ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies the HardTanh function element-wise. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.Hardtanh to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::HardtanhOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// Hardtanh +/// model(HardtanhOptions().min_val(-42.42).max_val(0.42).inplace(true)); +/// ``` +class TORCH_API HardtanhImpl : public torch::nn::Cloneable { + public: + explicit HardtanhImpl(const HardtanhOptions& options_ = {}); + + Tensor forward(Tensor input); + + void reset() override; + + /// Pretty prints the `Hardtanh` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// The options with which this `Module` was constructed. + HardtanhOptions options; +}; + +/// A `ModuleHolder` subclass for `HardtanhImpl`. +/// See the documentation for `HardtanhImpl` class to learn what methods it +/// provides, and examples of how to use `Hardtanh` with +/// `torch::nn::HardtanhOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(Hardtanh); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ LeakyReLU ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies the LeakyReLU function element-wise. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.LeakyReLU to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::LeakyReLUOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// LeakyReLU model(LeakyReLUOptions().negative_slope(0.42).inplace(true)); +/// ``` +class TORCH_API LeakyReLUImpl : public torch::nn::Cloneable { + public: + explicit LeakyReLUImpl(const LeakyReLUOptions& options_ = {}); + + Tensor forward(Tensor input); + + void reset() override; + + /// Pretty prints the `LeakyReLU` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// The options with which this `Module` was constructed. + LeakyReLUOptions options; +}; + +/// A `ModuleHolder` subclass for `LeakyReLUImpl`. +/// See the documentation for `LeakyReLUImpl` class to learn what methods it +/// provides, and examples of how to use `LeakyReLU` with +/// `torch::nn::LeakyReLUOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(LeakyReLU); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ LogSigmoid ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies the LogSigmoid function element-wise. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.LogSigmoid to learn +/// about the exact behavior of this module. +class TORCH_API LogSigmoidImpl : public torch::nn::Cloneable { + public: + Tensor forward(const Tensor& input); + + void reset() override; + + /// Pretty prints the `LogSigmoid` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; +}; + +/// A `ModuleHolder` subclass for `LogSigmoidImpl`. +/// See the documentation for `LogSigmoidImpl` class to learn what methods it +/// provides, or the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(LogSigmoid); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Softmax ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies the Softmax function. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.Softmax to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::SoftmaxOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// Softmax model(SoftmaxOptions(1)); +/// ``` +class TORCH_API SoftmaxImpl : public torch::nn::Cloneable { + public: + explicit SoftmaxImpl(int64_t dim) : SoftmaxImpl(SoftmaxOptions(dim)) {} + explicit SoftmaxImpl(const SoftmaxOptions& options_); + + Tensor forward(const Tensor& input); + + void reset() override; + + /// Pretty prints the `Softmax` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + SoftmaxOptions options; +}; + +/// A `ModuleHolder` subclass for `SoftmaxImpl`. +/// See the documentation for `SoftmaxImpl` class to learn what methods it +/// provides, and examples of how to use `Softmax` with +/// `torch::nn::SoftmaxOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(Softmax); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Softmin ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies the Softmin function element-wise. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.Softmin to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::SoftminOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// Softmin model(SoftminOptions(1)); +/// ``` +class TORCH_API SoftminImpl : public torch::nn::Cloneable { + public: + explicit SoftminImpl(int64_t dim) : SoftminImpl(SoftminOptions(dim)) {} + explicit SoftminImpl(const SoftminOptions& options_); + + Tensor forward(const Tensor& input); + + void reset() override; + + /// Pretty prints the `Softmin` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + SoftminOptions options; +}; + +/// A `ModuleHolder` subclass for `SoftminImpl`. +/// See the documentation for `SoftminImpl` class to learn what methods it +/// provides, and examples of how to use `Softmin` with +/// `torch::nn::SoftminOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(Softmin); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ LogSoftmax ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies the LogSoftmax function element-wise. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.LogSoftmax to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::LogSoftmaxOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// LogSoftmax model(LogSoftmaxOptions(1)); +/// ``` +class TORCH_API LogSoftmaxImpl : public torch::nn::Cloneable { + public: + explicit LogSoftmaxImpl(int64_t dim) + : LogSoftmaxImpl(LogSoftmaxOptions(dim)) {} + explicit LogSoftmaxImpl(const LogSoftmaxOptions& options_); + + Tensor forward(const Tensor& input); + + void reset() override; + + /// Pretty prints the `LogSoftmax` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + LogSoftmaxOptions options; +}; + +/// A `ModuleHolder` subclass for `LogSoftmaxImpl`. +/// See the documentation for `LogSoftmaxImpl` class to learn what methods it +/// provides, and examples of how to use `LogSoftmax` with +/// `torch::nn::LogSoftmaxOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(LogSoftmax); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Softmax2d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies the Softmax2d function element-wise. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.Softmax2d to learn +/// about the exact behavior of this module. +class TORCH_API Softmax2dImpl : public torch::nn::Cloneable { + public: + Tensor forward(const Tensor& input); + + void reset() override; + + /// Pretty prints the `Softmax2d` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; +}; + +/// A `ModuleHolder` subclass for `Softmax2dImpl`. +/// See the documentation for `Softmax2dImpl` class to learn what methods it +/// provides, or the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(Softmax2d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PReLU ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies the PReLU function element-wise. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.PReLU to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::PReLUOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// PReLU model(PReLUOptions().num_parameters(42)); +/// ``` +class TORCH_API PReLUImpl : public torch::nn::Cloneable { + public: + explicit PReLUImpl(const PReLUOptions& options_ = {}); + + Tensor forward(const Tensor& input); + + void reset() override; + + /// Pretty prints the `PReLU` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// The options with which this `Module` was constructed. + PReLUOptions options; + + /// The learned weight. + Tensor weight; +}; + +/// A `ModuleHolder` subclass for `PReLUImpl`. +/// See the documentation for `PReLUImpl` class to learn what methods it +/// provides, and examples of how to use `PReLU` with `torch::nn::PReLUOptions`. +/// See the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(PReLU); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ReLU ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies the ReLU function element-wise. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.ReLU to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::ReLUOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// ReLU model(ReLUOptions().inplace(true)); +/// ``` +class TORCH_API ReLUImpl : public torch::nn::Cloneable { + public: + explicit ReLUImpl(const ReLUOptions& options_ = {}); + + Tensor forward(Tensor input); + + void reset() override; + + /// Pretty prints the `ReLU` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// The options with which this `Module` was constructed. + ReLUOptions options; +}; + +/// A `ModuleHolder` subclass for `ReLUImpl`. +/// See the documentation for `ReLUImpl` class to learn what methods it +/// provides, and examples of how to use `ReLU` with `torch::nn::ReLUOptions`. +/// See the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(ReLU); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ReLU6 ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies the ReLU6 function element-wise. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.ReLU6 to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::ReLU6Options` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// ReLU6 model(ReLU6Options().inplace(true)); +/// ``` +class TORCH_API ReLU6Impl : public torch::nn::Cloneable { + public: + explicit ReLU6Impl(const ReLU6Options& options_ = {}); + + Tensor forward(Tensor input); + + void reset() override; + + /// Pretty prints the `ReLU6` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// The options with which this `Module` was constructed. + ReLU6Options options; +}; + +/// A `ModuleHolder` subclass for `ReLU6Impl`. +/// See the documentation for `ReLU6Impl` class to learn what methods it +/// provides, and examples of how to use `ReLU6` with `torch::nn::ReLU6Options`. +/// See the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(ReLU6); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RReLU ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies the RReLU function element-wise. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.RReLU to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::RReLUOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// RReLU model(RReLUOptions().lower(0.24).upper(0.42).inplace(true)); +/// ``` +class TORCH_API RReLUImpl : public torch::nn::Cloneable { + public: + explicit RReLUImpl(const RReLUOptions& options_ = {}); + + Tensor forward(Tensor input); + + void reset() override; + + /// Pretty prints the `RReLU` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// The options with which this `Module` was constructed. + RReLUOptions options; +}; + +/// A `ModuleHolder` subclass for `RReLUImpl`. +/// See the documentation for `RReLUImpl` class to learn what methods it +/// provides, and examples of how to use `RReLU` with `torch::nn::RReLUOptions`. +/// See the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(RReLU); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CELU ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies celu over a given input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.CELU to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::CELUOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// CELU model(CELUOptions().alpha(42.42).inplace(true)); +/// ``` +class TORCH_API CELUImpl : public torch::nn::Cloneable { + public: + explicit CELUImpl(const CELUOptions& options_ = {}); + + Tensor forward(Tensor input); + + void reset() override; + + /// Pretty prints the `CELU` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// The options with which this `Module` was constructed. + CELUOptions options; +}; + +/// A `ModuleHolder` subclass for `CELUImpl`. +/// See the documentation for `CELUImpl` class to learn what methods it +/// provides, and examples of how to use `CELU` with `torch::nn::CELUOptions`. +/// See the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(CELU); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GLU ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies glu over a given input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.GLU to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::GLUOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// GLU model(GLUOptions(1)); +/// ``` +class TORCH_API GLUImpl : public torch::nn::Cloneable { + public: + explicit GLUImpl(const GLUOptions& options_ = {}); + + Tensor forward(const Tensor& input); + + void reset() override; + + /// Pretty prints the `GLU` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// The options with which this `Module` was constructed. + GLUOptions options; +}; + +/// A `ModuleHolder` subclass for `GLUImpl`. +/// See the documentation for `GLUImpl` class to learn what methods it +/// provides, and examples of how to use `GLU` with `torch::nn::GLUOptions`. +/// See the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(GLU); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GELU ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies gelu over a given input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.GELU to learn +/// about the exact behavior of this module. +class TORCH_API GELUImpl : public torch::nn::Cloneable { + public: + explicit GELUImpl(GELUOptions options_ = {}); + + Tensor forward(const Tensor& input); + + void reset() override; + + /// Pretty prints the `GELU` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// The options with which this `Module` was constructed. + GELUOptions options; +}; + +/// A `ModuleHolder` subclass for `GELUImpl`. +/// See the documentation for `GELUImpl` class to learn what methods it +/// provides, or the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(GELU); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SiLU ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies silu over a given input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.SiLU to learn +/// about the exact behavior of this module. +class TORCH_API SiLUImpl : public torch::nn::Cloneable { + public: + Tensor forward(const Tensor& input); + + void reset() override; + + /// Pretty prints the `SiLU` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; +}; + +/// A `ModuleHolder` subclass for `SiLUImpl`. +/// See the documentation for `SiLUImpl` class to learn what methods it +/// provides, or the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(SiLU); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Mish ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies mish over a given input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.Mish to learn +/// about the exact behavior of this module. +class TORCH_API MishImpl : public torch::nn::Cloneable { + public: + Tensor forward(const Tensor& input); + + void reset() override; + + /// Pretty prints the `Mish` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; +}; + +/// A `ModuleHolder` subclass for `MishImpl`. +/// See the documentation for `MishImpl` class to learn what methods it +/// provides, or the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(Mish); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Sigmoid ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies sigmoid over a given input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.Sigmoid to learn +/// about the exact behavior of this module. +class TORCH_API SigmoidImpl : public torch::nn::Cloneable { + public: + Tensor forward(const Tensor& input); + + void reset() override; + + /// Pretty prints the `Sigmoid` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; +}; + +/// A `ModuleHolder` subclass for `SigmoidImpl`. +/// See the documentation for `SigmoidImpl` class to learn what methods it +/// provides, or the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(Sigmoid); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Softplus ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies softplus over a given input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.Softplus to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::SoftplusOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// Softplus model(SoftplusOptions().beta(0.24).threshold(42.42)); +/// ``` +class TORCH_API SoftplusImpl : public torch::nn::Cloneable { + public: + explicit SoftplusImpl(const SoftplusOptions& options_ = {}); + + Tensor forward(const Tensor& input); + + void reset() override; + + /// Pretty prints the `Softplus` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// The options with which this `Module` was constructed. + SoftplusOptions options; +}; + +/// A `ModuleHolder` subclass for `SoftplusImpl`. +/// See the documentation for `SoftplusImpl` class to learn what methods it +/// provides, and examples of how to use `Softplus` with +/// `torch::nn::SoftplusOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(Softplus); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Softshrink ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies the soft shrinkage function element-wise. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.Softshrink to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::SoftshrinkOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// Softshrink model(SoftshrinkOptions(42.42)); +/// ``` +class TORCH_API SoftshrinkImpl : public torch::nn::Cloneable { + public: + explicit SoftshrinkImpl(const SoftshrinkOptions& options_ = {}); + + Tensor forward(const Tensor& input); + + void reset() override; + + /// Pretty prints the `Softshrink` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// The options with which this `Module` was constructed. + SoftshrinkOptions options; +}; + +/// A `ModuleHolder` subclass for `SoftshrinkImpl`. +/// See the documentation for `SoftshrinkImpl` class to learn what methods it +/// provides, and examples of how to use `Softshrink` with +/// `torch::nn::SoftshrinkOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(Softshrink); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Softsign ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies Softsign over a given input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.Softsign to learn +/// about the exact behavior of this module. +class TORCH_API SoftsignImpl : public torch::nn::Cloneable { + public: + Tensor forward(const Tensor& input); + + void reset() override; + + /// Pretty prints the `Softsign` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; +}; + +/// A `ModuleHolder` subclass for `SoftsignImpl`. +/// See the documentation for `SoftsignImpl` class to learn what methods it +/// provides, or the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(Softsign); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Tanh ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies Tanh over a given input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.Tanh to learn +/// about the exact behavior of this module. +class TORCH_API TanhImpl : public torch::nn::Cloneable { + public: + Tensor forward(const Tensor& input); + + void reset() override; + + /// Pretty prints the `Tanh` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; +}; + +/// A `ModuleHolder` subclass for `TanhImpl`. +/// See the documentation for `TanhImpl` class to learn what methods it +/// provides, or the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(Tanh); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Tanhshrink ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies Tanhshrink over a given input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.Tanhshrink to learn +/// about the exact behavior of this module. +class TORCH_API TanhshrinkImpl : public torch::nn::Cloneable { + public: + Tensor forward(const Tensor& input); + + void reset() override; + + /// Pretty prints the `Tanhshrink` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; +}; + +/// A `ModuleHolder` subclass for `TanhshrinkImpl`. +/// See the documentation for `TanhshrinkImpl` class to learn what methods it +/// provides, or the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(Tanhshrink); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Threshold ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies the Threshold function element-wise. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.Threshold to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::ThresholdOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// Threshold model(ThresholdOptions(42.42, 24.24).inplace(true)); +/// ``` +class TORCH_API ThresholdImpl : public torch::nn::Cloneable { + public: + ThresholdImpl(double threshold, double value) + : ThresholdImpl(ThresholdOptions(threshold, value)) {} + explicit ThresholdImpl(const ThresholdOptions& options_); + + Tensor forward(Tensor input); + + void reset() override; + + /// Pretty prints the `Threshold` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// The options with which this `Module` was constructed. + ThresholdOptions options; +}; + +/// A `ModuleHolder` subclass for `ThresholdImpl`. +/// See the documentation for `ThresholdImpl` class to learn what methods it +/// provides, and examples of how to use `Threshold` with +/// `torch::nn::ThresholdOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(Threshold); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MultiheadAttention ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies the MultiheadAttention function element-wise. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.MultiheadAttention +/// to learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::MultiheadAttentionOptions` class to +/// learn what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// MultiheadAttention model(MultiheadAttentionOptions(20, 10).bias(false)); +/// ``` +class TORCH_API MultiheadAttentionImpl + : public torch::nn::Cloneable { + public: + MultiheadAttentionImpl(int64_t embed_dim, int64_t num_heads) + : MultiheadAttentionImpl( + MultiheadAttentionOptions(embed_dim, num_heads)) {} + explicit MultiheadAttentionImpl(const MultiheadAttentionOptions& options_); + + std::tuple forward( + const Tensor& query, + const Tensor& key, + const Tensor& value, + const Tensor& key_padding_mask = {}, + bool need_weights = true, + const Tensor& attn_mask = {}, + bool average_attn_weights = true); + + protected: + FORWARD_HAS_DEFAULT_ARGS( + {3, AnyValue(Tensor())}, + {4, AnyValue(true)}, + {5, AnyValue(Tensor())}, + {6, AnyValue(true)}) + + public: + void reset() override; + + void _reset_parameters(); + + /// The options with which this `Module` was constructed. + MultiheadAttentionOptions options; + + bool _qkv_same_embed_dim{}; + Tensor in_proj_weight; + Tensor in_proj_bias; + Tensor bias_k; + Tensor bias_v; + Linear out_proj = nullptr; + Tensor q_proj_weight; + Tensor k_proj_weight; + Tensor v_proj_weight; + int64_t head_dim{}; +}; + +/// A `ModuleHolder` subclass for `MultiheadAttentionImpl`. +/// See the documentation for `MultiheadAttentionImpl` class to learn what +/// methods it provides, and examples of how to use `MultiheadAttention` with +/// `torch::nn::MultiheadAttentionOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(MultiheadAttention); + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/adaptive.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/adaptive.h new file mode 100644 index 0000000000000000000000000000000000000000..1c33f8b350642c2d217564ada43891b50e5e2cf4 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/adaptive.h @@ -0,0 +1,114 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace torch::nn { + +/// The output of a single invocation of an AdaptiveLogSoftmaxWithLoss +/// module's `forward()` method. +struct TORCH_API ASMoutput { + ASMoutput(Tensor output_, double loss_); + + /// Tensor containing computed target log probabilities for each example + Tensor output; + + /// Scalar representing the computed negative log likelihood loss + double loss; +}; + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ AdaptiveLogSoftmaxWithLoss +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Efficient softmax approximation as described in +/// `Efficient softmax approximation for GPUs`_ by Edouard Grave, Armand Joulin, +/// Moustapha Cissé, David Grangier, and Hervé Jégou. +/// See +/// https://pytorch.org/docs/main/nn.html#torch.nn.AdaptiveLogSoftmaxWithLoss +/// to learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::AdaptiveLogSoftmaxWithLossOptions` +/// class to learn what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// AdaptiveLogSoftmaxWithLoss model(AdaptiveLogSoftmaxWithLossOptions(8, 10, +/// {4, 8}).div_value(2.).head_bias(true)); +/// ``` +class TORCH_API AdaptiveLogSoftmaxWithLossImpl + : public Cloneable { + public: + AdaptiveLogSoftmaxWithLossImpl( + int64_t in_features, + int64_t n_classes, + std::vector cutoffs) + : AdaptiveLogSoftmaxWithLossImpl(AdaptiveLogSoftmaxWithLossOptions( + in_features, + n_classes, + std::move(cutoffs))) {} + + explicit AdaptiveLogSoftmaxWithLossImpl( + AdaptiveLogSoftmaxWithLossOptions options_); + + ASMoutput forward(const Tensor& input, const Tensor& target); + + void reset() override; + + void reset_parameters(); + + /// Pretty prints the `AdaptiveLogSoftmaxWithLoss` module into the given + /// `stream`. + void pretty_print(std::ostream& stream) const override; + + /// Given input tensor, and output of `head`, computes the log of the full + /// distribution + Tensor _get_full_log_prob(const Tensor& input, const Tensor& head_output); + + /// Computes log probabilities for all n_classes + Tensor log_prob(const Tensor& input); + + /// This is equivalent to `log_pob(input).argmax(1)` but is more efficient in + /// some cases + Tensor predict(const Tensor& input); + + /// The options with which this `Module` was constructed + AdaptiveLogSoftmaxWithLossOptions options; + + /// Cutoffs used to assign targets to their buckets. It should be an ordered + /// Sequence of integers sorted in the increasing order + std::vector cutoffs; + + int64_t shortlist_size; + + /// Number of clusters + int64_t n_clusters; + + /// Output size of head classifier + int64_t head_size; + + Linear head = nullptr; + + ModuleList tail; +}; + +/// A `ModuleHolder` subclass for `AdaptiveLogSoftmaxWithLossImpl`. +/// See the documentation for `AdaptiveLogSoftmaxWithLossImpl` class to learn +/// what methods it provides, and examples of how to use +/// `AdaptiveLogSoftmaxWithLoss` with +/// `torch::nn::AdaptiveLogSoftmaxWithLossOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(AdaptiveLogSoftmaxWithLoss); + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/batchnorm.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/batchnorm.h new file mode 100644 index 0000000000000000000000000000000000000000..bf156150de6ba594fe0965b43e9f8ec8a3f4d170 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/batchnorm.h @@ -0,0 +1,247 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace torch::nn { + +/// Base class for all (dimension-specialized) batchnorm and instancenorm +/// modules. +template +class NormImplBase : public torch::nn::Cloneable { + protected: + virtual void _check_input_dim(const Tensor& input) = 0; + + public: + NormImplBase(const DerivedOptions& options_) : options(options_) { + NormImplBase::reset(); + } + + void reset() override { + if (options.affine()) { + weight = this->register_parameter( + "weight", torch::empty({options.num_features()})); + bias = this->register_parameter( + "bias", torch::empty({options.num_features()})); + } else { + weight = + this->register_parameter("weight", Tensor(), /*requires_grad=*/false); + bias = + this->register_parameter("bias", Tensor(), /*requires_grad=*/false); + } + if (options.track_running_stats()) { + running_mean = this->register_buffer( + "running_mean", torch::zeros({options.num_features()})); + running_var = this->register_buffer( + "running_var", torch::ones({options.num_features()})); + num_batches_tracked = this->register_buffer( + "num_batches_tracked", torch::tensor(0, torch::dtype(torch::kLong))); + } else { + running_mean = this->register_buffer("running_mean", Tensor()); + running_var = this->register_buffer("running_var", Tensor()); + num_batches_tracked = + this->register_buffer("num_batches_tracked", Tensor()); + } + reset_parameters(); + } + + void reset_running_stats() { + if (options.track_running_stats()) { + running_mean.zero_(); + running_var.fill_(1); + num_batches_tracked.zero_(); + } + } + + void reset_parameters() { + reset_running_stats(); + if (options.affine()) { + torch::nn::init::ones_(weight); + torch::nn::init::zeros_(bias); + } + } + + /// The options with which this module was constructed. + DerivedOptions options; + + /// The learned weight. + /// Only defined if the `affine` option was `true` upon construction. + Tensor weight; + + /// The learned bias. + /// Only defined if the `affine` option was `true` upon construction. + Tensor bias; + + /// The running mean. + /// Only defined if the `track_running_stats` option was `true` upon + /// construction. + Tensor running_mean; + + /// The running variance. + /// Only defined if the `track_running_stats` option was `true` upon + /// construction. + Tensor running_var; + + /// The number of the forward call. + /// Only defined if the `track_running_stats` option was `true` upon + /// construction. + Tensor num_batches_tracked; +}; + +/// Base class for all (dimension-specialized) batchnorm modules. +template +class BatchNormImplBase : public NormImplBase { + public: + using NormImplBase::NormImplBase; + + Tensor forward(const Tensor& input) { + this->_check_input_dim(input); + double exponential_average_factor = 0.0; + if (this->options.momentum().has_value()) { + exponential_average_factor = this->options.momentum().value(); + } + + if (this->is_training() && this->options.track_running_stats()) { + if (this->num_batches_tracked.defined()) { + this->num_batches_tracked += 1; + if (this->options.momentum() == + std::nullopt) { // use cumulative moving average + exponential_average_factor = + 1.0 / this->num_batches_tracked.template item(); + } else { // use exponential moving average + exponential_average_factor = this->options.momentum().value(); + } + } + } + + return torch::nn::functional::detail::batch_norm( + input, + this->running_mean, + this->running_var, + this->weight, + this->bias, + this->is_training() || !this->options.track_running_stats(), + /*momentum=*/exponential_average_factor, + this->options.eps()); + } + + /// Pretty prints the `BatchNorm{1,2,3}d` module into the given `stream`. + void pretty_print(std::ostream& stream) const override { + stream << std::boolalpha << "torch::nn::BatchNorm" << D << "d(" + << this->options.num_features() << ", " + << "eps=" << this->options.eps() << ", " + << "momentum="; + + if (this->options.momentum().has_value()) { + stream << this->options.momentum().value(); + } else { + stream << "None"; + } + + stream << ", " + << "affine=" << this->options.affine() << ", " + << "track_running_stats=" << this->options.track_running_stats() + << ')'; + } +}; + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BatchNorm1d +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies the BatchNorm1d function. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.BatchNorm1d to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::BatchNorm1dOptions` class to learn +/// what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// BatchNorm1d +/// model(BatchNorm1dOptions(4).eps(0.5).momentum(0.1).affine(false).track_running_stats(true)); +/// ``` +class TORCH_API BatchNorm1dImpl : public BatchNormImplBase<1, BatchNorm1dImpl> { + protected: + void _check_input_dim(const Tensor& input) override; + + public: + using BatchNormImplBase<1, BatchNorm1dImpl>::BatchNormImplBase; +}; + +/// A `ModuleHolder` subclass for `BatchNorm1dImpl`. +/// See the documentation for `BatchNorm1dImpl` class to learn what methods it +/// provides, and examples of how to use `BatchNorm1d` with +/// `torch::nn::BatchNorm1dOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(BatchNorm1d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BatchNorm2d +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies the BatchNorm2d function. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.BatchNorm2d to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::BatchNorm2dOptions` class to learn +/// what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// BatchNorm2d +/// model(BatchNorm2dOptions(4).eps(0.5).momentum(0.1).affine(false).track_running_stats(true)); +/// ``` +class TORCH_API BatchNorm2dImpl : public BatchNormImplBase<2, BatchNorm2dImpl> { + protected: + void _check_input_dim(const Tensor& input) override; + + public: + using BatchNormImplBase<2, BatchNorm2dImpl>::BatchNormImplBase; +}; + +/// A `ModuleHolder` subclass for `BatchNorm2dImpl`. +/// See the documentation for `BatchNorm2dImpl` class to learn what methods it +/// provides, and examples of how to use `BatchNorm2d` with +/// `torch::nn::BatchNorm2dOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(BatchNorm2d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BatchNorm3d +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies the BatchNorm3d function. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.BatchNorm3d to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::BatchNorm3dOptions` class to learn +/// what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// BatchNorm3d +/// model(BatchNorm3dOptions(4).eps(0.5).momentum(0.1).affine(false).track_running_stats(true)); +/// ``` +class TORCH_API BatchNorm3dImpl : public BatchNormImplBase<3, BatchNorm3dImpl> { + protected: + void _check_input_dim(const Tensor& input) override; + + public: + using BatchNormImplBase<3, BatchNorm3dImpl>::BatchNormImplBase; +}; + +/// A `ModuleHolder` subclass for `BatchNorm3dImpl`. +/// See the documentation for `BatchNorm3dImpl` class to learn what methods it +/// provides, and examples of how to use `BatchNorm3d` with +/// `torch::nn::BatchNorm3dOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(BatchNorm3d); + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/common.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/common.h new file mode 100644 index 0000000000000000000000000000000000000000..83a73e5f87faf4603b37f61c02da69dded99d616 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/common.h @@ -0,0 +1,104 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +/// This macro enables a module with default arguments in its forward method +/// to be used in a Sequential module. +/// +/// Example usage: +/// +/// Let's say we have a module declared like this: +/// ``` +/// struct MImpl : torch::nn::Module { +/// public: +/// explicit MImpl(int value_) : value(value_) {} +/// torch::Tensor forward(int a, int b = 2, double c = 3.0) { +/// return torch::tensor(a + b + c); +/// } +/// private: +/// int value; +/// }; +/// TORCH_MODULE(M); +/// ``` +/// +/// If we try to use it in a Sequential module and run forward: +/// ``` +/// torch::nn::Sequential seq(M(1)); +/// seq->forward(1); +/// ``` +/// +/// We will receive the following error message: +/// ``` +/// MImpl's forward() method expects 3 argument(s), but received 1. +/// If MImpl's forward() method has default arguments, please make sure +/// the forward() method is declared with a corresponding +/// `FORWARD_HAS_DEFAULT_ARGS` macro. +/// ``` +/// +/// The right way to fix this error is to use the `FORWARD_HAS_DEFAULT_ARGS` +/// macro when declaring the module: +/// ``` +/// struct MImpl : torch::nn::Module { +/// public: +/// explicit MImpl(int value_) : value(value_) {} +/// torch::Tensor forward(int a, int b = 2, double c = 3.0) { +/// return torch::tensor(a + b + c); +/// } +/// protected: +/// /* +/// NOTE: looking at the argument list of `forward`: +/// `forward(int a, int b = 2, double c = 3.0)` +/// we saw the following default arguments: +/// ---------------------------------------------------------------- +/// 0-based index of default | Default value of arg +/// arg in forward arg list | (wrapped by `torch::nn::AnyValue()`) +/// ---------------------------------------------------------------- +/// 1 | torch::nn::AnyValue(2) +/// 2 | torch::nn::AnyValue(3.0) +/// ---------------------------------------------------------------- +/// Thus we pass the following arguments to the `FORWARD_HAS_DEFAULT_ARGS` +/// macro: +/// */ +/// FORWARD_HAS_DEFAULT_ARGS({1, torch::nn::AnyValue(2)}, {2, +/// torch::nn::AnyValue(3.0)}) +/// private: +/// int value; +/// }; +/// TORCH_MODULE(M); +/// ``` +/// Now, running the following would work: +/// ``` +/// torch::nn::Sequential seq(M(1)); +/// seq->forward(1); // This correctly populates the default arguments for +/// `MImpl::forward` +/// ``` +#define FORWARD_HAS_DEFAULT_ARGS(...) \ + template \ + friend struct torch::nn::AnyModuleHolder; \ + bool _forward_has_default_args() override { \ + return true; \ + } \ + unsigned int _forward_num_required_args() override { \ + std::vector> args_info{ \ + __VA_ARGS__}; \ + return std::begin(args_info)->first; \ + } \ + std::vector _forward_populate_default_args( \ + std::vector&& arguments) override { \ + std::vector> args_info{ \ + __VA_ARGS__}; \ + unsigned int num_all_args = std::rbegin(args_info)->first + 1; \ + TORCH_INTERNAL_ASSERT( \ + arguments.size() >= _forward_num_required_args() && \ + arguments.size() <= num_all_args); \ + std::vector ret = std::move(arguments); \ + ret.reserve(num_all_args); \ + for (auto& arg_info : args_info) { \ + if (arg_info.first > ret.size() - 1) \ + ret.emplace_back(std::move(arg_info.second)); \ + } \ + return ret; \ + } + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/any.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/any.h new file mode 100644 index 0000000000000000000000000000000000000000..be17d5a8bc3ae0bd67e66a8acaba6765733587ad --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/any.h @@ -0,0 +1,368 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include +#include +#include + +namespace torch::nn { + +/// Stores a type erased `Module`. +/// +/// The PyTorch C++ API does not impose an interface on the signature of +/// `forward()` in `Module` subclasses. This gives you complete freedom to +/// design your `forward()` methods to your liking. However, this also means +/// there is no unified base type you could store in order to call `forward()` +/// polymorphically for any module. This is where the `AnyModule` comes in. +/// Instead of inheritance, it relies on type erasure for polymorphism. +/// +/// An `AnyModule` can store any `nn::Module` subclass that provides a +/// `forward()` method. This `forward()` may accept any types and return any +/// type. Once stored in an `AnyModule`, you can invoke the underlying module's +/// `forward()` by calling `AnyModule::forward()` with the arguments you would +/// supply to the stored module (though see one important limitation below). +/// Example: +/// +/// \rst +/// .. code-block:: cpp +/// +/// struct GenericTrainer { +/// torch::nn::AnyModule module; +/// +/// void train(torch::Tensor input) { +/// module.forward(input); +/// } +/// }; +/// +/// GenericTrainer trainer1{torch::nn::Linear(3, 4)}; +/// GenericTrainer trainer2{torch::nn::Conv2d(3, 4, 2)}; +/// \endrst +/// +/// As `AnyModule` erases the static type of the stored module (and its +/// `forward()` method) to achieve polymorphism, type checking of arguments is +/// moved to runtime. That is, passing an argument with an incorrect type to an +/// `AnyModule` will compile, but throw an exception at runtime: +/// +/// \rst +/// .. code-block:: cpp +/// +/// torch::nn::AnyModule module(torch::nn::Linear(3, 4)); +/// // Linear takes a tensor as input, but we are passing an integer. +/// // This will compile, but throw a `torch::Error` exception at runtime. +/// module.forward(123); +/// \endrst +/// +/// \rst +/// .. attention:: +/// One noteworthy limitation of `AnyModule` is that its `forward()` method +/// does not support implicit conversion of argument types. For example, if +/// the stored module's `forward()` method accepts a `float` and you call +/// `any_module.forward(3.4)` (where `3.4` is a `double`), this will throw +/// an exception. +/// \endrst +/// +/// The return type of the `AnyModule`'s `forward()` method is controlled via +/// the first template argument to `AnyModule::forward()`. It defaults to +/// `torch::Tensor`. To change it, you can write `any_module.forward()`, +/// for example. +/// +/// \rst +/// .. code-block:: cpp +/// +/// torch::nn::AnyModule module(torch::nn::Linear(3, 4)); +/// auto output = module.forward(torch::ones({2, 3})); +/// +/// struct IntModule { +/// int forward(int x) { return x; } +/// }; +/// torch::nn::AnyModule module(IntModule{}); +/// int output = module.forward(5); +/// \endrst +/// +/// The only other method an `AnyModule` provides access to on the stored +/// module is `clone()`. However, you may acquire a handle on the module via +/// `.ptr()`, which returns a `shared_ptr`. Further, if you know +/// the concrete type of the stored module, you can get a concrete handle to it +/// using `.get()` where `T` is the concrete module type. +/// +/// \rst +/// .. code-block:: cpp +/// +/// torch::nn::AnyModule module(torch::nn::Linear(3, 4)); +/// std::shared_ptr ptr = module.ptr(); +/// torch::nn::Linear linear(module.get()); +/// \endrst +class AnyModule { + public: + /// A default-constructed `AnyModule` is in an empty state. + AnyModule() = default; + + /// Constructs an `AnyModule` from a `shared_ptr` to concrete module object. + template + explicit AnyModule(std::shared_ptr module); + + /// Constructs an `AnyModule` from a concrete module object. + template < + typename ModuleType, + typename = torch::detail::enable_if_module_t> + explicit AnyModule(ModuleType&& module); + + /// Constructs an `AnyModule` from a module holder. + template + explicit AnyModule(const ModuleHolder& module_holder); + + /// Move construction and assignment is allowed, and follows the default + /// behavior of move for `std::unique_ptr`. + AnyModule(AnyModule&&) = default; + AnyModule& operator=(AnyModule&&) = default; + + /// Creates a shallow copy of an `AnyModule`. + AnyModule(const AnyModule& other); + AnyModule& operator=(const AnyModule& other); + + /// Creates a deep copy of an `AnyModule` if it contains a module, else an + /// empty `AnyModule` if it is empty. + AnyModule clone(std::optional device = std::nullopt) const; + + /// Assigns a module to the `AnyModule` (to circumvent the explicit + /// constructor). + template + AnyModule& operator=(std::shared_ptr module); + + /// Invokes `forward()` on the contained module with the given arguments, and + /// returns the return value as an `AnyValue`. Use this method when chaining + /// `AnyModule`s in a loop. + template + AnyValue any_forward(ArgumentTypes&&... arguments); + + /// Invokes `forward()` on the contained module with the given arguments, and + /// casts the returned `AnyValue` to the supplied `ReturnType` (which defaults + /// to `torch::Tensor`). + template + ReturnType forward(ArgumentTypes&&... arguments); + + /// Attempts to cast the underlying module to the given module type. Throws an + /// exception if the types do not match. + template > + T& get(); + + /// Attempts to cast the underlying module to the given module type. Throws an + /// exception if the types do not match. + template > + const T& get() const; + + /// Returns the contained module in a `nn::ModuleHolder` subclass if possible + /// (i.e. if `T` has a constructor for the underlying module type). + template + T get() const; + + /// Returns a `std::shared_ptr` whose dynamic type is that of the underlying + /// module. + std::shared_ptr ptr() const; + + /// Like `ptr()`, but casts the pointer to the given type. + template > + std::shared_ptr ptr() const; + + /// Returns the `type_info` object of the contained value. + const std::type_info& type_info() const; + + /// Returns true if the `AnyModule` does not contain a module. + bool is_empty() const noexcept; + + private: + /// Creates a `unique_ptr` pointing to a + /// `AnyModuleHolder` of the correct type. This method is used to deduce the + /// arguments of the module's `forward()` method. + template < + typename ModuleType, + typename Class, + typename ReturnType, + typename... ArgumentTypes> + std::unique_ptr make_holder( + std::shared_ptr&& module, + ReturnType (Class::* /*unused*/)(ArgumentTypes...)); + + /// Helper method invoked by const and non-const `get()`. + template + ModuleType& get_( + ReturnType (ModuleType::* /*unused*/)(ArgumentTypes...)) const; + + /// Helper method invoked by const and non-const `get()`. + template + ModuleType& get_() const; + + /// The type erased module. + std::unique_ptr content_; +}; + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ AnyModule ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +template +AnyModule::AnyModule(std::shared_ptr module) + : content_(make_holder( + std::move(module), + &std::remove_reference_t::forward)) { + // `AnyModule` can only store an `nn::Module` subclass object that provides + // a `forward()` method that has a non-templatized return type. + // (e.g. `AnyModule` cannot store `nn::Sequential`, because `nn::Sequential`'s + // `forward()` method has a templatized return type.) + static_assert( + torch::detail::is_module::value, + "Can only store object derived from nn::Module into AnyModule"); + static_assert( + torch::detail::has_forward::value, + "Can only store module with a forward() method that has a non-templatized" + " argument type and return type into AnyModule (e.g. we cannot store nn::Sequential" + "into AnyModule, because its forward() method's argument type and return type are templatized." + " If you need to use nn::Sequentials inside each other you can subclass " + "nn::Sequential and write a non-templatized forward function for it. You can checkout " + "https://github.com/pytorch/vision/blob/2f46070f3cb1ea894d82578f3dc5677f82f34958/torchvision/csrc/models/mnasnet.cpp#L59 " + "for an example on how to do this.)."); +} + +template +AnyModule::AnyModule(ModuleType&& module) + : AnyModule( + std::make_shared(std::forward(module))) {} + +template +AnyModule::AnyModule(const ModuleHolder& module_holder) + : AnyModule(module_holder.ptr()) {} + +inline AnyModule::AnyModule(const AnyModule& other) + : content_(other.content_ ? other.content_->copy() : nullptr) {} + +inline AnyModule& AnyModule::operator=(const AnyModule& other) { + if (this != &other) { + content_ = other.content_ ? other.content_->copy() : nullptr; + } + return *this; +} + +inline AnyModule AnyModule::clone(std::optional device) const { + AnyModule clone; + clone.content_ = content_ ? content_->clone_module(device) : nullptr; + return clone; +} + +template +AnyModule& AnyModule::operator=(std::shared_ptr module) { + *this = AnyModule(std::move(module)); + return *this; +} + +template +AnyValue AnyModule::any_forward(ArgumentTypes&&... arguments) { + TORCH_CHECK(!is_empty(), "Cannot call forward() on an empty AnyModule"); + std::vector values; + values.reserve(sizeof...(ArgumentTypes)); + torch::apply( + [&values](AnyValue&& value) { values.push_back(std::move(value)); }, + AnyValue(std::forward(arguments))...); + return content_->forward(std::move(values)); +} + +template +ReturnType AnyModule::forward(ArgumentTypes&&... arguments) { + return any_forward(std::forward(arguments)...) + .template get(); +} + +template +T& AnyModule::get() { + TORCH_CHECK(!is_empty(), "Cannot call get() on an empty AnyModule"); + return get_(); +} + +template +const T& AnyModule::get() const { + TORCH_CHECK(!is_empty(), "Cannot call get() on an empty AnyModule"); + return get_(); +} + +template +T AnyModule::get() const { + return T(ptr()); +} + +inline std::shared_ptr AnyModule::ptr() const { + TORCH_CHECK(!is_empty(), "Cannot call ptr() on an empty AnyModule"); + return content_->ptr(); +} + +template +std::shared_ptr AnyModule::ptr() const { + TORCH_CHECK(!is_empty(), "Cannot call ptr() on an empty AnyModule"); + // Call get() but discard the value, just to do the type checking. + get_(); + return std::dynamic_pointer_cast(ptr()); +} + +inline const std::type_info& AnyModule::type_info() const { + TORCH_CHECK(!is_empty(), "Cannot call type_info() on an empty AnyModule"); + return content_->type_info; +} + +inline bool AnyModule::is_empty() const noexcept { + return content_ == nullptr; +} + +// Private Methods + +template < + typename ModuleType, + typename Class, + typename ReturnType, + typename... ArgumentTypes> +std::unique_ptr AnyModule::make_holder( + std::shared_ptr&& module, + ReturnType (Class::* /*unused*/)(ArgumentTypes...)) { + static_assert( + torch::detail::check_not_lvalue_references(), + "Modules stored inside AnyModule must not take references. " + "Use pointers instead."); + static_assert( + !std::is_void_v, + "AnyModule cannot store modules that return void " + "(you can return a dummy value)."); + return std::make_unique< + AnyModuleHolder, ArgumentTypes...>>( + std::move(module)); +} + +template +ModuleType& AnyModule::get_() const { + using M = std::remove_reference_t; + static_assert( + torch::detail::has_forward::value, + "Can only call AnyModule::get with a type T that has a forward method"); + return get_(&M::forward); +} + +template +ModuleType& AnyModule::get_( + ReturnType (ModuleType::* /*unused*/)(ArgumentTypes...)) const { + if (typeid(ModuleType).hash_code() == type_info().hash_code()) { + return *static_cast&>( + *content_) + .module; + } + TORCH_CHECK( + false, + "Attempted to cast module of type ", + c10::demangle(type_info().name()), + " to type ", + c10::demangle(typeid(ModuleType).name())); +} + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/any_module_holder.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/any_module_holder.h new file mode 100644 index 0000000000000000000000000000000000000000..9476ac2fd75c048bc6f5cd61e2a10b86b7234d48 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/any_module_holder.h @@ -0,0 +1,140 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::nn { + +class Module; + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~ AnyModulePlaceholder ~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// The static type of the object we store in the `AnyModule`, which erases +/// the actual type, but allows us to call `forward()` on the underlying +/// module. +struct AnyModulePlaceholder : public AnyValue::Placeholder { + using AnyValue::Placeholder::Placeholder; + + /// The "erased" `forward()` method. + virtual AnyValue forward(std::vector&& arguments) = 0; + + /// Returns std::shared_ptr pointing to the erased module. + virtual std::shared_ptr ptr() = 0; + + /// Returns a `AnyModulePlaceholder` with a shallow copy of this `AnyModule`. + virtual std::unique_ptr copy() const = 0; + + /// Returns a `AnyModulePlaceholder` with a deep copy of this `AnyModule`. + virtual std::unique_ptr clone_module( + std::optional device) const = 0; +}; + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ AnyModuleHolder ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// The dynamic type of the object stored in the `AnyModule`. It contains the +/// concrete instance to which all calls are forwarded. It is parameterized +/// over the concrete type of the module, and the types of the arguments the +/// module takes in its `forward()` method. +template +struct AnyModuleHolder : public AnyModulePlaceholder { + /// \internal + struct CheckedGetter { + template + std::decay_t&& operator()(size_t index) { + AT_ASSERT(index < arguments_.size()); + auto& value = arguments_[index]; + if (auto* maybe_value = value.template try_get>()) { + return std::move(*maybe_value); + } + TORCH_CHECK( + false, + "Expected argument #", + index, + " to be of type ", + c10::demangle(typeid(T).name()), + ", but received value of type ", + c10::demangle(value.type_info().name())); + } + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + std::vector& arguments_; + }; + + /// \internal + struct InvokeForward { + template + AnyValue operator()(Ts&&... ts) { + return AnyValue(module_->forward(std::forward(ts)...)); + } + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + std::shared_ptr& module_; + }; + + /// Constructs the `AnyModuleHolder` from a concrete module. + explicit AnyModuleHolder(std::shared_ptr&& module_) + : AnyModulePlaceholder(typeid(ModuleType)), module(std::move(module_)) {} + + /// Calls `forward()` on the underlying module, casting each `AnyValue` in the + /// argument vector to a concrete value. + AnyValue forward(std::vector&& arguments) override { + if (module->_forward_has_default_args()) { + TORCH_CHECK( + arguments.size() >= module->_forward_num_required_args() && + arguments.size() <= sizeof...(ArgumentTypes), + c10::demangle(type_info.name()), + "'s forward() method expects at least ", + module->_forward_num_required_args(), + " argument(s) and at most ", + sizeof...(ArgumentTypes), + " argument(s), but received ", + arguments.size(), + "."); + arguments = std::move( + module->_forward_populate_default_args(std::move(arguments))); + } else { + std::string use_default_args_macro_prompt = " If " + + c10::demangle(type_info.name()) + + "'s forward() method has default arguments, " + + "please make sure the forward() method is declared with a corresponding `FORWARD_HAS_DEFAULT_ARGS` macro."; + TORCH_CHECK( + arguments.size() == sizeof...(ArgumentTypes), + c10::demangle(type_info.name()), + "'s forward() method expects ", + sizeof...(ArgumentTypes), + " argument(s), but received ", + arguments.size(), + ".", + (arguments.size() < sizeof...(ArgumentTypes)) + ? use_default_args_macro_prompt + : ""); + } + + // FYI: During invocation of a module's `forward()` method, the values live + // in the `arguments` vector inside this function. + return torch::unpack( + InvokeForward{module}, CheckedGetter{arguments}); + } + + std::shared_ptr ptr() override { + return module; + } + + std::unique_ptr copy() const override { + return std::make_unique(*this); + } + + std::unique_ptr clone_module( + std::optional device) const override { + return std::make_unique( + std::dynamic_pointer_cast(module->clone(device))); + } + + /// The actual concrete module instance. + std::shared_ptr module; +}; + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/any_value.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/any_value.h new file mode 100644 index 0000000000000000000000000000000000000000..0f1b723dd41ca7786e255ac47c9e6ad6c323c9b2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/any_value.h @@ -0,0 +1,129 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include +#include +#include + +namespace torch::nn { + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ AnyValue ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// An implementation of `std::any` which stores +/// a type erased object, whose concrete value can be retrieved at runtime by +/// checking if the `typeid()` of a requested type matches the `typeid()` of +/// the object stored. +class AnyValue { + public: + /// Move construction and assignment is allowed, and follows the default + /// behavior of move for `std::unique_ptr`. + AnyValue(AnyValue&&) = default; + AnyValue& operator=(AnyValue&&) = default; + ~AnyValue() = default; + + /// Copy construction and assignment is allowed. + AnyValue(const AnyValue& other) : content_(other.content_->clone()) {} + AnyValue& operator=(const AnyValue& other) { + content_ = other.content_->clone(); + return *this; + } + + /// Constructs the `AnyValue` from value type. + template < + typename T, + typename = std::enable_if_t>> + explicit AnyValue(T&& value) + : content_( + std::make_unique>>(std::forward(value))) { + } + + /// Returns a pointer to the value contained in the `AnyValue` if the type + /// passed as template parameter matches the type of the value stored, and + /// returns a null pointer otherwise. + template + T* try_get() { + static_assert( + !std::is_reference_v, + "AnyValue stores decayed types, you cannot cast it to a reference type"); + static_assert( + !std::is_array_v, + "AnyValue stores decayed types, you must cast it to T* instead of T[]"); + if (typeid(T).hash_code() == type_info().hash_code()) { + return &static_cast&>(*content_).value; + } + return nullptr; + } + + /// Returns the value contained in the `AnyValue` if the type passed as + /// template parameter matches the type of the value stored, and throws an + /// exception otherwise. + template + T get() { + if (auto* maybe_value = try_get()) { + return *maybe_value; + } + TORCH_CHECK( + false, + "Attempted to cast AnyValue to ", + c10::demangle(typeid(T).name()), + ", but its actual type is ", + c10::demangle(type_info().name())); + } + + /// Returns the `type_info` object of the contained value. + const std::type_info& type_info() const noexcept { + return content_->type_info; + } + + private: + friend struct AnyModulePlaceholder; + friend struct TestAnyValue; + + /// \internal + /// The static type of the object we store in the `AnyValue`, which erases the + /// actual object's type, allowing us only to check the `type_info` of the + /// type stored in the dynamic type. + struct Placeholder { + explicit Placeholder(const std::type_info& type_info_) noexcept + : type_info(type_info_) {} + Placeholder(const Placeholder&) = default; + Placeholder(Placeholder&&) = default; + Placeholder& operator=(const Placeholder&) = delete; + Placeholder& operator=(Placeholder&&) = delete; + virtual ~Placeholder() = default; + virtual std::unique_ptr clone() const { + TORCH_CHECK(false, "clone() should only be called on `AnyValue::Holder`"); + } + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const std::type_info& type_info; + }; + + /// \internal + /// The dynamic type of the object we store in the `AnyValue`, which hides the + /// actual object we have erased in this `AnyValue`. + template + struct Holder : public Placeholder { + /// A template because T&& would not be universal reference here. + template < + typename U, + typename = std::enable_if_t>> + explicit Holder(U&& value_) noexcept + : Placeholder(typeid(T)), value(std::forward(value_)) {} + std::unique_ptr clone() const override { + return std::make_unique>(value); + } + T value; + }; + + /// The type erased object. + std::unique_ptr content_; +}; + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/functional.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/functional.h new file mode 100644 index 0000000000000000000000000000000000000000..5913dabf2c39ce36b64d866e02a5c39914666d1a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/functional.h @@ -0,0 +1,106 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include + +namespace torch::nn { + +/// Wraps a function in a `Module`. +/// +/// The `Functional` module allows wrapping an arbitrary function or function +/// object in an `nn::Module`. This is primarily handy for usage in +/// `Sequential`. +/// +/// \rst +/// .. code-block:: cpp +/// +/// Sequential sequential( +/// Linear(3, 4), +/// Functional(torch::relu), +/// BatchNorm1d(3), +/// Functional(torch::elu, /*alpha=*/1)); +/// \endrst +/// +/// While a `Functional` module only accepts a single `Tensor` as input, it is +/// possible for the wrapped function to accept further arguments. However, +/// these have to be bound *at construction time*. For example, if +/// you want to wrap `torch::leaky_relu`, which accepts a `slope` scalar as its +/// second argument, with a particular value for its `slope` in a `Functional` +/// module, you could write +/// +/// \rst +/// .. code-block:: cpp +/// +/// Functional(torch::leaky_relu, /*slope=*/0.5) +/// \endrst +/// +/// The value of `0.5` is then stored within the `Functional` object and +/// supplied to the function call at invocation time. Note that such bound +/// values are evaluated eagerly and stored a single time. See the documentation +/// of [std::bind](https://en.cppreference.com/w/cpp/utility/functional/bind) +/// for more information on the semantics of argument binding. +/// +/// \rst +/// .. attention:: +/// After passing any bound arguments, the function must accept a single +/// tensor and return a single tensor. +/// \endrst +/// +/// Note that `Functional` overloads the call operator (`operator()`) such that +/// you can invoke it with `my_func(...)`. +class TORCH_API FunctionalImpl : public torch::nn::Cloneable { + public: + using Function = std::function; + + /// Constructs a `Functional` from a function object. + explicit FunctionalImpl(Function function); + + template < + typename SomeFunction, + typename... Args, + typename = std::enable_if_t<(sizeof...(Args) > 0)>> + explicit FunctionalImpl(SomeFunction original_function, Args&&... args) + // NOLINTNEXTLINE(modernize-avoid-bind) + : function_(std::bind( + original_function, + /*input=*/std::placeholders::_1, + std::forward(args)...)) { + // std::bind is normally evil, but (1) gcc is broken w.r.t. handling + // parameter pack expansion in lambdas and (2) moving parameter packs into + // a lambda only works with C++14, so std::bind is the more move-aware + // solution here. + } + + void reset() override; + + /// Pretty prints the `Functional` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// Forwards the `input` tensor to the underlying (bound) function object. + Tensor forward(Tensor input); + + /// Calls forward(input). + Tensor operator()(Tensor input); + + bool is_serializable() const override; + + private: + Function function_; +}; + +/// A `ModuleHolder` subclass for `FunctionalImpl`. +/// See the documentation for `FunctionalImpl` class to learn what methods it +/// provides, or the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(Functional); + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/moduledict.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/moduledict.h new file mode 100644 index 0000000000000000000000000000000000000000..3075f175df1068df4583ac8cfbcbb0d4b412e971 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/moduledict.h @@ -0,0 +1,265 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::nn { + +/// An OrderedDict of `Module`s that registers its elements by their `key`s. +/// +/// \rst +/// .. code-block:: cpp +/// +/// torch::OrderedDict> ordereddict = { +/// {"linear", Linear(10, 3).ptr()}, +/// {"conv", Conv2d(1, 2, 3).ptr()}, +/// {"dropout", Dropout(0.5).ptr()}, +/// }; +/// torch::nn::ModuleDict dict1(ordereddict); +/// +/// for (const auto &module : *dict1) { +/// module->pretty_print(std::cout); +/// } +/// +/// std::vector>> list = { +/// {"linear", Linear(10, 3).ptr()}, +/// {"conv", Conv2d(1, 2, 3).ptr()}, +/// {"dropout", Dropout(0.5).ptr()}, +/// }; +/// torch::nn::ModuleDict dict2(list); +/// +/// for (const auto &module : *dict2) { +/// module->pretty_print(std::cout); +/// } +/// +/// \endrst +/// +/// Why should you use `ModuleDict` instead of a simple `map` or `OrderedDict`? +/// The value a `ModuleDict` provides over manually calling an ordered map of +/// modules is that it allows treating the whole container *as a single module*, +/// such that performing a transformation on the `ModuleDict` applies to each of +/// the modules it stores (which are each a registered submodule of the +/// `ModuleDict`). For example, calling `.to(torch::kCUDA)` on a `ModuleDict` +/// will move each module in the map to CUDA memory. For example: +/// +/// \rst +/// .. code-block:: cpp +/// +/// torch::OrderedDict> ordereddict = { +/// {"linear", Linear(10, 3).ptr()}, +/// {"conv", Conv2d(1, 2, 3).ptr()}, +/// {"dropout", Dropout(0.5).ptr()}, +/// }; +/// torch::nn::ModuleDict dict(ordereddict); +/// +/// // Convert all modules to CUDA. +/// dict->to(torch::kCUDA); +/// +/// \endrst +/// +/// Finally, `ModuleDict` provides a lightweight container API, such as allowing +/// iteration over submodules, positional access, adding new modules from a +/// vector of key-module pairs or an `OrderedDict` or another `ModuleDict` after +/// construction via `update`. +class ModuleDictImpl : public Cloneable { + public: + using Iterator = + torch::OrderedDict>::Iterator; + using ConstIterator = + torch::OrderedDict>::ConstIterator; + + ModuleDictImpl() = default; + + /// Constructs the `ModuleDict` from a list of string-Module pairs. + explicit ModuleDictImpl( + const std::vector>>& + modules) { + update(modules); + } + + /// Constructs the `ModuleDict` from an `OrderedDict`. + explicit ModuleDictImpl( + const torch::OrderedDict>& modules) { + update(modules); + } + + /// Return the items in the `ModuleDict`. + std::vector>> items() const { + return modules_.pairs(); + } + + /// Return the keys in the `ModuleDict`. + std::vector keys() const { + return modules_.keys(); + } + + /// Return the values in the `ModuleDict`. + std::vector> values() const { + return modules_.values(); + } + + /// Return an iterator to the start of `ModuleDict`. + Iterator begin() { + return modules_.begin(); + } + + /// Return a const iterator to the start of `ModuleDict`. + ConstIterator begin() const { + return modules_.begin(); + } + + /// Return an iterator to the end of `ModuleDict`. + Iterator end() { + return modules_.end(); + } + + /// Return a const iterator to the end of `ModuleDict`. + ConstIterator end() const { + return modules_.end(); + } + + /// Return the number of items currently stored in the `ModuleDict`. + size_t size() const noexcept { + return modules_.size(); + } + + /// Return true if the `ModuleDict` is empty, otherwise return false. + bool empty() const noexcept { + return modules_.is_empty(); + } + + /// Check if the certain parameter with the key in the `ModuleDict`. + bool contains(const std::string& key) const noexcept { + return modules_.contains(key); + } + + /// Remove all items from the `ModuleDict`. + void clear() { + // Not remove the registration of modules to make it consistent with python + // version. + modules_.clear(); + } + + /// Special cloning function for `ModuleDict` because it does not use + /// `reset()`. + std::shared_ptr clone( + const std::optional& device = std::nullopt) const override { + auto clone = std::make_shared(); + for (const auto& module : modules_) { + clone->insert(module.key(), module.value()->clone(device)); + } + return clone; + } + + /// `reset()` is empty for `ModuleDict`, since it does not have parameters of + /// its own. + void reset() override {} + + /// Pretty prints the `ModuleDict` into the given `stream`. + void pretty_print(std::ostream& stream) const override { + stream << "torch::nn::ModuleDict"; + } + + /// Attempts to returns the `Module` associated with the given `key`. Throws + /// an exception if no such `key` is stored in the `ModuleDict`. Check + /// contains(key) before for a non-throwing way of access. + std::shared_ptr operator[](const std::string& key) const { + return modules_[key]; + } + + /// Attempts to return the module at the given key as the requested type. + /// Throws an exception if no such `key` is stored in the `ModuleDict`. + /// Check contains(key) before for a non-throwing way of access. + template + T& at(const std::string& key) { + static_assert( + torch::detail::is_module::value, + "Can only call ModuleList::at with an nn::Module type"); + auto module = modules_[key]->as(); + TORCH_CHECK( + module, + "Unable to cast module[", + key, + "] to ", + c10::demangle(typeid(T).name())); + return *module; + } + + /// Attempts to return the module at the given key as the requested type. + /// Throws an exception if no such `key` is stored in the `ModuleDict`. + /// Check contains(key) before for a non-throwing way of access. + template + const T& at(const std::string& key) const { + static_assert( + torch::detail::is_module::value, + "Can only call ModuleList::at with an nn::Module type"); + const auto module = modules_[key]->as(); + TORCH_CHECK( + module, + "Unable to cast module[", + key, + "] to ", + c10::demangle(typeid(T).name())); + return *module; + } + + /// Removes and returns the `Module` associated with the given `key`. + /// Throws an exception if no such `key` is stored in the `ModuleDict`. + /// Check contains(key) before for a non-throwing way of access. + std::shared_ptr pop(const std::string& key) { + auto module = modules_[key]; + modules_.erase(key); + // Not remove the registration of the module to make it consistent with + // python version. + return module; + } + + /// Updated the `ModuleDict` with a vector of key-module pairs. + void update( + const std::vector>>& + modules) { + for (auto& item : modules) { + insert(item.first, item.second); + } + } + + /// Updated the `ModuleDict` with key-value pairs from `OrderedDict` or + /// `ModuleDict`. + template + void update(const Container& container) { + for (auto& item : container) { + insert(item.key(), item.value()); + } + } + + private: + /// Private `OrderedDict` holding the key-Module pairs. + torch::OrderedDict> modules_; + + /// Insert a key-module pair by overwriting existing keys, + /// and register or replace the `Module`. + void insert(const std::string& key, std::shared_ptr module) { + if (contains(key)) { + modules_[key] = std::move(module); + replace_module(key, modules_[key]); + } else { + modules_.insert(key, std::move(module)); + register_module(key, modules_.back().value()); + } + } +}; + +/// A `ModuleHolder` subclass for `ModuleDictImpl`. +/// See the documentation for `ModuleDictImpl` class to learn what methods it +/// provides, or the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(ModuleDict); + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/modulelist.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/modulelist.h new file mode 100644 index 0000000000000000000000000000000000000000..fddefa244488a0cc92d691ba7dea603c3a9464c1 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/modulelist.h @@ -0,0 +1,277 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include + +namespace torch::nn { + +/// A list of `Module`s that registers its elements. +/// +/// \rst +/// .. code-block:: cpp +/// +/// torch::nn::ModuleList mlist( +/// torch::nn::Linear(3, 4), +/// torch::nn::BatchNorm1d(4), +/// torch::nn::Dropout(0.5) +/// ); +/// +/// for (const auto &module : *mlist) { +/// module->pretty_print(std::cout); +/// } +/// +/// \endrst +/// +/// Why should you use `ModuleList` instead of a simple `std::vector`? The value +/// a `ModuleList` provides over manually calling a sequence of modules is that +/// it allows treating the whole container *as a single module*, such that +/// performing a transformation on the `ModuleList` applies to each of the +/// modules it stores (which are each a registered submodule of the +/// `ModuleList`). For example, calling +/// `.to(torch::kCUDA)` on a `ModuleList` will move each module in the list to +/// CUDA memory. For example: +/// +/// \rst +/// .. code-block:: cpp +/// +/// torch::nn::ModuleList mlist( +/// torch::nn::Linear(3, 4), +/// torch::nn::BatchNorm1d(4), +/// torch::nn::Dropout(0.5) +/// ); +/// +/// // Convert all modules to CUDA. +/// mlist->to(torch::kCUDA); +/// +/// \endrst +/// +/// Finally, `ModuleList` provides a lightweight container API, such as allowing +/// iteration over submodules, positional access, adding a new module after +/// construction via `push_back`, as well as joining two `ModuleList`s via +/// `extend`. +class ModuleListImpl : public Cloneable { + public: + using Iterator = std::vector>::iterator; + using ConstIterator = std::vector>::const_iterator; + + ModuleListImpl() = default; + + /// Constructs the `ModuleList` from a variadic list of modules. + template + explicit ModuleListImpl(Modules&&... modules) { + modules_.reserve(sizeof...(Modules)); + push_back_var(std::forward(modules)...); + } + + /// Special cloning function for `ModuleList` because it does not use + /// `reset()`. + std::shared_ptr clone( + const std::optional& device = std::nullopt) const override { + auto clone = std::make_shared(); + for (const auto& module : modules_) { + clone->push_back(module->clone(device)); + } + return clone; + } + + /// `reset()` is empty for `ModuleList`, since it does not have parameters of + /// its own. + void reset() override {} + + /// Pretty prints the `ModuleList` module into the given `stream`. + void pretty_print(std::ostream& stream) const override { + stream << "torch::nn::ModuleList"; + } + + void push_back(std::shared_ptr module) { + modules_.push_back(std::move(module)); + const auto index = modules_.size() - 1; + register_module(std::to_string(index), modules_[index]); + } + + /// Adds a new `Module` to the `ModuleList` container, moving or copying + /// it into a `shared_ptr` internally. This method allows passing value types, + /// and letting the container deal with the boxing. + template > + void push_back(M&& module) { + using Type = std::remove_reference_t; + push_back(std::make_shared(std::forward(module))); + } + + /// Unwraps the contained module of a `ModuleHolder` and adds it to the + /// `ModuleList`. + template + void push_back(const ModuleHolder& module_holder) { + push_back(module_holder.ptr()); + } + + /// Iterates over the container and calls `push_back()` on each value. + template + void extend(const Container& container) { + for (const auto& module : container) { + push_back(module); + } + } + + /// Returns an iterator to the start of the `ModuleList`. + Iterator begin() { + return modules_.begin(); + } + + /// Returns a const iterator to the start of the `ModuleList`. + ConstIterator begin() const { + return modules_.begin(); + } + + /// Returns an iterator to the end of the `ModuleList`. + Iterator end() { + return modules_.end(); + } + + /// Returns a const iterator to the end of the `ModuleList`. + ConstIterator end() const { + return modules_.end(); + } + + /// Attempts to return the module at the given index as the requested type. + /// Throws an exception if the index is out of bounds or the types do not + /// match. + template + T& at(size_t index) { + static_assert( + torch::detail::is_module::value, + "Can only call ModuleList::at with an nn::Module type"); + TORCH_CHECK(index < size(), "Index out of range"); + auto module = modules_[index]->as(); + TORCH_CHECK( + module, + "Unable to cast module[", + index, + "] to ", + c10::demangle(typeid(T).name())); + return *module; + } + + /// Attempts to return the module at the given index as the requested type. + /// Throws an exception if the index is out of bounds or the types do not + /// match. + template + const T& at(size_t index) const { + static_assert( + torch::detail::is_module::value, + "Can only call ModuleList::at with an nn::Module type"); + TORCH_CHECK(index < size(), "Index out of range"); + const auto module = modules_[index]->as(); + TORCH_CHECK( + module, + "Unable to cast module[", + index, + "] to ", + c10::demangle(typeid(T).name())); + return *module; + } + + /// Attempts to return a `std::shared_ptr` whose dynamic type is that of the + /// underlying module at the given index. Throws an exception if the index is + /// out of bounds. + std::shared_ptr ptr(size_t index) const { + TORCH_CHECK(index < size(), "Index out of range"); + return modules_[index]; + } + + /// Attempts to return a `std::shared_ptr` whose type is the one provided. + /// Throws an exception if the index is out of bounds or the types do not + /// match. + template + std::shared_ptr ptr(size_t index) const { + static_assert( + torch::detail::is_module::value, + "Can only call ModuleList::ptr with an nn::Module type"); + TORCH_CHECK(index < size(), "Index out of range"); + return std::dynamic_pointer_cast(modules_[index]); + } + + /// Like `ptr(index)`. + std::shared_ptr operator[](size_t index) const { + // This is the only method we can call without a type. + return ptr(index); + } + + /// The current size of the `ModuleList` container. + size_t size() const noexcept { + return modules_.size(); + } + + /// True if there are no modules in the `ModuleList`. + bool is_empty() const noexcept { + return size() == 0; + } + + void insert(size_t index, std::shared_ptr module) { + TORCH_CHECK(index <= size(), "Index out of range"); + + if (index == size()) + push_back(std::move(module)); + else { + modules_.insert( + modules_.begin() + Iterator::difference_type(index), + std::move(module)); + + for (const auto i : c10::irange(index, size() - 1)) { + (void)i; // Suppress unused variable warning + replace_module(std::to_string(index), modules_[index]); + } + register_module(std::to_string(size() - 1), modules_.back()); + } + } + + /// Unwraps the contained module of a `ModuleHolder` and inserts it in the + /// `ModuleList`. + template + void insert(size_t index, const ModuleHolder& module_holder) { + insert(index, module_holder.ptr()); + } + + /// inserts a new `Module` to the `ModuleList` container, moving or copying + /// it into a `shared_ptr` internally. This method allows passing value types, + /// and letting the container deal with the boxing. + template > + void insert(size_t index, M&& module) { + using Type = std::remove_reference_t; + insert(index, std::make_shared(std::forward(module))); + } + + private: + template + void push_back_var(Head&& head, Tail&&... tail) { + push_back(std::forward(head)); + // Recursively calls this method, until the parameter pack only thas this + // entry left. Then calls `push_back()` a final time (above). + push_back_var(std::forward(tail)...); + } + + /// The base case, when the list of modules is empty. + void push_back_var() {} + + // Box the AnyModules to give ModuleList reference semantics, like the rest of + // the API. Note that this is not required otherwise, this could just be a + // `vector`. + std::vector> modules_; +}; + +/// A `ModuleHolder` subclass for `ModuleListImpl`. +/// See the documentation for `ModuleListImpl` class to learn what methods it +/// provides, or the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(ModuleList); + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/named_any.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/named_any.h new file mode 100644 index 0000000000000000000000000000000000000000..acfce51d479633f6ae6abc3760bcd15294bb83d9 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/named_any.h @@ -0,0 +1,86 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include + +namespace torch::nn { + +/// Stores a type erased `Module` with name. +/// +/// The `NamedAnyModule` class enables the following API for constructing +/// `nn::Sequential` with named submodules: +/// \rst +/// .. code-block:: cpp +/// +/// struct M : torch::nn::Module { +/// explicit M(int value_) : value(value_) {} +/// int value; +/// int forward() { +/// return value; +/// } +/// }; +/// +/// Sequential sequential({ +/// {"m1", std::make_shared(1)}, // shared pointer to `Module` is +/// supported {std::string("m2"), M(2)}, // `Module` is supported +/// {"linear1", Linear(10, 3)} // `ModuleHolder` is supported +/// }); +/// \endrst +class NamedAnyModule { + public: + /// Creates a `NamedAnyModule` from a (boxed) `Module`. + template + NamedAnyModule(std::string name, std::shared_ptr module_ptr) + : NamedAnyModule(std::move(name), AnyModule(std::move(module_ptr))) {} + + /// Creates a `NamedAnyModule` from a `Module`, moving or copying it + /// into a `shared_ptr` internally. + // NOTE: We need to use `std::remove_reference_t` to get rid of + // any reference components for make_unique. + template > + NamedAnyModule(std::string name, M&& module) + : NamedAnyModule( + std::move(name), + std::make_shared>( + std::forward(module))) {} + + /// Creates a `NamedAnyModule` from a `Module` that is unwrapped from + /// a `ModuleHolder`. + template + NamedAnyModule(std::string name, const ModuleHolder& module_holder) + : NamedAnyModule(std::move(name), module_holder.ptr()) {} + + /// Creates a `NamedAnyModule` from a type-erased `AnyModule`. + NamedAnyModule(std::string name, AnyModule any_module) + : name_(std::move(name)), module_(std::move(any_module)) {} + + /// Returns a reference to the name. + const std::string& name() const noexcept { + return name_; + } + + /// Returns a reference to the module. + AnyModule& module() noexcept { + return module_; + } + + /// Returns a const reference to the module. + const AnyModule& module() const noexcept { + return module_; + } + + private: + std::string name_; + AnyModule module_; +}; + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/parameterdict.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/parameterdict.h new file mode 100644 index 0000000000000000000000000000000000000000..b2464e930cf6d35442ce2ae4f7fbb3fa92772f58 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/parameterdict.h @@ -0,0 +1,151 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace torch::nn { + +class ParameterDictImpl : public Cloneable { + public: + using Iterator = OrderedDict::Iterator; + using ConstIterator = OrderedDict::ConstIterator; + + ParameterDictImpl() = default; + + explicit ParameterDictImpl( + const torch::OrderedDict& params) { + parameters_ = params; + } + + /// `reset()` is empty for `ParameterDict`, since it does not have + /// parameters of its own. + void reset() override {} + + /// Pretty prints the `ParameterDict` module into the given `stream`. + void pretty_print(std::ostream& stream) const override { + stream << "torch::nn::ParameterDict(" << '\n'; + for (const auto& pair : parameters_) { + stream << '(' << pair.key() << ')' << ": Parameter containing: [" + << pair.value().scalar_type() << " of size " + << pair.value().sizes() << ']'; + ; + stream << '\n'; + } + stream << ')'; + } + + /// Insert the parameter along with the key into ParameterDict + /// The parameter is set to be require grad by default + Tensor& insert(const std::string& key, const Tensor& param) { + bool requires_grad = param.requires_grad(); + return register_parameter(key, param, requires_grad); + } + + /// Remove key from the ParameterDict and return its value, throw exception + /// if the key is not contained. Please check contains(key) before for a + /// non-throwing access. + Tensor pop(const std::string& key) { + torch::Tensor v = parameters_[key]; + parameters_.erase(key); + return v; + } + + /// Return the keys in the dict + ::std::vector keys() const { + return parameters_.keys(); + } + + /// Return the Values in the dict + ::std::vector values() const { + return parameters_.values(); + } + + /// Return an iterator to the start of ParameterDict + Iterator begin() { + return parameters_.begin(); + } + + /// Return a const iterator to the start of ParameterDict + ConstIterator begin() const { + return parameters_.begin(); + } + + /// Return an iterator to the end of ParameterDict + Iterator end() { + return parameters_.end(); + } + + /// Return a const iterator to the end of ParameterDict + ConstIterator end() const { + return parameters_.end(); + } + + /// Return the number of items currently stored in the ParameterDict + size_t size() const noexcept { + return parameters_.size(); + } + + /// Return true if the ParameterDict is empty, otherwise return false + bool empty() const noexcept { + return parameters_.is_empty(); + } + + /// Update the ParameterDict with the key-value pairs from + /// another ParameterDict, overwriting existing key + template + void update(const Container& container) { + for (auto& item : container) { + parameters_[item.key()] = item.value(); + } + } + + /// Remove all parameters in the ParameterDict + void clear() { + parameters_.clear(); + } + + /// Check if the certain parameter with the key in the ParameterDict + bool contains(const std::string& key) const noexcept { + return parameters_.contains(key); + } + + /// Returns the value associated with the given `key`. Throws an exception if + /// no such key is stored in the `ParameterDict`. Check contains(key) before + /// for a non-throwing way of access + const Tensor& get(const std::string& key) const { + return parameters_[key]; + } + + /// Returns the value associated with the given `key`. Throws an exception if + /// no such key is stored in the `ParameterDict`. Check contains(key) before + /// for a non-throwing way of access + Tensor& get(const std::string& key) { + return parameters_[key]; + } + + /// Returns the value associated with the given `key`. Throws an exception if + /// no such key is stored in the `ParameterDict`. Check contains(key) before + /// for a non-throwing way of access + Tensor& operator[](const std::string& key) { + return parameters_[key]; + } + + /// Returns the value associated with the given `key`. Throws an exception if + /// no such key is stored in the `ParameterDict`. Check contains(key) before + /// for a non-throwing way of access + const Tensor& operator[](const std::string& key) const { + return parameters_[key]; + } +}; + +TORCH_MODULE(ParameterDict); + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/parameterlist.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/parameterlist.h new file mode 100644 index 0000000000000000000000000000000000000000..1c087f46b6b275a7fac804a36034ef146c9f71c2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/parameterlist.h @@ -0,0 +1,172 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +namespace torch::nn { +class ParameterListImpl : public Cloneable { + public: + using Iterator = + std::vector::Item>::iterator; + using ConstIterator = std::vector< + OrderedDict::Item>::const_iterator; + + ParameterListImpl() = default; + + /// Constructs the `ParameterList` from a variadic list of ParameterList. + template + explicit ParameterListImpl(Tensors&&... params) { + parameters_.reserve(sizeof...(Tensors)); + push_back_var(std::forward(params)...); + } + + template + explicit ParameterListImpl(const Tensors&... params) { + parameters_.reserve(sizeof...(Tensors)); + push_back_var(std::forward(params)...); + } + + /// `reset()` is empty for `ParameterList`, since it does not have parameters + /// of its own. + void reset() override {} + + /// Pretty prints the `ParameterList` module into the given `stream`. + void pretty_print(std::ostream& stream) const override { + stream << "torch::nn::ParameterList(" << '\n'; + for (const auto& pair : parameters_) { + stream << '(' << pair.key() << ')' << ": Parameter containing: [" + << pair.value().scalar_type() << " of size " + << pair.value().sizes() << ']'; + ; + stream << '\n'; + } + stream << ')'; + } + + /// push the a given parameter at the end of the list + void append(torch::Tensor&& param) { + bool requires_grad = param.requires_grad(); + register_parameter( + std::to_string(parameters_.size()), std::move(param), requires_grad); + } + + /// push the a given parameter at the end of the list + void append(const torch::Tensor& param) { + bool requires_grad = param.requires_grad(); + register_parameter( + std::to_string(parameters_.size()), param, requires_grad); + } + + /// push the a given parameter at the end of the list + /// And the key of the pair will be discarded, only the value + /// will be added into the `ParameterList` + void append(const OrderedDict::Item& pair) { + register_parameter( + std::to_string(parameters_.size()), + pair.value(), + pair.value().requires_grad()); + } + + /// extend parameters from a container to the end of the list + template + void extend(const Container& container) { + for (const auto& param : container) { + append(param); + } + } + + /// Returns an iterator to the start of the ParameterList + /// the iterator returned will be type of `OrderedDict::Item` + Iterator begin() { + return parameters_.begin(); + } + + /// Returns a const iterator to the start of the ParameterList + /// the iterator returned will be type of `OrderedDict::Item` + ConstIterator begin() const { + return parameters_.begin(); + } + + /// Returns an iterator to the end of the ParameterList + /// the iterator returned will be type of `OrderedDict::Item` + Iterator end() { + return parameters_.end(); + } + + /// Returns a const iterator to the end of the ParameterList + /// the iterator returned will be type of `OrderedDict::Item` + ConstIterator end() const { + return parameters_.end(); + } + + /// Returns the value associated with the given `key`. Throws an exception if + /// no such key is stored in the `ParameterList`. Check contains(key) before + /// for a non-throwing way of access + at::Tensor& at(size_t idx) { + TORCH_CHECK(idx < size(), "Index out of range"); + return parameters_[std::to_string(idx)]; + } + + /// Returns the value associated with the given `key`. Throws an exception if + /// no such key is stored in the `ParameterList`. Check contains(key) before + /// for a non-throwing way of access + const at::Tensor& at(size_t idx) const { + TORCH_CHECK(idx < size(), "Index out of range"); + return parameters_[std::to_string(idx)]; + } + + /// Returns the value associated with the given `key`. Throws an exception if + /// no such key is stored in the `ParameterList`. Check contains(key) before + /// for a non-throwing way of access + at::Tensor& operator[](size_t idx) { + return at(idx); + } + + /// Returns the value associated with the given `key`. Throws an exception if + /// no such key is stored in the `ParameterList`. Check contains(key) before + /// for a non-throwing way of access + const at::Tensor& operator[](size_t idx) const { + return at(idx); + } + + /// Return the size of the ParameterList + size_t size() const noexcept { + return parameters_.size(); + } + /// True if the ParameterList is empty + bool is_empty() const noexcept { + return parameters_.is_empty(); + } + + /// Overload the +=, so that two ParameterList could be incrementally added + template + Container& operator+=(const Container& other) { + extend(other); + return *this; + } + + private: + template + void push_back_var(Head&& head, Tail&&... tail) { + append(std::forward(head)); + // Recursively calls this method, until the parameter pack only thas this + // entry left. Then calls `push_back()` a final time (above). + push_back_var(std::forward(tail)...); + } + + /// The base case, when the list of modules is empty. + void push_back_var() {} +}; +TORCH_MODULE(ParameterList); +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/sequential.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/sequential.h new file mode 100644 index 0000000000000000000000000000000000000000..2b79721dd8a9798fb28f89c06d5ceac9876532eb --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/container/sequential.h @@ -0,0 +1,392 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace torch::nn { + +/// A list of `Module`s that acts as a `Module` itself. +/// +/// A `Sequential` is fundamentally a list of `Module`s, each with a `forward()` +/// method. `Sequential` provides a `forward()` method of its own, which accepts +/// any input and forwards it to the first module it stores. It then "chains" +/// outputs to inputs sequentially for each subsequent module, finally returning +/// the output of the last module. For example: +/// +/// \rst +/// .. code-block:: cpp +/// +/// torch::nn::Sequential seq( +/// torch::nn::Linear(3, 4), +/// torch::nn::BatchNorm1d(4), +/// torch::nn::Dropout(0.5) +/// ); +/// +/// auto output = seq->forward(torch::ones(3)); +/// +/// \endrst +/// +/// This can conceptually be thought of as the following loop (using Python as +/// pseudocode): +/// +/// \rst +/// .. code-block:: python +/// +/// def forward(sequential, input): +/// for module in sequential: +/// input = module(input) +/// return input +/// +/// \endrst +/// +/// Why should you use `Sequential` instead of a simple `std::vector`? The value +/// a `Sequential` provides over manually calling a sequence of modules is that +/// it allows treating the whole container *as a single module*, such that +/// performing a transformation on the `Sequential` applies to each of the +/// modules it stores (which are each a registered submodule of the +/// `Sequential`). For example, calling +/// `.to(torch::kCUDA)` on a `Sequential` will move each module in the list to +/// CUDA memory. For example: +/// +/// \rst +/// .. code-block:: cpp +/// +/// torch::nn::Sequential seq( +/// torch::nn::Linear(3, 4), +/// torch::nn::BatchNorm1d(4), +/// torch::nn::Dropout(0.5) +/// ); +/// +/// // Convert all modules to CUDA. +/// seq->to(torch::kCUDA); +/// +/// \endrst +/// +/// Finally, `Sequential` provides a lightweight container API, such as allowing +/// iteration over submodules, positional access, adding a new module after +/// construction via `push_back`, as well as joining two `Sequential`s via +/// `extend`. +/// +/// \rst +/// .. attention:: +/// One current limitation of `Sequential` is that all except the first module +/// must accept a single argument. If your modules need to take multiple +/// arguments, you should define them to take and return tuples. +/// \endrst +class SequentialImpl : public Cloneable { + public: + using Iterator = std::vector::iterator; + using ConstIterator = std::vector::const_iterator; + + SequentialImpl() = default; + + /// Constructs the `Sequential` from a variadic list of modules. + template + explicit SequentialImpl(Modules&&... modules) { + modules_.reserve(sizeof...(Modules)); + push_back(std::forward(modules)...); + } + + /// Constructs the `Sequential` from an `OrderedDict` of named `AnyModule`s. + explicit SequentialImpl( + torch::OrderedDict&& ordered_dict) { + modules_.reserve(ordered_dict.size()); + for (auto& item : ordered_dict) { + push_back(item.key(), std::move(item.value())); + } + } + + /// Constructs the `Sequential` from a braced-init-list of named `AnyModule`s. + /// It enables the following use case: + /// `Sequential sequential({{"m1", M(1)}, {"m2", M(2)}})` + explicit SequentialImpl(std::initializer_list named_modules) { + modules_.reserve(named_modules.size()); + for (const auto& named_module : named_modules) { + push_back(named_module.name(), named_module.module()); + } + } + + /// Special cloning function for `Sequential` because it does not use + /// `reset()`. + std::shared_ptr clone( + const std::optional& device = std::nullopt) const override { + auto clone = std::make_shared(); + for (const auto& module : modules_) { + clone->push_back(module.clone(device)); + } + return clone; + } + + /// `reset()` is empty for `Sequential`, since it does not have parameters of + /// its own. + void reset() override {} + + /// Pretty prints the `Sequential` module into the given `stream`. + void pretty_print(std::ostream& stream) const override { + stream << "torch::nn::Sequential"; + } + + /// Feeds `inputs` to the first module and then chains outputs to inputs, + /// returning the last output. + /// + /// Conceptually the following loop in Python: + /// + /// \rst + /// .. code-block:: python + /// + /// def forward(sequential, input): + /// for module in sequential: + /// input = module(input) + /// return input + /// + /// \endrst + /// + /// The return type is taken as the first template parameter. It defaults to + /// `Tensor`. If the last module in the `Sequential` returns another type `T`, + /// you should call `forward(inputs)` instead of just `forward(inputs)`: + /// + /// \rst + /// .. code-block:: cpp + /// + /// torch::Tensor tensor = sequential1->forward(inputs); + /// int integer = sequential2->forward(inputs); + /// float value = sequential3->forward(inputs); + /// + /// \endrst + template + ReturnType forward(InputTypes&&... inputs) { + TORCH_CHECK(!is_empty(), "Cannot call forward() on an empty Sequential"); + + auto iterator = modules_.begin(); + auto input = iterator->any_forward(std::forward(inputs)...); + + for (++iterator; iterator != modules_.end(); ++iterator) { + input = iterator->any_forward(std::move(input)); + } + + // Check the return value and give a nice error message if the requested + // return type was incorrect. + if (auto* return_value = input.template try_get()) { + return std::move(*return_value); + } + TORCH_CHECK( + false, + "The type of the return value is ", + c10::demangle(input.type_info().name()), + ", but you asked for type ", + c10::demangle(typeid(ReturnType).name())); + } + + /// Adds a new (boxed) `Module` to the `Sequential` container. + template + void push_back(std::shared_ptr module_ptr) { + push_back(std::to_string(modules_.size()), std::move(module_ptr)); + } + + /// Adds a new named (boxed) `Module` to the `Sequential` container. + template + void push_back(std::string name, std::shared_ptr module_ptr) { + push_back(std::move(name), AnyModule(std::move(module_ptr))); + } + + /// Adds a new `Module` to the `Sequential` container, moving or copying it + /// into a `shared_ptr` internally. This method allows passing value types, + /// and letting the container deal with the boxing. This means you can write + /// `Sequential(Module(3, 4))` instead of + /// `Sequential(std::make_shared(3, 4))`. + template > + void push_back(M&& module) { + push_back(std::to_string(modules_.size()), std::forward(module)); + } + + /// Adds a new named `Module` to the `Sequential` container, moving or copying + /// it into a `shared_ptr` internally. This method allows passing value types, + /// and letting the container deal with the boxing. + template > + void push_back(std::string name, M&& module) { + using Type = typename std::remove_reference_t; + push_back(std::move(name), std::make_shared(std::forward(module))); + } + + /// Unwraps the contained module of a `ModuleHolder` and adds it to the + /// `Sequential`. + template + void push_back(const ModuleHolder& module_holder) { + push_back(std::to_string(modules_.size()), module_holder); + } + + /// Unwraps the contained named module of a `ModuleHolder` and adds it to the + /// `Sequential`. + template + void push_back(std::string name, const ModuleHolder& module_holder) { + push_back(std::move(name), module_holder.ptr()); + } + + /// Iterates over the container and calls `push_back()` on each value. + template + void extend(const Container& container) { + for (const auto& module : container) { + push_back(module); + } + } + + /// Adds a type-erased `AnyModule` to the `Sequential`. + void push_back(AnyModule any_module) { + push_back(std::to_string(modules_.size()), std::move(any_module)); + } + + void push_back(std::string name, AnyModule any_module) { + modules_.push_back(std::move(any_module)); + const auto index = modules_.size() - 1; + register_module(std::move(name), modules_[index].ptr()); + } + + /// Returns an iterator to the start of the `Sequential`. + Iterator begin() { + return modules_.begin(); + } + + /// Returns a const iterator to the start of the `Sequential`. + ConstIterator begin() const { + return modules_.begin(); + } + + /// Returns an iterator to the end of the `Sequential`. + Iterator end() { + return modules_.end(); + } + + /// Returns a const iterator to the end of the `Sequential`. + ConstIterator end() const { + return modules_.end(); + } + + /// Attempts to return the module at the given index as the requested type. + /// Throws an exception if the index is out of bounds or the types do not + /// match. + template + T& at(size_t index) { + static_assert( + torch::detail::is_module::value, + "Can only call Sequential::at with an nn::Module type"); + TORCH_CHECK(index < size(), "Index out of range"); + return modules_[index].get(); + } + + /// Attempts to return the module at the given index as the requested type. + /// Throws an exception if the index is out of bounds or the types do not + /// match. + template + const T& at(size_t index) const { + static_assert( + torch::detail::is_module::value, + "Can only call Sequential::at with an nn::Module type"); + TORCH_CHECK(index < size(), "Index out of range"); + return modules_[index].get(); + } + + /// Attempts to return a `std::shared_ptr` whose dynamic type is that of the + /// underlying module at the given index. Throws an exception if the index is + /// out of bounds. + std::shared_ptr ptr(size_t index) const { + TORCH_CHECK(index < size(), "Index out of range"); + return modules_[index].ptr(); + } + + /// Attempts to return a `std::shared_ptr` whose type is the one provided. + /// Throws an exception if the index is out of bounds or the types do not + /// match. + template + std::shared_ptr ptr(size_t index) const { + static_assert( + torch::detail::is_module::value, + "Can only call Sequential::ptr with an nn::Module type"); + TORCH_CHECK(index < size(), "Index out of range"); + return modules_[index].ptr(); + } + + /// Like `ptr(index)`. + std::shared_ptr operator[](size_t index) const { + // This is the only method we can call without a type. + return ptr(index); + } + + /// The current size of the `Sequential` container. + size_t size() const noexcept { + return modules_.size(); + } + + /// True if there are no modules in the `Sequential`. + bool is_empty() const noexcept { + return size() == 0; + } + + private: + /// Takes a First *and* Second parameter, to avoid ambiguity when a parameter + /// pack has only one type, in which case the template would be preferred, + /// even if the other `push_back` functions are better fits (e.g. `unique_ptr` + /// -> `shared_ptr` overload). + /// NOTE: We explicitly avoid matching this template with + /// `push_back(std::string("name"), module)` or `push_back("name", module)`, + /// since they should be handled by their respective `push_back` functions. + template < + typename First, + typename Second, + typename... Rest, + typename = std::enable_if_t< + !std::is_same_v && + // NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays) + !std::is_same_v, std::decay_t>>> + void push_back(First&& first, Second&& second, Rest&&... rest) { + push_back(std::forward(first)); + // Recursively calls this method, until the parameter pack only thas this + // entry left. Then calls `push_back()` a final time (above). + push_back(std::forward(second), std::forward(rest)...); + } + + /// The base case, when the list of modules is empty. + void push_back() {} + + // Box the AnyModules to give Sequential reference semantics, like the rest of + // the API. Note that this is not required otherwise, this could just be a + // `vector`. + std::vector modules_; +}; + +/// A `ModuleHolder` subclass for `SequentialImpl`. +/// See the documentation for `SequentialImpl` class to learn what methods it +/// provides, or the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +class Sequential : public torch::nn::ModuleHolder { + public: + using torch::nn::ModuleHolder::ModuleHolder; + + Sequential() = default; + + /// Constructs the `Sequential` from a braced-init-list of named `AnyModule`s. + /// It enables the following use case: + /// `Sequential sequential({{"m1", M(1)}, {"m2", M(2)}})` + Sequential(std::initializer_list named_modules) + : ModuleHolder(std::make_shared(named_modules)) {} +}; +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/conv.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/conv.h new file mode 100644 index 0000000000000000000000000000000000000000..98bc531333b1f595d1ba88ae0f6f8a0903ecf32a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/conv.h @@ -0,0 +1,453 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +namespace torch::nn { + +/// Base class for all (dimension-specialized) convolution modules. +template +class ConvNdImpl : public torch::nn::Cloneable { + public: + explicit ConvNdImpl(detail::ConvNdOptions options_) + : options(std::move(options_)) { + ConvNdImpl::reset(); + } + + void reset() override { + TORCH_CHECK( + options.in_channels() > 0 && options.groups() > 0 && + options.out_channels() > 0, + "in_channels, groups and out_channels must be a positive integer."); + TORCH_CHECK( + options.in_channels() % options.groups() == 0, + "in_channels must be divisible by groups"); + TORCH_CHECK( + options.out_channels() % options.groups() == 0, + "out_channels must be divisible by groups"); + + std::visit( + c10::overloaded( + [&](enumtype::kValid) { + _reversed_padding_repeated_twice.resize(2 * D); + std::fill_n(_reversed_padding_repeated_twice.begin(), 2 * D, 0); + }, + [&](enumtype::kSame) { + for (const auto i : c10::irange(D)) { + const auto stride = (*options.stride())[i]; + TORCH_CHECK( + stride == 1, + "padding='same' is not supported for strided convolutions"); + } + + _reversed_padding_repeated_twice.resize(2 * D); + for (const auto i : c10::irange(D)) { + const auto dilation = (*options.dilation())[i]; + const auto kernel_size = (*options.kernel_size())[i]; + const auto total_padding = dilation * (kernel_size - 1); + auto left_pad = total_padding / 2; + auto right_pad = total_padding - left_pad; + _reversed_padding_repeated_twice[2 * i] = left_pad; + _reversed_padding_repeated_twice[2 * i + 1] = right_pad; + } + }, + [&](const ExpandingArray& pad) { + _reversed_padding_repeated_twice = + torch::nn::modules::utils::_reverse_repeat_vector(pad, 2); + }), + options.padding()); + + if (options.transposed()) { + std::vector weight_sizes = { + options.in_channels(), options.out_channels() / options.groups()}; + weight_sizes.insert( + weight_sizes.end(), + (*options.kernel_size()).begin(), + (*options.kernel_size()).end()); + weight = this->register_parameter("weight", torch::empty(weight_sizes)); + } else { + std::vector weight_sizes = { + options.out_channels(), options.in_channels() / options.groups()}; + weight_sizes.insert( + weight_sizes.end(), + (*options.kernel_size()).begin(), + (*options.kernel_size()).end()); + weight = this->register_parameter("weight", torch::empty(weight_sizes)); + } + + if (options.bias()) { + bias = this->register_parameter( + "bias", torch::empty({options.out_channels()})); + } else { + this->register_parameter("bias", Tensor(), /*requires_grad=*/false); + } + + reset_parameters(); + } + + void reset_parameters() { + init::kaiming_uniform_( + weight, + /*a=*/std::sqrt(5)); // NOLINT(cppcoreguidelines-avoid-magic-numbers) + + if (bias.defined()) { + auto [fan_in, fan_out] = init::_calculate_fan_in_and_fan_out(weight); + auto bound = 1 / std::sqrt(fan_in); + init::uniform_(bias, -bound, bound); + } + } + + /// Pretty prints the `Conv{1,2,3}d` module into the given `stream`. + void pretty_print(std::ostream& stream) const override { + stream << "torch::nn::Conv" << D << 'd' << '(' << options.in_channels() + << ", " << options.out_channels() + << ", kernel_size=" << options.kernel_size() + << ", stride=" << options.stride(); + std::visit( + c10::overloaded( + [&](enumtype::kValid) { stream << ", padding='valid'"; }, + [&](enumtype::kSame) { stream << ", padding='same'"; }, + [&](const ExpandingArray& pad) { + if (*pad != *ExpandingArray(0)) { + stream << ", padding=" << pad; + } + }), + options.padding()); + if (*options.dilation() != *ExpandingArray(1)) { + stream << ", dilation=" << options.dilation(); + } + if (*options.output_padding() != *ExpandingArray(0)) { + stream << ", output_padding=" << options.output_padding(); + } + if (options.groups() != 1) { + stream << ", groups=" << options.groups(); + } + if (!options.bias()) { + stream << ", bias=" << std::boolalpha << false; + } + if (!std::get_if(&options.padding_mode())) { + stream << ", padding_mode=" + << enumtype::get_enum_name(options.padding_mode()); + } + stream << ')'; + } + + /// The options with which this `Module` was constructed. + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + detail::ConvNdOptions options; + + /// The learned kernel (or "weight"). + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + Tensor weight; + + /// The learned bias. Only defined if the `bias` option was true. + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + Tensor bias; + + protected: + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::vector _reversed_padding_repeated_twice; +}; + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Conv1d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies convolution over a 1-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.Conv1d to learn about +/// the exact behavior of this module. +/// +/// See the documentation for `torch::nn::Conv1dOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// Conv1d model(Conv1dOptions(3, 2, 3).stride(1).bias(false)); +/// ``` +class TORCH_API Conv1dImpl : public ConvNdImpl<1, Conv1dImpl> { + public: + Conv1dImpl( + int64_t input_channels, + int64_t output_channels, + ExpandingArray<1> kernel_size) + : Conv1dImpl( + Conv1dOptions(input_channels, output_channels, kernel_size)) {} + explicit Conv1dImpl(Conv1dOptions options_); + Tensor forward(const Tensor& input); +}; + +/// A `ModuleHolder` subclass for `Conv1dImpl`. +/// See the documentation for `Conv1dImpl` class to learn what methods it +/// provides, and examples of how to use `Conv1d` with +/// `torch::nn::Conv1dOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(Conv1d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Conv2d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies convolution over a 2-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.Conv2d to learn about +/// the exact behavior of this module. +/// +/// See the documentation for `torch::nn::Conv2dOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// Conv2d model(Conv2dOptions(3, 2, 3).stride(1).bias(false)); +/// ``` +class TORCH_API Conv2dImpl : public ConvNdImpl<2, Conv2dImpl> { + public: + Conv2dImpl( + int64_t input_channels, + int64_t output_channels, + ExpandingArray<2> kernel_size) + : Conv2dImpl( + Conv2dOptions(input_channels, output_channels, kernel_size)) {} + explicit Conv2dImpl(Conv2dOptions options_); + Tensor forward(const Tensor& input); + + protected: + Tensor _conv_forward(const Tensor& input, const Tensor& weight); +}; + +/// A `ModuleHolder` subclass for `Conv2dImpl`. +/// See the documentation for `Conv2dImpl` class to learn what methods it +/// provides, and examples of how to use `Conv2d` with +/// `torch::nn::Conv2dOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(Conv2d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Conv3d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies convolution over a 3-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.Conv3d to learn about +/// the exact behavior of this module. +/// +/// See the documentation for `torch::nn::Conv3dOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// Conv3d model(Conv3dOptions(3, 2, 3).stride(1).bias(false)); +/// ``` +class TORCH_API Conv3dImpl : public ConvNdImpl<3, Conv3dImpl> { + public: + Conv3dImpl( + int64_t input_channels, + int64_t output_channels, + ExpandingArray<3> kernel_size) + : Conv3dImpl( + Conv3dOptions(input_channels, output_channels, kernel_size)) {} + explicit Conv3dImpl(Conv3dOptions options_); + Tensor forward(const Tensor& input); +}; + +/// A `ModuleHolder` subclass for `Conv3dImpl`. +/// See the documentation for `Conv3dImpl` class to learn what methods it +/// provides, and examples of how to use `Conv3d` with +/// `torch::nn::Conv3dOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(Conv3d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~ ConvTranspose ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Base class for all (dimension-specialized) convolution transpose modules. +template +class ConvTransposeNdImpl : public ConvNdImpl { + public: + using torch::nn::ConvNdImpl::ConvNdImpl; + explicit ConvTransposeNdImpl(detail::ConvNdOptions options_) + : ConvNdImpl(options_) { + TORCH_INTERNAL_ASSERT( + std::holds_alternative>(this->options.padding()), + "ConvTranspose padding cannot be a string"); + } + + /// Pretty prints the `ConvTranspose{1,2,3}d` module into the given `stream`. + void pretty_print(std::ostream& stream) const override { + stream << "torch::nn::ConvTranspose" << D << 'd' << '(' + << this->options.in_channels() << ", " + << this->options.out_channels() + << ", kernel_size=" << this->options.kernel_size() + << ", stride=" << this->options.stride(); + const auto& pad = padding(); + if (*pad != *ExpandingArray(0)) { + stream << ", padding=" << pad; + } + if (*this->options.dilation() != *ExpandingArray(1)) { + stream << ", dilation=" << this->options.dilation(); + } + if (*this->options.output_padding() != *ExpandingArray(0)) { + stream << ", output_padding=" << this->options.output_padding(); + } + if (this->options.groups() != 1) { + stream << ", groups=" << this->options.groups(); + } + if (!this->options.bias()) { + stream << ", bias=" << std::boolalpha << false; + } + if (!std::get_if(&this->options.padding_mode())) { + stream << ", padding_mode=" + << enumtype::get_enum_name(this->options.padding_mode()); + } + stream << ')'; + } + + protected: + const ExpandingArray& padding() const { + return std::get>(this->options.padding()); + } + + std::vector _output_padding( + const Tensor& input, + const std::optional& output_size, + const ExpandingArray& stride, + const ExpandingArray& padding, + const ExpandingArray& kernel_size); +}; + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ConvTranspose1d +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies the ConvTranspose1d function. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.ConvTranspose1d to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::ConvTranspose1dOptions` class to learn +/// what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// ConvTranspose1d model(ConvTranspose1dOptions(3, 2, +/// 3).stride(1).bias(false)); +/// ``` +class TORCH_API ConvTranspose1dImpl + : public ConvTransposeNdImpl<1, ConvTranspose1dImpl> { + public: + ConvTranspose1dImpl( + int64_t input_channels, + int64_t output_channels, + ExpandingArray<1> kernel_size) + : ConvTranspose1dImpl(ConvTranspose1dOptions( + input_channels, + output_channels, + kernel_size)) {} + explicit ConvTranspose1dImpl(ConvTranspose1dOptions options_); + Tensor forward( + const Tensor& input, + const std::optional& output_size = std::nullopt); + + protected: + FORWARD_HAS_DEFAULT_ARGS({1, AnyValue(std::optional())}) +}; + +/// A `ModuleHolder` subclass for `ConvTranspose1dImpl`. +/// See the documentation for `ConvTranspose1dImpl` class to learn what methods +/// it provides, and examples of how to use `ConvTranspose1d` with +/// `torch::nn::ConvTranspose1dOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(ConvTranspose1d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ConvTranspose2d +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies the ConvTranspose2d function. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.ConvTranspose2d to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::ConvTranspose2dOptions` class to learn +/// what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// ConvTranspose2d model(ConvTranspose2dOptions(3, 2, +/// 3).stride(1).bias(false)); +/// ``` +class TORCH_API ConvTranspose2dImpl + : public ConvTransposeNdImpl<2, ConvTranspose2dImpl> { + public: + ConvTranspose2dImpl( + int64_t input_channels, + int64_t output_channels, + ExpandingArray<2> kernel_size) + : ConvTranspose2dImpl(ConvTranspose2dOptions( + input_channels, + output_channels, + kernel_size)) {} + explicit ConvTranspose2dImpl(ConvTranspose2dOptions options_); + Tensor forward( + const Tensor& input, + const std::optional& output_size = std::nullopt); + + protected: + FORWARD_HAS_DEFAULT_ARGS({1, AnyValue(std::optional())}) +}; + +/// A `ModuleHolder` subclass for `ConvTranspose2dImpl`. +/// See the documentation for `ConvTranspose2dImpl` class to learn what methods +/// it provides, and examples of how to use `ConvTranspose2d` with +/// `torch::nn::ConvTranspose2dOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(ConvTranspose2d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ConvTranspose3d +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies the ConvTranspose3d function. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.ConvTranspose3d to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::ConvTranspose3dOptions` class to learn +/// what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// ConvTranspose3d model(ConvTranspose3dOptions(2, 2, +/// 2).stride(1).bias(false)); +/// ``` +class TORCH_API ConvTranspose3dImpl + : public ConvTransposeNdImpl<3, ConvTranspose3dImpl> { + public: + ConvTranspose3dImpl( + int64_t input_channels, + int64_t output_channels, + ExpandingArray<3> kernel_size) + : ConvTranspose3dImpl(ConvTranspose3dOptions( + input_channels, + output_channels, + kernel_size)) {} + explicit ConvTranspose3dImpl(ConvTranspose3dOptions options_); + Tensor forward( + const Tensor& input, + const std::optional& output_size = std::nullopt); + + protected: + FORWARD_HAS_DEFAULT_ARGS({1, AnyValue(std::optional())}) +}; + +/// A `ModuleHolder` subclass for `ConvTranspose3dImpl`. +/// See the documentation for `ConvTranspose3dImpl` class to learn what methods +/// it provides, and examples of how to use `ConvTranspose3d` with +/// `torch::nn::ConvTranspose3dOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(ConvTranspose3d); + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/distance.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/distance.h new file mode 100644 index 0000000000000000000000000000000000000000..50e6cad849a983372d72b0728c85df2146511cac --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/distance.h @@ -0,0 +1,89 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace torch::nn { + +/// Returns the cosine similarity between :math:`x_1` and :math:`x_2`, computed +/// along `dim`. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.CosineSimilarity to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::CosineSimilarityOptions` class to +/// learn what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// CosineSimilarity model(CosineSimilarityOptions().dim(0).eps(0.5)); +/// ``` +class TORCH_API CosineSimilarityImpl : public Cloneable { + public: + explicit CosineSimilarityImpl(const CosineSimilarityOptions& options_ = {}); + + void reset() override; + + /// Pretty prints the `CosineSimilarity` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + Tensor forward(const Tensor& input1, const Tensor& input2); + + /// The options with which this `Module` was constructed. + CosineSimilarityOptions options; +}; + +/// A `ModuleHolder` subclass for `CosineSimilarityImpl`. +/// See the documentation for `CosineSimilarityImpl` class to learn what methods +/// it provides, and examples of how to use `CosineSimilarity` with +/// `torch::nn::CosineSimilarityOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(CosineSimilarity); + +// ============================================================================ + +/// Returns the batchwise pairwise distance between vectors :math:`v_1`, +/// :math:`v_2` using the p-norm. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.PairwiseDistance to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::PairwiseDistanceOptions` class to +/// learn what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// PairwiseDistance +/// model(PairwiseDistanceOptions().p(3).eps(0.5).keepdim(true)); +/// ``` +class TORCH_API PairwiseDistanceImpl : public Cloneable { + public: + explicit PairwiseDistanceImpl(const PairwiseDistanceOptions& options_ = {}); + + void reset() override; + + /// Pretty prints the `PairwiseDistance` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + Tensor forward(const Tensor& input1, const Tensor& input2); + + /// The options with which this `Module` was constructed. + PairwiseDistanceOptions options; +}; + +/// A `ModuleHolder` subclass for `PairwiseDistanceImpl`. +/// See the documentation for `PairwiseDistanceImpl` class to learn what methods +/// it provides, and examples of how to use `PairwiseDistance` with +/// `torch::nn::PairwiseDistanceOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(PairwiseDistance); + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/dropout.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/dropout.h new file mode 100644 index 0000000000000000000000000000000000000000..98585fdd7be341727b67ccb4c6c936edda8ad159 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/dropout.h @@ -0,0 +1,189 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include + +namespace torch::nn { + +namespace detail { + +template +class _DropoutNd : public torch::nn::Cloneable { + public: + _DropoutNd(double p) : _DropoutNd(DropoutOptions().p(p)) {} + + explicit _DropoutNd(const DropoutOptions& options_ = {}) : options(options_) { + _DropoutNd::reset(); + } + + void reset() override { + TORCH_CHECK( + options.p() >= 0. && options.p() <= 1., + "dropout probability has to be between 0 and 1, but got ", + options.p()); + } + + /// The options with which this `Module` was constructed. + DropoutOptions options; +}; + +} // namespace detail + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Dropout ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies dropout over a 1-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.Dropout to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::DropoutOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// Dropout model(DropoutOptions().p(0.42).inplace(true)); +/// ``` +class TORCH_API DropoutImpl : public detail::_DropoutNd { + public: + using detail::_DropoutNd::_DropoutNd; + + Tensor forward(Tensor input); + + /// Pretty prints the `Dropout` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; +}; + +/// A `ModuleHolder` subclass for `DropoutImpl`. +/// See the documentation for `DropoutImpl` class to learn what methods it +/// provides, and examples of how to use `Dropout` with +/// `torch::nn::DropoutOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(Dropout); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Dropout2d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies dropout over a 2-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.Dropout2d to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::Dropout2dOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// Dropout2d model(Dropout2dOptions().p(0.42).inplace(true)); +/// ``` +class TORCH_API Dropout2dImpl : public detail::_DropoutNd { + public: + using detail::_DropoutNd::_DropoutNd; + + Tensor forward(Tensor input); + + /// Pretty prints the `Dropout2d` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; +}; + +/// A `ModuleHolder` subclass for `Dropout2dImpl`. +/// See the documentation for `Dropout2dImpl` class to learn what methods it +/// provides, and examples of how to use `Dropout2d` with +/// `torch::nn::Dropout2dOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(Dropout2d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Dropout3d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies dropout over a 3-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.Dropout3d to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::Dropout3dOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// Dropout3d model(Dropout3dOptions().p(0.42).inplace(true)); +/// ``` +class TORCH_API Dropout3dImpl : public detail::_DropoutNd { + public: + using detail::_DropoutNd::_DropoutNd; + + Tensor forward(Tensor input); + + /// Pretty prints the `Dropout3d` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; +}; + +/// A `ModuleHolder` subclass for `Dropout3dImpl`. +/// See the documentation for `Dropout3dImpl` class to learn what methods it +/// provides, and examples of how to use `Dropout3d` with +/// `torch::nn::Dropout3dOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(Dropout3d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ AlphaDropout ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies Alpha Dropout over the input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.AlphaDropout to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::AlphaDropoutOptions` class to learn +/// what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// AlphaDropout model(AlphaDropoutOptions(0.2).inplace(true)); +/// ``` +class TORCH_API AlphaDropoutImpl : public detail::_DropoutNd { + public: + using detail::_DropoutNd::_DropoutNd; + + Tensor forward(const Tensor& input); + + /// Pretty prints the `AlphaDropout` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; +}; + +/// A `ModuleHolder` subclass for `AlphaDropoutImpl`. +/// See the documentation for `AlphaDropoutImpl` class to learn what methods it +/// provides, and examples of how to use `AlphaDropout` with +/// `torch::nn::AlphaDropoutOptions`. See the documentation for `ModuleHolder` +/// to learn about PyTorch's module storage semantics. +TORCH_MODULE(AlphaDropout); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FeatureAlphaDropout +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// See the documentation for `torch::nn::FeatureAlphaDropoutOptions` class to +/// learn what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// FeatureAlphaDropout model(FeatureAlphaDropoutOptions(0.2).inplace(true)); +/// ``` +class TORCH_API FeatureAlphaDropoutImpl + : public detail::_DropoutNd { + public: + using detail::_DropoutNd::_DropoutNd; + + Tensor forward(const Tensor& input); + + /// Pretty prints the `FeatureAlphaDropout` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; +}; + +/// A `ModuleHolder` subclass for `FeatureAlphaDropoutImpl`. +/// See the documentation for `FeatureAlphaDropoutImpl` class to learn what +/// methods it provides, and examples of how to use `FeatureAlphaDropout` with +/// `torch::nn::FeatureAlphaDropoutOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(FeatureAlphaDropout); + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/embedding.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/embedding.h new file mode 100644 index 0000000000000000000000000000000000000000..18997923e58279fbd4c35a529a22a0178971c4e2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/embedding.h @@ -0,0 +1,170 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + +namespace torch::nn { + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Embedding +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Performs a lookup in a fixed size embedding table. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.Embedding to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::EmbeddingOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// Embedding model(EmbeddingOptions(10, +/// 2).padding_idx(3).max_norm(2).norm_type(2.5).scale_grad_by_freq(true).sparse(true)); +/// ``` +class TORCH_API EmbeddingImpl : public torch::nn::Cloneable { + public: + EmbeddingImpl(int64_t num_embeddings, int64_t embedding_dim) + : EmbeddingImpl(EmbeddingOptions(num_embeddings, embedding_dim)) {} + explicit EmbeddingImpl(EmbeddingOptions options_); + + void reset() override; + + void reset_parameters(); + + /// Pretty prints the `Embedding` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// Performs a lookup on the embedding table stored in `weight` using the + /// `indices` supplied and returns the result. + Tensor forward(const Tensor& indices); + + /// The `Options` used to configure this `Embedding` module. + /// Changes to `EmbeddingOptions` *after construction* have no effect. + EmbeddingOptions options; + + /// The embedding table. + Tensor weight; +}; + +/// A `ModuleHolder` subclass for `EmbeddingImpl`. +/// See the documentation for `EmbeddingImpl` class to learn what methods it +/// provides, and examples of how to use `Embedding` with +/// `torch::nn::EmbeddingOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +class Embedding : public torch::nn::ModuleHolder { + public: + using torch::nn::ModuleHolder::ModuleHolder; + + /// See the documentation for `torch::nn::EmbeddingFromPretrainedOptions` + /// class to learn what optional arguments are supported for this function. + static Embedding from_pretrained( + const torch::Tensor& embeddings, + const EmbeddingFromPretrainedOptions& options = {}) { + TORCH_CHECK( + embeddings.dim() == 2, + "Embeddings parameter is expected to be 2-dimensional"); + + auto rows = embeddings.size(0); + auto cols = embeddings.size(1); + + Embedding embedding(EmbeddingOptions(rows, cols) + ._weight(embeddings) + .padding_idx(options.padding_idx()) + .max_norm(options.max_norm()) + .norm_type(options.norm_type()) + .scale_grad_by_freq(options.scale_grad_by_freq()) + .sparse(options.sparse())); + embedding->weight.set_requires_grad(!options.freeze()); + return embedding; + } +}; + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ EmbeddingBag +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Computes sums or means of 'bags' of embeddings, without instantiating the +/// intermediate embeddings. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.EmbeddingBag to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::EmbeddingBagOptions` class to learn +/// what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// EmbeddingBag model(EmbeddingBagOptions(10, +/// 2).max_norm(2).norm_type(2.5).scale_grad_by_freq(true).sparse(true).mode(torch::kSum).padding_idx(1)); +/// ``` +class TORCH_API EmbeddingBagImpl + : public torch::nn::Cloneable { + public: + EmbeddingBagImpl(int64_t num_embeddings, int64_t embedding_dim) + : EmbeddingBagImpl(EmbeddingBagOptions(num_embeddings, embedding_dim)) {} + explicit EmbeddingBagImpl(EmbeddingBagOptions options_); + + void reset() override; + + void reset_parameters(); + + /// Pretty prints the `EmbeddingBag` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// The `Options` used to configure this `EmbeddingBag` module. + EmbeddingBagOptions options; + /// The embedding table. + Tensor weight; + + Tensor forward( + const Tensor& input, + const Tensor& offsets = {}, + const Tensor& per_sample_weights = {}); + + protected: + FORWARD_HAS_DEFAULT_ARGS({1, AnyValue(Tensor())}, {2, AnyValue(Tensor())}) +}; + +/// A `ModuleHolder` subclass for `EmbeddingBagImpl`. +/// See the documentation for `EmbeddingBagImpl` class to learn what methods it +/// provides, and examples of how to use `EmbeddingBag` with +/// `torch::nn::EmbeddingBagOptions`. See the documentation for `ModuleHolder` +/// to learn about PyTorch's module storage semantics. +class EmbeddingBag : public torch::nn::ModuleHolder { + public: + using torch::nn::ModuleHolder::ModuleHolder; + + /// See the documentation for `torch::nn::EmbeddingBagFromPretrainedOptions` + /// class to learn what optional arguments are supported for this function. + static EmbeddingBag from_pretrained( + const torch::Tensor& embeddings, + const EmbeddingBagFromPretrainedOptions& options = {}) { + TORCH_CHECK( + embeddings.dim() == 2, + "Embeddings parameter is expected to be 2-dimensional"); + + auto rows = embeddings.size(0); + auto cols = embeddings.size(1); + + EmbeddingBag embeddingbag( + EmbeddingBagOptions(rows, cols) + ._weight(embeddings) + .max_norm(options.max_norm()) + .norm_type(options.norm_type()) + .scale_grad_by_freq(options.scale_grad_by_freq()) + .mode(options.mode()) + .sparse(options.sparse()) + .padding_idx(options.padding_idx())); + embeddingbag->weight.set_requires_grad(!options.freeze()); + return embeddingbag; + } +}; +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/fold.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/fold.h new file mode 100644 index 0000000000000000000000000000000000000000..8870c07dd7f8afc5d0d81233fba47d8e7dd6230c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/fold.h @@ -0,0 +1,90 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace torch::nn { + +/// Applies fold over a 3-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.Fold to learn about +/// the exact behavior of this module. +/// +/// See the documentation for `torch::nn::FoldOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// Fold model(FoldOptions({8, 8}, {3, 3}).dilation(2).padding({2, +/// 1}).stride(2)); +/// ``` +class TORCH_API FoldImpl : public torch::nn::Cloneable { + public: + FoldImpl(ExpandingArray<2> output_size, ExpandingArray<2> kernel_size) + : FoldImpl(FoldOptions(output_size, kernel_size)) {} + explicit FoldImpl(const FoldOptions& options_); + + void reset() override; + + /// Pretty prints the `Fold` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + Tensor forward(const Tensor& input); + + /// The options with which this `Module` was constructed. + FoldOptions options; +}; + +/// A `ModuleHolder` subclass for `FoldImpl`. +/// See the documentation for `FoldImpl` class to learn what methods it +/// provides, and examples of how to use `Fold` with `torch::nn::FoldOptions`. +/// See the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(Fold); + +// ============================================================================ + +/// Applies unfold over a 4-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.Unfold to learn about +/// the exact behavior of this module. +/// +/// See the documentation for `torch::nn::UnfoldOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// Unfold model(UnfoldOptions({2, 4}).dilation(2).padding({2, 1}).stride(2)); +/// ``` +class TORCH_API UnfoldImpl : public Cloneable { + public: + UnfoldImpl(ExpandingArray<2> kernel_size) + : UnfoldImpl(UnfoldOptions(kernel_size)) {} + explicit UnfoldImpl(const UnfoldOptions& options_); + + void reset() override; + + /// Pretty prints the `Unfold` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + Tensor forward(const Tensor& input); + + /// The options with which this `Module` was constructed. + UnfoldOptions options; +}; + +/// A `ModuleHolder` subclass for `UnfoldImpl`. +/// See the documentation for `UnfoldImpl` class to learn what methods it +/// provides, and examples of how to use `Unfold` with +/// `torch::nn::UnfoldOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(Unfold); + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/instancenorm.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/instancenorm.h new file mode 100644 index 0000000000000000000000000000000000000000..d82880870eff97093818ca550abd94aa56782b38 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/instancenorm.h @@ -0,0 +1,158 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::nn { + +/// Base class for all (dimension-specialized) instance norm modules +template +// NOLINTNEXTLINE(bugprone-crtp-constructor-accessibility) +class InstanceNormImpl + : public torch::nn::NormImplBase { + private: + inline Tensor apply_instance_norm(const Tensor& input) { + return torch::nn::functional::detail::instance_norm( + input, + this->running_mean, + this->running_var, + this->weight, + this->bias, + this->is_training() || !this->options.track_running_stats(), + this->options.momentum(), + this->options.eps()); + } + + inline Tensor handle_no_batch_input(const Tensor& input) { + return this->apply_instance_norm(input.unsqueeze(0)).squeeze(0); + } + + public: + using torch::nn::NormImplBase::NormImplBase; + + Tensor forward(const Tensor& input) { + this->_check_input_dim(input); + + // For InstanceNorm1D, 2D is unbatched and 3D is batched + // For InstanceNorm2D, 3D is unbatched and 4D is batched + // For InstanceNorm3D, 4D is unbatched and 5D is batched + // check if input does not have a batch-dim + if (input.dim() == D + 1) { + return this->handle_no_batch_input(input); + } + + return this->apply_instance_norm(input); + } + + /// Pretty prints the `InstanceNorm{1,2,3}d` module into the given `stream`. + void pretty_print(std::ostream& stream) const override { + stream << std::boolalpha << "torch::nn::InstanceNorm" << D << "d(" + << this->options.num_features() << ", " + << "eps=" << this->options.eps() << ", " + << "momentum=" << this->options.momentum() << ", " + << "affine=" << this->options.affine() << ", " + << "track_running_stats=" << this->options.track_running_stats() + << ')'; + } +}; + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ InstanceNorm1d +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies the InstanceNorm1d function. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.InstanceNorm1d to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::InstanceNorm1dOptions` class to learn +/// what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// InstanceNorm1d +/// model(InstanceNorm1dOptions(4).eps(0.5).momentum(0.1).affine(false).track_running_stats(true)); +/// ``` +class TORCH_API InstanceNorm1dImpl + : public InstanceNormImpl<1, InstanceNorm1dImpl> { + protected: + void _check_input_dim(const Tensor& input) override; + + public: + using InstanceNormImpl<1, InstanceNorm1dImpl>::InstanceNormImpl; +}; + +/// A `ModuleHolder` subclass for `InstanceNorm1dImpl`. +/// See the documentation for `InstanceNorm1dImpl` class to learn what methods +/// it provides, and examples of how to use `InstanceNorm1d` with +/// `torch::nn::InstanceNorm1dOptions`. See the documentation for `ModuleHolder` +/// to learn about PyTorch's module storage semantics. +TORCH_MODULE(InstanceNorm1d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ InstanceNorm2d +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies the InstanceNorm2d function. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.InstanceNorm2d to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::InstanceNorm2dOptions` class to learn +/// what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// InstanceNorm2d +/// model(InstanceNorm2dOptions(4).eps(0.5).momentum(0.1).affine(false).track_running_stats(true)); +/// ``` +class TORCH_API InstanceNorm2dImpl + : public InstanceNormImpl<2, InstanceNorm2dImpl> { + protected: + void _check_input_dim(const Tensor& input) override; + + public: + using InstanceNormImpl<2, InstanceNorm2dImpl>::InstanceNormImpl; +}; + +/// A `ModuleHolder` subclass for `InstanceNorm2dImpl`. +/// See the documentation for `InstanceNorm2dImpl` class to learn what methods +/// it provides, and examples of how to use `InstanceNorm2d` with +/// `torch::nn::InstanceNorm2dOptions`. See the documentation for `ModuleHolder` +/// to learn about PyTorch's module storage semantics. +TORCH_MODULE(InstanceNorm2d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ InstanceNorm3d +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies the InstanceNorm3d function. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.InstanceNorm3d to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::InstanceNorm3dOptions` class to learn +/// what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// InstanceNorm3d +/// model(InstanceNorm3dOptions(4).eps(0.5).momentum(0.1).affine(false).track_running_stats(true)); +/// ``` +class TORCH_API InstanceNorm3dImpl + : public InstanceNormImpl<3, InstanceNorm3dImpl> { + protected: + void _check_input_dim(const Tensor& input) override; + + public: + using InstanceNormImpl<3, InstanceNorm3dImpl>::InstanceNormImpl; +}; + +/// A `ModuleHolder` subclass for `InstanceNorm3dImpl`. +/// See the documentation for `InstanceNorm3dImpl` class to learn what methods +/// it provides, and examples of how to use `InstanceNorm3d` with +/// `torch::nn::InstanceNorm3dOptions`. See the documentation for `ModuleHolder` +/// to learn about PyTorch's module storage semantics. +TORCH_MODULE(InstanceNorm3d); + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/linear.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/linear.h new file mode 100644 index 0000000000000000000000000000000000000000..5b9e552ade1bb8e176fcdfb469272fe4b72f76b0 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/linear.h @@ -0,0 +1,219 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace torch::nn { + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Identity ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// A placeholder identity operator that is argument-insensitive. +/// See https://pytorch.org/docs/main/generated/torch.nn.Identity.html to +/// learn about the exact behavior of this module. +class TORCH_API IdentityImpl : public Cloneable { + public: + void reset() override; + + /// Pretty prints the `Identity` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + Tensor forward(const Tensor& input); +}; + +/// A `ModuleHolder` subclass for `IdentityImpl`. +/// See the documentation for `IdentityImpl` class to learn what methods it +/// provides, or the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(Identity); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Linear ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies a linear transformation with optional bias. +/// See https://pytorch.org/docs/main/generated/torch.nn.Linear.html to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::LinearOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// Linear model(LinearOptions(5, 2).bias(false)); +/// ``` +class TORCH_API LinearImpl : public Cloneable { + public: + LinearImpl(int64_t in_features, int64_t out_features) + : LinearImpl(LinearOptions(in_features, out_features)) {} + explicit LinearImpl(const LinearOptions& options_); + + void reset() override; + + void reset_parameters(); + + /// Pretty prints the `Linear` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// Transforms the `input` tensor by multiplying with the `weight` and + /// optionally adding the `bias`, if `with_bias` is true in the options. + Tensor forward(const Tensor& input); + + /// The options used to configure this module. + LinearOptions options; + + /// The learned weight. + Tensor weight; + + /// The learned bias. If `bias` is false in the `options`, this tensor is + /// undefined. + Tensor bias; +}; + +/// A `ModuleHolder` subclass for `LinearImpl`. +/// See the documentation for `LinearImpl` class to learn what methods it +/// provides, and examples of how to use `Linear` with +/// `torch::nn::LinearOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(Linear); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Flatten ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// A placeholder for Flatten operator +/// See https://pytorch.org/docs/main/generated/torch.nn.Flatten.html to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::FlattenOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// Flatten model(FlattenOptions().start_dim(2).end_dim(4)); +/// ``` +class TORCH_API FlattenImpl : public Cloneable { + public: + explicit FlattenImpl(const FlattenOptions& options_ = {}); + + void reset() override; + + /// Pretty prints the `Flatten` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// Applies a flatten transform on the `input`. + Tensor forward(const Tensor& input); + + /// The options used to configure this module. + FlattenOptions options; +}; + +/// A `ModuleHolder` subclass for `FlattenImpl`. +/// See the documentation for `FlattenImpl` class to learn what methods it +/// provides, and examples of how to use `Flatten` with +/// `torch::nn::FlattenOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(Flatten); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Unflatten +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// A placeholder for unflatten operator +/// See https://pytorch.org/docs/main/generated/torch.nn.Unflatten.html to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::UnflattenOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// Unflatten model(UnflattenOptions(0, {2, 2})); +/// Unflatten model(UnflattenOptions("B", {{"B1", 2}, {"B2", 2}})); +/// ``` +class TORCH_API UnflattenImpl : public Cloneable { + public: + UnflattenImpl(int64_t dim, std::vector sizes) + : UnflattenImpl(UnflattenOptions(dim, std::move(sizes))) {} + UnflattenImpl(std::string dimname, UnflattenOptions::namedshape_t namedshape) + : UnflattenImpl( + UnflattenOptions(std::move(dimname), std::move(namedshape))) {} + explicit UnflattenImpl(UnflattenOptions options_); + + void reset() override; + + /// Pretty prints the `Unflatten` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// Applies an unflatten transform on the `input`. + Tensor forward(const Tensor& input); + + /// The options used to configure this module. + UnflattenOptions options; +}; + +/// A `ModuleHolder` subclass for `UnflattenImpl`. +/// See the documentation for `UnflattenImpl` class to learn what methods it +/// provides, and examples of how to use `Unflatten` with +/// `torch::nn::UnflattenOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(Unflatten); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Bilinear ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies a billinear transformation with optional bias. +/// See https://pytorch.org/docs/main/generated/torch.nn.Bilinear.html to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::BilinearOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// Bilinear model(BilinearOptions(3, 2, 4).bias(false)); +/// ``` +class TORCH_API BilinearImpl : public Cloneable { + public: + BilinearImpl(int64_t in1_features, int64_t in2_features, int64_t out_features) + : BilinearImpl( + BilinearOptions(in1_features, in2_features, out_features)) {} + explicit BilinearImpl(const BilinearOptions& options_); + + void reset() override; + + void reset_parameters(); + + /// Pretty prints the `Bilinear` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// Applies a bilinear transform on the `input1` and `input2` tensor by + /// multiplying with the `weight` and optionally adding the `bias`, if + /// `with_bias` is true in the options. + Tensor forward(const Tensor& input1, const Tensor& input2); + + /// The options used to configure this module. + BilinearOptions options; + + /// The learned weight. + Tensor weight; + + /// The learned bias. If `with_bias` is false in the `options`, this tensor is + /// undefined. + Tensor bias; +}; + +/// A `ModuleHolder` subclass for `BilinearImpl`. +/// See the documentation for `BilinearImpl` class to learn what methods it +/// provides, and examples of how to use `Bilinear` with +/// `torch::nn::BilinearOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(Bilinear); + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/loss.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/loss.h new file mode 100644 index 0000000000000000000000000000000000000000..8771c7d3bd1b17ab02b73393fe8f703a82c056bb --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/loss.h @@ -0,0 +1,808 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +namespace torch::nn { + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ L1Loss ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Creates a criterion that measures the mean absolute error (MAE) between each +/// element in the input : math :`x` and target : `y`. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.L1Loss to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::L1LossOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// L1Loss model(L1LossOptions(torch::kNone)); +/// ``` +struct TORCH_API L1LossImpl : Cloneable { + explicit L1LossImpl(L1LossOptions options_ = {}); + + void reset() override; + + /// Pretty prints the `L1Loss` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + Tensor forward(const Tensor& input, const Tensor& target); + + /// The options with which this `Module` was constructed. + L1LossOptions options; +}; + +/// A `ModuleHolder` subclass for `L1LossImpl`. +/// See the documentation for `L1LossImpl` class to learn what methods it +/// provides, and examples of how to use `L1Loss` with +/// `torch::nn::L1LossOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(L1Loss); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ KLDivLoss +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// The Kullback-Leibler divergence loss measure +/// See https://pytorch.org/docs/main/nn.html#torch.nn.KLDivLoss to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::KLDivLossOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// KLDivLoss model(KLDivLossOptions().reduction(torch::kNone)); +/// ``` +struct TORCH_API KLDivLossImpl : Cloneable { + explicit KLDivLossImpl(KLDivLossOptions options_ = {}); + + void reset() override; + + /// Pretty prints the `KLDivLoss` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + Tensor forward(const Tensor& input, const Tensor& target); + + /// The options with which this `Module` was constructed. + KLDivLossOptions options; +}; + +/// A `ModuleHolder` subclass for `KLDivLossImpl`. +/// See the documentation for `KLDivLossImpl` class to learn what methods it +/// provides, and examples of how to use `KLDivLoss` with +/// `torch::nn::KLDivLossOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(KLDivLoss); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MSELoss ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Creates a criterion that measures the mean squared error (squared L2 norm) +/// between each element in the input :math:`x` and target :math:`y`. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.MSELoss to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::MSELossOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// MSELoss model(MSELossOptions(torch::kNone)); +/// ``` +struct TORCH_API MSELossImpl : Cloneable { + explicit MSELossImpl(MSELossOptions options_ = {}); + + void reset() override; + + /// Pretty prints the `MSELoss` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + Tensor forward(const Tensor& input, const Tensor& target); + + /// The options with which this `Module` was constructed. + MSELossOptions options; +}; + +/// A `ModuleHolder` subclass for `MSELossImpl`. +/// See the documentation for `MSELossImpl` class to learn what methods it +/// provides, and examples of how to use `MSELoss` with +/// `torch::nn::MSELossOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(MSELoss); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BCELoss ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Creates a criterion that measures the Binary Cross Entropy +/// between the target and the output. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.BCELoss to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::BCELossOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// BCELoss model(BCELossOptions().reduction(torch::kNone).weight(weight)); +/// ``` +struct TORCH_API BCELossImpl : Cloneable { + explicit BCELossImpl(BCELossOptions options_ = {}); + + void reset() override; + + /// Pretty prints the `BCELoss` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + Tensor forward(const Tensor& input, const Tensor& target); + + /// The options with which this `Module` was constructed. + BCELossOptions options; +}; + +/// A `ModuleHolder` subclass for `BCELossImpl`. +/// See the documentation for `BCELossImpl` class to learn what methods it +/// provides, and examples of how to use `BCELoss` with +/// `torch::nn::BCELossOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(BCELoss); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HingeEmbeddingLoss +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Creates a criterion that measures the loss given an input tensor :math:`x` +/// and a labels tensor :math:`y` (containing 1 or -1). +/// See https://pytorch.org/docs/main/nn.html#torch.nn.HingeEmbeddingLoss to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::HingeEmbeddingLossOptions` class to +/// learn what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// HingeEmbeddingLoss +/// model(HingeEmbeddingLossOptions().margin(4).reduction(torch::kNone)); +/// ``` +struct TORCH_API HingeEmbeddingLossImpl : Cloneable { + explicit HingeEmbeddingLossImpl(HingeEmbeddingLossOptions options_ = {}); + + void reset() override; + + /// Pretty prints the `HingeEmbeddingLoss` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + Tensor forward(const Tensor& input, const Tensor& target); + + /// The options with which this `Module` was constructed. + HingeEmbeddingLossOptions options; +}; + +/// A `ModuleHolder` subclass for `HingeEmbeddingLossImpl`. +/// See the documentation for `HingeEmbeddingLossImpl` class to learn what +/// methods it provides, and examples of how to use `HingeEmbeddingLoss` with +/// `torch::nn::HingeEmbeddingLossOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(HingeEmbeddingLoss); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MultiMarginLoss +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Creates a criterion that optimizes a multi-class classification hinge +/// loss (margin-based loss) between input :math:`x` (a 2D mini-batch `Tensor`) +/// and output :math:`y` (which is a 1D tensor of target class indices, :math:`0 +/// \leq y \leq \text{x.size}(1)-1`). See +/// https://pytorch.org/docs/main/nn.html#torch.nn.MultiMarginLoss to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::MultiMarginLossOptions` class to learn +/// what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// MultiMarginLoss model(MultiMarginLossOptions().margin(2).weight(weight)); +/// ``` +struct TORCH_API MultiMarginLossImpl : public Cloneable { + explicit MultiMarginLossImpl(MultiMarginLossOptions options_ = {}); + + void reset() override; + + /// Pretty prints the `MultiMarginLoss` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + Tensor forward(const Tensor& input, const Tensor& target); + + /// The options with which this `Module` was constructed. + MultiMarginLossOptions options; +}; + +/// A `ModuleHolder` subclass for `MultiMarginLossImpl`. +/// See the documentation for `MultiMarginLossImpl` class to learn what methods +/// it provides, and examples of how to use `MultiMarginLoss` with +/// `torch::nn::MultiMarginLossOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(MultiMarginLoss); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CosineEmbeddingLoss +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Creates a criterion that measures the loss given input tensors +/// `input1`, `input2`, and a `Tensor` label `target` with values 1 or +/// -1. This is used for measuring whether two inputs are similar or +/// dissimilar, using the cosine distance, and is typically used for learning +/// nonlinear embeddings or semi-supervised learning. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.CosineEmbeddingLoss to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::CosineEmbeddingLossOptions` class to +/// learn what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// CosineEmbeddingLoss model(CosineEmbeddingLossOptions().margin(0.5)); +/// ``` +struct TORCH_API CosineEmbeddingLossImpl + : public Cloneable { + explicit CosineEmbeddingLossImpl(CosineEmbeddingLossOptions options_ = {}); + + void reset() override; + + /// Pretty prints the `CosineEmbeddingLoss` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + Tensor forward( + const Tensor& input1, + const Tensor& input2, + const Tensor& target); + + /// The options with which this `Module` was constructed. + CosineEmbeddingLossOptions options; +}; + +/// A `ModuleHolder` subclass for `CosineEmbeddingLossImpl`. +/// See the documentation for `CosineEmbeddingLossImpl` class to learn what +/// methods it provides, and examples of how to use `CosineEmbeddingLoss` with +/// `torch::nn::CosineEmbeddingLossOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(CosineEmbeddingLoss); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SmoothL1Loss +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Creates a criterion that uses a squared term if the absolute +/// element-wise error falls below beta and an L1 term otherwise. +/// It is less sensitive to outliers than the `MSELoss` and in some cases +/// prevents exploding gradients (e.g. see the paper `Fast R-CNN` by Ross +/// Girshick). See https://pytorch.org/docs/main/nn.html#torch.nn.SmoothL1Loss +/// to learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::SmoothL1LossOptions` class to learn +/// what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// SmoothL1Loss model(SmoothL1LossOptions().reduction(torch::kNone).beta(0.5)); +/// ``` +struct TORCH_API SmoothL1LossImpl : public Cloneable { + explicit SmoothL1LossImpl(SmoothL1LossOptions options = {}); + + void reset() override; + + /// Pretty prints the `L1Loss` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + Tensor forward(const Tensor& input, const Tensor& target); + + /// The options with which this `Module` was constructed. + SmoothL1LossOptions options; +}; + +/// A `ModuleHolder` subclass for `SmoothL1LossImpl`. +/// See the documentation for `SmoothL1LossImpl` class to learn what methods it +/// provides, and examples of how to use `SmoothL1Loss` with +/// `torch::nn::SmoothL1LossOptions`. See the documentation for `ModuleHolder` +/// to learn about PyTorch's module storage semantics. +TORCH_MODULE(SmoothL1Loss); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ HuberLoss +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Creates a criterion that uses a squared term if the absolute +/// element-wise error falls below delta and a delta-scaled L1 term otherwise. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.HuberLoss to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::HuberLossOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// HuberLoss model(HuberLossOptions().reduction(torch::kNone).delta(0.5)); +/// ``` +struct TORCH_API HuberLossImpl : public Cloneable { + explicit HuberLossImpl(HuberLossOptions options_ = {}); + + void reset() override; + + /// Pretty prints the `HuberLoss` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + Tensor forward(const Tensor& input, const Tensor& target); + + /// The options with which this `Module` was constructed. + HuberLossOptions options; +}; + +/// A `ModuleHolder` subclass for `HuberLossImpl`. +/// See the documentation for `HuberLossImpl` class to learn what methods it +/// provides, and examples of how to use `HuberLoss` with +/// `torch::nn::HuberLossOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(HuberLoss); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MultiLabelMarginLoss +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Creates a criterion that optimizes a multi-class multi-classification +/// hinge loss (margin-based loss) between input :math:`x` (a 2D mini-batch +/// `Tensor`) and output :math:`y` (which is a 2D `Tensor` of target class +/// indices). See +/// https://pytorch.org/docs/main/nn.html#torch.nn.MultiLabelMarginLoss to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::MultiLabelMarginLossOptions` class to +/// learn what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// MultiLabelMarginLoss model(MultiLabelMarginLossOptions(torch::kNone)); +/// ``` +struct TORCH_API MultiLabelMarginLossImpl + : public Cloneable { + explicit MultiLabelMarginLossImpl(MultiLabelMarginLossOptions options_ = {}); + + void reset() override; + + /// Pretty prints the `L1Loss` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + Tensor forward(const Tensor& input, const Tensor& target); + + /// The options with which this `Module` was constructed. + MultiLabelMarginLossOptions options; +}; + +/// A `ModuleHolder` subclass for `MultiLabelMarginLossImpl`. +/// See the documentation for `MultiLabelMarginLossImpl` class to learn what +/// methods it provides, and examples of how to use `MultiLabelMarginLoss` with +/// `torch::nn::MultiLabelMarginLossOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(MultiLabelMarginLoss); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ SoftMarginLoss +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Creates a criterion that optimizes a two-class classification +/// logistic loss between input tensor :math:`x` and target tensor :math:`y` +/// (containing 1 or -1). +/// See https://pytorch.org/docs/main/nn.html#torch.nn.SoftMarginLoss to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::SoftMarginLossOptions` class to learn +/// what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// SoftMarginLoss model(SoftMarginLossOptions(torch::kNone)); +/// ``` +struct TORCH_API SoftMarginLossImpl : public Cloneable { + explicit SoftMarginLossImpl(SoftMarginLossOptions options_ = {}); + + /// Pretty prints the `SoftMarginLoss` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + void reset() override; + + Tensor forward(const Tensor& input, const Tensor& target); + + /// The options with which this `Module` was constructed. + SoftMarginLossOptions options; +}; + +/// A `ModuleHolder` subclass for `SoftMarginLossImpl`. +/// See the documentation for `SoftMarginLossImpl` class to learn what methods +/// it provides, and examples of how to use `SoftMarginLoss` with +/// `torch::nn::SoftMarginLossOptions`. See the documentation for `ModuleHolder` +/// to learn about PyTorch's module storage semantics. +TORCH_MODULE(SoftMarginLoss); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MultiLabelSoftMarginLoss +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Creates a criterion that optimizes a multi-label one-versus-all +/// loss based on max-entropy, between input :math:`x` and target :math:`y` of +/// size :math:`(N, C)`. See +/// https://pytorch.org/docs/main/nn.html#torch.nn.MultiLabelSoftMarginLoss to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::MultiLabelSoftMarginLossOptions` class +/// to learn what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// MultiLabelSoftMarginLoss +/// model(MultiLabelSoftMarginLossOptions().reduction(torch::kNone).weight(weight)); +/// ``` +struct TORCH_API MultiLabelSoftMarginLossImpl + : public Cloneable { + explicit MultiLabelSoftMarginLossImpl( + MultiLabelSoftMarginLossOptions options_ = {}); + + /// Pretty prints the `MultiLabelSoftMarginLoss` module into the given + /// `stream`. + void pretty_print(std::ostream& stream) const override; + + void reset() override; + + Tensor forward(const Tensor& input, const Tensor& target); + + /// The options with which this `Module` was constructed. + MultiLabelSoftMarginLossOptions options; +}; + +/// A `ModuleHolder` subclass for `MultiLabelSoftMarginLossImpl`. +/// See the documentation for `MultiLabelSoftMarginLossImpl` class to learn what +/// methods it provides, and examples of how to use `MultiLabelSoftMarginLoss` +/// with `torch::nn::MultiLabelSoftMarginLossOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(MultiLabelSoftMarginLoss); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TripletMarginLoss +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Creates a criterion that measures the triplet loss given an input +/// tensors :math:`x1`, :math:`x2`, :math:`x3` and a margin with a value greater +/// than :math:`0`. This is used for measuring a relative similarity between +/// samples. A triplet is composed by `a`, `p` and `n` (i.e., `anchor`, +/// `positive examples` and `negative examples` respectively). The +/// shapes of all input tensors should be :math:`(N, D)`. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.TripletMarginLoss to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::TripletMarginLossOptions` class to +/// learn what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// TripletMarginLoss +/// model(TripletMarginLossOptions().margin(3).p(2).eps(1e-06).swap(false)); +/// ``` +struct TORCH_API TripletMarginLossImpl + : public Cloneable { + explicit TripletMarginLossImpl(TripletMarginLossOptions options_ = {}); + + void reset() override; + + /// Pretty prints the `TripletMarginLoss` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + Tensor forward( + const Tensor& anchor, + const Tensor& positive, + const Tensor& negative); + + /// The options with which this `Module` was constructed. + TripletMarginLossOptions options; +}; + +/// A `ModuleHolder` subclass for `TripletMarginLossImpl`. +/// See the documentation for `TripletMarginLossImpl` class to learn what +/// methods it provides, and examples of how to use `TripletMarginLoss` with +/// `torch::nn::TripletMarginLossOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(TripletMarginLoss); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TripletMarginWithDistanceLoss +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Creates a criterion that measures the triplet loss given input +/// tensors :math:`a`, :math:`p`, and :math:`n` (representing anchor, +/// positive, and negative examples, respectively); and a nonnegative, +/// real-valued function +/// ("distance function") used to compute the relationships between the anchor +/// and positive example ("positive distance") and the anchor and negative +/// example ("negative distance"). +/// See +/// https://pytorch.org/docs/main/nn.html#torch.nn.TripletMarginWithDistanceLoss +/// to learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::TripletMarginWithDistanceLossOptions` +/// class to learn what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// TripletMarginWithDistanceLoss +/// model(TripletMarginWithDistanceLossOptions().margin(3).swap(false)); +/// ``` +struct TORCH_API TripletMarginWithDistanceLossImpl + : public Cloneable { + explicit TripletMarginWithDistanceLossImpl( + TripletMarginWithDistanceLossOptions options_ = {}); + + void reset() override; + + /// Pretty prints the `TripletMarginWithDistanceLoss` module into the given + /// `stream`. + void pretty_print(std::ostream& stream) const override; + + Tensor forward( + const Tensor& anchor, + const Tensor& positive, + const Tensor& negative); + + /// The options with which this `Module` was constructed. + TripletMarginWithDistanceLossOptions options; +}; + +/// A `ModuleHolder` subclass for `TripletMarginWithDistanceLossImpl`. +/// See the documentation for `TripletMarginWithDistanceLossImpl` class to learn +/// what methods it provides, and examples of how to use +/// `TripletMarginWithDistanceLoss` with +/// `torch::nn::TripletMarginWithDistanceLossOptions`. +/// See the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(TripletMarginWithDistanceLoss); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CTCLoss ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// The Connectionist Temporal Classification loss. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.CTCLoss to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::CTCLossOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// CTCLoss +/// model(CTCLossOptions().blank(42).zero_infinity(false).reduction(torch::kSum)); +/// ``` +struct TORCH_API CTCLossImpl : public Cloneable { + explicit CTCLossImpl(CTCLossOptions options_ = {}); + + void reset() override; + + /// Pretty prints the `CTCLoss` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + Tensor forward( + const Tensor& log_probs, + const Tensor& targets, + const Tensor& input_lengths, + const Tensor& target_lengths); + + /// The options with which this `Module` was constructed. + CTCLossOptions options; +}; + +/// A `ModuleHolder` subclass for `CTCLossImpl`. +/// See the documentation for `CTCLossImpl` class to learn what methods it +/// provides, and examples of how to use `CTCLoss` with +/// `torch::nn::CTCLossOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(CTCLoss); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PoissonNLLLoss +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Negative log likelihood loss with Poisson distribution of target. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.PoissonNLLLoss to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::PoissonNLLLossOptions` class to learn +/// what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// PoissonNLLLoss +/// model(PoissonNLLLossOptions().log_input(false).full(true).eps(0.42).reduction(torch::kSum)); +/// ``` +struct TORCH_API PoissonNLLLossImpl : public Cloneable { + explicit PoissonNLLLossImpl(PoissonNLLLossOptions options_ = {}); + + void reset() override; + + /// Pretty prints the `PoissonNLLLoss` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + Tensor forward(const Tensor& log_input, const Tensor& targets); + + /// The options with which this `Module` was constructed. + PoissonNLLLossOptions options; +}; + +/// A `ModuleHolder` subclass for `PoissonNLLLossImpl`. +/// See the documentation for `PoissonNLLLossImpl` class to learn what methods +/// it provides, and examples of how to use `PoissonNLLLoss` with +/// `torch::nn::PoissonNLLLossOptions`. See the documentation for `ModuleHolder` +/// to learn about PyTorch's module storage semantics. +TORCH_MODULE(PoissonNLLLoss); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MarginRankingLoss +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Creates a criterion that measures the loss given +/// inputs :math:`x1`, :math:`x2`, two 1D mini-batch `Tensors`, +/// and a label 1D mini-batch tensor :math:`y` (containing 1 or -1). +/// See https://pytorch.org/docs/main/nn.html#torch.nn.MarginRankingLoss to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::MarginRankingLossOptions` class to +/// learn what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// MarginRankingLoss +/// model(MarginRankingLossOptions().margin(0.5).reduction(torch::kSum)); +/// ``` +struct TORCH_API MarginRankingLossImpl + : public Cloneable { + explicit MarginRankingLossImpl(MarginRankingLossOptions options_ = {}); + + void reset() override; + + /// Pretty prints the `MarginRankingLoss` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + Tensor forward( + const Tensor& input1, + const Tensor& input2, + const Tensor& targets); + + /// The options with which this `Module` was constructed. + MarginRankingLossOptions options; +}; + +/// A `ModuleHolder` subclass for `MarginRankingLossImpl`. +/// See the documentation for `MarginRankingLossImpl` class to learn what +/// methods it provides, and examples of how to use `MarginRankingLoss` with +/// `torch::nn::MarginRankingLossOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(MarginRankingLoss); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ NLLLoss ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// The negative log likelihood loss. It is useful to train a classification +/// problem with `C` classes. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.NLLLoss to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::NLLLossOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// NLLLoss model(NLLLossOptions().ignore_index(-100).reduction(torch::kMean)); +/// ``` +struct TORCH_API NLLLossImpl : public Cloneable { + explicit NLLLossImpl(NLLLossOptions options_ = {}); + + /// Pretty prints the `NLLLoss` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + void reset() override; + + Tensor forward(const Tensor& input, const Tensor& target); + + /// The options with which this `Module` was constructed. + NLLLossOptions options; + + /// A manual rescaling weight given to each class. + Tensor weight; +}; + +/// A `ModuleHolder` subclass for `NLLLossImpl`. +/// See the documentation for `NLLLossImpl` class to learn what methods it +/// provides, and examples of how to use `NLLLoss` with +/// `torch::nn::NLLLossOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(NLLLoss); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CrossEntropyLoss +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Creates a criterion that computes cross entropy loss between input and +/// target. See +/// https://pytorch.org/docs/main/nn.html#torch.nn.CrossEntropyLoss to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::CrossEntropyLossOptions` class to +/// learn what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// CrossEntropyLoss +/// model(CrossEntropyLossOptions().ignore_index(-100).reduction(torch::kMean)); +/// ``` +struct TORCH_API CrossEntropyLossImpl : public Cloneable { + explicit CrossEntropyLossImpl(CrossEntropyLossOptions options_ = {}); + + void reset() override; + + /// Pretty prints the `CrossEntropyLoss` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + Tensor forward(const Tensor& input, const Tensor& target); + + /// The options with which this `Module` was constructed. + CrossEntropyLossOptions options; + + /// A manual rescaling weight given to each class. + Tensor weight; +}; + +/// A `ModuleHolder` subclass for `CrossEntropyLossImpl`. +/// See the documentation for `CrossEntropyLossImpl` class to learn what methods +/// it provides, and examples of how to use `CrossEntropyLoss` with +/// `torch::nn::CrossEntropyLossOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(CrossEntropyLoss); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ BCEWithLogitsLoss +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// This loss combines a `Sigmoid` layer and the `BCELoss` in one single +/// class. This version is more numerically stable than using a plain `Sigmoid` +/// followed by a `BCELoss` as, by combining the operations into one layer, +/// we take advantage of the log-sum-exp trick for numerical stability. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.BCEWithLogitsLoss to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::BCEWithLogitsLossOptions` class to +/// learn what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// BCEWithLogitsLoss +/// model(BCEWithLogitsLossOptions().reduction(torch::kNone).weight(weight)); +/// ``` +struct TORCH_API BCEWithLogitsLossImpl + : public Cloneable { + explicit BCEWithLogitsLossImpl(BCEWithLogitsLossOptions options_ = {}); + + void reset() override; + + /// Pretty prints the `BCEWithLogitsLoss` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + Tensor forward(const Tensor& input, const Tensor& target); + + /// The options with which this `Module` was constructed. + BCEWithLogitsLossOptions options; + + /// A manual rescaling weight given to the loss of each batch element. + Tensor weight; + + /// A weight of positive examples. + Tensor pos_weight; +}; + +/// A `ModuleHolder` subclass for `BCEWithLogitsLossImpl`. +/// See the documentation for `BCEWithLogitsLossImpl` class to learn what +/// methods it provides, and examples of how to use `BCEWithLogitsLoss` with +/// `torch::nn::BCEWithLogitsLossOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(BCEWithLogitsLoss); + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/normalization.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/normalization.h new file mode 100644 index 0000000000000000000000000000000000000000..da424e74390e4f5ae442393a25719a9a93b1d387 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/normalization.h @@ -0,0 +1,202 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace torch::nn { + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ LayerNorm ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies Layer Normalization over a mini-batch of inputs as described in +/// the paper `Layer Normalization`_ . +/// See https://pytorch.org/docs/main/nn.html#torch.nn.LayerNorm to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::LayerNormOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// LayerNorm model(LayerNormOptions({2, +/// 2}).elementwise_affine(false).eps(2e-5)); +/// ``` +class TORCH_API LayerNormImpl : public torch::nn::Cloneable { + public: + LayerNormImpl(std::vector normalized_shape) + : LayerNormImpl(LayerNormOptions(std::move(normalized_shape))) {} + explicit LayerNormImpl(LayerNormOptions options_); + + void reset() override; + + void reset_parameters(); + + /// Pretty prints the `LayerNorm` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// Applies layer normalization over a mini-batch of inputs as described in + /// the paper `Layer Normalization`_ . + /// + /// The mean and standard-deviation are calculated separately over the last + /// certain number dimensions which have to be of the shape specified by + /// input `normalized_shape`. + /// + /// `Layer Normalization`: https://arxiv.org/abs/1607.06450 + Tensor forward(const Tensor& input); + + /// The options with which this module was constructed. + LayerNormOptions options; + + /// The learned weight. + /// Initialized to ones if the `elementwise_affine` option is set to `true` + /// upon construction. + Tensor weight; + + /// The learned bias. + /// Initialized to zeros `elementwise_affine` option is set to `true` upon + /// construction. + Tensor bias; +}; + +/// A `ModuleHolder` subclass for `LayerNormImpl`. +/// See the documentation for `LayerNormImpl` class to learn what methods it +/// provides, and examples of how to use `LayerNorm` with +/// `torch::nn::LayerNormOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(LayerNorm); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ LocalResponseNorm +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies local response normalization over an input signal composed +/// of several input planes, where channels occupy the second dimension. +/// Applies normalization across channels. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.LocalResponseNorm to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::LocalResponseNormOptions` class to +/// learn what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// LocalResponseNorm +/// model(LocalResponseNormOptions(2).alpha(0.0002).beta(0.85).k(2.)); +/// ``` +class TORCH_API LocalResponseNormImpl + : public Cloneable { + public: + LocalResponseNormImpl(int64_t size) + : LocalResponseNormImpl(LocalResponseNormOptions(size)) {} + explicit LocalResponseNormImpl(const LocalResponseNormOptions& options_); + + Tensor forward(const Tensor& input); + + void reset() override; + + /// Pretty prints the `LocalResponseNormImpl` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// The options with which this `Module` was constructed. + LocalResponseNormOptions options; +}; + +/// A `ModuleHolder` subclass for `LocalResponseNormImpl`. +/// See the documentation for `LocalResponseNormImpl` class to learn what +/// methods it provides, and examples of how to use `LocalResponseNorm` with +/// `torch::nn::LocalResponseNormOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(LocalResponseNorm); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ CrossMapLRN2d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// See the documentation for `torch::nn::CrossMapLRN2dOptions` class to learn +/// what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// CrossMapLRN2d model(CrossMapLRN2dOptions(3).alpha(1e-5).beta(0.1).k(10)); +/// ``` +class TORCH_API CrossMapLRN2dImpl + : public torch::nn::Cloneable { + public: + CrossMapLRN2dImpl(int64_t size) + : CrossMapLRN2dImpl(CrossMapLRN2dOptions(size)) {} + explicit CrossMapLRN2dImpl(const CrossMapLRN2dOptions& options_) + : options(options_) {} + + void reset() override; + + /// Pretty prints the `CrossMapLRN2d` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + torch::Tensor forward(const torch::Tensor& input); + + CrossMapLRN2dOptions options; +}; + +/// A `ModuleHolder` subclass for `CrossMapLRN2dImpl`. +/// See the documentation for `CrossMapLRN2dImpl` class to learn what methods it +/// provides, and examples of how to use `CrossMapLRN2d` with +/// `torch::nn::CrossMapLRN2dOptions`. See the documentation for `ModuleHolder` +/// to learn about PyTorch's module storage semantics. +TORCH_MODULE(CrossMapLRN2d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GroupNorm ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies Group Normalization over a mini-batch of inputs as described in +/// the paper `Group Normalization`_ . +/// See https://pytorch.org/docs/main/nn.html#torch.nn.GroupNorm to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::GroupNormOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// GroupNorm model(GroupNormOptions(2, 2).eps(2e-5).affine(false)); +/// ``` +class TORCH_API GroupNormImpl : public torch::nn::Cloneable { + public: + GroupNormImpl(int64_t num_groups, int64_t num_channels) + : GroupNormImpl(GroupNormOptions(num_groups, num_channels)) {} + explicit GroupNormImpl(const GroupNormOptions& options_); + + void reset() override; + + void reset_parameters(); + + /// Pretty prints the `GroupNorm` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + Tensor forward(const Tensor& input); + + /// The options with which this module was constructed. + GroupNormOptions options; + + /// The learned weight. + Tensor weight; + + /// The learned bias. + Tensor bias; +}; + +/// A `ModuleHolder` subclass for `GroupNormImpl`. +/// See the documentation for `GroupNormImpl` class to learn what methods it +/// provides, and examples of how to use `GroupNorm` with +/// `torch::nn::GroupNormOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(GroupNorm); + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/padding.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/padding.h new file mode 100644 index 0000000000000000000000000000000000000000..73fbcc20ab642ae61ad14a40ecb502ab278409b5 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/padding.h @@ -0,0 +1,381 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +namespace torch::nn { + +/// Base class for all (dimension-specialized) ReflectionPad modules. +template +class TORCH_API ReflectionPadImpl : public torch::nn::Cloneable { + public: + ReflectionPadImpl(ExpandingArray padding) + : ReflectionPadImpl(ReflectionPadOptions(padding)) {} + explicit ReflectionPadImpl(const ReflectionPadOptions& options_); + + void reset() override; + + Tensor forward(const Tensor& input); + + /// Pretty prints the `ReflectionPad{1,2}d` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// The options with which this `Module` was constructed. + ReflectionPadOptions options; +}; + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ReflectionPad1d +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies ReflectionPad over a 1-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.ReflectionPad1d to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::ReflectionPad1dOptions` class to learn +/// what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// ReflectionPad1d model(ReflectionPad1dOptions({3, 1})); +/// ``` +class TORCH_API ReflectionPad1dImpl + : public ReflectionPadImpl<1, ReflectionPad1dImpl> { + public: + using ReflectionPadImpl<1, ReflectionPad1dImpl>::ReflectionPadImpl; +}; + +/// A `ModuleHolder` subclass for `ReflectionPad1dImpl`. +/// See the documentation for `ReflectionPad1dImpl` class to learn what methods +/// it provides, and examples of how to use `ReflectionPad1d` with +/// `torch::nn::ReflectionPad1dOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(ReflectionPad1d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ReflectionPad2d +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies ReflectionPad over a 2-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.ReflectionPad2d to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::ReflectionPad2dOptions` class to learn +/// what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// ReflectionPad2d model(ReflectionPad2dOptions({1, 1, 2, 0})); +/// ``` +class TORCH_API ReflectionPad2dImpl + : public ReflectionPadImpl<2, ReflectionPad2dImpl> { + public: + using ReflectionPadImpl<2, ReflectionPad2dImpl>::ReflectionPadImpl; +}; + +/// A `ModuleHolder` subclass for `ReflectionPad2dImpl`. +/// See the documentation for `ReflectionPad2dImpl` class to learn what methods +/// it provides, and examples of how to use `ReflectionPad2d` with +/// `torch::nn::ReflectionPad2dOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(ReflectionPad2d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ReflectionPad3d +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies ReflectionPad over a 3-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.ReflectionPad3d to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::ReflectionPad3dOptions` class to learn +/// what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// ReflectionPad3d model(ReflectionPad3dOptions(1)); +/// ReflectionPad3d model(ReflectionPad3dOptions({1, 1, 2, 0, 1, 2})); +/// ``` +class TORCH_API ReflectionPad3dImpl + : public ReflectionPadImpl<3, ReflectionPad3dImpl> { + public: + using ReflectionPadImpl<3, ReflectionPad3dImpl>::ReflectionPadImpl; +}; + +/// A `ModuleHolder` subclass for `ReflectionPad3dImpl`. +/// See the documentation for `ReflectionPad3dImpl` class to learn what methods +/// it provides, and examples of how to use `ReflectionPad3d` with +/// `torch::nn::ReflectionPad3dOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(ReflectionPad3d); + +// ============================================================================ + +/// Base class for all (dimension-specialized) ReplicationPad modules. +template +class TORCH_API ReplicationPadImpl : public torch::nn::Cloneable { + public: + ReplicationPadImpl(ExpandingArray padding) + : ReplicationPadImpl(ReplicationPadOptions(padding)) {} + explicit ReplicationPadImpl(const ReplicationPadOptions& options_); + + void reset() override; + + Tensor forward(const Tensor& input); + + /// Pretty prints the `ReplicationPad{1,2}d` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// The options with which this `Module` was constructed. + ReplicationPadOptions options; +}; + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ReplicationPad1d +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies ReplicationPad over a 1-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.ReplicationPad1d to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::ReplicationPad1dOptions` class to +/// learn what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// ReplicationPad1d model(ReplicationPad1dOptions({3, 1})); +/// ``` +class TORCH_API ReplicationPad1dImpl + : public ReplicationPadImpl<1, ReplicationPad1dImpl> { + public: + using ReplicationPadImpl<1, ReplicationPad1dImpl>::ReplicationPadImpl; +}; + +/// A `ModuleHolder` subclass for `ReplicationPad1dImpl`. +/// See the documentation for `ReplicationPad1dImpl` class to learn what methods +/// it provides, and examples of how to use `ReplicationPad1d` with +/// `torch::nn::ReplicationPad1dOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(ReplicationPad1d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ReplicationPad2d +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies ReplicationPad over a 2-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.ReplicationPad2d to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::ReplicationPad2dOptions` class to +/// learn what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// ReplicationPad2d model(ReplicationPad2dOptions({1, 1, 2, 0})); +/// ``` +class TORCH_API ReplicationPad2dImpl + : public ReplicationPadImpl<2, ReplicationPad2dImpl> { + public: + using ReplicationPadImpl<2, ReplicationPad2dImpl>::ReplicationPadImpl; +}; + +/// A `ModuleHolder` subclass for `ReplicationPad2dImpl`. +/// See the documentation for `ReplicationPad2dImpl` class to learn what methods +/// it provides, and examples of how to use `ReplicationPad2d` with +/// `torch::nn::ReplicationPad2dOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(ReplicationPad2d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ReplicationPad3d +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies ReplicationPad over a 3-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.ReplicationPad3d to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::ReplicationPad3dOptions` class to +/// learn what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// ReplicationPad3d model(ReplicationPad3dOptions({1, 2, 1, 2, 1, 2})); +/// ``` +class TORCH_API ReplicationPad3dImpl + : public ReplicationPadImpl<3, ReplicationPad3dImpl> { + public: + using ReplicationPadImpl<3, ReplicationPad3dImpl>::ReplicationPadImpl; +}; + +/// A `ModuleHolder` subclass for `ReplicationPad3dImpl`. +/// See the documentation for `ReplicationPad3dImpl` class to learn what methods +/// it provides, and examples of how to use `ReplicationPad3d` with +/// `torch::nn::ReplicationPad3dOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(ReplicationPad3d); + +// ============================================================================ + +/// Base class for all (dimension-specialized) ZeroPad modules. +template +class TORCH_API ZeroPadImpl : public torch::nn::Cloneable { + public: + ZeroPadImpl(ExpandingArray padding) + : ZeroPadImpl(ZeroPadOptions(padding)) {} + explicit ZeroPadImpl(const ZeroPadOptions& options_); + + void reset() override; + + Tensor forward(const Tensor& input); + + /// Pretty prints the `ZeroPad{1,2}d` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// The options with which this `Module` was constructed. + ZeroPadOptions options; +}; + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ZeroPad1d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +// Applies ZeroPad over a 1-D input. +class TORCH_API ZeroPad1dImpl : public ZeroPadImpl<1, ZeroPad1dImpl> { + public: + using ZeroPadImpl<1, ZeroPad1dImpl>::ZeroPadImpl; +}; + +/// A `ModuleHolder` subclass for `ZeroPad1dImpl`. +/// See the documentation for `ZeroPad1dImpl` class to learn what methods it +/// provides, and examples of how to use `ZeroPad1d` with +/// `torch::nn::ZeroPad1dOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(ZeroPad1d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ZeroPad2d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +// Applies ZeroPad over a 2-D input. +class TORCH_API ZeroPad2dImpl : public ZeroPadImpl<2, ZeroPad2dImpl> { + public: + using ZeroPadImpl<2, ZeroPad2dImpl>::ZeroPadImpl; +}; + +/// A `ModuleHolder` subclass for `ZeroPad2dImpl`. +/// See the documentation for `ZeroPad2dImpl` class to learn what methods it +/// provides, and examples of how to use `ZeroPad2d` with +/// `torch::nn::ZeroPad2dOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(ZeroPad2d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ZeroPad3d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +// Applies ZeroPad over a 3-D input. +class TORCH_API ZeroPad3dImpl : public ZeroPadImpl<3, ZeroPad3dImpl> { + public: + using ZeroPadImpl<3, ZeroPad3dImpl>::ZeroPadImpl; +}; + +/// A `ModuleHolder` subclass for `ZeroPad3dImpl`. +/// See the documentation for `ZeroPad3dImpl` class to learn what methods it +/// provides, and examples of how to use `ZeroPad3d` with +/// `torch::nn::ZeroPad3dOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(ZeroPad3d); + +// ============================================================================ + +/// Base class for all (dimension-specialized) ConstantPad modules. +template +class TORCH_API ConstantPadImpl : public torch::nn::Cloneable { + public: + ConstantPadImpl(ExpandingArray padding, double value) + : ConstantPadImpl(ConstantPadOptions(padding, value)) {} + explicit ConstantPadImpl(const ConstantPadOptions& options_); + + void reset() override; + + Tensor forward(const Tensor& input); + + /// Pretty prints the `ConstantPad{1,2}d` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// The options with which this `Module` was constructed. + ConstantPadOptions options; +}; + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ConstantPad1d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies ConstantPad over a 1-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.ConstantPad1d to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::ConstantPad1dOptions` class to learn +/// what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// ConstantPad1d model(ConstantPad1dOptions({3, 1}, 3.5)); +/// ``` +class TORCH_API ConstantPad1dImpl + : public ConstantPadImpl<1, ConstantPad1dImpl> { + public: + using ConstantPadImpl<1, ConstantPad1dImpl>::ConstantPadImpl; +}; + +/// A `ModuleHolder` subclass for `ConstantPad1dImpl`. +/// See the documentation for `ConstantPad1dImpl` class to learn what methods it +/// provides, and examples of how to use `ConstantPad1d` with +/// `torch::nn::ConstantPad1dOptions`. See the documentation for `ModuleHolder` +/// to learn about PyTorch's module storage semantics. +TORCH_MODULE(ConstantPad1d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ConstantPad2d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies ConstantPad over a 2-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.ConstantPad2d to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::ConstantPad2dOptions` class to learn +/// what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// ConstantPad2d model(ConstantPad2dOptions({3, 0, 2, 1}, 3.5)); +/// ``` +class TORCH_API ConstantPad2dImpl + : public ConstantPadImpl<2, ConstantPad2dImpl> { + public: + using ConstantPadImpl<2, ConstantPad2dImpl>::ConstantPadImpl; +}; + +/// A `ModuleHolder` subclass for `ConstantPad2dImpl`. +/// See the documentation for `ConstantPad2dImpl` class to learn what methods it +/// provides, and examples of how to use `ConstantPad2d` with +/// `torch::nn::ConstantPad2dOptions`. See the documentation for `ModuleHolder` +/// to learn about PyTorch's module storage semantics. +TORCH_MODULE(ConstantPad2d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ ConstantPad3d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies ConstantPad over a 3-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.ConstantPad3d to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::ConstantPad3dOptions` class to learn +/// what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// ConstantPad3d model(ConstantPad3dOptions({1, 2, 1, 2, 1, 2}, 3.5)); +/// ``` +class TORCH_API ConstantPad3dImpl + : public ConstantPadImpl<3, ConstantPad3dImpl> { + public: + using ConstantPadImpl<3, ConstantPad3dImpl>::ConstantPadImpl; +}; + +/// A `ModuleHolder` subclass for `ConstantPad3dImpl`. +/// See the documentation for `ConstantPad3dImpl` class to learn what methods it +/// provides, and examples of how to use `ConstantPad3d` with +/// `torch::nn::ConstantPad3dOptions`. See the documentation for `ModuleHolder` +/// to learn about PyTorch's module storage semantics. +TORCH_MODULE(ConstantPad3d); + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/pixelshuffle.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/pixelshuffle.h new file mode 100644 index 0000000000000000000000000000000000000000..bb7eb86c45bb7b8ce1de3f6daee3e7f5297e7f6b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/pixelshuffle.h @@ -0,0 +1,91 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +namespace torch::nn { + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PixelShuffle +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Rearranges elements in a tensor of shape :math:`(*, C \times r^2, H, W)` +/// to a tensor of shape :math:`(*, C, H \times r, W \times r)`, where r is an +/// upscale factor. See +/// https://pytorch.org/docs/main/nn.html#torch.nn.PixelShuffle to learn about +/// the exact behavior of this module. +/// +/// See the documentation for `torch::nn::PixelShuffleOptions` class to learn +/// what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// PixelShuffle model(PixelShuffleOptions(5)); +/// ``` +struct TORCH_API PixelShuffleImpl + : public torch::nn::Cloneable { + explicit PixelShuffleImpl(const PixelShuffleOptions& options_); + + /// Pretty prints the `PixelShuffle` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + Tensor forward(const Tensor& input); + + void reset() override; + + /// The options with which this `Module` was constructed. + PixelShuffleOptions options; +}; + +/// A `ModuleHolder` subclass for `PixelShuffleImpl`. +/// See the documentation for `PixelShuffleImpl` class to learn what methods it +/// provides, and examples of how to use `PixelShuffle` with +/// `torch::nn::PixelShuffleOptions`. See the documentation for `ModuleHolder` +/// to learn about PyTorch's module storage semantics. +TORCH_MODULE(PixelShuffle); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ PixelUnshuffle ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Reverses the PixelShuffle operation by rearranging elements in a tensor of +/// shape :math:`(*, C, H \times r, W \times r)` to a tensor of shape :math:`(*, +/// C \times r^2, H, W)`, where r is a downscale factor. See +/// https://pytorch.org/docs/main/nn.html#torch.nn.PixelUnshuffle to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::PixelUnshuffleOptions` class to learn +/// what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// PixelUnshuffle model(PixelUnshuffleOptions(5)); +/// ``` +struct TORCH_API PixelUnshuffleImpl + : public torch::nn::Cloneable { + explicit PixelUnshuffleImpl(const PixelUnshuffleOptions& options_); + + /// Pretty prints the `PixelUnshuffle` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + Tensor forward(const Tensor& input); + + void reset() override; + + /// The options with which this `Module` was constructed. + PixelUnshuffleOptions options; +}; + +/// A `ModuleHolder` subclass for `PixelUnshuffleImpl`. +/// See the documentation for `PixelUnshuffleImpl` class to learn what methods +/// it provides, and examples of how to use `PixelUnshuffle` with +/// `torch::nn::PixelUnshuffleOptions`. See the documentation for `ModuleHolder` +/// to learn about PyTorch's module storage semantics. +TORCH_MODULE(PixelUnshuffle); + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/pooling.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/pooling.h new file mode 100644 index 0000000000000000000000000000000000000000..2e0535909870d80cce205c15c4fb72e19d528f44 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/pooling.h @@ -0,0 +1,782 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace torch::nn { + +/// Base class for all (dimension-specialized) avgpool modules. +template +class TORCH_API AvgPoolImpl : public torch::nn::Cloneable { + public: + AvgPoolImpl(ExpandingArray kernel_size) + : AvgPoolImpl(AvgPoolOptions(kernel_size)) {} + explicit AvgPoolImpl(const AvgPoolOptions& options_); + + void reset() override; + + /// Pretty prints the `AvgPool{1,2,3}d` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// The options with which this `Module` was constructed. + AvgPoolOptions options; +}; + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ AvgPool1d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies avgpool over a 1-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.AvgPool1d to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::AvgPool1dOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// AvgPool1d model(AvgPool1dOptions(3).stride(2)); +/// ``` +class TORCH_API AvgPool1dImpl : public AvgPoolImpl<1, AvgPool1dImpl> { + public: + using AvgPoolImpl<1, AvgPool1dImpl>::AvgPoolImpl; + Tensor forward(const Tensor& input); +}; + +/// A `ModuleHolder` subclass for `AvgPool1dImpl`. +/// See the documentation for `AvgPool1dImpl` class to learn what methods it +/// provides, and examples of how to use `AvgPool1d` with +/// `torch::nn::AvgPool1dOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(AvgPool1d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ AvgPool2d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies avgpool over a 2-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.AvgPool2d to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::AvgPool2dOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// AvgPool2d model(AvgPool2dOptions({3, 2}).stride({2, 2})); +/// ``` +class TORCH_API AvgPool2dImpl : public AvgPoolImpl<2, AvgPool2dImpl> { + public: + using AvgPoolImpl<2, AvgPool2dImpl>::AvgPoolImpl; + Tensor forward(const Tensor& input); +}; + +/// A `ModuleHolder` subclass for `AvgPool2dImpl`. +/// See the documentation for `AvgPool2dImpl` class to learn what methods it +/// provides, and examples of how to use `AvgPool2d` with +/// `torch::nn::AvgPool2dOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(AvgPool2d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ AvgPool3d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies avgpool over a 3-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.AvgPool3d to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::AvgPool3dOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// AvgPool3d model(AvgPool3dOptions(5).stride(2)); +/// ``` +class TORCH_API AvgPool3dImpl : public AvgPoolImpl<3, AvgPool3dImpl> { + public: + using AvgPoolImpl<3, AvgPool3dImpl>::AvgPoolImpl; + Tensor forward(const Tensor& input); +}; + +/// A `ModuleHolder` subclass for `AvgPool3dImpl`. +/// See the documentation for `AvgPool3dImpl` class to learn what methods it +/// provides, and examples of how to use `AvgPool3d` with +/// `torch::nn::AvgPool3dOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(AvgPool3d); + +// ============================================================================ + +/// Base class for all (dimension-specialized) maxpool modules. +template +class TORCH_API MaxPoolImpl : public torch::nn::Cloneable { + public: + MaxPoolImpl(ExpandingArray kernel_size) + : MaxPoolImpl(MaxPoolOptions(kernel_size)) {} + explicit MaxPoolImpl(const MaxPoolOptions& options_); + + void reset() override; + + /// Pretty prints the `MaxPool{1,2,3}d` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// The options with which this `Module` was constructed. + MaxPoolOptions options; +}; + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MaxPool1d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies maxpool over a 1-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.MaxPool1d to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::MaxPool1dOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// MaxPool1d model(MaxPool1dOptions(3).stride(2)); +/// ``` +class TORCH_API MaxPool1dImpl : public MaxPoolImpl<1, MaxPool1dImpl> { + public: + using MaxPoolImpl<1, MaxPool1dImpl>::MaxPoolImpl; + Tensor forward(const Tensor& input); + + /// Returns the outputs and the indices of the max values. + /// Useful for `torch::nn::MaxUnpool1d` later. + std::tuple forward_with_indices(const Tensor& input); +}; + +/// A `ModuleHolder` subclass for `MaxPool1dImpl`. +/// See the documentation for `MaxPool1dImpl` class to learn what methods it +/// provides, and examples of how to use `MaxPool1d` with +/// `torch::nn::MaxPool1dOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(MaxPool1d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MaxPool2d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies maxpool over a 2-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.MaxPool2d to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::MaxPool2dOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// MaxPool2d model(MaxPool2dOptions({3, 2}).stride({2, 2})); +/// ``` +class TORCH_API MaxPool2dImpl : public MaxPoolImpl<2, MaxPool2dImpl> { + public: + using MaxPoolImpl<2, MaxPool2dImpl>::MaxPoolImpl; + Tensor forward(const Tensor& input); + + /// Returns the outputs and the indices of the max values. + /// Useful for `torch::nn::MaxUnpool2d` later. + std::tuple forward_with_indices(const Tensor& input); +}; + +/// A `ModuleHolder` subclass for `MaxPool2dImpl`. +/// See the documentation for `MaxPool2dImpl` class to learn what methods it +/// provides, and examples of how to use `MaxPool2d` with +/// `torch::nn::MaxPool2dOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(MaxPool2d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MaxPool3d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies maxpool over a 3-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.MaxPool3d to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::MaxPool3dOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// MaxPool3d model(MaxPool3dOptions(3).stride(2)); +/// ``` +class TORCH_API MaxPool3dImpl : public MaxPoolImpl<3, MaxPool3dImpl> { + public: + using MaxPoolImpl<3, MaxPool3dImpl>::MaxPoolImpl; + Tensor forward(const Tensor& input); + + /// Returns the outputs and the indices of the max values. + /// Useful for `torch::nn::MaxUnpool3d` later. + std::tuple forward_with_indices(const Tensor& input); +}; + +/// A `ModuleHolder` subclass for `MaxPool3dImpl`. +/// See the documentation for `MaxPool3dImpl` class to learn what methods it +/// provides, and examples of how to use `MaxPool3d` with +/// `torch::nn::MaxPool3dOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(MaxPool3d); + +// ============================================================================ + +/// Base class for all (dimension-specialized) adaptive maxpool modules. +template +class TORCH_API AdaptiveMaxPoolImpl : public torch::nn::Cloneable { + public: + AdaptiveMaxPoolImpl(output_size_t output_size) + : AdaptiveMaxPoolImpl( + AdaptiveMaxPoolOptions(output_size)) {} + explicit AdaptiveMaxPoolImpl( + const AdaptiveMaxPoolOptions& options_) + : options(options_) {} + + void reset() override {} + + /// Pretty prints the `AdaptiveMaxPool{1,2,3}d` module into the given + /// `stream`. + void pretty_print(std::ostream& stream) const override { + stream << "torch::nn::AdaptiveMaxPool" << D << 'd' + << "(output_size=" << options.output_size() << ')'; + } + + /// The options with which this `Module` was constructed. + AdaptiveMaxPoolOptions options; +}; + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~ AdaptiveMaxPool1d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies adaptive maxpool over a 1-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.AdaptiveMaxPool1d to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::AdaptiveMaxPool1dOptions` class to +/// learn what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// AdaptiveMaxPool1d model(AdaptiveMaxPool1dOptions(3)); +/// ``` +class TORCH_API AdaptiveMaxPool1dImpl + : public AdaptiveMaxPoolImpl<1, ExpandingArray<1>, AdaptiveMaxPool1dImpl> { + public: + using AdaptiveMaxPoolImpl<1, ExpandingArray<1>, AdaptiveMaxPool1dImpl>:: + AdaptiveMaxPoolImpl; + + Tensor forward(const Tensor& input); + + /// Returns the indices along with the outputs. + /// Useful to pass to nn.MaxUnpool1d. + std::tuple forward_with_indices(const Tensor& input); +}; + +/// A `ModuleHolder` subclass for `AdaptiveMaxPool1dImpl`. +/// See the documentation for `AdaptiveMaxPool1dImpl` class to learn what +/// methods it provides, and examples of how to use `AdaptiveMaxPool1d` with +/// `torch::nn::AdaptiveMaxPool1dOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(AdaptiveMaxPool1d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ AdaptiveMaxPool2d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies adaptive maxpool over a 2-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.AdaptiveMaxPool2d to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::AdaptiveMaxPool2dOptions` class to +/// learn what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// AdaptiveMaxPool2d model(AdaptiveMaxPool2dOptions({3, 2})); +/// ``` +class TORCH_API AdaptiveMaxPool2dImpl : public AdaptiveMaxPoolImpl< + 2, + ExpandingArrayWithOptionalElem<2>, + AdaptiveMaxPool2dImpl> { + public: + using AdaptiveMaxPoolImpl< + 2, + ExpandingArrayWithOptionalElem<2>, + AdaptiveMaxPool2dImpl>::AdaptiveMaxPoolImpl; + + Tensor forward(const Tensor& input); + + /// Returns the indices along with the outputs. + /// Useful to pass to nn.MaxUnpool2d. + std::tuple forward_with_indices(const Tensor& input); +}; + +/// A `ModuleHolder` subclass for `AdaptiveMaxPool2dImpl`. +/// See the documentation for `AdaptiveMaxPool2dImpl` class to learn what +/// methods it provides, and examples of how to use `AdaptiveMaxPool2d` with +/// `torch::nn::AdaptiveMaxPool2dOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(AdaptiveMaxPool2d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ AdaptiveMaxPool3d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies adaptive maxpool over a 3-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.AdaptiveMaxPool3d to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::AdaptiveMaxPool3dOptions` class to +/// learn what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// AdaptiveMaxPool3d model(AdaptiveMaxPool3dOptions(3)); +/// ``` +class TORCH_API AdaptiveMaxPool3dImpl : public AdaptiveMaxPoolImpl< + 3, + ExpandingArrayWithOptionalElem<3>, + AdaptiveMaxPool3dImpl> { + public: + using AdaptiveMaxPoolImpl< + 3, + ExpandingArrayWithOptionalElem<3>, + AdaptiveMaxPool3dImpl>::AdaptiveMaxPoolImpl; + + Tensor forward(const Tensor& input); + + /// Returns the indices along with the outputs. + /// Useful to pass to nn.MaxUnpool3d. + std::tuple forward_with_indices(const Tensor& input); +}; + +/// A `ModuleHolder` subclass for `AdaptiveMaxPool3dImpl`. +/// See the documentation for `AdaptiveMaxPool3dImpl` class to learn what +/// methods it provides, and examples of how to use `AdaptiveMaxPool3d` with +/// `torch::nn::AdaptiveMaxPool3dOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(AdaptiveMaxPool3d); + +// ============================================================================ + +/// Base class for all (dimension-specialized) adaptive avgpool modules. +template +class TORCH_API AdaptiveAvgPoolImpl : public torch::nn::Cloneable { + public: + AdaptiveAvgPoolImpl(output_size_t output_size) + : AdaptiveAvgPoolImpl( + AdaptiveAvgPoolOptions(output_size)) {} + explicit AdaptiveAvgPoolImpl( + const AdaptiveAvgPoolOptions& options_) + : options(options_) {} + + void reset() override {} + + /// Pretty prints the `AdaptiveAvgPool{1,2,3}d` module into the given + /// `stream`. + void pretty_print(std::ostream& stream) const override { + stream << "torch::nn::AdaptiveAvgPool" << D << 'd' + << "(output_size=" << options.output_size() << ')'; + } + + /// The options with which this `Module` was constructed. + AdaptiveAvgPoolOptions options; +}; + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~ AdaptiveAvgPool1d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies adaptive avgpool over a 1-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.AdaptiveAvgPool1d to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::AdaptiveAvgPool1dOptions` class to +/// learn what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// AdaptiveAvgPool1d model(AdaptiveAvgPool1dOptions(5)); +/// ``` +class TORCH_API AdaptiveAvgPool1dImpl + : public AdaptiveAvgPoolImpl<1, ExpandingArray<1>, AdaptiveAvgPool1dImpl> { + public: + using AdaptiveAvgPoolImpl<1, ExpandingArray<1>, AdaptiveAvgPool1dImpl>:: + AdaptiveAvgPoolImpl; + + Tensor forward(const Tensor& input); +}; + +/// A `ModuleHolder` subclass for `AdaptiveAvgPool1dImpl`. +/// See the documentation for `AdaptiveAvgPool1dImpl` class to learn what +/// methods it provides, and examples of how to use `AdaptiveAvgPool1d` with +/// `torch::nn::AdaptiveAvgPool1dOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(AdaptiveAvgPool1d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~ AdaptiveAvgPool2d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies adaptive avgpool over a 2-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.AdaptiveAvgPool2d to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::AdaptiveAvgPool2dOptions` class to +/// learn what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// AdaptiveAvgPool2d model(AdaptiveAvgPool2dOptions({3, 2})); +/// ``` +class TORCH_API AdaptiveAvgPool2dImpl : public AdaptiveAvgPoolImpl< + 2, + ExpandingArrayWithOptionalElem<2>, + AdaptiveAvgPool2dImpl> { + public: + using AdaptiveAvgPoolImpl< + 2, + ExpandingArrayWithOptionalElem<2>, + AdaptiveAvgPool2dImpl>::AdaptiveAvgPoolImpl; + + Tensor forward(const Tensor& input); +}; + +/// A `ModuleHolder` subclass for `AdaptiveAvgPool2dImpl`. +/// See the documentation for `AdaptiveAvgPool2dImpl` class to learn what +/// methods it provides, and examples of how to use `AdaptiveAvgPool2d` with +/// `torch::nn::AdaptiveAvgPool2dOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(AdaptiveAvgPool2d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~ AdaptiveAvgPool3d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies adaptive avgpool over a 3-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.AdaptiveAvgPool3d to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::AdaptiveAvgPool3dOptions` class to +/// learn what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// AdaptiveAvgPool3d model(AdaptiveAvgPool3dOptions(3)); +/// ``` +class TORCH_API AdaptiveAvgPool3dImpl : public AdaptiveAvgPoolImpl< + 3, + ExpandingArrayWithOptionalElem<3>, + AdaptiveAvgPool3dImpl> { + public: + using AdaptiveAvgPoolImpl< + 3, + ExpandingArrayWithOptionalElem<3>, + AdaptiveAvgPool3dImpl>::AdaptiveAvgPoolImpl; + + Tensor forward(const Tensor& input); +}; + +/// A `ModuleHolder` subclass for `AdaptiveAvgPool3dImpl`. +/// See the documentation for `AdaptiveAvgPool3dImpl` class to learn what +/// methods it provides, and examples of how to use `AdaptiveAvgPool3d` with +/// `torch::nn::AdaptiveAvgPool3dOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(AdaptiveAvgPool3d); + +// ============================================================================ + +/// Base class for all (dimension-specialized) maxunpool modules. +template +class TORCH_API MaxUnpoolImpl : public torch::nn::Cloneable { + public: + MaxUnpoolImpl(ExpandingArray kernel_size) + : MaxUnpoolImpl(MaxUnpoolOptions(kernel_size)) {} + explicit MaxUnpoolImpl(const MaxUnpoolOptions& options_); + + void reset() override; + + /// Pretty prints the `MaxUnpool{1,2,3}d` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// The options with which this `Module` was constructed. + MaxUnpoolOptions options; +}; + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MaxUnpool1d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies maxunpool over a 1-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.MaxUnpool1d to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::MaxUnpool1dOptions` class to learn +/// what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// MaxUnpool1d model(MaxUnpool1dOptions(3).stride(2).padding(1)); +/// ``` +class TORCH_API MaxUnpool1dImpl : public MaxUnpoolImpl<1, MaxUnpool1dImpl> { + public: + using MaxUnpoolImpl<1, MaxUnpool1dImpl>::MaxUnpoolImpl; + Tensor forward( + const Tensor& input, + const Tensor& indices, + const std::optional>& output_size = std::nullopt); + + protected: + FORWARD_HAS_DEFAULT_ARGS({2, AnyValue(std::optional>())}) +}; + +/// A `ModuleHolder` subclass for `MaxUnpool1dImpl`. +/// See the documentation for `MaxUnpool1dImpl` class to learn what methods it +/// provides, and examples of how to use `MaxUnpool1d` with +/// `torch::nn::MaxUnpool1dOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(MaxUnpool1d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MaxUnpool2d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies maxunpool over a 2-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.MaxUnpool2d to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::MaxUnpool2dOptions` class to learn +/// what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// MaxUnpool2d model(MaxUnpool2dOptions(3).stride(2).padding(1)); +/// ``` +class TORCH_API MaxUnpool2dImpl : public MaxUnpoolImpl<2, MaxUnpool2dImpl> { + public: + using MaxUnpoolImpl<2, MaxUnpool2dImpl>::MaxUnpoolImpl; + Tensor forward( + const Tensor& input, + const Tensor& indices, + const std::optional>& output_size = std::nullopt); + + protected: + FORWARD_HAS_DEFAULT_ARGS({2, AnyValue(std::optional>())}) +}; + +/// A `ModuleHolder` subclass for `MaxUnpool2dImpl`. +/// See the documentation for `MaxUnpool2dImpl` class to learn what methods it +/// provides, and examples of how to use `MaxUnpool2d` with +/// `torch::nn::MaxUnpool2dOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(MaxUnpool2d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ MaxUnpool3d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies maxunpool over a 3-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.MaxUnpool3d to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::MaxUnpool3dOptions` class to learn +/// what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// MaxUnpool3d model(MaxUnpool3dOptions(3).stride(2).padding(1)); +/// ``` +class TORCH_API MaxUnpool3dImpl : public MaxUnpoolImpl<3, MaxUnpool3dImpl> { + public: + using MaxUnpoolImpl<3, MaxUnpool3dImpl>::MaxUnpoolImpl; + Tensor forward( + const Tensor& input, + const Tensor& indices, + const std::optional>& output_size = std::nullopt); + + protected: + FORWARD_HAS_DEFAULT_ARGS({2, AnyValue(std::optional>())}) +}; + +/// A `ModuleHolder` subclass for `MaxUnpool3dImpl`. +/// See the documentation for `MaxUnpool3dImpl` class to learn what methods it +/// provides, and examples of how to use `MaxUnpool3d` with +/// `torch::nn::MaxUnpool3dOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(MaxUnpool3d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FractionalMaxPool2d +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies fractional maxpool over a 2-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.FractionalMaxPool2d to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::FractionalMaxPool2dOptions` class to +/// learn what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// FractionalMaxPool2d model(FractionalMaxPool2dOptions(5).output_size(1)); +/// ``` +class TORCH_API FractionalMaxPool2dImpl + : public torch::nn::Cloneable { + public: + FractionalMaxPool2dImpl(ExpandingArray<2> kernel_size) + : FractionalMaxPool2dImpl(FractionalMaxPool2dOptions(kernel_size)) {} + explicit FractionalMaxPool2dImpl(FractionalMaxPool2dOptions options_); + + void reset() override; + + /// Pretty prints the `FractionalMaxPool2d` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + Tensor forward(const Tensor& input); + + /// Returns the outputs and the indices of the max values. + /// Useful for `torch::nn::MaxUnpool2d` later. + std::tuple forward_with_indices(const Tensor& input); + + /// The options with which this `Module` was constructed. + FractionalMaxPool2dOptions options; + + Tensor _random_samples; +}; + +/// A `ModuleHolder` subclass for `FractionalMaxPool2dImpl`. +/// See the documentation for `FractionalMaxPool2dImpl` class to learn what +/// methods it provides, and examples of how to use `FractionalMaxPool2d` with +/// `torch::nn::FractionalMaxPool2dOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(FractionalMaxPool2d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ FractionalMaxPool3d +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies fractional maxpool over a 3-D input. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.FractionalMaxPool3d to +/// learn about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::FractionalMaxPool3dOptions` class to +/// learn what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// FractionalMaxPool3d model(FractionalMaxPool3dOptions(5).output_size(1)); +/// ``` +class TORCH_API FractionalMaxPool3dImpl + : public torch::nn::Cloneable { + public: + FractionalMaxPool3dImpl(ExpandingArray<3> kernel_size) + : FractionalMaxPool3dImpl(FractionalMaxPool3dOptions(kernel_size)) {} + explicit FractionalMaxPool3dImpl(FractionalMaxPool3dOptions options_); + + void reset() override; + + /// Pretty prints the `FractionalMaxPool3d` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + Tensor forward(const Tensor& input); + + /// Returns the outputs and the indices of the max values. + /// Useful for `torch::nn::MaxUnpool3d` later. + std::tuple forward_with_indices(const Tensor& input); + + /// The options with which this `Module` was constructed. + FractionalMaxPool3dOptions options; + + Tensor _random_samples; +}; + +/// A `ModuleHolder` subclass for `FractionalMaxPool3dImpl`. +/// See the documentation for `FractionalMaxPool3dImpl` class to learn what +/// methods it provides, and examples of how to use `FractionalMaxPool3d` with +/// `torch::nn::FractionalMaxPool3dOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(FractionalMaxPool3d); + +// ============================================================================ + +/// Base class for all (dimension-specialized) lppool modules. +template +class TORCH_API LPPoolImpl : public torch::nn::Cloneable { + public: + LPPoolImpl(double norm_type, ExpandingArray kernel_size) + : LPPoolImpl(LPPoolOptions(norm_type, kernel_size)) {} + explicit LPPoolImpl(const LPPoolOptions& options_); + + void reset() override; + + /// Pretty prints the `LPPool{1,2}d` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + LPPoolOptions options; +}; + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ LPPool1d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies the LPPool1d function element-wise. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.LPPool1d to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::LPPool1dOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// LPPool1d model(LPPool1dOptions(1, 2).stride(5).ceil_mode(true)); +/// ``` +class TORCH_API LPPool1dImpl : public LPPoolImpl<1, LPPool1dImpl> { + public: + using LPPoolImpl<1, LPPool1dImpl>::LPPoolImpl; + + Tensor forward(const Tensor& input); +}; + +/// A `ModuleHolder` subclass for `LPPool1dImpl`. +/// See the documentation for `LPPool1dImpl` class to learn what methods it +/// provides, and examples of how to use `LPPool1d` with +/// `torch::nn::LPPool1dOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(LPPool1d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ LPPool2d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies the LPPool2d function element-wise. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.LPPool2d to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::LPPool2dOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// LPPool2d model(LPPool2dOptions(1, std::vector({3, 4})).stride({5, +/// 6}).ceil_mode(true)); +/// ``` +class TORCH_API LPPool2dImpl : public LPPoolImpl<2, LPPool2dImpl> { + public: + using LPPoolImpl<2, LPPool2dImpl>::LPPoolImpl; + + Tensor forward(const Tensor& input); +}; + +/// A `ModuleHolder` subclass for `LPPool2dImpl`. +/// See the documentation for `LPPool2dImpl` class to learn what methods it +/// provides, and examples of how to use `LPPool2d` with +/// `torch::nn::LPPool2dOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(LPPool2d); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ LPPool3d ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Applies the LPPool3d function element-wise. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.LPPool3d to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::LPPool3dOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// LPPool3d model(LPPool3dOptions(1, std::vector({3, 4, 5})).stride( +/// {5, 6, 7}).ceil_mode(true)); +/// ``` +class TORCH_API LPPool3dImpl : public LPPoolImpl<3, LPPool3dImpl> { + public: + using LPPoolImpl<3, LPPool3dImpl>::LPPoolImpl; + + Tensor forward(const Tensor& input); +}; + +/// A `ModuleHolder` subclass for `LPPool3dImpl`. +/// See the documentation for `LPPool3dImpl` class to learn what methods it +/// provides, and examples of how to use `LPPool3d` with +/// `torch::nn::LPPool3dOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(LPPool3d); + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/rnn.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/rnn.h new file mode 100644 index 0000000000000000000000000000000000000000..469d455c9754e5c44f1ea5144f8872e9d3d2b20b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/rnn.h @@ -0,0 +1,404 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include + +namespace torch::nn { + +namespace detail { +/// Base class for all RNN implementations (intended for code sharing). +template +class TORCH_API RNNImplBase : public torch::nn::Cloneable { + public: + explicit RNNImplBase(const RNNOptionsBase& options_); + + /// Initializes the parameters of the RNN module. + void reset() override; + + void reset_parameters(); + + /// Overrides `nn::Module::to()` to call `flatten_parameters()` after the + /// original operation. + void to(torch::Device device, torch::Dtype dtype, bool non_blocking = false) + override; + void to(torch::Dtype dtype, bool non_blocking = false) override; + void to(torch::Device device, bool non_blocking = false) override; + + /// Pretty prints the RNN module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + /// Modifies the internal storage of weights for optimization purposes. + /// + /// On CPU, this method should be called if any of the weight or bias vectors + /// are changed (i.e. weights are added or removed). On GPU, it should be + /// called __any time the storage of any parameter is modified__, e.g. any + /// time a parameter is assigned a new value. This allows using the fast path + /// in cuDNN implementations of respective RNN `forward()` methods. It is + /// called once upon construction, inside `reset()`. + void flatten_parameters(); + + std::vector all_weights() const; + + /// The RNN's options. + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + RNNOptionsBase options_base; + + protected: + // Resets flat_weights_ + // Note: be v. careful before removing this, as 3rd party device types + // likely rely on this behavior to properly .to() modules like LSTM. + void reset_flat_weights(); + + void check_input(const Tensor& input, const Tensor& batch_sizes) const; + + std::tuple get_expected_hidden_size( + const Tensor& input, + const Tensor& batch_sizes) const; + + void check_hidden_size( + const Tensor& hx, + std::tuple expected_hidden_size, + std::string msg = "Expected hidden size {1}, got {2}") const; + + void check_forward_args(Tensor input, Tensor hidden, Tensor batch_sizes) + const; + + Tensor permute_hidden(Tensor hx, const Tensor& permutation) const; + + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::vector flat_weights_names_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::vector> all_weights_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::vector flat_weights_; +}; +} // namespace detail + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RNN ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// A multi-layer Elman RNN module with Tanh or ReLU activation. +/// See https://pytorch.org/docs/main/generated/torch.nn.RNN.html to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::RNNOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// RNN model(RNNOptions(128, +/// 64).num_layers(3).dropout(0.2).nonlinearity(torch::kTanh)); +/// ``` +class TORCH_API RNNImpl : public detail::RNNImplBase { + public: + RNNImpl(int64_t input_size, int64_t hidden_size) + : RNNImpl(RNNOptions(input_size, hidden_size)) {} + explicit RNNImpl(const RNNOptions& options_); + + std::tuple forward(const Tensor& input, Tensor hx = {}); + + protected: + FORWARD_HAS_DEFAULT_ARGS({1, AnyValue(Tensor())}) + + public: + std::tuple + forward_with_packed_input( + const torch::nn::utils::rnn::PackedSequence& packed_input, + Tensor hx = {}); + + RNNOptions options; + + protected: + std::tuple forward_helper( + const Tensor& input, + const Tensor& batch_sizes, + const Tensor& sorted_indices, + int64_t max_batch_size, + Tensor hx); +}; + +/// A `ModuleHolder` subclass for `RNNImpl`. +/// See the documentation for `RNNImpl` class to learn what methods it +/// provides, and examples of how to use `RNN` with `torch::nn::RNNOptions`. +/// See the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(RNN); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ LSTM ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// A multi-layer long-short-term-memory (LSTM) module. +/// See https://pytorch.org/docs/main/generated/torch.nn.LSTM.html to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::LSTMOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// LSTM model(LSTMOptions(2, +/// 4).num_layers(3).batch_first(false).bidirectional(true)); +/// ``` +class TORCH_API LSTMImpl : public detail::RNNImplBase { + public: + LSTMImpl(int64_t input_size, int64_t hidden_size) + : LSTMImpl(LSTMOptions(input_size, hidden_size)) {} + explicit LSTMImpl(const LSTMOptions& options_); + + std::tuple> forward( + const Tensor& input, + std::optional> hx_opt = {}); + + protected: + FORWARD_HAS_DEFAULT_ARGS( + {1, AnyValue(std::optional>())}) + + public: + std::tuple> + forward_with_packed_input( + const torch::nn::utils::rnn::PackedSequence& packed_input, + std::optional> hx_opt = {}); + + LSTMOptions options; + + protected: + void check_forward_args( + const Tensor& input, + std::tuple hidden, + const Tensor& batch_sizes) const; + + std::tuple get_expected_cell_size( + const Tensor& input, + const Tensor& batch_sizes) const; + + std::tuple permute_hidden( + std::tuple hx, + const Tensor& permutation) const; + + std::tuple> forward_helper( + const Tensor& input, + const Tensor& batch_sizes, + const Tensor& sorted_indices, + int64_t max_batch_size, + std::optional> hx_opt); +}; + +/// A `ModuleHolder` subclass for `LSTMImpl`. +/// See the documentation for `LSTMImpl` class to learn what methods it +/// provides, and examples of how to use `LSTM` with `torch::nn::LSTMOptions`. +/// See the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(LSTM); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GRU ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// A multi-layer gated recurrent unit (GRU) module. +/// See https://pytorch.org/docs/main/generated/torch.nn.GRU.html to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::GRUOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// GRU model(GRUOptions(2, +/// 4).num_layers(3).batch_first(false).bidirectional(true)); +/// ``` +class TORCH_API GRUImpl : public detail::RNNImplBase { + public: + GRUImpl(int64_t input_size, int64_t hidden_size) + : GRUImpl(GRUOptions(input_size, hidden_size)) {} + explicit GRUImpl(const GRUOptions& options_); + + std::tuple forward(const Tensor& input, Tensor hx = {}); + + protected: + FORWARD_HAS_DEFAULT_ARGS({1, AnyValue(torch::Tensor())}) + + public: + std::tuple + forward_with_packed_input( + const torch::nn::utils::rnn::PackedSequence& packed_input, + Tensor hx = {}); + + GRUOptions options; + + protected: + std::tuple forward_helper( + const Tensor& input, + const Tensor& batch_sizes, + const Tensor& sorted_indices, + int64_t max_batch_size, + Tensor hx); +}; + +/// A `ModuleHolder` subclass for `GRUImpl`. +/// See the documentation for `GRUImpl` class to learn what methods it +/// provides, and examples of how to use `GRU` with `torch::nn::GRUOptions`. +/// See the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(GRU); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RNNCellImplBase +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +namespace detail { +/// Base class for all RNNCell implementations (intended for code sharing). +template +class TORCH_API RNNCellImplBase : public torch::nn::Cloneable { + public: + explicit RNNCellImplBase(const RNNCellOptionsBase& options_); + + /// Initializes the parameters of the RNNCell module. + void reset() override; + + void reset_parameters(); + + /// Pretty prints the RNN module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + RNNCellOptionsBase options_base; + + Tensor weight_ih; + Tensor weight_hh; + Tensor bias_ih; + Tensor bias_hh; + + protected: + void check_forward_input(const Tensor& input, const std::string& name) const; + virtual std::string get_nonlinearity_str() const; +}; +} // namespace detail + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RNNCell +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// An Elman RNN cell with tanh or ReLU non-linearity. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.RNNCell to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::RNNCellOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// RNNCell model(RNNCellOptions(20, +/// 10).bias(false).nonlinearity(torch::kReLU)); +/// ``` +class TORCH_API RNNCellImpl : public detail::RNNCellImplBase { + public: + RNNCellImpl(int64_t input_size, int64_t hidden_size) + : RNNCellImpl(RNNCellOptions(input_size, hidden_size)) {} + explicit RNNCellImpl(const RNNCellOptions& options_); + + Tensor forward(const Tensor& input, const Tensor& hx = {}); + + protected: + FORWARD_HAS_DEFAULT_ARGS({1, AnyValue(Tensor())}) + + public: + RNNCellOptions options; + + protected: + std::string get_nonlinearity_str() const override; +}; + +/// A `ModuleHolder` subclass for `RNNCellImpl`. +/// See the documentation for `RNNCellImpl` class to learn what methods it +/// provides, and examples of how to use `RNNCell` with +/// `torch::nn::RNNCellOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(RNNCell); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ LSTMCell +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// A long short-term memory (LSTM) cell. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.LSTMCell to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::LSTMCellOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// LSTMCell model(LSTMCellOptions(20, 10).bias(false)); +/// ``` +class TORCH_API LSTMCellImpl : public detail::RNNCellImplBase { + public: + LSTMCellImpl(int64_t input_size, int64_t hidden_size) + : LSTMCellImpl(LSTMCellOptions(input_size, hidden_size)) {} + explicit LSTMCellImpl(const LSTMCellOptions& options_); + + std::tuple forward( + const Tensor& input, + std::optional> hx_opt = {}); + + protected: + FORWARD_HAS_DEFAULT_ARGS( + {1, AnyValue(std::optional>())}) + + public: + LSTMCellOptions options; +}; + +/// A `ModuleHolder` subclass for `LSTMCellImpl`. +/// See the documentation for `LSTMCellImpl` class to learn what methods it +/// provides, and examples of how to use `LSTMCell` with +/// `torch::nn::LSTMCellOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(LSTMCell); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ GRUCell +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// A gated recurrent unit (GRU) cell. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.GRUCell to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::GRUCellOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// GRUCell model(GRUCellOptions(20, 10).bias(false)); +/// ``` +class TORCH_API GRUCellImpl : public detail::RNNCellImplBase { + public: + GRUCellImpl(int64_t input_size, int64_t hidden_size) + : GRUCellImpl(GRUCellOptions(input_size, hidden_size)) {} + explicit GRUCellImpl(const GRUCellOptions& options_); + + Tensor forward(const Tensor& input, const Tensor& hx = {}); + + protected: + FORWARD_HAS_DEFAULT_ARGS({1, AnyValue(Tensor())}) + + public: + GRUCellOptions options; +}; + +/// A `ModuleHolder` subclass for `GRUCellImpl`. +/// See the documentation for `GRUCellImpl` class to learn what methods it +/// provides, and examples of how to use `GRUCell` with +/// `torch::nn::GRUCellOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(GRUCell); + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/transformer.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/transformer.h new file mode 100644 index 0000000000000000000000000000000000000000..65b9e8d6a7e0b9cc3c7811d822d9208aa383cd38 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/transformer.h @@ -0,0 +1,146 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +#include + +#include + +namespace torch::nn { + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Transformer ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// A transformer model. User is able to modify the attributes as needed. The +/// architecture is based on the paper "Attention Is All You Need". Ashish +/// Vaswani, Noam Shazeer, Niki Parmar, Jakob Uszkoreit, Llion Jones, Aidan N +/// Gomez, Lukasz Kaiser, and Illia Polosukhin. 2017. Attention is all you need. +/// In Advances in Neural Information Processing Systems, pages 6000-6010. +/// +/// See https://pytorch.org/docs/stable/generated/torch.nn.Transformer.html to +/// learn about the exact behavior of this transformer model +/// +/// See the documentation for `torch::nn::Transformer` class to learn what +/// constructor arguments are supported for this encoder layer model +/// +/// Example: +/// ``` +/// Transformer trans(TransformerOptions(512, 8)); +/// ``` +class TORCH_API TransformerImpl : public Cloneable { + public: + explicit TransformerImpl(TransformerOptions options_); + + /// forward function for Transformer Module + /// Args: + /// src: the sequence to the encoder (required). + /// tgt: the sequence to the decoder (required). + /// src_mask: the additive mask for the src sequence (optional). + /// tgt_mask: the additive mask for the tgt sequence (optional). + /// memory_mask: the additive mask for the encoder output (optional). + /// src_key_padding_mask: the ByteTensor mask for src keys per batch + /// (optional). tgt_key_padding_mask: the ByteTensor mask for tgt keys per + /// batch (optional). memory_key_padding_mask: the ByteTensor mask for + /// memory keys per batch (optional). + /// + /// Shape: + /// src: `(S, N, E)` + /// tgt: `(T, N, E)` + /// src_mask: `(S, S)` + /// tgt_mask: `(T, T)` + /// memory_mask: `(T, S)` + /// src_key_padding_mask: `(N, S)` + /// tgt_key_padding_mask: `(N, T)` + /// memory_key_padding_mask: `(N, S)` + /// + /// Note: + /// [src/tgt/memory]_mask ensures that position i is allowed to attend the + /// unmasked positions. If a ByteTensor is provided, the non-zero + /// positions are not allowed to attend while the zero positions will be + /// unchanged. If a BoolTensor is provided, positions with `True` are not + /// allowed to attend while `False` values will be unchanged. If a + /// FloatTensor is provided, it will be added to the attention weight. + /// + /// [src/tgt/memory]_key_padding_mask provides specified elements in the + /// key to be ignored by the attention. If a ByteTensor is provided, the + /// non-zero positions will be ignored while the zero positions will be + /// unchanged. If a BoolTensor is provided, the positions with the value + /// of `True` will be ignored while the position with the value of `False` + /// will be unchanged. + /// + /// output: `(T, N, E)` + /// + /// Note: + /// Due to the multi-head attention architecture in the transformer model, + /// the output sequence length of a transformer is same as the input + /// sequence (i.e. target) length of the decode. + /// + /// where + /// S is the source sequence length, + /// T is the target sequence length, + /// N is the batch size, + /// E is the feature number. + Tensor forward( + const Tensor& src, + const Tensor& tgt, + const Tensor& src_mask = {}, + const Tensor& tgt_mask = {}, + const Tensor& memory_mask = {}, + const Tensor& src_key_padding_mask = {}, + const Tensor& tgt_key_padding_mask = {}, + const Tensor& memory_key_padding_mask = {}); + + void reset() override; + + void reset_parameters(); + + /// Generate a square mask for the sequence. + /// The masked positions are filled with `-inf` in float type. + /// Unmasked positions are filled with `0.0` in float type. + /// Note: + /// 1. This function will always return a CPU tensor. + /// 2. This function requires the platform support IEEE754, since `-inf` is + /// guaranteed to + /// be valid only when IEEE754 is supported. If the platform doesn't + /// support IEEE754, this function will fill the mask with the smallest + /// float number instead of `-inf`, a one time warning will pop up as + /// well. + static Tensor generate_square_subsequent_mask(int64_t sz); + + protected: + FORWARD_HAS_DEFAULT_ARGS( + {2, AnyValue(Tensor())}, + {3, AnyValue(Tensor())}, + {4, AnyValue(Tensor())}, + {5, AnyValue(Tensor())}, + {6, AnyValue(Tensor())}, + {7, AnyValue(Tensor())}) + + public: + /// options with which this `Transformer` was constructed + TransformerOptions options; + + /// encoder module + AnyModule encoder; + + /// decoder module + AnyModule decoder; +}; + +/// A `ModuleHolder` subclass for `TransformerImpl`. +/// See the documentation for `TransformerImpl` class to learn what +/// methods it provides, and examples of how to use `Transformer` with +/// `torch::nn::TransformerOptions`. +/// See the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(Transformer); + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/transformercoder.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/transformercoder.h new file mode 100644 index 0000000000000000000000000000000000000000..f092b91e38208ce171c08c2e06d13e675339927e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/transformercoder.h @@ -0,0 +1,157 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +namespace torch::nn { + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TransformerEncoder +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// TransformerEncoder module. +/// See +/// https://pytorch.org/docs/main/generated/torch.nn.TransformerEncoder.html +/// to learn abouut the exact behavior of this encoder layer module. +/// +/// See the documentation for `torch::nn::TransformerEncoder` class to learn +/// what constructor arguments are supported for this encoder module. +/// +/// Example: +/// ``` +/// TransformerEncoderLayer encoderLayer(TransformerEncoderLayerOptions(512, +/// 8).dropout(0.1)); TransformerEncoder +/// encoder(TransformerEncoderOptions(encoderLayer, +/// 6).norm(LayerNorm(LayerNormOptions({2})))); +/// ``` +class TORCH_API TransformerEncoderImpl + : public Cloneable { + public: + TransformerEncoderImpl( + TransformerEncoderLayer encoder_layer, + int64_t num_layers) + : TransformerEncoderImpl( + TransformerEncoderOptions(std::move(encoder_layer), num_layers)) {} + explicit TransformerEncoderImpl(TransformerEncoderOptions options_); + + Tensor forward( + const Tensor& src, + const Tensor& src_mask = {}, + const Tensor& src_key_padding_mask = {}); + + void reset() override; + + void reset_parameters(); + + protected: + FORWARD_HAS_DEFAULT_ARGS({1, AnyValue(Tensor())}, {2, AnyValue(Tensor())}) + + public: + /// options with which this `TransformerEncoder` was constructed + TransformerEncoderOptions options; + + /// module list that contains all the encoder layers + ModuleList layers = nullptr; + + /// optional normalization module + AnyModule norm; +}; + +/// A `ModuleHolder` subclass for `TransformerEncoderImpl`. +/// See the documentation for `TransformerEncoderImpl` class to learn what +/// methods it provides, and examples of how to use `TransformerEncoder` with +/// `torch::nn::TransformerEncoderOptions`. +/// See the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(TransformerEncoder); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TransformerDecoder +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// TransformerDecoder is a stack of N decoder layers. +/// See +/// https://pytorch.org/docs/main/generated/torch.nn.TransformerDecoder.html +/// to learn abouut the exact behavior of this decoder module +/// +/// See the documentation for `torch::nn::TransformerDecoderOptions` class to +/// learn what constructor arguments are supported for this decoder module +/// +/// Example: +/// ``` +/// TransformerDecoderLayer decoder_layer(TransformerDecoderLayerOptions(512, +/// 8).dropout(0.1)); TransformerDecoder +/// transformer_decoder(TransformerDecoderOptions(decoder_layer, +/// 6).norm(LayerNorm(LayerNormOptions({2})))); const auto memory = +/// torch::rand({10, 32, 512}); const auto tgt = torch::rand({20, 32, 512}); +/// auto out = transformer_decoder(tgt, memory); +/// ``` +class TORCH_API TransformerDecoderImpl + : public Cloneable { + public: + TransformerDecoderImpl( + TransformerDecoderLayer decoder_layer, + int64_t num_layers) + : TransformerDecoderImpl( + TransformerDecoderOptions(std::move(decoder_layer), num_layers)) {} + explicit TransformerDecoderImpl(TransformerDecoderOptions options_); + + void reset() override; + + void reset_parameters(); + + /// Pass the inputs (and mask) through the decoder layer in turn. + /// Args: + /// tgt: the sequence to the decoder layer (required). + /// memory: the sequence from the last layer of the encoder (required). + /// tgt_mask: the mask for the tgt sequence (optional). + /// memory_mask: the mask for the memory sequence (optional). + /// tgt_key_padding_mask: the mask for the tgt keys per batch + /// (optional). memory_key_padding_mask: the mask for the memory keys + /// per batch (optional). + Tensor forward( + const Tensor& tgt, + const Tensor& memory, + const Tensor& tgt_mask = {}, + const Tensor& memory_mask = {}, + const Tensor& tgt_key_padding_mask = {}, + const Tensor& memory_key_padding_mask = {}); + + /// The options used to configure this module. + TransformerDecoderOptions options; + + /// Cloned layers of decoder layers + ModuleList layers{nullptr}; + + /// optional layer normalization module + AnyModule norm; + + protected: + FORWARD_HAS_DEFAULT_ARGS( + {2, AnyValue(Tensor())}, + {3, AnyValue(Tensor())}, + {4, AnyValue(Tensor())}, + {5, AnyValue(Tensor())}) +}; + +/// A `ModuleHolder` subclass for `TransformerDecoderImpl`. +/// See the documentation for `TransformerDecoderImpl` class to learn what +/// methods it provides, and examples of how to use `TransformerDecoder` with +/// `torch::nn::TransformerDecoderOptions`. +/// See the documentation for `ModuleHolder` to learn about PyTorch's +/// module storage semantics. +TORCH_MODULE(TransformerDecoder); + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/transformerlayer.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/transformerlayer.h new file mode 100644 index 0000000000000000000000000000000000000000..7f66b5f35d933e33637b1cbf41ba0bcc6a8ecab7 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/transformerlayer.h @@ -0,0 +1,198 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +#include + +namespace torch::nn { + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TransformerEncoderLayer +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// TransformerEncoderLayer module. +/// See +/// https://pytorch.org/docs/main/generated/torch.nn.TransformerEncoderLayer.html +/// to learn abouut the exact behavior of this encoder layer model +/// +/// See the documentation for `torch::nn::TransformerEncoderLayer` class to +/// learn what constructor arguments are supported for this encoder layer model +/// +/// Example: +/// ``` +/// TransformerEncoderLayer encoderLayer(TransformerEncoderLayerOptions(512, +/// 8).dropout(0.1)); +/// ``` +class TORCH_API TransformerEncoderLayerImpl + : public Cloneable { + public: + TransformerEncoderLayerImpl(int64_t d_model, int64_t nhead) + : TransformerEncoderLayerImpl( + TransformerEncoderLayerOptions(d_model, nhead)) {} + explicit TransformerEncoderLayerImpl(TransformerEncoderLayerOptions options_); + + Tensor forward( + const Tensor& src, + const Tensor& src_mask = {}, + const Tensor& src_key_padding_mask = {}); + + void reset() override; + + void reset_parameters(); + + protected: + FORWARD_HAS_DEFAULT_ARGS({1, AnyValue(Tensor())}, {2, AnyValue(Tensor())}) + + public: + /// options with which this `TransformerEncoderLayer` was constructed + TransformerEncoderLayerOptions options; + + /// self attention + MultiheadAttention self_attn = nullptr; + + /// feedforward first linear layer + Linear linear1 = nullptr; + + /// feedforward dropout layer + Dropout dropout = nullptr; + + /// feedforward second linear layer + Linear linear2 = nullptr; + + /// pre feedforward, normalization layer + LayerNorm norm1 = nullptr; + /// post feedfastward, normalization layer + LayerNorm norm2 = nullptr; + + /// pre feedfastward, dropout layer + Dropout dropout1 = nullptr; + /// post feedfastward, dropout layer + Dropout dropout2 = nullptr; +}; + +/// A `ModuleHolder` subclass for `TransformerEncoderLayerImpl``. +/// See the documentation for `TransformerEncoderLayerImpl` class to learn what +/// methods it provides, and examples of how to use `TransformerEncoderLayer` +/// with `torch::nn::TransformerEncoderLayerOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(TransformerEncoderLayer); + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ TransformerDecoderLayer +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// TransformerDecoderLayer is made up of self-attn, multi-head-attn and +/// feedforward network. This standard decoder layer is based on the paper +/// "Attention Is All You Need". Ashish Vaswani, Noam Shazeer, Niki Parmar, +/// Jakob Uszkoreit, Llion Jones, Aidan N Gomez, Lukasz Kaiser, and Illia +/// Polosukhin. 2017. Attention is all you need. In Advances in Neural +/// Information Processing Systems, pages 6000-6010. Users may modify or +/// implement in a different way during application. See +/// https://pytorch.org/docs/main/nn.html#transformer-layers to learn about +/// the exact behavior of this module. +/// +/// See the documentation for `torch::nn::TransformerDecoderLayerOptions` class +/// to learn what constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// TransformerDecoderLayer model(TransformerDecoderLayerOptions(512, +/// 8).dropout(0.2)); +/// ``` +class TORCH_API TransformerDecoderLayerImpl + : public Cloneable { + public: + TransformerDecoderLayerImpl(int64_t d_model, int64_t nhead) + : TransformerDecoderLayerImpl( + TransformerDecoderLayerOptions(d_model, nhead)) {} + explicit TransformerDecoderLayerImpl(TransformerDecoderLayerOptions options_); + + void reset() override; + + void reset_parameters(); + + /// Pass the inputs (and mask) through the decoder layer. + /// Args: + /// tgt: the sequence to the decoder layer (required). + /// memory: the sequence from the last layer of the encoder (required). + /// tgt_mask: the mask for the tgt sequence (optional). + /// memory_mask: the mask for the memory sequence (optional). + /// tgt_key_padding_mask: the mask for the tgt keys per batch + /// (optional). memory_key_padding_mask: the mask for the memory keys + /// per batch (optional). + Tensor forward( + Tensor tgt, + const Tensor& memory, + const Tensor& tgt_mask = {}, + const Tensor& memory_mask = {}, + const Tensor& tgt_key_padding_mask = {}, + const Tensor& memory_key_padding_mask = {}); + + /// The options used to configure this module. + TransformerDecoderLayerOptions options; + + /// self attention + MultiheadAttention self_attn{nullptr}; + + /// Dropout, post self attention + Dropout dropout1{nullptr}; + + /// Normalization, post self attention + LayerNorm norm1{nullptr}; + + /// Multi-headed attention + MultiheadAttention multihead_attn{nullptr}; + + /// Dropout, post multi-headed attention + Dropout dropout2{nullptr}; + + /// Normalization, post multi-headed attention + LayerNorm norm2{nullptr}; + + /// Feed forward first linear layer + Linear linear1{nullptr}; + + /// Feed forward dropout layer + Dropout dropout{nullptr}; + + /// Feed forward second linear layer + Linear linear2{nullptr}; + + /// Dropout, post feed forward + Dropout dropout3{nullptr}; + + /// Normalization, post feed forward + LayerNorm norm3{nullptr}; + + protected: + FORWARD_HAS_DEFAULT_ARGS( + {2, AnyValue(Tensor())}, + {3, AnyValue(Tensor())}, + {4, AnyValue(Tensor())}, + {5, AnyValue(Tensor())}) + + /// Apply activation based on configuration + Tensor activation(const Tensor& input); +}; + +/// A `ModuleHolder` subclass for `TransformerDecoderLayerImpl`. +/// See the documentation for `TransformerDecoderLayerImpl` class to learn what +/// methods it provides, and examples of how to use `TransformerDecoderLayer` +/// with `torch::nn::TransformerDecoderLayerOptions`. See the documentation for +/// `ModuleHolder` to learn about PyTorch's module storage semantics. +TORCH_MODULE(TransformerDecoderLayer); + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/upsampling.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/upsampling.h new file mode 100644 index 0000000000000000000000000000000000000000..add048c3e401c5670dd450f70cff457344909182 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/upsampling.h @@ -0,0 +1,58 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +#include + +#include +#include + +namespace torch::nn { + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Upsample ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +/// Upsamples a given multi-channel 1D (temporal), 2D (spatial) or 3D +/// (volumetric) data. +/// See https://pytorch.org/docs/main/nn.html#torch.nn.Upsample to learn +/// about the exact behavior of this module. +/// +/// See the documentation for `torch::nn::UpsampleOptions` class to learn what +/// constructor arguments are supported for this module. +/// +/// Example: +/// ``` +/// Upsample +/// model(UpsampleOptions().scale_factor({3}).mode(torch::kLinear).align_corners(false)); +/// ``` +class TORCH_API UpsampleImpl : public Cloneable { + public: + explicit UpsampleImpl(UpsampleOptions options_ = {}); + + void reset() override; + + /// Pretty prints the `Upsample` module into the given `stream`. + void pretty_print(std::ostream& stream) const override; + + Tensor forward(const Tensor& input); + + /// The options with which this `Module` was constructed. + UpsampleOptions options; +}; + +/// A `ModuleHolder` subclass for `UpsampleImpl`. +/// See the documentation for `UpsampleImpl` class to learn what methods it +/// provides, and examples of how to use `Upsample` with +/// `torch::nn::UpsampleOptions`. See the documentation for `ModuleHolder` to +/// learn about PyTorch's module storage semantics. +TORCH_MODULE(Upsample); + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/utils.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..68548c9aa479ba19faff073e7f639050d8d9809d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/modules/utils.h @@ -0,0 +1,53 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +namespace torch::nn::modules::utils { + +// Reverse the order of `t` and repeat each element for `n` times. +// This can be used to translate padding arg used by Conv and Pooling modules +// to the ones used by `F::pad`. +// +// This mirrors `_reverse_repeat_tuple` in `torch/nn/modules/utils.py`. +inline std::vector _reverse_repeat_vector( + c10::ArrayRef t, + int64_t n) { + TORCH_INTERNAL_ASSERT(n >= 0); + std::vector ret; + ret.reserve(t.size() * n); + for (auto rit = t.rbegin(); rit != t.rend(); ++rit) { + for ([[maybe_unused]] const auto i : c10::irange(n)) { + ret.emplace_back(*rit); + } + } + return ret; +} + +inline std::vector _list_with_default( + c10::ArrayRef> out_size, + c10::IntArrayRef defaults) { + TORCH_CHECK( + defaults.size() > out_size.size(), + "Input dimension should be at least ", + out_size.size() + 1); + std::vector ret; + c10::IntArrayRef defaults_slice = + defaults.slice(defaults.size() - out_size.size(), out_size.size()); + for (const auto i : c10::irange(out_size.size())) { + auto v = out_size.at(i); + auto d = defaults_slice.at(i); + ret.emplace_back(v.has_value() ? v.value() : d); + } + return ret; +} + +} // namespace torch::nn::modules::utils + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options.h new file mode 100644 index 0000000000000000000000000000000000000000..8610d2ffb3a26d43fa949c28023a057c790feacc --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options.h @@ -0,0 +1,23 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/activation.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/activation.h new file mode 100644 index 0000000000000000000000000000000000000000..db8ba171d9135ea9171e7de0522515dfaf6aadf5 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/activation.h @@ -0,0 +1,717 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::nn { + +/// Options for the `ELU` module. +/// +/// Example: +/// ``` +/// ELU model(ELUOptions().alpha(42.42).inplace(true)); +/// ``` +struct TORCH_API ELUOptions { + /// The `alpha` value for the ELU formulation. Default: 1.0 + TORCH_ARG(double, alpha) = 1.0; + + /// can optionally do the operation in-place. Default: False + TORCH_ARG(bool, inplace) = false; +}; + +namespace functional { +/// Options for `torch::nn::functional::elu`. +/// +/// See the documentation for `torch::nn::ELUOptions` class to learn what +/// arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::elu(x, F::ELUFuncOptions().alpha(0.42).inplace(true)); +/// ``` +using ELUFuncOptions = ELUOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `SELU` module. +/// +/// Example: +/// ``` +/// SELU model(SELUOptions().inplace(true)); +/// ``` +struct TORCH_API SELUOptions { + /* implicit */ SELUOptions(bool inplace = false); + + /// can optionally do the operation in-place. Default: False + TORCH_ARG(bool, inplace); +}; + +namespace functional { +/// Options for `torch::nn::functional::selu`. +/// +/// See the documentation for `torch::nn::SELUOptions` class to learn what +/// arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::selu(input, F::SELUFuncOptions(false)); +/// ``` +using SELUFuncOptions = SELUOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `GLU` module. +/// +/// Example: +/// ``` +/// GLU model(GLUOptions(1)); +/// ``` +struct TORCH_API GLUOptions { + /* implicit */ GLUOptions(int64_t dim = -1); + + /// the dimension on which to split the input. Default: -1 + TORCH_ARG(int64_t, dim); +}; + +namespace functional { +/// Options for `torch::nn::functional::glu`. +/// +/// See the documentation for `torch::nn::GLUOptions` class to learn what +/// arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::glu(input, GLUFuncOptions(1)); +/// ``` +using GLUFuncOptions = GLUOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `GELU` module. +/// +/// Example: +/// ``` +/// GELU model(GELUOptions().approximate("none")); +/// ``` +struct TORCH_API GELUOptions { + /// Specifies the approximation to apply to the output. + TORCH_ARG(std::string, approximate) = "none"; +}; + +namespace functional { +/// Options for `torch::nn::functional::gelu`. +/// +/// See the documentation for `torch::nn::GELUOptions` class to learn what +/// arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::gelu(input, F::GELUFuncOptions().approximate("none")); +/// ``` +using GELUFuncOptions = GELUOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `Hardshrink` module. +/// +/// Example: +/// ``` +/// Hardshrink model(HardshrinkOptions().lambda(42.42)); +/// ``` +struct TORCH_API HardshrinkOptions { + /* implicit */ HardshrinkOptions(double lambda = 0.5); + + /// the `lambda` value for the Hardshrink formulation. Default: 0.5 + TORCH_ARG(double, lambda); +}; + +namespace functional { +/// Options for `torch::nn::functional::hardshrink`. +/// +/// See the documentation for `torch::nn::HardshrinkOptions` class to learn what +/// arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::hardshrink(x, F::HardshrinkFuncOptions().lambda(0.42)); +/// ``` +using HardshrinkFuncOptions = HardshrinkOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `Hardtanh` module. +/// +/// Example: +/// ``` +/// Hardtanh +/// model(HardtanhOptions().min_val(-42.42).max_val(0.42).inplace(true)); +/// ``` +struct TORCH_API HardtanhOptions { + /// minimum value of the linear region range. Default: -1 + TORCH_ARG(double, min_val) = -1.0; + + /// maximum value of the linear region range. Default: 1 + TORCH_ARG(double, max_val) = 1.0; + + /// can optionally do the operation in-place. Default: False + TORCH_ARG(bool, inplace) = false; +}; + +namespace functional { +/// Options for `torch::nn::functional::hardtanh`. +/// +/// See the documentation for `torch::nn::HardtanhOptions` class to learn what +/// arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::hardtanh(x, +/// F::HardtanhFuncOptions().min_val(-1.0).max_val(1.0).inplace(true)); +/// ``` +using HardtanhFuncOptions = HardtanhOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `LeakyReLU` module. +/// +/// Example: +/// ``` +/// LeakyReLU model(LeakyReLUOptions().negative_slope(0.42).inplace(true)); +/// ``` +struct TORCH_API LeakyReLUOptions { + /// Controls the angle of the negative slope. Default: 1e-2 + TORCH_ARG(double, negative_slope) = 1e-2; + + /// can optionally do the operation in-place. Default: False + TORCH_ARG(bool, inplace) = false; +}; + +namespace functional { +/// Options for `torch::nn::functional::leaky_relu`. +/// +/// See the documentation for `torch::nn::LeakyReLUOptions` class to learn what +/// arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::leaky_relu(x, +/// F::LeakyReLUFuncOptions().negative_slope(0.42).inplace(true)); +/// ``` +using LeakyReLUFuncOptions = LeakyReLUOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `Softmax` module. +/// +/// Example: +/// ``` +/// Softmax model(SoftmaxOptions(1)); +/// ``` +struct TORCH_API SoftmaxOptions { + SoftmaxOptions(int64_t dim); + + /// Dimension along which Softmax will be computed. + TORCH_ARG(int64_t, dim); +}; + +// ============================================================================ + +namespace functional { + +/// Options for `torch::nn::functional::softmax`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::softmax(input, F::SoftmaxFuncOptions(1)); +/// ``` +struct TORCH_API SoftmaxFuncOptions { + SoftmaxFuncOptions(int64_t dim); + + /// Dimension along which Softmax will be computed. + TORCH_ARG(int64_t, dim); + + /// the desired data type of returned tensor. + /// If specified, the input tensor is casted to `dtype` before the operation + /// is performed. This is useful for preventing data type overflows. Default: + /// None. + TORCH_ARG(std::optional, dtype) = std::nullopt; +}; + +} // namespace functional + +// ============================================================================ + +/// Options for the `Softmin` module. +/// +/// Example: +/// ``` +/// Softmin model(SoftminOptions(1)); +/// ``` +struct TORCH_API SoftminOptions { + SoftminOptions(int64_t dim); + + /// Dimension along which Softmin will be computed. + TORCH_ARG(int64_t, dim); +}; + +// ============================================================================ + +namespace functional { + +/// Options for `torch::nn::functional::softmin`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::softmin(input, F::SoftminFuncOptions(1)); +/// ``` +struct TORCH_API SoftminFuncOptions { + SoftminFuncOptions(int64_t dim); + + /// Dimension along which Softmin will be computed. + TORCH_ARG(int64_t, dim); + + /// the desired data type of returned tensor. + /// If specified, the input tensor is casted to `dtype` before the operation + /// is performed. This is useful for preventing data type overflows. Default: + /// None. + TORCH_ARG(std::optional, dtype) = std::nullopt; +}; + +} // namespace functional + +// ============================================================================ + +/// Options for the `LogSoftmax` module. +/// +/// Example: +/// ``` +/// LogSoftmax model(LogSoftmaxOptions(1)); +/// ``` +struct TORCH_API LogSoftmaxOptions { + LogSoftmaxOptions(int64_t dim); + + /// Dimension along which LogSoftmax will be computed. + TORCH_ARG(int64_t, dim); +}; + +// ============================================================================ + +namespace functional { + +/// Options for `torch::nn::functional::log_softmax`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::log_softmax(input, LogSoftmaxFuncOptions(1)); +/// ``` +struct TORCH_API LogSoftmaxFuncOptions { + LogSoftmaxFuncOptions(int64_t dim); + + /// Dimension along which LogSoftmax will be computed. + TORCH_ARG(int64_t, dim); + + /// the desired data type of returned tensor. + /// If specified, the input tensor is casted to `dtype` before the operation + /// is performed. This is useful for preventing data type overflows. Default: + /// None. + TORCH_ARG(std::optional, dtype) = std::nullopt; +}; + +} // namespace functional + +// ============================================================================ + +/// Options for the `PReLU` module. +/// +/// Example: +/// ``` +/// PReLU model(PReLUOptions().num_parameters(42)); +/// ``` +struct TORCH_API PReLUOptions { + /// number of `a` to learn. Although it takes an int as input, there is only + /// two values are legitimate: 1, or the number of channels at input. Default: + /// 1 + TORCH_ARG(int64_t, num_parameters) = 1; + + /// the initial value of `a`. Default: 0.25 + TORCH_ARG(double, init) = 0.25; +}; + +// ============================================================================ + +/// Options for the `ReLU` module. +/// +/// Example: +/// ``` +/// ReLU model(ReLUOptions().inplace(true)); +/// ``` +struct TORCH_API ReLUOptions { + /* implicit */ ReLUOptions(bool inplace = false); + + /// can optionally do the operation in-place. Default: False + TORCH_ARG(bool, inplace); +}; + +namespace functional { +/// Options for `torch::nn::functional::relu`. +/// +/// See the documentation for `torch::nn::ReLUOptions` class to learn what +/// arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::relu(x, F::ReLUFuncOptions().inplace(true)); +/// ``` +using ReLUFuncOptions = ReLUOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `ReLU6` module. +/// +/// Example: +/// ``` +/// ReLU6 model(ReLU6Options().inplace(true)); +/// ``` +struct TORCH_API ReLU6Options { + /* implicit */ ReLU6Options(bool inplace = false); + + /// can optionally do the operation in-place. Default: False + TORCH_ARG(bool, inplace); +}; + +namespace functional { +/// Options for `torch::nn::functional::relu6`. +/// +/// See the documentation for `torch::nn::ReLU6Options` class to learn what +/// arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::relu6(x, F::ReLU6FuncOptions().inplace(true)); +/// ``` +using ReLU6FuncOptions = ReLU6Options; +} // namespace functional + +// ============================================================================ + +/// Options for the `RReLU` module. +/// +/// Example: +/// ``` +/// RReLU model(RReLUOptions().lower(0.24).upper(0.42).inplace(true)); +/// ``` +struct TORCH_API RReLUOptions { + /// lower bound of the uniform distribution. Default: 1/8 + TORCH_ARG(double, lower) = 1.0 / 8.0; + + /// upper bound of the uniform distribution. Default: 1/3 + TORCH_ARG(double, upper) = 1.0 / 3.0; + + /// can optionally do the operation in-place. Default: False + TORCH_ARG(bool, inplace) = false; +}; + +// ============================================================================ + +namespace functional { + +/// Options for `torch::nn::functional::rrelu`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::rrelu(x, F::RReLUFuncOptions().lower(0.1).upper(0.4).inplace(true)); +/// ``` +struct TORCH_API RReLUFuncOptions { + /// lower bound of the uniform distribution. Default: 1/8 + TORCH_ARG(double, lower) = 1.0 / 8.0; + + /// upper bound of the uniform distribution. Default: 1/3 + TORCH_ARG(double, upper) = 1.0 / 3.0; + + TORCH_ARG(bool, training) = false; + + /// can optionally do the operation in-place. Default: False + TORCH_ARG(bool, inplace) = false; +}; + +} // namespace functional + +// ============================================================================ + +/// Options for the `CELU` module. +/// +/// Example: +/// ``` +/// CELU model(CELUOptions().alpha(42.42).inplace(true)); +/// ``` +struct TORCH_API CELUOptions { + /// The `alpha` value for the CELU formulation. Default: 1.0 + TORCH_ARG(double, alpha) = 1.0; + + /// can optionally do the operation in-place. Default: False + TORCH_ARG(bool, inplace) = false; +}; + +namespace functional { +/// Options for `torch::nn::functional::celu`. +/// +/// See the documentation for `torch::nn::CELUOptions` class to learn what +/// arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::celu(x, F::CELUFuncOptions().alpha(0.42).inplace(true)); +/// ``` +using CELUFuncOptions = CELUOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `Softplus` module. +/// +/// Example: +/// ``` +/// Softplus model(SoftplusOptions().beta(0.24).threshold(42.42)); +/// ``` +struct TORCH_API SoftplusOptions { + /// the `beta` value for the Softplus formulation. Default: 1 + TORCH_ARG(double, beta) = 1.0; + + /// values above this revert to a linear function. Default: 20 + TORCH_ARG(double, threshold) = 20.0; +}; + +namespace functional { +/// Options for `torch::nn::functional::softplus`. +/// +/// See the documentation for `torch::nn::SoftplusOptions` class to learn what +/// arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::softplus(x, F::SoftplusFuncOptions().beta(0.5).threshold(3.0)); +/// ``` +using SoftplusFuncOptions = SoftplusOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `Softshrink` module. +/// +/// Example: +/// ``` +/// Softshrink model(SoftshrinkOptions(42.42)); +/// ``` +struct TORCH_API SoftshrinkOptions { + /* implicit */ SoftshrinkOptions(double lambda = 0.5); + + /// the `lambda` value for the Softshrink formulation. Default: 0.5 + TORCH_ARG(double, lambda); +}; + +namespace functional { +/// Options for `torch::nn::functional::softshrink`. +/// +/// See the documentation for `torch::nn::SoftshrinkOptions` class to learn what +/// arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::softshrink(x, F::SoftshrinkFuncOptions(0.42)); +/// ``` +using SoftshrinkFuncOptions = SoftshrinkOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `Threshold` module. +/// +/// Example: +/// ``` +/// Threshold model(ThresholdOptions(42.42, 24.24).inplace(true)); +/// ``` +struct TORCH_API ThresholdOptions { + ThresholdOptions(double threshold, double value) + : threshold_(threshold), value_(value) {} + + /// The value to threshold at + TORCH_ARG(double, threshold); + + /// The value to replace with + TORCH_ARG(double, value); + + /// can optionally do the operation in-place. Default: False + TORCH_ARG(bool, inplace) = false; +}; + +namespace functional { +/// Options for `torch::nn::functional::threshold`. +/// +/// See the documentation for `torch::nn::ThresholdOptions` class to learn what +/// arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::threshold(x, F::ThresholdFuncOptions(0.5, 0.5).inplace(true)); +/// ``` +using ThresholdFuncOptions = ThresholdOptions; +} // namespace functional + +// ============================================================================ + +namespace functional { + +/// Options for `torch::nn::functional::gumbel_softmax`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::gumbel_softmax(logits, F::GumbelSoftmaxFuncOptions().hard(true).dim(-1)); +/// ``` +struct TORCH_API GumbelSoftmaxFuncOptions { + /// non-negative scalar temperature + TORCH_ARG(double, tau) = 1.0; + + /// returned samples will be discretized as one-hot vectors, + /// but will be differentiated as if it is the soft sample in autograd. + /// Default: False + TORCH_ARG(bool, hard) = false; + + /// dimension along which softmax will be computed. Default: -1 + TORCH_ARG(int, dim) = -1; +}; + +} // namespace functional + +// ============================================================================ + +/// Options for the `MultiheadAttention` module. +/// +/// Example: +/// ``` +/// MultiheadAttention model(MultiheadAttentionOptions(20, 10).bias(false)); +/// ``` +struct TORCH_API MultiheadAttentionOptions { + MultiheadAttentionOptions(int64_t embed_dim, int64_t num_heads); + + /// total dimension of the model. + TORCH_ARG(int64_t, embed_dim); + + /// parallel attention heads. + TORCH_ARG(int64_t, num_heads); + + /// a Dropout layer on attn_output_weights. Default: 0.0. + TORCH_ARG(double, dropout) = 0.0; + + /// add bias as module parameter. Default: true. + TORCH_ARG(bool, bias) = true; + + /// add bias to the key and value sequences at dim=0. + TORCH_ARG(bool, add_bias_kv) = false; + + /// add a new batch of zeros to the key and value sequences at dim=1. + TORCH_ARG(bool, add_zero_attn) = false; + + /// total number of features in key. Default: std::nullopt. + TORCH_ARG(int64_t, kdim); + + /// total number of features in key. Default: std::nullopt. + TORCH_ARG(int64_t, vdim); +}; + +// ============================================================================ + +namespace functional { + +/// Options for `torch::nn::functional::multi_head_attention_forward` +struct TORCH_API MultiheadAttentionForwardFuncOptions { + MultiheadAttentionForwardFuncOptions( + int64_t embed_dim_to_check, + int64_t num_heads, + Tensor in_proj_weight, + Tensor in_proj_bias, + Tensor bias_k, + Tensor bias_v, + bool add_zero_attn, + double dropout_p, + Tensor out_proj_weight, + Tensor out_proj_bias); + + TORCH_ARG(int64_t, embed_dim_to_check); + + TORCH_ARG(int64_t, num_heads); + + TORCH_ARG(Tensor, in_proj_weight); + + TORCH_ARG(Tensor, in_proj_bias); + + TORCH_ARG(Tensor, bias_k); + + TORCH_ARG(Tensor, bias_v); + + TORCH_ARG(bool, add_zero_attn); + + TORCH_ARG(double, dropout_p); + + TORCH_ARG(Tensor, out_proj_weight); + + TORCH_ARG(Tensor, out_proj_bias); + + TORCH_ARG(bool, training) = true; + + TORCH_ARG(Tensor, key_padding_mask); + + TORCH_ARG(bool, need_weights) = true; + + TORCH_ARG(Tensor, attn_mask); + + TORCH_ARG(bool, use_separate_proj_weight) = false; + + TORCH_ARG(Tensor, q_proj_weight); + + TORCH_ARG(Tensor, k_proj_weight); + + TORCH_ARG(Tensor, v_proj_weight); + + TORCH_ARG(Tensor, static_k); + + TORCH_ARG(Tensor, static_v); + + TORCH_ARG(bool, average_attn_weights) = true; +}; + +} // namespace functional + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/adaptive.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/adaptive.h new file mode 100644 index 0000000000000000000000000000000000000000..62af777dbd17f5e6371ec400f4253c048580c1e6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/adaptive.h @@ -0,0 +1,44 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::nn { + +/// Options for the `AdaptiveLogSoftmaxWithLoss` module. +/// +/// Example: +/// ``` +/// AdaptiveLogSoftmaxWithLoss model(AdaptiveLogSoftmaxWithLossOptions(8, 10, +/// {4, 8}).div_value(2.).head_bias(true)); +/// ``` +struct TORCH_API AdaptiveLogSoftmaxWithLossOptions { + /* implicit */ AdaptiveLogSoftmaxWithLossOptions( + int64_t in_features, + int64_t n_classes, + std::vector cutoffs); + + /// Number of features in the input tensor + TORCH_ARG(int64_t, in_features); + + /// Number of classes in the dataset + TORCH_ARG(int64_t, n_classes); + + /// Cutoffs used to assign targets to their buckets + TORCH_ARG(std::vector, cutoffs); + + /// value used as an exponent to compute sizes of the clusters. Default: 4.0 + TORCH_ARG(double, div_value) = 4.; + + /// If ``true``, adds a bias term to the 'head' of + /// the adaptive softmax. Default: false + TORCH_ARG(bool, head_bias) = false; +}; + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/batchnorm.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/batchnorm.h new file mode 100644 index 0000000000000000000000000000000000000000..eb34bafb86811bad579ee4d835fccda1deefe790 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/batchnorm.h @@ -0,0 +1,98 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::nn { + +/// Options for the `BatchNorm` module. +struct TORCH_API BatchNormOptions { + /* implicit */ BatchNormOptions(int64_t num_features); + + /// The number of features of the input tensor. + /// Changing this parameter after construction __has no effect__. + TORCH_ARG(int64_t, num_features); + + /// The epsilon value added for numerical stability. + /// Changing this parameter after construction __is effective__. + TORCH_ARG(double, eps) = 1e-5; + + /// A momentum multiplier for the mean and variance. + /// Changing this parameter after construction __is effective__. + TORCH_ARG(std::optional, momentum) = 0.1; + + /// Whether to learn a scale and bias that are applied in an affine + /// transformation on the input. + /// Changing this parameter after construction __has no effect__. + TORCH_ARG(bool, affine) = true; + + /// Whether to store and update batch statistics (mean and variance) in the + /// module. + /// Changing this parameter after construction __has no effect__. + TORCH_ARG(bool, track_running_stats) = true; +}; + +/// Options for the `BatchNorm1d` module. +/// +/// Example: +/// ``` +/// BatchNorm1d +/// model(BatchNorm1dOptions(4).eps(0.5).momentum(0.1).affine(false).track_running_stats(true)); +/// ``` +using BatchNorm1dOptions = BatchNormOptions; + +/// Options for the `BatchNorm2d` module. +/// +/// Example: +/// ``` +/// BatchNorm2d +/// model(BatchNorm2dOptions(4).eps(0.5).momentum(0.1).affine(false).track_running_stats(true)); +/// ``` +using BatchNorm2dOptions = BatchNormOptions; + +/// Options for the `BatchNorm3d` module. +/// +/// Example: +/// ``` +/// BatchNorm3d +/// model(BatchNorm3dOptions(4).eps(0.5).momentum(0.1).affine(false).track_running_stats(true)); +/// ``` +using BatchNorm3dOptions = BatchNormOptions; + +// ============================================================================ + +namespace functional { + +/// Options for `torch::nn::functional::batch_norm`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::batch_norm(input, mean, variance, +/// F::BatchNormFuncOptions().weight(weight).bias(bias).momentum(0.1).eps(1e-05).training(false)); +/// ``` +struct TORCH_API BatchNormFuncOptions { + TORCH_ARG(Tensor, weight); + + TORCH_ARG(Tensor, bias); + + TORCH_ARG(bool, training) = false; + + /// A momentum multiplier for the mean and variance. + /// Changing this parameter after construction __is effective__. + TORCH_ARG(double, momentum) = 0.1; + + /// The epsilon value added for numerical stability. + /// Changing this parameter after construction __is effective__. + TORCH_ARG(double, eps) = 1e-5; +}; + +} // namespace functional + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/conv.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/conv.h new file mode 100644 index 0000000000000000000000000000000000000000..bd7445e1ebb20ab682eb27578f399085493b3fb1 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/conv.h @@ -0,0 +1,418 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace torch::nn { + +namespace detail { + +typedef std::variant< + enumtype::kZeros, + enumtype::kReflect, + enumtype::kReplicate, + enumtype::kCircular> + conv_padding_mode_t; + +template +using conv_padding_t = + std::variant, enumtype::kValid, enumtype::kSame>; + +/// Options for a `D`-dimensional convolution or convolution transpose module. +template +struct ConvNdOptions { + using padding_t = conv_padding_t; + ConvNdOptions( + int64_t in_channels, + int64_t out_channels, + ExpandingArray kernel_size) + : in_channels_(in_channels), + out_channels_(out_channels), + kernel_size_(std::move(kernel_size)) {} + + /// The number of channels the input volumes will have. + /// Changing this parameter after construction __has no effect__. + TORCH_ARG(int64_t, in_channels); + + /// The number of output channels the convolution should produce. + /// Changing this parameter after construction __has no effect__. + TORCH_ARG(int64_t, out_channels); + + /// The kernel size to use. + /// For a `D`-dim convolution, must be a single number or a list of `D` + /// numbers. + /// This parameter __can__ be changed after construction. + TORCH_ARG(ExpandingArray, kernel_size); + + /// The stride of the convolution. + /// For a `D`-dim convolution, must be a single number or a list of `D` + /// numbers. + /// This parameter __can__ be changed after construction. + TORCH_ARG(ExpandingArray, stride) = 1; + + /// The padding to add to the input volumes. + /// For a `D`-dim convolution, must be a single number or a list of `D` + /// numbers. + /// This parameter __can__ be changed after construction. + TORCH_ARG(padding_t, padding) = 0; + + public: + auto padding(std::initializer_list il) { + return padding(IntArrayRef{il}); + } + + /// The kernel dilation. + /// For a `D`-dim convolution, must be a single number or a list of `D` + /// numbers. + /// This parameter __can__ be changed after construction. + TORCH_ARG(ExpandingArray, dilation) = 1; + + /// If true, convolutions will be transpose convolutions (a.k.a. + /// deconvolutions). + /// Changing this parameter after construction __has no effect__. + TORCH_ARG(bool, transposed) = false; + + /// For transpose convolutions, the padding to add to output volumes. + /// For a `D`-dim convolution, must be a single number or a list of `D` + /// numbers. + /// This parameter __can__ be changed after construction. + TORCH_ARG(ExpandingArray, output_padding) = 0; + + /// The number of convolution groups. + /// This parameter __can__ be changed after construction. + TORCH_ARG(int64_t, groups) = 1; + + /// Whether to add a bias after individual applications of the kernel. + /// Changing this parameter after construction __has no effect__. + TORCH_ARG(bool, bias) = true; + + /// Accepted values `torch::kZeros`, `torch::kReflect`, `torch::kReplicate` or + /// `torch::kCircular`. Default: `torch::kZeros` + TORCH_ARG(conv_padding_mode_t, padding_mode) = torch::kZeros; +}; + +} // namespace detail + +// ============================================================================ + +/// Options for a `D`-dimensional convolution module. +template +struct ConvOptions { + using padding_mode_t = detail::conv_padding_mode_t; + using padding_t = detail::conv_padding_t; + + ConvOptions( + int64_t in_channels, + int64_t out_channels, + ExpandingArray kernel_size) + : in_channels_(in_channels), + out_channels_(out_channels), + kernel_size_(std::move(kernel_size)) {} + + /// The number of channels the input volumes will have. + /// Changing this parameter after construction __has no effect__. + TORCH_ARG(int64_t, in_channels); + + /// The number of output channels the convolution should produce. + /// Changing this parameter after construction __has no effect__. + TORCH_ARG(int64_t, out_channels); + + /// The kernel size to use. + /// For a `D`-dim convolution, must be a single number or a list of `D` + /// numbers. + /// This parameter __can__ be changed after construction. + TORCH_ARG(ExpandingArray, kernel_size); + + /// The stride of the convolution. + /// For a `D`-dim convolution, must be a single number or a list of `D` + /// numbers. + /// This parameter __can__ be changed after construction. + TORCH_ARG(ExpandingArray, stride) = 1; + + /// The padding to add to the input volumes. + /// For a `D`-dim convolution, must be a single number or a list of `D` + /// numbers. + /// This parameter __can__ be changed after construction. + TORCH_ARG(padding_t, padding) = 0; + + public: + auto padding(std::initializer_list il) { + return padding(IntArrayRef{il}); + } + + /// The kernel dilation. + /// For a `D`-dim convolution, must be a single number or a list of `D` + /// numbers. + /// This parameter __can__ be changed after construction. + TORCH_ARG(ExpandingArray, dilation) = 1; + + /// The number of convolution groups. + /// This parameter __can__ be changed after construction. + TORCH_ARG(int64_t, groups) = 1; + + /// Whether to add a bias after individual applications of the kernel. + /// Changing this parameter after construction __has no effect__. + TORCH_ARG(bool, bias) = true; + + /// Accepted values `torch::kZeros`, `torch::kReflect`, `torch::kReplicate` or + /// `torch::kCircular`. Default: `torch::kZeros` + TORCH_ARG(padding_mode_t, padding_mode) = torch::kZeros; +}; + +/// `ConvOptions` specialized for the `Conv1d` module. +/// +/// Example: +/// ``` +/// Conv1d model(Conv1dOptions(3, 2, 3).stride(1).bias(false)); +/// ``` +using Conv1dOptions = ConvOptions<1>; + +/// `ConvOptions` specialized for the `Conv2d` module. +/// +/// Example: +/// ``` +/// Conv2d model(Conv2dOptions(3, 2, 3).stride(1).bias(false)); +/// ``` +using Conv2dOptions = ConvOptions<2>; + +/// `ConvOptions` specialized for the `Conv3d` module. +/// +/// Example: +/// ``` +/// Conv3d model(Conv3dOptions(3, 2, 3).stride(1).bias(false)); +/// ``` +using Conv3dOptions = ConvOptions<3>; + +// ============================================================================ + +namespace functional { + +/// Options for a `D`-dimensional convolution functional. +template +struct ConvFuncOptions { + using padding_t = torch::nn::detail::conv_padding_t; + + /// optional bias of shape `(out_channels)`. Default: ``None`` + TORCH_ARG(torch::Tensor, bias); + + /// The stride of the convolving kernel. + /// For a `D`-dim convolution, must be a single number or a list of `D` + /// numbers. + TORCH_ARG(ExpandingArray, stride) = 1; + + /// Implicit paddings on both sides of the input. + /// For a `D`-dim convolution, must be a single number or a list of `D` + /// numbers. + TORCH_ARG(padding_t, padding) = 0; + + public: + auto padding(std::initializer_list il) { + return padding(IntArrayRef{il}); + } + + /// The spacing between kernel elements. + /// For a `D`-dim convolution, must be a single number or a list of `D` + /// numbers. + TORCH_ARG(ExpandingArray, dilation) = 1; + + /// Split input into groups, `in_channels` should be divisible by + /// the number of groups. + TORCH_ARG(int64_t, groups) = 1; +}; + +/// `ConvFuncOptions` specialized for `torch::nn::functional::conv1d`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::conv1d(x, weight, F::Conv1dFuncOptions().stride(1)); +/// ``` +using Conv1dFuncOptions = ConvFuncOptions<1>; + +/// `ConvFuncOptions` specialized for `torch::nn::functional::conv2d`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::conv2d(x, weight, F::Conv2dFuncOptions().stride(1)); +/// ``` +using Conv2dFuncOptions = ConvFuncOptions<2>; + +/// `ConvFuncOptions` specialized for `torch::nn::functional::conv3d`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::conv3d(x, weight, F::Conv3dFuncOptions().stride(1)); +/// ``` +using Conv3dFuncOptions = ConvFuncOptions<3>; + +} // namespace functional + +// ============================================================================ + +template +struct ConvTransposeOptions { + using padding_mode_t = detail::conv_padding_mode_t; + + ConvTransposeOptions( + int64_t in_channels, + int64_t out_channels, + ExpandingArray kernel_size) + : in_channels_(in_channels), + out_channels_(out_channels), + kernel_size_(std::move(kernel_size)) {} + + /// The number of channels the input volumes will have. + /// Changing this parameter after construction __has no effect__. + TORCH_ARG(int64_t, in_channels); + + /// The number of output channels the convolution should produce. + /// Changing this parameter after construction __has no effect__. + TORCH_ARG(int64_t, out_channels); + + /// The kernel size to use. + /// For a `D`-dim convolution, must be a single number or a list of `D` + /// numbers. + /// This parameter __can__ be changed after construction. + TORCH_ARG(ExpandingArray, kernel_size); + + /// The stride of the convolution. + /// For a `D`-dim convolution, must be a single number or a list of `D` + /// numbers. + /// This parameter __can__ be changed after construction. + TORCH_ARG(ExpandingArray, stride) = 1; + + /// The padding to add to the input volumes. + /// For a `D`-dim convolution, must be a single number or a list of `D` + /// numbers. + /// This parameter __can__ be changed after construction. + TORCH_ARG(ExpandingArray, padding) = 0; + + /// For transpose convolutions, the padding to add to output volumes. + /// For a `D`-dim convolution, must be a single number or a list of `D` + /// numbers. + /// This parameter __can__ be changed after construction. + TORCH_ARG(ExpandingArray, output_padding) = 0; + + /// The number of convolution groups. + /// This parameter __can__ be changed after construction. + TORCH_ARG(int64_t, groups) = 1; + + /// Whether to add a bias after individual applications of the kernel. + /// Changing this parameter after construction __has no effect__. + TORCH_ARG(bool, bias) = true; + + /// The kernel dilation. + /// For a `D`-dim convolution, must be a single number or a list of `D` + /// numbers. + /// This parameter __can__ be changed after construction. + TORCH_ARG(ExpandingArray, dilation) = 1; + + /// Accepted values `torch::kZeros`, `torch::kReflect`, `torch::kReplicate` or + /// `torch::kCircular`. Default: `torch::kZeros` + TORCH_ARG(padding_mode_t, padding_mode) = torch::kZeros; +}; + +/// `ConvTransposeOptions` specialized for the `ConvTranspose1d` module. +/// +/// Example: +/// ``` +/// ConvTranspose1d model(ConvTranspose1dOptions(3, 2, +/// 3).stride(1).bias(false)); +/// ``` +using ConvTranspose1dOptions = ConvTransposeOptions<1>; + +/// `ConvTransposeOptions` specialized for the `ConvTranspose2d` module. +/// +/// Example: +/// ``` +/// ConvTranspose2d model(ConvTranspose2dOptions(3, 2, +/// 3).stride(1).bias(false)); +/// ``` +using ConvTranspose2dOptions = ConvTransposeOptions<2>; + +/// `ConvTransposeOptions` specialized for the `ConvTranspose3d` module. +/// +/// Example: +/// ``` +/// ConvTranspose3d model(ConvTranspose3dOptions(2, 2, +/// 2).stride(1).bias(false)); +/// ``` +using ConvTranspose3dOptions = ConvTransposeOptions<3>; + +// ============================================================================ + +namespace functional { + +/// Options for a `D`-dimensional convolution functional. +template +struct ConvTransposeFuncOptions { + /// optional bias of shape `(out_channels)`. Default: ``None`` + TORCH_ARG(torch::Tensor, bias); + + /// The stride of the convolving kernel. + /// For a `D`-dim convolution, must be a single number or a list of `D` + /// numbers. + TORCH_ARG(ExpandingArray, stride) = 1; + + /// Implicit paddings on both sides of the input. + /// For a `D`-dim convolution, must be a single number or a list of `D` + /// numbers. + TORCH_ARG(ExpandingArray, padding) = 0; + + /// Additional size added to one side of each dimension in the output shape. + /// Default: 0 + TORCH_ARG(ExpandingArray, output_padding) = 0; + + /// Split input into groups, `in_channels` should be divisible by + /// the number of groups. + TORCH_ARG(int64_t, groups) = 1; + + /// The spacing between kernel elements. + /// For a `D`-dim convolution, must be a single number or a list of `D` + /// numbers. + TORCH_ARG(ExpandingArray, dilation) = 1; +}; + +/// `ConvTransposeFuncOptions` specialized for +/// `torch::nn::functional::conv_transpose1d`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::conv_transpose1d(x, weight, F::ConvTranspose1dFuncOptions().stride(1)); +/// ``` +using ConvTranspose1dFuncOptions = ConvTransposeFuncOptions<1>; + +/// `ConvTransposeFuncOptions` specialized for +/// `torch::nn::functional::conv_transpose2d`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::conv_transpose2d(x, weight, F::ConvTranspose2dFuncOptions().stride(1)); +/// ``` +using ConvTranspose2dFuncOptions = ConvTransposeFuncOptions<2>; + +/// `ConvTransposeFuncOptions` specialized for +/// `torch::nn::functional::conv_transpose3d`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::conv_transpose3d(x, weight, F::ConvTranspose3dFuncOptions().stride(1)); +/// ``` +using ConvTranspose3dFuncOptions = ConvTransposeFuncOptions<3>; + +} // namespace functional + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/distance.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/distance.h new file mode 100644 index 0000000000000000000000000000000000000000..0717119d17b67454c5d3c587af7afeaa78c67611 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/distance.h @@ -0,0 +1,74 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::nn { + +/// Options for the `CosineSimilarity` module. +/// +/// Example: +/// ``` +/// CosineSimilarity model(CosineSimilarityOptions().dim(0).eps(0.5)); +/// ``` +struct TORCH_API CosineSimilarityOptions { + /// Dimension where cosine similarity is computed. Default: 1 + TORCH_ARG(int64_t, dim) = 1; + /// Small value to avoid division by zero. Default: 1e-8 + TORCH_ARG(double, eps) = 1e-8; +}; + +namespace functional { +/// Options for `torch::nn::functional::cosine_similarity`. +/// +/// See the documentation for `torch::nn::CosineSimilarityOptions` class to +/// learn what arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::cosine_similarity(input1, input2, +/// F::CosineSimilarityFuncOptions().dim(1)); +/// ``` +using CosineSimilarityFuncOptions = CosineSimilarityOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `PairwiseDistance` module. +/// +/// Example: +/// ``` +/// PairwiseDistance +/// model(PairwiseDistanceOptions().p(3).eps(0.5).keepdim(true)); +/// ``` +struct TORCH_API PairwiseDistanceOptions { + /// The norm degree. Default: 2 + TORCH_ARG(double, p) = 2.0; + /// Small value to avoid division by zero. Default: 1e-6 + TORCH_ARG(double, eps) = 1e-6; + /// Determines whether or not to keep the vector dimension. Default: false + TORCH_ARG(bool, keepdim) = false; +}; + +namespace functional { +/// Options for `torch::nn::functional::pairwise_distance`. +/// +/// See the documentation for `torch::nn::PairwiseDistanceOptions` class to +/// learn what arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::pairwise_distance(input1, input2, F::PairwiseDistanceFuncOptions().p(1)); +/// ``` +using PairwiseDistanceFuncOptions = PairwiseDistanceOptions; +} // namespace functional + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/dropout.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/dropout.h new file mode 100644 index 0000000000000000000000000000000000000000..58e8ea34d87df72ec5ea15ec38b1f5eb7ada7c46 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/dropout.h @@ -0,0 +1,133 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::nn { + +/// Options for the `Dropout` module. +/// +/// Example: +/// ``` +/// Dropout model(DropoutOptions().p(0.42).inplace(true)); +/// ``` +struct TORCH_API DropoutOptions { + /* implicit */ DropoutOptions(double p = 0.5); + + /// The probability of an element to be zeroed. Default: 0.5 + TORCH_ARG(double, p) = 0.5; + + /// can optionally do the operation in-place. Default: False + TORCH_ARG(bool, inplace) = false; +}; + +/// Options for the `Dropout2d` module. +/// +/// Example: +/// ``` +/// Dropout2d model(Dropout2dOptions().p(0.42).inplace(true)); +/// ``` +using Dropout2dOptions = DropoutOptions; + +/// Options for the `Dropout3d` module. +/// +/// Example: +/// ``` +/// Dropout3d model(Dropout3dOptions().p(0.42).inplace(true)); +/// ``` +using Dropout3dOptions = DropoutOptions; + +/// Options for the `AlphaDropout` module. +/// +/// Example: +/// ``` +/// AlphaDropout model(AlphaDropoutOptions(0.2).inplace(true)); +/// ``` +using AlphaDropoutOptions = DropoutOptions; + +/// Options for the `FeatureAlphaDropout` module. +/// +/// Example: +/// ``` +/// FeatureAlphaDropout model(FeatureAlphaDropoutOptions(0.2).inplace(true)); +/// ``` +using FeatureAlphaDropoutOptions = DropoutOptions; + +namespace functional { + +/// Options for `torch::nn::functional::dropout`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::dropout(input, F::DropoutFuncOptions().p(0.5)); +/// ``` +struct TORCH_API DropoutFuncOptions { + /// The probability of an element to be zeroed. Default: 0.5 + TORCH_ARG(double, p) = 0.5; + + TORCH_ARG(bool, training) = true; + + /// can optionally do the operation in-place. Default: False + TORCH_ARG(bool, inplace) = false; +}; + +/// Options for `torch::nn::functional::dropout2d`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::dropout2d(input, F::Dropout2dFuncOptions().p(0.5)); +/// ``` +using Dropout2dFuncOptions = DropoutFuncOptions; + +/// Options for `torch::nn::functional::dropout3d`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::dropout3d(input, F::Dropout3dFuncOptions().p(0.5)); +/// ``` +using Dropout3dFuncOptions = DropoutFuncOptions; + +/// Options for `torch::nn::functional::alpha_dropout`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::alpha_dropout(input, +/// F::AlphaDropoutFuncOptions().p(0.5).training(false)); +/// ``` +struct TORCH_API AlphaDropoutFuncOptions { + TORCH_ARG(double, p) = 0.5; + + TORCH_ARG(bool, training) = false; + + TORCH_ARG(bool, inplace) = false; +}; + +/// Options for `torch::nn::functional::feature_alpha_dropout`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::feature_alpha_dropout(input, +/// F::FeatureAlphaDropoutFuncOptions().p(0.5).training(false)); +/// ``` +struct TORCH_API FeatureAlphaDropoutFuncOptions { + TORCH_ARG(double, p) = 0.5; + + TORCH_ARG(bool, training) = false; + + TORCH_ARG(bool, inplace) = false; +}; + +} // namespace functional + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/embedding.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/embedding.h new file mode 100644 index 0000000000000000000000000000000000000000..feab5225745d42cddd814fa0c19e1afb2a65b58f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/embedding.h @@ -0,0 +1,245 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::nn { + +/// Options for the `Embedding` module. +/// +/// Example: +/// ``` +/// Embedding model(EmbeddingOptions(10, +/// 2).padding_idx(3).max_norm(2).norm_type(2.5).scale_grad_by_freq(true).sparse(true)); +/// ``` +struct TORCH_API EmbeddingOptions { + EmbeddingOptions(int64_t num_embeddings, int64_t embedding_dim); + + /// The size of the dictionary of embeddings. + TORCH_ARG(int64_t, num_embeddings); + /// The size of each embedding vector. + TORCH_ARG(int64_t, embedding_dim); + /// If specified, the entries at `padding_idx` do not contribute to the + /// gradient; therefore, the embedding vector at `padding_idx` is not updated + /// during training, i.e. it remains as a fixed "pad". For a newly constructed + /// Embedding, the embedding vector at `padding_idx` will default to all + /// zeros, but can be updated to another value to be used as the padding + /// vector. + TORCH_ARG(std::optional, padding_idx) = std::nullopt; + /// If given, each embedding vector with norm larger than `max_norm` is + /// renormalized to have norm `max_norm`. + TORCH_ARG(std::optional, max_norm) = std::nullopt; + /// The p of the p-norm to compute for the `max_norm` option. Default ``2``. + TORCH_ARG(double, norm_type) = 2.; + /// If given, this will scale gradients by the inverse of frequency of the + /// words in the mini-batch. Default ``false``. + TORCH_ARG(bool, scale_grad_by_freq) = false; + /// If ``true``, gradient w.r.t. `weight` matrix will be a sparse tensor. + TORCH_ARG(bool, sparse) = false; + /// The learnable weights of the module of shape (num_embeddings, + /// embedding_dim) + TORCH_ARG(torch::Tensor, _weight); +}; + +// ============================================================================ + +/// Options for the `Embedding::from_pretrained` function. +struct TORCH_API EmbeddingFromPretrainedOptions { + /// If ``true``, the tensor does not get updated in the learning process. + /// Equivalent to ``embedding.weight.requires_grad_(false)``. Default: + /// ``true`` + TORCH_ARG(bool, freeze) = true; + /// If specified, the entries at `padding_idx` do not contribute to the + /// gradient; therefore, the embedding vector at `padding_idx` is not updated + /// during training, i.e. it remains as a fixed "pad". + TORCH_ARG(std::optional, padding_idx) = std::nullopt; + /// If given, each embedding vector with norm larger than `max_norm` is + /// renormalized to have norm `max_norm`. + TORCH_ARG(std::optional, max_norm) = std::nullopt; + /// The p of the p-norm to compute for the `max_norm` option. Default ``2``. + TORCH_ARG(double, norm_type) = 2.; + /// If given, this will scale gradients by the inverse of frequency of the + /// words in the mini-batch. Default ``false``. + TORCH_ARG(bool, scale_grad_by_freq) = false; + /// If ``true``, gradient w.r.t. `weight` matrix will be a sparse tensor. + TORCH_ARG(bool, sparse) = false; +}; + +// ============================================================================ + +namespace functional { + +/// Options for `torch::nn::functional::embedding`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::embedding(input, weight, +/// F::EmbeddingFuncOptions().norm_type(2.5).scale_grad_by_freq(true).sparse(true)); +/// ``` +struct TORCH_API EmbeddingFuncOptions { + /// If specified, the entries at `padding_idx` do not contribute to the + /// gradient; therefore, the embedding vector at `padding_idx` is not updated + /// during training, i.e. it remains as a fixed "pad". + TORCH_ARG(std::optional, padding_idx) = std::nullopt; + /// If given, each embedding vector with norm larger than `max_norm` is + /// renormalized to have norm `max_norm`. + TORCH_ARG(std::optional, max_norm) = std::nullopt; + /// The p of the p-norm to compute for the `max_norm` option. Default ``2``. + TORCH_ARG(double, norm_type) = 2.; + /// If given, this will scale gradients by the inverse of frequency of the + /// words in the mini-batch. Default ``false``. + TORCH_ARG(bool, scale_grad_by_freq) = false; + /// If ``true``, gradient w.r.t. `weight` matrix will be a sparse tensor. + TORCH_ARG(bool, sparse) = false; +}; + +} // namespace functional + +// ============================================================================ + +typedef std::variant + EmbeddingBagMode; + +/// Options for the `EmbeddingBag` module. +/// +/// Example: +/// ``` +/// EmbeddingBag model(EmbeddingBagOptions(10, +/// 2).max_norm(2).norm_type(2.5).scale_grad_by_freq(true).sparse(true).mode(torch::kSum)); +/// ``` +struct TORCH_API EmbeddingBagOptions { + EmbeddingBagOptions(int64_t num_embeddings, int64_t embedding_dim); + + /// The size of the dictionary of embeddings. + TORCH_ARG(int64_t, num_embeddings); + /// The size of each embedding vector. + TORCH_ARG(int64_t, embedding_dim); + /// If given, each embedding vector with norm larger than `max_norm` is + /// renormalized to have norm `max_norm`. + TORCH_ARG(std::optional, max_norm) = std::nullopt; + /// The p of the p-norm to compute for the `max_norm` option. Default ``2``. + TORCH_ARG(double, norm_type) = 2.; + /// If given, this will scale gradients by the inverse of frequency of the + /// words in the mini-batch. Default ``false``. Note: this option is not + /// supported when ``mode="kMax"``. + TORCH_ARG(bool, scale_grad_by_freq) = false; + /// ``"kSum"``, ``"kMean"`` or ``"kMax"``. Specifies the way to reduce the + /// bag. ``"kSum"`` computes the weighted sum, taking `per_sample_weights` + /// into consideration. ``"kMean"`` computes the average of the values in the + /// bag, ``"kMax"`` computes the max value over each bag. + TORCH_ARG(EmbeddingBagMode, mode) = torch::kMean; + /// If ``true``, gradient w.r.t. `weight` matrix will be a sparse tensor. + /// Note: this option is not supported when ``mode="kMax"``. + TORCH_ARG(bool, sparse) = false; + /// The learnable weights of the module of shape (num_embeddings, + /// embedding_dim) + TORCH_ARG(torch::Tensor, _weight); + /// If ``true``, `offsets` has one additional element, where the last element + /// is equivalent to the size of `indices`. This matches the CSR format. + TORCH_ARG(bool, include_last_offset) = false; + /// If specified, the entries at `padding_idx` do not contribute to the + /// gradient; therefore, the embedding vector at padding_idx is not updated + /// during training, i.e. it remains as a fixed "pad". For a newly constructed + /// EmbeddingBag, the embedding vector at `padding_idx` will default to all + /// zeros, but can be updated to another value to be used as the padding + /// vector. Note that the embedding vector at `padding_idx` is excluded from + /// the reduction. + TORCH_ARG(std::optional, padding_idx) = std::nullopt; +}; + +// ============================================================================ + +/// Options for the `EmbeddingBag::from_pretrained` function. +struct TORCH_API EmbeddingBagFromPretrainedOptions { + /// If ``true``, the tensor does not get updated in the learning process. + /// Equivalent to ``embeddingbag.weight.requires_grad_(false)``. Default: + /// ``true`` + TORCH_ARG(bool, freeze) = true; + /// If given, each embedding vector with norm larger than `max_norm` is + /// renormalized to have norm `max_norm`. + TORCH_ARG(std::optional, max_norm) = std::nullopt; + /// The p of the p-norm to compute for the `max_norm` option. Default ``2``. + TORCH_ARG(double, norm_type) = 2.; + /// If given, this will scale gradients by the inverse of frequency of the + /// words in the mini-batch. Default ``false``. Note: this option is not + /// supported when ``mode="kMax"``. + TORCH_ARG(bool, scale_grad_by_freq) = false; + /// ``"kSum"``, ``"kMean"`` or ``"kMax"``. Specifies the way to reduce the + /// bag. ``"kSum"`` computes the weighted sum, taking `per_sample_weights` + /// into consideration. ``"kMean"`` computes the average of the values in the + /// bag, ``"kMax"`` computes the max value over each bag. + TORCH_ARG(EmbeddingBagMode, mode) = torch::kMean; + /// If ``true``, gradient w.r.t. `weight` matrix will be a sparse tensor. + /// Note: this option is not supported when ``mode="kMax"``. + TORCH_ARG(bool, sparse) = false; + /// If ``true``, `offsets` has one additional element, where the last element + /// is equivalent to the size of `indices`. This matches the CSR format. Note: + /// this option is currently only supported when ``mode="sum"``. + TORCH_ARG(bool, include_last_offset) = false; + /// If specified, the entries at `padding_idx` do not contribute to the + /// gradient; therefore, the embedding vector at padding_idx is not updated + /// during training, i.e. it remains as a fixed "pad". Note that the embedding + /// vector at `padding_idx` is excluded from the reduction. + TORCH_ARG(std::optional, padding_idx) = std::nullopt; +}; + +// ============================================================================ + +namespace functional { + +/// Options for `torch::nn::functional::embedding_bag`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::embedding_bag(input, weight, +/// F::EmbeddingBagFuncOptions().mode(torch::kSum).offsets(offsets)); +/// ``` +struct TORCH_API EmbeddingBagFuncOptions { + /// Only used when `input` is 1D. `offsets` determines + /// the starting index position of each bag (sequence) in `input`. + TORCH_ARG(torch::Tensor, offsets); + /// If given, each embedding vector with norm larger than `max_norm` is + /// renormalized to have norm `max_norm`. + TORCH_ARG(std::optional, max_norm) = std::nullopt; + /// The p of the p-norm to compute for the `max_norm` option. Default ``2``. + TORCH_ARG(double, norm_type) = 2.; + /// If given, this will scale gradients by the inverse of frequency of the + /// words in the mini-batch. Default ``false``. Note: this option is not + /// supported when ``mode="kMax"``. + TORCH_ARG(bool, scale_grad_by_freq) = false; + /// ``"kSum"``, ``"kMean"`` or ``"kMax"``. Specifies the way to reduce the + /// bag. ``"kSum"`` computes the weighted sum, taking `per_sample_weights` + /// into consideration. ``"kMean"`` computes the average of the values in the + /// bag, ``"kMax"`` computes the max value over each bag. + TORCH_ARG(EmbeddingBagMode, mode) = torch::kMean; + /// If ``true``, gradient w.r.t. `weight` matrix will be a sparse tensor. + /// Note: this option is not supported when ``mode="kMax"``. + TORCH_ARG(bool, sparse) = false; + /// a tensor of float / double weights, or None to indicate all weights should + /// be taken to be 1. If specified, `per_sample_weights` must have exactly the + /// same shape as input and is treated as having the same `offsets`, if those + /// are not None. + TORCH_ARG(torch::Tensor, per_sample_weights); + /// If ``true``, `offsets` has one additional element, where the last element + /// is equivalent to the size of `indices`. This matches the CSR format. Note: + /// this option is currently only supported when ``mode="sum"``. + TORCH_ARG(bool, include_last_offset) = false; + /// If specified, the entries at `padding_idx` do not contribute to the + /// gradient; therefore, the embedding vector at padding_idx is not updated + /// during training, i.e. it remains as a fixed "pad". Note that the embedding + /// vector at `padding_idx` is excluded from the reduction. + TORCH_ARG(std::optional, padding_idx) = std::nullopt; +}; + +} // namespace functional + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/fold.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/fold.h new file mode 100644 index 0000000000000000000000000000000000000000..de95e6428d722fecf62cf2f22c8d0f256d0d2344 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/fold.h @@ -0,0 +1,100 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::nn { + +/// Options for the `Fold` module. +/// +/// Example: +/// ``` +/// Fold model(FoldOptions({8, 8}, {3, 3}).dilation(2).padding({2, +/// 1}).stride(2)); +/// ``` +struct TORCH_API FoldOptions { + FoldOptions(ExpandingArray<2> output_size, ExpandingArray<2> kernel_size) + : output_size_(output_size), kernel_size_(kernel_size) {} + + /// describes the spatial shape of the large containing tensor of the sliding + /// local blocks. It is useful to resolve the ambiguity when multiple input + /// shapes map to same number of sliding blocks, e.g., with stride > 0. + TORCH_ARG(ExpandingArray<2>, output_size); + + /// the size of the sliding blocks + TORCH_ARG(ExpandingArray<2>, kernel_size); + + /// controls the spacing between the kernel points; also known as the à trous + /// algorithm. + TORCH_ARG(ExpandingArray<2>, dilation) = 1; + + /// controls the amount of implicit zero-paddings on both sides for padding + /// number of points for each dimension before reshaping. + TORCH_ARG(ExpandingArray<2>, padding) = 0; + + /// controls the stride for the sliding blocks. + TORCH_ARG(ExpandingArray<2>, stride) = 1; +}; + +namespace functional { +/// Options for `torch::nn::functional::fold`. +/// +/// See the documentation for `torch::nn::FoldOptions` class to learn what +/// arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::fold(input, F::FoldFuncOptions({3, 2}, {2, 2})); +/// ``` +using FoldFuncOptions = FoldOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `Unfold` module. +/// +/// Example: +/// ``` +/// Unfold model(UnfoldOptions({2, 4}).dilation(2).padding({2, 1}).stride(2)); +/// ``` +struct TORCH_API UnfoldOptions { + UnfoldOptions(ExpandingArray<2> kernel_size) : kernel_size_(kernel_size) {} + + /// the size of the sliding blocks + TORCH_ARG(ExpandingArray<2>, kernel_size); + + /// controls the spacing between the kernel points; also known as the à trous + /// algorithm. + TORCH_ARG(ExpandingArray<2>, dilation) = 1; + + /// controls the amount of implicit zero-paddings on both sides for padding + /// number of points for each dimension before reshaping. + TORCH_ARG(ExpandingArray<2>, padding) = 0; + + /// controls the stride for the sliding blocks. + TORCH_ARG(ExpandingArray<2>, stride) = 1; +}; + +namespace functional { +/// Options for `torch::nn::functional::unfold`. +/// +/// See the documentation for `torch::nn::UnfoldOptions` class to learn what +/// arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::unfold(input, F::UnfoldFuncOptions({2, 2}).padding(1).stride(2)); +/// ``` +using UnfoldFuncOptions = UnfoldOptions; +} // namespace functional + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/instancenorm.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/instancenorm.h new file mode 100644 index 0000000000000000000000000000000000000000..67795640ed95665924ec516661c6df05a2df49b2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/instancenorm.h @@ -0,0 +1,92 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::nn { + +/// Options for the `InstanceNorm` module. +struct TORCH_API InstanceNormOptions { + /* implicit */ InstanceNormOptions(int64_t num_features); + + /// The number of features of the input tensor. + TORCH_ARG(int64_t, num_features); + + /// The epsilon value added for numerical stability. + TORCH_ARG(double, eps) = 1e-5; + + /// A momentum multiplier for the mean and variance. + TORCH_ARG(double, momentum) = 0.1; + + /// Whether to learn a scale and bias that are applied in an affine + /// transformation on the input. + TORCH_ARG(bool, affine) = false; + + /// Whether to store and update batch statistics (mean and variance) in the + /// module. + TORCH_ARG(bool, track_running_stats) = false; +}; + +/// Options for the `InstanceNorm1d` module. +/// +/// Example: +/// ``` +/// InstanceNorm1d +/// model(InstanceNorm1dOptions(4).eps(0.5).momentum(0.1).affine(false).track_running_stats(true)); +/// ``` +using InstanceNorm1dOptions = InstanceNormOptions; + +/// Options for the `InstanceNorm2d` module. +/// +/// Example: +/// ``` +/// InstanceNorm2d +/// model(InstanceNorm2dOptions(4).eps(0.5).momentum(0.1).affine(false).track_running_stats(true)); +/// ``` +using InstanceNorm2dOptions = InstanceNormOptions; + +/// Options for the `InstanceNorm3d` module. +/// +/// Example: +/// ``` +/// InstanceNorm3d +/// model(InstanceNorm3dOptions(4).eps(0.5).momentum(0.1).affine(false).track_running_stats(true)); +/// ``` +using InstanceNorm3dOptions = InstanceNormOptions; + +namespace functional { + +/// Options for `torch::nn::functional::instance_norm`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::instance_norm(input, +/// F::InstanceNormFuncOptions().running_mean(mean).running_var(variance).weight(weight).bias(bias).momentum(0.1).eps(1e-5)); +/// ``` +struct TORCH_API InstanceNormFuncOptions { + TORCH_ARG(Tensor, running_mean); + + TORCH_ARG(Tensor, running_var); + + TORCH_ARG(Tensor, weight); + + TORCH_ARG(Tensor, bias); + + TORCH_ARG(bool, use_input_stats) = true; + + TORCH_ARG(double, momentum) = 0.1; + + TORCH_ARG(double, eps) = 1e-5; +}; + +} // namespace functional + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/linear.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/linear.h new file mode 100644 index 0000000000000000000000000000000000000000..a8af483d0282e4a902e387ed3cd56d530be44355 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/linear.h @@ -0,0 +1,98 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::nn { + +/// Options for the `Linear` module. +/// +/// Example: +/// ``` +/// Linear model(LinearOptions(5, 2).bias(false)); +/// ``` +struct TORCH_API LinearOptions { + LinearOptions(int64_t in_features, int64_t out_features); + /// size of each input sample + TORCH_ARG(int64_t, in_features); + + /// size of each output sample + TORCH_ARG(int64_t, out_features); + + /// If set to false, the layer will not learn an additive bias. Default: true + TORCH_ARG(bool, bias) = true; +}; + +// ============================================================================ + +/// Options for the `Flatten` module. +/// +/// Example: +/// ``` +/// Flatten model(FlattenOptions().start_dim(2).end_dim(4)); +/// ``` +struct TORCH_API FlattenOptions { + /// first dim to flatten + TORCH_ARG(int64_t, start_dim) = 1; + /// last dim to flatten + TORCH_ARG(int64_t, end_dim) = -1; +}; + +// ============================================================================ + +/// Options for the `Unflatten` module. +/// +/// Note: If input tensor is named, use dimname and namedshape arguments. +/// +/// Example: +/// ``` +/// Unflatten unnamed_model(UnflattenOptions(0, {2, 2})); +/// Unflatten named_model(UnflattenOptions("B", {{"B1", 2}, {"B2", 2}})); +/// ``` +struct TORCH_API UnflattenOptions { + typedef std::vector> namedshape_t; + + UnflattenOptions(int64_t dim, std::vector sizes); + UnflattenOptions(const char* dimname, namedshape_t namedshape); + UnflattenOptions(std::string dimname, namedshape_t namedshape); + + /// dim to unflatten + TORCH_ARG(int64_t, dim); + /// name of dim to unflatten, for use with named tensors + TORCH_ARG(std::string, dimname); + /// new shape of unflattened dim + TORCH_ARG(std::vector, sizes); + /// new shape of unflattened dim with names, for use with named tensors + TORCH_ARG(namedshape_t, namedshape); +}; + +// ============================================================================ + +/// Options for the `Bilinear` module. +/// +/// Example: +/// ``` +/// Bilinear model(BilinearOptions(3, 2, 4).bias(false)); +/// ``` +struct TORCH_API BilinearOptions { + BilinearOptions( + int64_t in1_features, + int64_t in2_features, + int64_t out_features); + /// The number of features in input 1 (columns of the input1 matrix). + TORCH_ARG(int64_t, in1_features); + /// The number of features in input 2 (columns of the input2 matrix). + TORCH_ARG(int64_t, in2_features); + /// The number of output features to produce (columns of the output matrix). + TORCH_ARG(int64_t, out_features); + /// Whether to learn and add a bias after the bilinear transformation. + TORCH_ARG(bool, bias) = true; +}; + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/loss.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/loss.h new file mode 100644 index 0000000000000000000000000000000000000000..5fe03551da6b553fb1eaf6c9438c9aff4fcc1443 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/loss.h @@ -0,0 +1,805 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::nn { + +/// Options for the `L1Loss` module. +/// +/// Example: +/// ``` +/// L1Loss model(L1LossOptions(torch::kNone)); +/// ``` +struct TORCH_API L1LossOptions { + typedef std::variant + reduction_t; + + TORCH_OPTIONS_CTOR_VARIANT_ARG3(L1LossOptions, reduction, kNone, kMean, kSum) + + /// Specifies the reduction to apply to the output. + TORCH_ARG(reduction_t, reduction) = torch::kMean; +}; + +namespace functional { +/// Options for `torch::nn::functional::l1_loss`. +/// +/// See the documentation for `torch::nn::L1LossOptions` class to learn what +/// arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::l1_loss(input, target, F::L1LossFuncOptions(torch::kNone)); +/// ``` +using L1LossFuncOptions = L1LossOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `KLDivLoss` module. +/// +/// Example: +/// ``` +/// KLDivLoss +/// model(KLDivLossOptions().reduction(torch::kNone).log_target(false)); +/// ``` +struct TORCH_API KLDivLossOptions { + typedef std::variant< + enumtype::kNone, + enumtype::kBatchMean, + enumtype::kSum, + enumtype::kMean> + reduction_t; + + TORCH_OPTIONS_CTOR_VARIANT_ARG4( + KLDivLossOptions, + reduction, + kNone, + kBatchMean, + kSum, + kMean) + + /// Specifies the reduction to apply to the output. + /// ``'none'`` | ``'batchmean'`` | ``'sum'`` | ``'mean'``. Default: ``'mean'`` + TORCH_ARG(reduction_t, reduction) = torch::kMean; + + /// Specifies whether `target` is accepted in the log space. Default: False + TORCH_ARG(bool, log_target) = false; +}; + +namespace functional { +/// Options for `torch::nn::functional::kl_div`. +/// +/// See the documentation for `torch::nn::KLDivLossOptions` class to learn what +/// arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::kl_div(input, target, +/// F::KLDivFuncOptions().reduction(torch::kNone).log_target(false)); +/// ``` +using KLDivFuncOptions = KLDivLossOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `MSELoss` module. +/// +/// Example: +/// ``` +/// MSELoss model(MSELossOptions(torch::kNone)); +/// ``` +struct TORCH_API MSELossOptions { + typedef std::variant + reduction_t; + + TORCH_OPTIONS_CTOR_VARIANT_ARG3(MSELossOptions, reduction, kNone, kMean, kSum) + + /// Specifies the reduction to apply to the output. + /// ``'none'`` | ``'mean'`` | ``'sum'``. Default: ``'mean'`` + TORCH_ARG(reduction_t, reduction) = torch::kMean; +}; + +namespace functional { +/// Options for `torch::nn::functional::mse_loss`. +/// +/// See the documentation for `torch::nn::MSELossOptions` class to learn what +/// arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::mse_loss(input, target, F::MSELossFuncOptions(torch::kNone)); +/// ``` +using MSELossFuncOptions = MSELossOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `BCELoss` module. +/// +/// Example: +/// ``` +/// BCELoss model(BCELossOptions().reduction(torch::kNone).weight(weight)); +/// ``` +struct TORCH_API BCELossOptions { + typedef std::variant + reduction_t; + + /// A manual rescaling weight given to the loss of each batch element. + TORCH_ARG(Tensor, weight); + /// Specifies the reduction to apply to the output. + /// ``'none'`` | ``'mean'`` | ``'sum'``. Default: ``'mean'`` + TORCH_ARG(reduction_t, reduction) = torch::kMean; +}; + +namespace functional { +/// Options for `torch::nn::functional::binary_cross_entropy`. +/// +/// See the documentation for `torch::nn::BCELossOptions` class to learn what +/// arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::binary_cross_entropy(input, target, +/// F::BinaryCrossEntropyFuncOptions().weight(weight)); +/// ``` +using BinaryCrossEntropyFuncOptions = BCELossOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `HingeEmbeddingLoss` module. +/// +/// Example: +/// ``` +/// HingeEmbeddingLoss +/// model(HingeEmbeddingLossOptions().margin(4).reduction(torch::kNone)); +/// ``` +struct TORCH_API HingeEmbeddingLossOptions { + typedef std::variant + reduction_t; + + /// Specifies the threshold for which the distance of a negative sample must + /// reach in order to incur zero loss. Default: 1 + TORCH_ARG(double, margin) = 1.0; + /// Specifies the reduction to apply to the output. Default: Mean + TORCH_ARG(reduction_t, reduction) = torch::kMean; +}; + +namespace functional { +/// Options for `torch::nn::functional::hinge_embedding_loss`. +/// +/// See the documentation for `torch::nn::HingeEmbeddingLossOptions` class to +/// learn what arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::hinge_embedding_loss(input, target, +/// F::HingeEmbeddingLossFuncOptions().margin(2)); +/// ``` +using HingeEmbeddingLossFuncOptions = HingeEmbeddingLossOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `MultiMarginLoss` module. +/// +/// Example: +/// ``` +/// MultiMarginLoss model(MultiMarginLossOptions().margin(2).weight(weight)); +/// ``` +struct TORCH_API MultiMarginLossOptions { + typedef std::variant + reduction_t; + + /// Has a default value of :math:`1`. :math:`1` and :math:`2` + /// are the only supported values. + TORCH_ARG(int64_t, p) = 1; + /// Has a default value of :math:`1`. + TORCH_ARG(double, margin) = 1.0; + /// A manual rescaling weight given to each + /// class. If given, it has to be a Tensor of size `C`. Otherwise, it is + /// treated as if having all ones. + TORCH_ARG(Tensor, weight); + /// Specifies the reduction to apply to the output: + /// ``'none'`` | ``'mean'`` | ``'sum'``. ``'none'``: no reduction will be + /// applied, + /// ``'mean'``: the sum of the output will be divided by the number of + /// elements in the output, ``'sum'``: the output will be summed. Default: + /// ``'mean'`` + TORCH_ARG(reduction_t, reduction) = torch::kMean; +}; + +namespace functional { +/// Options for `torch::nn::functional::multi_margin_loss`. +/// +/// See the documentation for `torch::nn::MultiMarginLossOptions` class to learn +/// what arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::multi_margin_loss(input, target, +/// F::MultiMarginLossFuncOptions().margin(2).weight(weight)); +/// ``` +using MultiMarginLossFuncOptions = MultiMarginLossOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `CosineEmbeddingLoss` module. +/// +/// Example: +/// ``` +/// CosineEmbeddingLoss model(CosineEmbeddingLossOptions().margin(0.5)); +/// ``` +struct TORCH_API CosineEmbeddingLossOptions { + typedef std::variant + reduction_t; + + /// Specifies the threshold for which the distance of a negative sample must + /// reach in order to incur zero loss. Should be a number from -1 to 1, 0 + /// to 0.5 is suggested. Default: 0.0 + TORCH_ARG(double, margin) = 0.0; + /// Specifies the reduction to apply to the output. Default: Mean + TORCH_ARG(reduction_t, reduction) = torch::kMean; +}; + +namespace functional { +/// Options for `torch::nn::functional::cosine_embedding_loss`. +/// +/// See the documentation for `torch::nn::CosineEmbeddingLossOptions` class to +/// learn what arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::cosine_embedding_loss(input1, input2, target, +/// F::CosineEmbeddingLossFuncOptions().margin(0.5)); +/// ``` +using CosineEmbeddingLossFuncOptions = CosineEmbeddingLossOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `MultiLabelMarginLoss` module. +/// +/// Example: +/// ``` +/// MultiLabelMarginLoss model(MultiLabelMarginLossOptions(torch::kNone)); +/// ``` +struct TORCH_API MultiLabelMarginLossOptions { + typedef std::variant + reduction_t; + + TORCH_OPTIONS_CTOR_VARIANT_ARG3( + MultiLabelMarginLossOptions, + reduction, + kNone, + kMean, + kSum) + + /// Specifies the reduction to apply to the output: 'none' | 'mean' | 'sum'. + /// 'none': no reduction will be applied, 'mean': the sum of the output will + /// be divided by the number of elements in the output, 'sum': the output will + /// be summed. Default: 'mean' + TORCH_ARG(reduction_t, reduction) = torch::kMean; +}; + +namespace functional { +/// Options for `torch::nn::functional::multilabel_margin_loss`. +/// +/// See the documentation for `torch::nn::MultiLabelMarginLossOptions` class to +/// learn what arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::multilabel_margin_loss(input, target, +/// F::MultilabelMarginLossFuncOptions(torch::kNone)); +/// ``` +using MultilabelMarginLossFuncOptions = MultiLabelMarginLossOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `SoftMarginLoss` module. +/// +/// Example: +/// ``` +/// SoftMarginLoss model(SoftMarginLossOptions(torch::kNone)); +/// ``` +struct TORCH_API SoftMarginLossOptions { + typedef std::variant + reduction_t; + + TORCH_OPTIONS_CTOR_VARIANT_ARG3( + SoftMarginLossOptions, + reduction, + kNone, + kMean, + kSum) + + /// Specifies the reduction to apply to the output: 'none' | 'mean' | 'sum'. + /// 'none': no reduction will be applied, 'mean': the sum of the output will + /// be divided by the number of elements in the output, 'sum': the output will + /// be summed. Default: 'mean' + TORCH_ARG(reduction_t, reduction) = torch::kMean; +}; + +namespace functional { +/// Options for `torch::nn::functional::soft_margin_loss`. +/// +/// See the documentation for `torch::nn::SoftMarginLossOptions` class to learn +/// what arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::soft_margin_loss(input, target, +/// F::SoftMarginLossFuncOptions(torch::kNone)); +/// ``` +using SoftMarginLossFuncOptions = SoftMarginLossOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `MultiLabelSoftMarginLoss` module. +/// +/// Example: +/// ``` +/// MultiLabelSoftMarginLoss +/// model(MultiLabelSoftMarginLossOptions().reduction(torch::kNone).weight(weight)); +/// ``` +struct TORCH_API MultiLabelSoftMarginLossOptions { + typedef std::variant + reduction_t; + + /// A manual rescaling weight given to each + /// class. If given, it has to be a Tensor of size `C`. Otherwise, it is + /// treated as if having all ones. + TORCH_ARG(Tensor, weight); + + /// Specifies the reduction to apply to the output: 'none' | 'mean' | 'sum'. + /// 'none': no reduction will be applied, 'mean': the sum of the output will + /// be divided by the number of elements in the output, 'sum': the output will + /// be summed. Default: 'mean' + TORCH_ARG(reduction_t, reduction) = torch::kMean; +}; + +namespace functional { +/// Options for `torch::nn::functional::multilabel_soft_margin_loss`. +/// +/// See the documentation for `torch::nn::MultiLabelSoftMarginLossOptions` class +/// to learn what arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::multilabel_soft_margin_loss(input, target, +/// F::MultilabelSoftMarginLossFuncOptions().reduction(torch::kNone).weight(weight)); +/// ``` +using MultilabelSoftMarginLossFuncOptions = MultiLabelSoftMarginLossOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `TripletMarginLoss` module. +/// +/// Example: +/// ``` +/// TripletMarginLoss +/// model(TripletMarginLossOptions().margin(3).p(2).eps(1e-06).swap(false)); +/// ``` +struct TORCH_API TripletMarginLossOptions { + typedef std::variant + reduction_t; + + /// Specifies the threshold for which the distance of a negative sample must + /// reach in order to incur zero loss. Default: 1 + TORCH_ARG(double, margin) = 1.0; + /// Specifies the norm degree for pairwise distance. Default: 2 + TORCH_ARG(double, p) = 2.0; + TORCH_ARG(double, eps) = 1e-6; + /// The distance swap is described in detail in the paper Learning shallow + /// convolutional feature descriptors with triplet losses by V. Balntas, + /// E. Riba et al. Default: False + TORCH_ARG(bool, swap) = false; + /// Specifies the reduction to apply to the output. Default: Mean + TORCH_ARG(reduction_t, reduction) = torch::kMean; +}; + +namespace functional { +/// Options for `torch::nn::functional::triplet_margin_loss`. +/// +/// See the documentation for `torch::nn::TripletMarginLossOptions` class to +/// learn what arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::triplet_margin_loss(anchor, positive, negative, +/// F::TripletMarginLossFuncOptions().margin(1.0)); +/// ``` +using TripletMarginLossFuncOptions = TripletMarginLossOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `TripletMarginWithDistanceLoss` module. +/// +/// Example: +/// ``` +/// TripletMarginWithDistanceLoss +/// model(TripletMarginWithDistanceLossOptions().margin(3).swap(false)); +/// ``` +struct TORCH_API TripletMarginWithDistanceLossOptions { + typedef std::variant + reduction_t; + typedef std::function + distance_function_t; + + /// Specifies a nonnegative, real-valued function that quantifies the + /// closeness of two tensors. If not specified, `F::pairwise_distance` will + /// be used. Default: nullopt + TORCH_ARG(std::optional, distance_function) = + std::nullopt; + /// Specifies a nonnegative margin representing the minimum difference + /// between the positive and negative distances required for the loss to be 0. + /// Larger margins penalize cases where the negative examples are not distance + /// enough from the anchors, relative to the positives. Default: 1 + TORCH_ARG(double, margin) = 1.0; + /// Whether to use the distance swap described in the paper Learning shallow + /// convolutional feature descriptors with triplet losses by V. Balntas, + /// E. Riba et al. If True, and if the positive example is closer to the + /// negative example than the anchor is, swaps the positive example and the + /// anchor in the loss computation. Default: False + TORCH_ARG(bool, swap) = false; + /// Specifies the reduction to apply to the output. Default: Mean + TORCH_ARG(reduction_t, reduction) = torch::kMean; +}; + +namespace functional { +/// Options for `torch::nn::functional::triplet_margin_with_distance_loss`. +/// +/// See the documentation for `torch::nn::TripletMarginWithDistanceLossOptions` +/// class to learn what arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::triplet_margin_with_distance_loss(anchor, positive, negative, +/// F::TripletMarginWithDistanceLossFuncOptions().margin(1.0)); +/// ``` +using TripletMarginWithDistanceLossFuncOptions = + TripletMarginWithDistanceLossOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `CTCLoss` module. +/// +/// Example: +/// ``` +/// CTCLoss +/// model(CTCLossOptions().blank(42).zero_infinity(false).reduction(torch::kSum)); +/// ``` +struct TORCH_API CTCLossOptions { + typedef std::variant + reduction_t; + + /// blank label. Default `0`. + TORCH_ARG(int64_t, blank) = 0; + /// Specifies the reduction to apply to the output. Default: Mean + TORCH_ARG(reduction_t, reduction) = torch::kMean; + /// Whether to zero infinite losses and the associated gradients. + /// Default: `false`. Infinite losses mainly occur when the inputs are + /// too short to be aligned to the targets. + TORCH_ARG(bool, zero_infinity) = false; +}; + +namespace functional { +/// Options for `torch::nn::functional::ctc_loss`. +/// +/// See the documentation for `torch::nn::CTCLossOptions` class to learn what +/// arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::ctc_loss(log_probs, targets, input_lengths, target_lengths, +/// F::CTCLossFuncOptions().reduction(torch::kNone)); +/// ``` +using CTCLossFuncOptions = CTCLossOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `SmoothL1Loss` module. +/// +/// Example: +/// ``` +/// SmoothL1Loss model(SmoothL1LossOptions().reduction(torch::kNone).beta(0.5)); +/// ``` +struct TORCH_API SmoothL1LossOptions { + typedef std::variant + reduction_t; + + TORCH_OPTIONS_CTOR_VARIANT_ARG3( + SmoothL1LossOptions, + reduction, + kNone, + kMean, + kSum) + + /// Specifies the reduction to apply to the output: 'none' | 'mean' | 'sum'. + /// 'none': no reduction will be applied, 'mean': the sum of the output will + /// be divided by the number of elements in the output, 'sum': the output will + /// be summed. Default: 'mean' + TORCH_ARG(reduction_t, reduction) = torch::kMean; + /// Specifies the threshold at which to change between L1 and L2 loss. + /// If beta is not specified, a value of 1.0 will be used. + /// Default: nullopt + TORCH_ARG(std::optional, beta) = std::nullopt; +}; + +namespace functional { +/// Options for `torch::nn::functional::smooth_l1_loss`. +/// +/// See the documentation for `torch::nn::SmoothL1LossOptions` class to learn +/// what arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::smooth_l1_loss(input, target, F::SmoothL1LossFuncOptions(torch::kNone)); +/// ``` +using SmoothL1LossFuncOptions = SmoothL1LossOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `HuberLoss` module. +/// +/// Example: +/// ``` +/// HuberLoss model(HuberLossOptions().reduction(torch::kNone).delta(0.5)); +/// ``` +struct TORCH_API HuberLossOptions { + typedef std::variant + reduction_t; + + TORCH_OPTIONS_CTOR_VARIANT_ARG3( + HuberLossOptions, + reduction, + kNone, + kMean, + kSum) + + /// Specifies the reduction to apply to the output: 'none' | 'mean' | 'sum'. + /// 'none': no reduction will be applied, 'mean': the sum of the output will + /// be divided by the number of elements in the output, 'sum': the output will + /// be summed. Default: 'mean' + TORCH_ARG(reduction_t, reduction) = torch::kMean; + /// Specifies the threshold at which to change between L1 and L2 loss. + /// Default: 1.0 + TORCH_ARG(double, delta) = 1.0; +}; + +namespace functional { +/// Options for `torch::nn::functional::huber_loss`. +/// +/// See the documentation for `torch::nn::HuberLossOptions` class to learn what +/// arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::huber_loss(input, target, F::HuberLossFuncOptions(torch::kNone)); +/// ``` +using HuberLossFuncOptions = HuberLossOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `PoissonNLLLoss` module. +/// +/// Example: +/// ``` +/// PoissonNLLLoss +/// model(PoissonNLLLossOptions().log_input(false).full(true).eps(0.42).reduction(torch::kSum)); +/// ``` +struct TORCH_API PoissonNLLLossOptions { + typedef std::variant + reduction_t; + + /// if true the loss is computed as `exp(input) - target * input`, + /// if false the loss is `input - target * log(input + eps)`. + TORCH_ARG(bool, log_input) = true; + /// whether to compute full loss, i.e. to add the Stirling approximation term + /// target * log(target) - target + 0.5 * log(2 * pi * target). + TORCH_ARG(bool, full) = false; + /// Small value to avoid evaluation of `log(0)` when `log_input = false`. + /// Default: 1e-8 + TORCH_ARG(double, eps) = 1e-8; + /// Specifies the reduction to apply to the output. Default: Mean + TORCH_ARG(reduction_t, reduction) = torch::kMean; +}; + +namespace functional { +/// Options for `torch::nn::functional::poisson_nll_loss`. +/// +/// See the documentation for `torch::nn::PoissonNLLLossOptions` class to learn +/// what arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::poisson_nll_loss(input, target, +/// F::PoissonNLLLossFuncOptions().reduction(torch::kNone)); +/// ``` +using PoissonNLLLossFuncOptions = PoissonNLLLossOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `MarginRankingLoss` module. +/// +/// Example: +/// ``` +/// MarginRankingLoss +/// model(MarginRankingLossOptions().margin(0.5).reduction(torch::kSum)); +/// ``` +struct TORCH_API MarginRankingLossOptions { + typedef std::variant + reduction_t; + + /// Has a default value of `0`. + TORCH_ARG(double, margin) = 0; + /// Specifies the reduction to apply to the output. Default: Mean + TORCH_ARG(reduction_t, reduction) = torch::kMean; +}; + +namespace functional { +/// Options for `torch::nn::functional::margin_ranking_loss`. +/// +/// See the documentation for `torch::nn::MarginRankingLossOptions` class to +/// learn what arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::margin_ranking_loss(input1, input2, target, +/// F::MarginRankingLossFuncOptions().margin(0.5).reduction(torch::kSum)); +/// ``` +using MarginRankingLossFuncOptions = MarginRankingLossOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `NLLLoss` module. +/// +/// Example: +/// ``` +/// NLLLoss model(NLLLossOptions().ignore_index(-100).reduction(torch::kMean)); +/// ``` +struct TORCH_API NLLLossOptions { + typedef std::variant + reduction_t; + + /// A manual rescaling weight given to each + /// class. If given, it has to be a Tensor of size `C`. Otherwise, it is + /// treated as if having all ones. + TORCH_ARG(Tensor, weight); + /// Specifies a target value that is ignored + /// and does not contribute to the input gradient. + TORCH_ARG(int64_t, ignore_index) = -100; + /// Specifies the reduction to apply to the output. Default: Mean + TORCH_ARG(reduction_t, reduction) = torch::kMean; +}; + +namespace functional { +/// Options for `torch::nn::functional::nll_loss`. +/// +/// See the documentation for `torch::nn::NLLLossOptions` class to learn what +/// arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::nll_loss(input, target, +/// F::NLLLossFuncOptions().ignore_index(-100).reduction(torch::kMean)); +/// ``` +using NLLLossFuncOptions = NLLLossOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `CrossEntropyLoss` module. +/// +/// Example: +/// ``` +/// CrossEntropyLoss +/// model(CrossEntropyLossOptions().ignore_index(-100).reduction(torch::kMean)); +/// ``` +struct TORCH_API CrossEntropyLossOptions { + typedef std::variant + reduction_t; + + /// A manual rescaling weight given to each class. If given, has to be a + /// Tensor of size C + TORCH_ARG(Tensor, weight); + /// Specifies a target value that is ignored + /// and does not contribute to the input gradient. + TORCH_ARG(int64_t, ignore_index) = -100; + /// Specifies the reduction to apply to the output. Default: Mean + TORCH_ARG(reduction_t, reduction) = torch::kMean; + /// Specifies the amount of smoothing when computing the loss. Default: 0.0 + TORCH_ARG(double, label_smoothing) = 0.0; +}; + +namespace functional { +/// Options for `torch::nn::functional::cross_entropy`. +/// +/// See the documentation for `torch::nn::CrossEntropyLossOptions` class to +/// learn what arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::cross_entropy(input, target, +/// F::CrossEntropyFuncOptions().ignore_index(-100).reduction(torch::kMean)); +/// ``` +using CrossEntropyFuncOptions = CrossEntropyLossOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `BCEWithLogitsLoss` module. +/// +/// Example: +/// ``` +/// BCEWithLogitsLoss +/// model(BCEWithLogitsLossOptions().reduction(torch::kNone).weight(weight)); +/// ``` +struct TORCH_API BCEWithLogitsLossOptions { + typedef std::variant + reduction_t; + /// A manual rescaling weight given to the loss of each batch element. + /// If given, has to be a Tensor of size `nbatch`. + TORCH_ARG(Tensor, weight); + /// Specifies the reduction to apply to the output. Default: Mean + TORCH_ARG(reduction_t, reduction) = torch::kMean; + /// A weight of positive examples. + /// Must be a vector with length equal to the number of classes. + TORCH_ARG(Tensor, pos_weight); +}; + +namespace functional { +/// Options for `torch::nn::functional::binary_cross_entropy_with_logits`. +/// +/// See the documentation for `torch::nn::BCEWithLogitsLossOptions` class to +/// learn what arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::binary_cross_entropy_with_logits(input, target, +/// F::BinaryCrossEntropyWithLogitsFuncOptions().pos_weight(pos_weight).reduction(torch::kSum)); +/// ``` +using BinaryCrossEntropyWithLogitsFuncOptions = BCEWithLogitsLossOptions; +} // namespace functional + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/normalization.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/normalization.h new file mode 100644 index 0000000000000000000000000000000000000000..bfd86184cacb4b768c49680e6d8095b0e52993cd --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/normalization.h @@ -0,0 +1,195 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::nn { + +/// Options for the `LayerNorm` module. +/// +/// Example: +/// ``` +/// LayerNorm model(LayerNormOptions({2, +/// 2}).elementwise_affine(false).eps(2e-5)); +/// ``` +struct TORCH_API LayerNormOptions { + /* implicit */ LayerNormOptions(std::vector normalized_shape); + /// input shape from an expected input. + TORCH_ARG(std::vector, normalized_shape); + /// a value added to the denominator for numerical stability. ``Default: + /// 1e-5``. + TORCH_ARG(double, eps) = 1e-5; + /// a boolean value that when set to ``true``, this module + /// has learnable per-element affine parameters initialized to ones (for + /// weights) and zeros (for biases). ``Default: true``. + TORCH_ARG(bool, elementwise_affine) = true; +}; + +// ============================================================================ + +namespace functional { + +/// Options for `torch::nn::functional::layer_norm`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::layer_norm(input, F::LayerNormFuncOptions({2, 2}).eps(2e-5)); +/// ``` +struct TORCH_API LayerNormFuncOptions { + /* implicit */ LayerNormFuncOptions(std::vector normalized_shape); + /// input shape from an expected input. + TORCH_ARG(std::vector, normalized_shape); + + TORCH_ARG(Tensor, weight); + + TORCH_ARG(Tensor, bias); + + /// a value added to the denominator for numerical stability. ``Default: + /// 1e-5``. + TORCH_ARG(double, eps) = 1e-5; +}; + +} // namespace functional + +// ============================================================================ + +/// Options for the `LocalResponseNorm` module. +/// +/// Example: +/// ``` +/// LocalResponseNorm +/// model(LocalResponseNormOptions(2).alpha(0.0002).beta(0.85).k(2.)); +/// ``` +struct TORCH_API LocalResponseNormOptions { + /* implicit */ LocalResponseNormOptions(int64_t size) : size_(size) {} + /// amount of neighbouring channels used for normalization + TORCH_ARG(int64_t, size); + + /// multiplicative factor. Default: 1e-4 + TORCH_ARG(double, alpha) = 1e-4; + + /// exponent. Default: 0.75 + TORCH_ARG(double, beta) = 0.75; + + /// additive factor. Default: 1 + TORCH_ARG(double, k) = 1.; +}; + +namespace functional { +/// Options for `torch::nn::functional::local_response_norm`. +/// +/// See the documentation for `torch::nn::LocalResponseNormOptions` class to +/// learn what arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::local_response_norm(x, F::LocalResponseNormFuncOptions(2)); +/// ``` +using LocalResponseNormFuncOptions = LocalResponseNormOptions; +} // namespace functional + +// ============================================================================ + +/// Options for the `CrossMapLRN2d` module. +/// +/// Example: +/// ``` +/// CrossMapLRN2d model(CrossMapLRN2dOptions(3).alpha(1e-5).beta(0.1).k(10)); +/// ``` +struct TORCH_API CrossMapLRN2dOptions { + CrossMapLRN2dOptions(int64_t size); + + TORCH_ARG(int64_t, size); + + TORCH_ARG(double, alpha) = 1e-4; + + TORCH_ARG(double, beta) = 0.75; + + TORCH_ARG(int64_t, k) = 1; +}; + +// ============================================================================ + +namespace functional { + +/// Options for `torch::nn::functional::normalize`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::normalize(input, F::NormalizeFuncOptions().p(1).dim(-1)); +/// ``` +struct TORCH_API NormalizeFuncOptions { + /// The exponent value in the norm formulation. Default: 2.0 + TORCH_ARG(double, p) = 2.0; + /// The dimension to reduce. Default: 1 + TORCH_ARG(int64_t, dim) = 1; + /// Small value to avoid division by zero. Default: 1e-12 + TORCH_ARG(double, eps) = 1e-12; + /// the output tensor. If `out` is used, this + /// operation won't be differentiable. + TORCH_ARG(std::optional, out) = std::nullopt; +}; + +} // namespace functional + +// ============================================================================ + +/// Options for the `GroupNorm` module. +/// +/// Example: +/// ``` +/// GroupNorm model(GroupNormOptions(2, 2).eps(2e-5).affine(false)); +/// ``` +struct TORCH_API GroupNormOptions { + /* implicit */ GroupNormOptions(int64_t num_groups, int64_t num_channels); + + /// number of groups to separate the channels into + TORCH_ARG(int64_t, num_groups); + /// number of channels expected in input + TORCH_ARG(int64_t, num_channels); + /// a value added to the denominator for numerical stability. Default: 1e-5 + TORCH_ARG(double, eps) = 1e-5; + /// a boolean value that when set to ``true``, this module + /// has learnable per-channel affine parameters initialized to ones (for + /// weights) and zeros (for biases). Default: ``true``. + TORCH_ARG(bool, affine) = true; +}; + +// ============================================================================ + +namespace functional { + +/// Options for `torch::nn::functional::group_norm`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::group_norm(input, F::GroupNormFuncOptions(2).eps(2e-5)); +/// ``` +struct TORCH_API GroupNormFuncOptions { + /* implicit */ GroupNormFuncOptions(int64_t num_groups); + + /// number of groups to separate the channels into + TORCH_ARG(int64_t, num_groups); + + TORCH_ARG(Tensor, weight); + + TORCH_ARG(Tensor, bias); + + /// a value added to the denominator for numerical stability. Default: 1e-5 + TORCH_ARG(double, eps) = 1e-5; +}; + +} // namespace functional + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/padding.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/padding.h new file mode 100644 index 0000000000000000000000000000000000000000..4e59a257940f360a1b5b0e605b12e11e7b4cad3e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/padding.h @@ -0,0 +1,222 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace torch::nn { + +/// Options for a `D`-dimensional ReflectionPad module. +template +struct TORCH_API ReflectionPadOptions { + ReflectionPadOptions(ExpandingArray padding) : padding_(padding) {} + + /// The size of the padding. + /// If it is `int`, uses the same padding in all boundaries. + /// If it is a 2-`tuple` (for ReflectionPad1d), uses (padding_left, + /// padding_right). If it is a 4-`tuple` (for ReflectionPad2d), uses + /// (padding_left, padding_right, padding_top, padding_bottom). If it is a + /// 6-`tuple` (for ReflectionPad3d), uses (padding_left, padding_right, + /// padding_top, padding_bottom, padding_front, padding_back). + + TORCH_ARG(ExpandingArray, padding); +}; + +/// `ReflectionPadOptions` specialized for the `ReflectionPad1d` module. +/// +/// Example: +/// ``` +/// ReflectionPad1d model(ReflectionPad1dOptions({3, 1})); +/// ``` +using ReflectionPad1dOptions = ReflectionPadOptions<1>; + +/// `ReflectionPadOptions` specialized for the `ReflectionPad2d` module. +/// +/// Example: +/// ``` +/// ReflectionPad2d model(ReflectionPad2dOptions({1, 1, 2, 0})); +/// ``` +using ReflectionPad2dOptions = ReflectionPadOptions<2>; + +/// `ReflectionPadOptions` specialized for the `ReflectionPad3d` module. +/// +/// Example: +/// ``` +/// ReflectionPad3d model(ReflectionPad3dOptions({1, 1, 2, 0, 1, 1})); +/// ``` +using ReflectionPad3dOptions = ReflectionPadOptions<3>; + +// ============================================================================ + +/// Options for a `D`-dimensional ReplicationPad module. +template +struct TORCH_API ReplicationPadOptions { + ReplicationPadOptions(ExpandingArray padding) : padding_(padding) {} + + /// The size of the padding. + /// - If it is `int`, uses the same padding in all boundaries. + /// - If it is a 2-`tuple` (for ReplicationPad1d), uses (padding_left, + /// padding_right). + /// - If it is a 4-`tuple` (for ReplicationPad2d), uses (padding_left, + /// padding_right, padding_top, padding_bottom). + /// - If it is a 6-`tuple` (for ReplicationPad3d), uses + /// (padding_left, padding_right, padding_top, padding_bottom, + /// padding_front, padding_back). + TORCH_ARG(ExpandingArray, padding); +}; + +/// `ReplicationPadOptions` specialized for the `ReplicationPad1d` module. +/// +/// Example: +/// ``` +/// ReplicationPad1d model(ReplicationPad1dOptions({3, 1})); +/// ``` +using ReplicationPad1dOptions = ReplicationPadOptions<1>; + +/// `ReplicationPadOptions` specialized for the `ReplicationPad2d` module. +/// +/// Example: +/// ``` +/// ReplicationPad2d model(ReplicationPad2dOptions({1, 1, 2, 0})); +/// ``` +using ReplicationPad2dOptions = ReplicationPadOptions<2>; + +/// `ReplicationPadOptions` specialized for the `ReplicationPad3d` module. +/// +/// Example: +/// ``` +/// ReplicationPad3d model(ReplicationPad3dOptions({1, 2, 1, 2, 1, 2})); +/// ``` +using ReplicationPad3dOptions = ReplicationPadOptions<3>; + +// ============================================================================ + +template +struct TORCH_API ZeroPadOptions { + ZeroPadOptions(ExpandingArray padding) : padding_(padding) {} + + /// The size of the padding. + /// - If it is `int`, uses the same padding in all boundaries. + /// - If it is a 2-`tuple` (for ZeroPad1d), uses (padding_left, + /// padding_right). + /// - If it is a 4-`tuple` (for ZeroPad2d), uses (padding_left, padding_right, + /// padding_top, padding_bottom). + /// - If it is a 6-`tuple` (for ZeroPad3d), uses + /// (padding_left, padding_right, padding_top, padding_bottom, + /// padding_front, padding_back). + TORCH_ARG(ExpandingArray, padding); +}; + +/// `ZeroPadOptions` specialized for the `ZeroPad1d` module. +/// +/// Example: +/// ``` +/// ConstantPad1d model(ConstantPad1dOptions({3, 1}); +/// ``` +using ZeroPad1dOptions = ZeroPadOptions<1>; + +/// `ZeroPadOptions` specialized for the `ZeroPad2d` module. +/// +/// Example: +/// ``` +/// ConstantPad2d model(ConstantPad2dOptions({1, 1, 2, 0}); +/// ``` +using ZeroPad2dOptions = ZeroPadOptions<2>; + +/// `ZeroPadOptions` specialized for the `ZeroPad3d` module. +/// +/// Example: +/// ``` +/// ConstantPad3d model(ConstantPad3dOptions({1, 2, 1, 2, 1, 2}); +/// ``` +using ZeroPad3dOptions = ZeroPadOptions<3>; + +// ============================================================================ + +/// Options for a `D`-dimensional ConstantPad module. +template +struct TORCH_API ConstantPadOptions { + ConstantPadOptions(ExpandingArray padding, double value) + : padding_(padding), value_(value) {} + + /// The size of the padding. + /// - If it is `int`, uses the same padding in all boundaries. + /// - If it is a 2-`tuple` (for ConstantPad1d), uses (padding_left, + /// padding_right). + /// - If it is a 4-`tuple` (for ConstantPad2d), uses (padding_left, + /// padding_right, padding_top, padding_bottom). + /// - If it is a 6-`tuple` (for ConstantPad3d), uses + /// (padding_left, padding_right, padding_top, padding_bottom, + /// padding_front, padding_back). + TORCH_ARG(ExpandingArray, padding); + + /// Fill value for constant padding. + TORCH_ARG(double, value); +}; + +/// `ConstantPadOptions` specialized for the `ConstantPad1d` module. +/// +/// Example: +/// ``` +/// ConstantPad1d model(ConstantPad1dOptions({3, 1}, 3.5)); +/// ``` +using ConstantPad1dOptions = ConstantPadOptions<1>; + +/// `ConstantPadOptions` specialized for the `ConstantPad2d` module. +/// +/// Example: +/// ``` +/// ConstantPad2d model(ConstantPad2dOptions({3, 0, 2, 1}, 3.5)); +/// ``` +using ConstantPad2dOptions = ConstantPadOptions<2>; + +/// `ConstantPadOptions` specialized for the `ConstantPad3d` module. +/// +/// Example: +/// ``` +/// ConstantPad3d model(ConstantPad3dOptions({1, 2, 1, 2, 1, 2}, 3.5)); +/// ``` +using ConstantPad3dOptions = ConstantPadOptions<3>; + +// ============================================================================ + +namespace functional { + +/// Options for `torch::nn::functional::pad`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::pad(input, F::PadFuncOptions({1, 2, 2, 1, 1, +/// 2}).mode(torch::kReplicate)); +/// ``` +struct TORCH_API PadFuncOptions { + typedef std::variant< + enumtype::kConstant, + enumtype::kReflect, + enumtype::kReplicate, + enumtype::kCircular> + mode_t; + + PadFuncOptions(std::vector pad); + + /// m-elements tuple, where m/2 <= input dimensions and m is even. + TORCH_ARG(std::vector, pad); + + /// "constant", "reflect", "replicate" or "circular". Default: "constant" + TORCH_ARG(mode_t, mode) = torch::kConstant; + + /// fill value for "constant" padding. Default: 0 + TORCH_ARG(double, value) = 0; +}; + +} // namespace functional + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/pixelshuffle.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/pixelshuffle.h new file mode 100644 index 0000000000000000000000000000000000000000..dbbda7c8f2db8fce5b6f18fcdeba7836c344d75b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/pixelshuffle.h @@ -0,0 +1,68 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::nn { + +/// Options for the `PixelShuffle` module. +/// +/// Example: +/// ``` +/// PixelShuffle model(PixelShuffleOptions(5)); +/// ``` +struct TORCH_API PixelShuffleOptions { + PixelShuffleOptions(int64_t upscale_factor) + : upscale_factor_(upscale_factor) {} + + /// Factor to increase spatial resolution by + TORCH_ARG(int64_t, upscale_factor); +}; + +/// Options for the `PixelUnshuffle` module. +/// +/// Example: +/// ``` +/// PixelUnshuffle model(PixelUnshuffleOptions(5)); +/// ``` +struct TORCH_API PixelUnshuffleOptions { + /* implicit */ PixelUnshuffleOptions(int64_t downscale_factor) + : downscale_factor_(downscale_factor) {} + + /// Factor to decrease spatial resolution by + TORCH_ARG(int64_t, downscale_factor); +}; + +namespace functional { +/// Options for `torch::nn::functional::pixel_shuffle`. +/// +/// See the documentation for `torch::nn::PixelShuffleOptions` class to learn +/// what arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::pixel_shuffle(x, F::PixelShuffleFuncOptions(2)); +/// ``` +using PixelShuffleFuncOptions = PixelShuffleOptions; + +/// Options for `torch::nn::functional::pixel_unshuffle`. +/// +/// See the documentation for `torch::nn::PixelUnshuffleOptions` class to learn +/// what arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::pixel_unshuffle(x, F::PixelUnshuffleFuncOptions(2)); +/// ``` +using PixelUnshuffleFuncOptions = PixelUnshuffleOptions; +} // namespace functional + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/pooling.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/pooling.h new file mode 100644 index 0000000000000000000000000000000000000000..a3b65e968f8952534baa9687a29223523fc60873 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/pooling.h @@ -0,0 +1,599 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::nn { + +/// Options for a `D`-dimensional avgpool module. +template +struct AvgPoolOptions { + AvgPoolOptions(ExpandingArray kernel_size) + : kernel_size_(kernel_size), stride_(kernel_size) {} + + /// the size of the window to take an average over + TORCH_ARG(ExpandingArray, kernel_size); + + /// the stride of the window. Default value is `kernel_size` + TORCH_ARG(ExpandingArray, stride); + + /// implicit zero padding to be added on both sides + TORCH_ARG(ExpandingArray, padding) = 0; + + /// when True, will use `ceil` instead of `floor` to compute the output shape + TORCH_ARG(bool, ceil_mode) = false; + + /// when True, will include the zero-padding in the averaging calculation + TORCH_ARG(bool, count_include_pad) = true; + + /// if specified, it will be used as divisor, otherwise size of the pooling + /// region will be used. + + TORCH_ARG(std::optional, divisor_override) = std::nullopt; +}; + +/// `AvgPoolOptions` specialized for the `AvgPool1d` module. +/// +/// Example: +/// ``` +/// AvgPool1d model(AvgPool1dOptions(3).stride(2)); +/// ``` +using AvgPool1dOptions = AvgPoolOptions<1>; + +/// `AvgPoolOptions` specialized for the `AvgPool2d` module. +/// +/// Example: +/// ``` +/// AvgPool2d model(AvgPool2dOptions({3, 2}).stride({2, 2})); +/// ``` +using AvgPool2dOptions = AvgPoolOptions<2>; + +/// `AvgPoolOptions` specialized for the `AvgPool3d` module. +/// +/// Example: +/// ``` +/// AvgPool3d model(AvgPool3dOptions(5).stride(2)); +/// ``` +using AvgPool3dOptions = AvgPoolOptions<3>; + +namespace functional { +/// Options for `torch::nn::functional::avg_pool1d`. +/// +/// See the documentation for `torch::nn::AvgPool1dOptions` class to learn what +/// arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::avg_pool1d(x, F::AvgPool1dFuncOptions(3).stride(2)); +/// ``` +using AvgPool1dFuncOptions = AvgPool1dOptions; +} // namespace functional + +namespace functional { +/// Options for `torch::nn::functional::avg_pool2d`. +/// +/// See the documentation for `torch::nn::AvgPool2dOptions` class to learn what +/// arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::avg_pool2d(x, F::AvgPool2dFuncOptions(3).stride(2)); +/// ``` +using AvgPool2dFuncOptions = AvgPool2dOptions; +} // namespace functional + +namespace functional { +/// Options for `torch::nn::functional::avg_pool3d`. +/// +/// See the documentation for `torch::nn::AvgPool3dOptions` class to learn what +/// arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::avg_pool3d(x, F::AvgPool3dFuncOptions(3).stride(2)); +/// ``` +using AvgPool3dFuncOptions = AvgPool3dOptions; +} // namespace functional + +// ============================================================================ + +/// Options for a `D`-dimensional maxpool module. +template +struct MaxPoolOptions { + MaxPoolOptions(ExpandingArray kernel_size) + : kernel_size_(kernel_size), stride_(kernel_size) {} + + /// the size of the window to take a max over + TORCH_ARG(ExpandingArray, kernel_size); + + /// the stride of the window. Default value is `kernel_size + TORCH_ARG(ExpandingArray, stride); + + /// implicit zero padding to be added on both sides + TORCH_ARG(ExpandingArray, padding) = 0; + + /// a parameter that controls the stride of elements in the window + TORCH_ARG(ExpandingArray, dilation) = 1; + + /// when True, will use `ceil` instead of `floor` to compute the output shape + TORCH_ARG(bool, ceil_mode) = false; +}; + +/// `MaxPoolOptions` specialized for the `MaxPool1d` module. +/// +/// Example: +/// ``` +/// MaxPool1d model(MaxPool1dOptions(3).stride(2)); +/// ``` +using MaxPool1dOptions = MaxPoolOptions<1>; + +/// `MaxPoolOptions` specialized for the `MaxPool2d` module. +/// +/// Example: +/// ``` +/// MaxPool2d model(MaxPool2dOptions({3, 2}).stride({2, 2})); +/// ``` +using MaxPool2dOptions = MaxPoolOptions<2>; + +/// `MaxPoolOptions` specialized for the `MaxPool3d` module. +/// +/// Example: +/// ``` +/// MaxPool3d model(MaxPool3dOptions(3).stride(2)); +/// ``` +using MaxPool3dOptions = MaxPoolOptions<3>; + +namespace functional { +/// Options for `torch::nn::functional::max_pool1d` and +/// `torch::nn::functional::max_pool1d_with_indices`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::max_pool1d(x, F::MaxPool1dFuncOptions(3).stride(2)); +/// ``` +using MaxPool1dFuncOptions = MaxPool1dOptions; +} // namespace functional + +namespace functional { +/// Options for `torch::nn::functional::max_pool2d` and +/// `torch::nn::functional::max_pool2d_with_indices`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::max_pool2d(x, F::MaxPool2dFuncOptions(3).stride(2)); +/// ``` +using MaxPool2dFuncOptions = MaxPool2dOptions; +} // namespace functional + +namespace functional { +/// Options for `torch::nn::functional::max_pool3d` and +/// `torch::nn::functional::max_pool3d_with_indices`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::max_pool3d(x, F::MaxPool3dFuncOptions(3).stride(2)); +/// ``` +using MaxPool3dFuncOptions = MaxPool3dOptions; +} // namespace functional + +// ============================================================================ + +/// Options for a `D`-dimensional adaptive maxpool module. +template +struct AdaptiveMaxPoolOptions { + AdaptiveMaxPoolOptions(output_size_t output_size) + : output_size_(output_size) {} + + /// the target output size + TORCH_ARG(output_size_t, output_size); +}; + +/// `AdaptiveMaxPoolOptions` specialized for the `AdaptiveMaxPool1d` module. +/// +/// Example: +/// ``` +/// AdaptiveMaxPool1d model(AdaptiveMaxPool1dOptions(3)); +/// ``` +using AdaptiveMaxPool1dOptions = AdaptiveMaxPoolOptions>; + +/// `AdaptiveMaxPoolOptions` specialized for the `AdaptiveMaxPool2d` module. +/// +/// Example: +/// ``` +/// AdaptiveMaxPool2d model(AdaptiveMaxPool2dOptions({3, 2})); +/// ``` +using AdaptiveMaxPool2dOptions = + AdaptiveMaxPoolOptions>; + +/// `AdaptiveMaxPoolOptions` specialized for the `AdaptiveMaxPool3d` module. +/// +/// Example: +/// ``` +/// AdaptiveMaxPool3d model(AdaptiveMaxPool3dOptions(3)); +/// ``` +using AdaptiveMaxPool3dOptions = + AdaptiveMaxPoolOptions>; + +namespace functional { +/// Options for `torch::nn::functional::adaptive_max_pool1d` and +/// `torch::nn::functional::adaptive_max_pool1d_with_indices` +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::adaptive_max_pool1d(x, F::AdaptiveMaxPool1dFuncOptions(3)); +/// ``` +using AdaptiveMaxPool1dFuncOptions = AdaptiveMaxPool1dOptions; +} // namespace functional + +namespace functional { +/// Options for `torch::nn::functional::adaptive_max_pool2d` and +/// `torch::nn::functional::adaptive_max_pool2d_with_indices` +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::adaptive_max_pool2d(x, F::AdaptiveMaxPool2dFuncOptions(3)); +/// ``` +using AdaptiveMaxPool2dFuncOptions = AdaptiveMaxPool2dOptions; +} // namespace functional + +namespace functional { +/// Options for `torch::nn::functional::adaptive_max_pool3d` and +/// `torch::nn::functional::adaptive_max_pool3d_with_indices` +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::adaptive_max_pool3d(x, F::AdaptiveMaxPool3dFuncOptions(3)); +/// ``` +using AdaptiveMaxPool3dFuncOptions = AdaptiveMaxPool3dOptions; +} // namespace functional + +// ============================================================================ + +/// Options for a `D`-dimensional adaptive avgpool module. +template +struct AdaptiveAvgPoolOptions { + AdaptiveAvgPoolOptions(output_size_t output_size) + : output_size_(output_size) {} + + /// the target output size + TORCH_ARG(output_size_t, output_size); +}; + +/// `AdaptiveAvgPoolOptions` specialized for the `AdaptiveAvgPool1d` module. +/// +/// Example: +/// ``` +/// AdaptiveAvgPool1d model(AdaptiveAvgPool1dOptions(5)); +/// ``` +using AdaptiveAvgPool1dOptions = AdaptiveAvgPoolOptions>; + +/// `AdaptiveAvgPoolOptions` specialized for the `AdaptiveAvgPool2d` module. +/// +/// Example: +/// ``` +/// AdaptiveAvgPool2d model(AdaptiveAvgPool2dOptions({3, 2})); +/// ``` +using AdaptiveAvgPool2dOptions = + AdaptiveAvgPoolOptions>; + +/// `AdaptiveAvgPoolOptions` specialized for the `AdaptiveAvgPool3d` module. +/// +/// Example: +/// ``` +/// AdaptiveAvgPool3d model(AdaptiveAvgPool3dOptions(3)); +/// ``` +using AdaptiveAvgPool3dOptions = + AdaptiveAvgPoolOptions>; + +namespace functional { +/// Options for `torch::nn::functional::adaptive_avg_pool1d`. +/// +/// See the documentation for `torch::nn::AdaptiveAvgPool1dOptions` class to +/// learn what arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::adaptive_avg_pool1d(x, F::AdaptiveAvgPool1dFuncOptions(3)); +/// ``` +using AdaptiveAvgPool1dFuncOptions = AdaptiveAvgPool1dOptions; +} // namespace functional + +namespace functional { +/// Options for `torch::nn::functional::adaptive_avg_pool2d`. +/// +/// See the documentation for `torch::nn::AdaptiveAvgPool2dOptions` class to +/// learn what arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::adaptive_avg_pool2d(x, F::AdaptiveAvgPool2dFuncOptions(3)); +/// ``` +using AdaptiveAvgPool2dFuncOptions = AdaptiveAvgPool2dOptions; +} // namespace functional + +namespace functional { +/// Options for `torch::nn::functional::adaptive_avg_pool3d`. +/// +/// See the documentation for `torch::nn::AdaptiveAvgPool3dOptions` class to +/// learn what arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::adaptive_avg_pool3d(x, F::AdaptiveAvgPool3dFuncOptions(3)); +/// ``` +using AdaptiveAvgPool3dFuncOptions = AdaptiveAvgPool3dOptions; +} // namespace functional + +// ============================================================================ + +/// Options for a `D`-dimensional maxunpool module. +template +struct MaxUnpoolOptions { + MaxUnpoolOptions(ExpandingArray kernel_size) + : kernel_size_(kernel_size), stride_(kernel_size) {} + + /// the size of the window to take a max over + TORCH_ARG(ExpandingArray, kernel_size); + + /// the stride of the window. Default value is `kernel_size + TORCH_ARG(ExpandingArray, stride); + + /// implicit zero padding to be added on both sides + TORCH_ARG(ExpandingArray, padding) = 0; +}; + +/// `MaxUnpoolOptions` specialized for the `MaxUnpool1d` module. +/// +/// Example: +/// ``` +/// MaxUnpool1d model(MaxUnpool1dOptions(3).stride(2).padding(1)); +/// ``` +using MaxUnpool1dOptions = MaxUnpoolOptions<1>; + +/// `MaxUnpoolOptions` specialized for the `MaxUnpool2d` module. +/// +/// Example: +/// ``` +/// MaxUnpool2d model(MaxUnpool2dOptions(3).stride(2).padding(1)); +/// ``` +using MaxUnpool2dOptions = MaxUnpoolOptions<2>; + +/// `MaxUnpoolOptions` specialized for the `MaxUnpool3d` module. +/// +/// Example: +/// ``` +/// MaxUnpool3d model(MaxUnpool3dOptions(3).stride(2).padding(1)); +/// ``` +using MaxUnpool3dOptions = MaxUnpoolOptions<3>; + +// ============================================================================ + +namespace functional { + +/// Options for a `D`-dimensional maxunpool functional. +template +struct MaxUnpoolFuncOptions { + MaxUnpoolFuncOptions(ExpandingArray kernel_size) + : kernel_size_(kernel_size), stride_(kernel_size) {} + + /// the size of the window to take a max over + TORCH_ARG(ExpandingArray, kernel_size); + + /// the stride of the window. Default value is `kernel_size + TORCH_ARG(ExpandingArray, stride); + + /// implicit zero padding to be added on both sides + TORCH_ARG(ExpandingArray, padding) = 0; + + /// the targeted output size + TORCH_ARG(std::optional>, output_size) = std::nullopt; +}; + +/// `MaxUnpoolFuncOptions` specialized for +/// `torch::nn::functional::max_unpool1d`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::max_unpool1d(x, indices, +/// F::MaxUnpool1dFuncOptions(3).stride(2).padding(1)); +/// ``` +using MaxUnpool1dFuncOptions = MaxUnpoolFuncOptions<1>; + +/// `MaxUnpoolFuncOptions` specialized for +/// `torch::nn::functional::max_unpool2d`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::max_unpool2d(x, indices, +/// F::MaxUnpool2dFuncOptions(3).stride(2).padding(1)); +/// ``` +using MaxUnpool2dFuncOptions = MaxUnpoolFuncOptions<2>; + +/// `MaxUnpoolFuncOptions` specialized for +/// `torch::nn::functional::max_unpool3d`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::max_unpool3d(x, indices, F::MaxUnpool3dFuncOptions(3)); +/// ``` +using MaxUnpool3dFuncOptions = MaxUnpoolFuncOptions<3>; + +} // namespace functional + +// ============================================================================ + +/// Options for a `D`-dimensional fractional maxpool module. +template +struct FractionalMaxPoolOptions { + FractionalMaxPoolOptions(ExpandingArray kernel_size) + : kernel_size_(kernel_size) {} + + /// the size of the window to take a max over + TORCH_ARG(ExpandingArray, kernel_size); + + /// the target output size of the image + TORCH_ARG(std::optional>, output_size) = std::nullopt; + + /// If one wants to have an output size as a ratio of the input size, this + /// option can be given. This has to be a number or tuple in the range (0, 1) + using ExpandingArrayDouble = torch::ExpandingArray; + TORCH_ARG(std::optional, output_ratio) = std::nullopt; + + TORCH_ARG(torch::Tensor, _random_samples); +}; + +/// `FractionalMaxPoolOptions` specialized for the `FractionalMaxPool2d` module. +/// +/// Example: +/// ``` +/// FractionalMaxPool2d model(FractionalMaxPool2dOptions(5).output_size(1)); +/// ``` +using FractionalMaxPool2dOptions = FractionalMaxPoolOptions<2>; + +/// `FractionalMaxPoolOptions` specialized for the `FractionalMaxPool3d` module. +/// +/// Example: +/// ``` +/// FractionalMaxPool3d model(FractionalMaxPool3dOptions(5).output_size(1)); +/// ``` +using FractionalMaxPool3dOptions = FractionalMaxPoolOptions<3>; + +namespace functional { +/// Options for `torch::nn::functional::fractional_max_pool2d` and +/// `torch::nn::functional::fractional_max_pool2d_with_indices` +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::fractional_max_pool2d(x, +/// F::FractionalMaxPool2dFuncOptions(3).output_size(2)); +/// ``` +using FractionalMaxPool2dFuncOptions = FractionalMaxPool2dOptions; +} // namespace functional + +namespace functional { +/// Options for `torch::nn::functional::fractional_max_pool3d` and +/// `torch::nn::functional::fractional_max_pool3d_with_indices` +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::fractional_max_pool3d(x, +/// F::FractionalMaxPool3dFuncOptions(3).output_size(2)); +/// ``` +using FractionalMaxPool3dFuncOptions = FractionalMaxPool3dOptions; +} // namespace functional + +// ============================================================================ + +/// Options for a `D`-dimensional lppool module. +template +struct LPPoolOptions { + LPPoolOptions(double norm_type, ExpandingArray kernel_size) + : norm_type_(norm_type), + kernel_size_(kernel_size), + stride_(kernel_size) {} + + TORCH_ARG(double, norm_type); + + // the size of the window to take an average over + TORCH_ARG(ExpandingArray, kernel_size); + + // the stride of the window. Default value is `kernel_size` + TORCH_ARG(ExpandingArray, stride); + + // when True, will use `ceil` instead of `floor` to compute the output shape + TORCH_ARG(bool, ceil_mode) = false; +}; + +/// `LPPoolOptions` specialized for the `LPPool1d` module. +/// +/// Example: +/// ``` +/// LPPool1d model(LPPool1dOptions(1, 2).stride(5).ceil_mode(true)); +/// ``` +using LPPool1dOptions = LPPoolOptions<1>; + +/// `LPPoolOptions` specialized for the `LPPool2d` module. +/// +/// Example: +/// ``` +/// LPPool2d model(LPPool2dOptions(1, std::vector({3, 4})).stride({5, +/// 6}).ceil_mode(true)); +/// ``` +using LPPool2dOptions = LPPoolOptions<2>; + +/// `LPPoolOptions` specialized for the `LPPool3d` module. +/// +/// Example: +/// ``` +/// LPPool3d model(LPPool3dOptions(1, std::vector({3, 4, 5})).stride( +/// {5, 6, 7}).ceil_mode(true)); +/// ``` +using LPPool3dOptions = LPPoolOptions<3>; + +namespace functional { +/// Options for `torch::nn::functional::lp_pool1d`. +/// +/// See the documentation for `torch::nn::LPPool1dOptions` class to learn what +/// arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::lp_pool1d(x, F::LPPool1dFuncOptions(2, 3).stride(2)); +/// ``` +using LPPool1dFuncOptions = LPPool1dOptions; +} // namespace functional + +namespace functional { +/// Options for `torch::nn::functional::lp_pool2d`. +/// +/// See the documentation for `torch::nn::LPPool2dOptions` class to learn what +/// arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::lp_pool2d(x, F::LPPool2dFuncOptions(2, {2, 3}).stride(2)); +/// ``` +using LPPool2dFuncOptions = LPPool2dOptions; +} // namespace functional + +namespace functional { +/// Options for `torch::nn::functional::lp_pool3d`. +/// +/// See the documentation for `torch::nn::LPPool3dOptions` class to learn what +/// arguments are supported. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::lp_pool3d(x, F::LPPool3dFuncOptions(2, {2, 3, 4}).stride(2)); +/// ``` +using LPPool3dFuncOptions = LPPool3dOptions; +} // namespace functional + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/rnn.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/rnn.h new file mode 100644 index 0000000000000000000000000000000000000000..dd9af29cf722b0f1d740662781b04c782bae1700 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/rnn.h @@ -0,0 +1,239 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::nn { + +namespace detail { + +/// Common options for RNN, LSTM and GRU modules. +struct TORCH_API RNNOptionsBase { + typedef std::variant< + enumtype::kLSTM, + enumtype::kGRU, + enumtype::kRNN_TANH, + enumtype::kRNN_RELU> + rnn_options_base_mode_t; + + RNNOptionsBase( + rnn_options_base_mode_t mode, + int64_t input_size, + int64_t hidden_size); + + TORCH_ARG(rnn_options_base_mode_t, mode); + /// The number of features of a single sample in the input sequence `x`. + TORCH_ARG(int64_t, input_size); + /// The number of features in the hidden state `h`. + TORCH_ARG(int64_t, hidden_size); + /// The number of recurrent layers (cells) to use. + TORCH_ARG(int64_t, num_layers) = 1; + /// Whether a bias term should be added to all linear operations. + TORCH_ARG(bool, bias) = true; + /// If true, the input sequence should be provided as `(batch, sequence, + /// features)`. If false (default), the expected layout is `(sequence, batch, + /// features)`. + TORCH_ARG(bool, batch_first) = false; + /// If non-zero, adds dropout with the given probability to the output of each + /// RNN layer, except the final layer. + TORCH_ARG(double, dropout) = 0.0; + /// Whether to make the RNN bidirectional. + TORCH_ARG(bool, bidirectional) = false; + /// Cell projection dimension. If 0, projections are not added. Can only be + /// used for LSTMs. + TORCH_ARG(int64_t, proj_size) = 0; +}; + +} // namespace detail + +/// Options for the `RNN` module. +/// +/// Example: +/// ``` +/// RNN model(RNNOptions(128, +/// 64).num_layers(3).dropout(0.2).nonlinearity(torch::kTanh)); +/// ``` +struct TORCH_API RNNOptions { + typedef std::variant nonlinearity_t; + + RNNOptions(int64_t input_size, int64_t hidden_size); + + /// The number of expected features in the input `x` + TORCH_ARG(int64_t, input_size); + /// The number of features in the hidden state `h` + TORCH_ARG(int64_t, hidden_size); + /// Number of recurrent layers. E.g., setting ``num_layers=2`` + /// would mean stacking two RNNs together to form a `stacked RNN`, + /// with the second RNN taking in outputs of the first RNN and + /// computing the final results. Default: 1 + TORCH_ARG(int64_t, num_layers) = 1; + /// The non-linearity to use. Can be either ``torch::kTanh`` or + /// ``torch::kReLU``. Default: ``torch::kTanh`` + TORCH_ARG(nonlinearity_t, nonlinearity) = torch::kTanh; + /// If ``false``, then the layer does not use bias weights `b_ih` and `b_hh`. + /// Default: ``true`` + TORCH_ARG(bool, bias) = true; + /// If ``true``, then the input and output tensors are provided + /// as `(batch, seq, feature)`. Default: ``false`` + TORCH_ARG(bool, batch_first) = false; + /// If non-zero, introduces a `Dropout` layer on the outputs of each + /// RNN layer except the last layer, with dropout probability equal to + /// `dropout`. Default: 0 + TORCH_ARG(double, dropout) = 0.0; + /// If ``true``, becomes a bidirectional RNN. Default: ``false`` + TORCH_ARG(bool, bidirectional) = false; +}; + +/// Options for the `LSTM` module. +/// +/// Example: +/// ``` +/// LSTM model(LSTMOptions(2, +/// 4).num_layers(3).batch_first(false).bidirectional(true)); +/// ``` +struct TORCH_API LSTMOptions { + LSTMOptions(int64_t input_size, int64_t hidden_size); + + /// The number of expected features in the input `x` + TORCH_ARG(int64_t, input_size); + /// The number of features in the hidden state `h` + TORCH_ARG(int64_t, hidden_size); + /// Number of recurrent layers. E.g., setting ``num_layers=2`` + /// would mean stacking two LSTMs together to form a `stacked LSTM`, + /// with the second LSTM taking in outputs of the first LSTM and + /// computing the final results. Default: 1 + TORCH_ARG(int64_t, num_layers) = 1; + /// If ``false``, then the layer does not use bias weights `b_ih` and `b_hh`. + /// Default: ``true`` + TORCH_ARG(bool, bias) = true; + /// If ``true``, then the input and output tensors are provided + /// as (batch, seq, feature). Default: ``false`` + TORCH_ARG(bool, batch_first) = false; + /// If non-zero, introduces a `Dropout` layer on the outputs of each + /// LSTM layer except the last layer, with dropout probability equal to + /// `dropout`. Default: 0 + TORCH_ARG(double, dropout) = 0.0; + /// If ``true``, becomes a bidirectional LSTM. Default: ``false`` + TORCH_ARG(bool, bidirectional) = false; + /// Cell projection dimension. If 0, projections are not added + TORCH_ARG(int64_t, proj_size) = 0; +}; + +/// Options for the `GRU` module. +/// +/// Example: +/// ``` +/// GRU model(GRUOptions(2, +/// 4).num_layers(3).batch_first(false).bidirectional(true)); +/// ``` +struct TORCH_API GRUOptions { + GRUOptions(int64_t input_size, int64_t hidden_size); + + /// The number of expected features in the input `x` + TORCH_ARG(int64_t, input_size); + /// The number of features in the hidden state `h` + TORCH_ARG(int64_t, hidden_size); + /// Number of recurrent layers. E.g., setting ``num_layers=2`` + /// would mean stacking two GRUs together to form a `stacked GRU`, + /// with the second GRU taking in outputs of the first GRU and + /// computing the final results. Default: 1 + TORCH_ARG(int64_t, num_layers) = 1; + /// If ``false``, then the layer does not use bias weights `b_ih` and `b_hh`. + /// Default: ``true`` + TORCH_ARG(bool, bias) = true; + /// If ``true``, then the input and output tensors are provided + /// as (batch, seq, feature). Default: ``false`` + TORCH_ARG(bool, batch_first) = false; + /// If non-zero, introduces a `Dropout` layer on the outputs of each + /// GRU layer except the last layer, with dropout probability equal to + /// `dropout`. Default: 0 + TORCH_ARG(double, dropout) = 0.0; + /// If ``true``, becomes a bidirectional GRU. Default: ``false`` + TORCH_ARG(bool, bidirectional) = false; +}; + +namespace detail { + +/// Common options for RNNCell, LSTMCell and GRUCell modules +struct TORCH_API RNNCellOptionsBase { + RNNCellOptionsBase( + int64_t input_size, + int64_t hidden_size, + bool bias, + int64_t num_chunks); + TORCH_ARG(int64_t, input_size); + TORCH_ARG(int64_t, hidden_size); + TORCH_ARG(bool, bias); + TORCH_ARG(int64_t, num_chunks); +}; + +} // namespace detail + +/// Options for the `RNNCell` module. +/// +/// Example: +/// ``` +/// RNNCell model(RNNCellOptions(20, +/// 10).bias(false).nonlinearity(torch::kReLU)); +/// ``` +struct TORCH_API RNNCellOptions { + typedef std::variant nonlinearity_t; + + RNNCellOptions(int64_t input_size, int64_t hidden_size); + + /// The number of expected features in the input `x` + TORCH_ARG(int64_t, input_size); + /// The number of features in the hidden state `h` + TORCH_ARG(int64_t, hidden_size); + /// If ``false``, then the layer does not use bias weights `b_ih` and `b_hh`. + /// Default: ``true`` + TORCH_ARG(bool, bias) = true; + /// The non-linearity to use. Can be either ``torch::kTanh`` or + /// ``torch::kReLU``. Default: ``torch::kTanh`` + TORCH_ARG(nonlinearity_t, nonlinearity) = torch::kTanh; +}; + +/// Options for the `LSTMCell` module. +/// +/// Example: +/// ``` +/// LSTMCell model(LSTMCellOptions(20, 10).bias(false)); +/// ``` +struct TORCH_API LSTMCellOptions { + LSTMCellOptions(int64_t input_size, int64_t hidden_size); + + /// The number of expected features in the input `x` + TORCH_ARG(int64_t, input_size); + /// The number of features in the hidden state `h` + TORCH_ARG(int64_t, hidden_size); + /// If ``false``, then the layer does not use bias weights `b_ih` and `b_hh`. + /// Default: ``true`` + TORCH_ARG(bool, bias) = true; +}; + +/// Options for the `GRUCell` module. +/// +/// Example: +/// ``` +/// GRUCell model(GRUCellOptions(20, 10).bias(false)); +/// ``` +struct TORCH_API GRUCellOptions { + GRUCellOptions(int64_t input_size, int64_t hidden_size); + + /// The number of expected features in the input `x` + TORCH_ARG(int64_t, input_size); + /// The number of features in the hidden state `h` + TORCH_ARG(int64_t, hidden_size); + /// If ``false``, then the layer does not use bias weights `b_ih` and `b_hh`. + /// Default: ``true`` + TORCH_ARG(bool, bias) = true; +}; + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/transformer.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/transformer.h new file mode 100644 index 0000000000000000000000000000000000000000..002790835ab87a631f2196197148732e1b3f3552 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/transformer.h @@ -0,0 +1,67 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include +#include + +namespace torch::nn { + +/// Options for the `Transformer` module +/// +/// Example: +/// ``` +/// TransformerOptions options; +/// TransformerOptions options(16, 4); +/// auto options = TransformerOptions().d_model(4).nhead(2).dropout(0.0); +/// ``` +struct TORCH_API TransformerOptions { + // The following constructors are commonly used + // Please don't add more unless it is proved as a common usage + TransformerOptions() = default; + TransformerOptions(int64_t d_model, int64_t nhead); + TransformerOptions( + int64_t d_model, + int64_t nhead, + int64_t num_encoder_layers, + int64_t num_decoder_layers); + + /// the number of expected features in the encoder/decoder inputs + /// (default=512) + TORCH_ARG(int64_t, d_model) = 512; + + /// the number of heads in the multiheadattention models (default=8) + TORCH_ARG(int64_t, nhead) = 8; + + /// the number of sub-encoder-layers in the encoder (default=6) + TORCH_ARG(int64_t, num_encoder_layers) = 6; + + /// the number of sub-decoder-layers in the decoder (default=6) + TORCH_ARG(int64_t, num_decoder_layers) = 6; + + /// the dimension of the feedforward network model (default=2048) + TORCH_ARG(int64_t, dim_feedforward) = 2048; + + /// the dropout value (default=0.1) + TORCH_ARG(double, dropout) = 0.1; + + /// the activation function of encoder/decoder intermediate layer + /// (default=``torch::kReLU``) + TORCH_ARG(activation_t, activation) = torch::kReLU; + + /// custom encoder (default=None) + TORCH_ARG(AnyModule, custom_encoder); + + /// custom decoder (default=None) + TORCH_ARG(AnyModule, custom_decoder); +}; + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/transformercoder.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/transformercoder.h new file mode 100644 index 0000000000000000000000000000000000000000..fd36d1de06709282158e592f56682b99404171cc --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/transformercoder.h @@ -0,0 +1,79 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include +#include + +namespace torch::nn { + +/// Options for the `TransformerEncoder` +/// +/// Example: +/// ``` +/// TransformerEncoderLayer encoderLayer(TransformerEncoderLayerOptions(512, +/// 8).dropout(0.1)); auto options = TransformerEncoderOptions(encoderLayer, +/// 6).norm(LayerNorm(LayerNormOptions({2}))); +/// ``` +struct TORCH_API TransformerEncoderOptions { + // This constructor will keep a shallow copy of encoder_layer, so it keeps all + // the data in encoder_layer. + TransformerEncoderOptions( + TransformerEncoderLayer encoder_layer, + int64_t num_layers); + // This constructor will create a new TransformerEncoderLayer obj based on + // passed in encoder_layer_options. + TransformerEncoderOptions( + const TransformerEncoderLayerOptions& encoder_layer_options, + int64_t num_layers); + + /// transformer Encoder Layer + TORCH_ARG(TransformerEncoderLayer, encoder_layer) = nullptr; + + /// number of encoder layers + TORCH_ARG(int64_t, num_layers); + + /// normalization module + TORCH_ARG(AnyModule, norm); +}; + +/// Options for the `TransformerDecoder` module. +/// +/// Example: +/// ``` +/// TransformerDecoderLayer decoder_layer(TransformerDecoderLayerOptions(512, +/// 8).dropout(0.1)); auto options = TransformerDecoderOptions(decoder_layer, +/// 6)norm(LayerNorm(LayerNormOptions({2}))); TransformerDecoder +/// transformer_decoder(options); +/// ``` +struct TORCH_API TransformerDecoderOptions { + // This constructor will keep the a ref of passed in decoder_layer, + // so it keeps all the data in decoder_layer. + TransformerDecoderOptions( + TransformerDecoderLayer decoder_layer, + int64_t num_layers); + // This constructor will create a new TransformerDecoderLayer obj, + // based on passed in decoder_layer_options. + TransformerDecoderOptions( + const TransformerDecoderLayerOptions& decoder_layer_options, + int64_t num_layers); + + /// decoder layer to be cloned + TORCH_ARG(TransformerDecoderLayer, decoder_layer) = nullptr; + + /// number of decoder layers + TORCH_ARG(int64_t, num_layers); + + /// normalization module + TORCH_ARG(AnyModule, norm); +}; + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/transformerlayer.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/transformerlayer.h new file mode 100644 index 0000000000000000000000000000000000000000..2e4afb8beecc0c1ddaf9d8f2d08375ec836ded3a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/transformerlayer.h @@ -0,0 +1,75 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::nn { + +using activation_t = std::variant< + enumtype::kReLU, + enumtype::kGELU, + std::function>; + +/// Options for the `TransformerEncoderLayer` +/// +/// Example: +/// ``` +/// auto options = TransformerEncoderLayer(512, 8).dropout(0.2); +/// ``` +struct TORCH_API TransformerEncoderLayerOptions { + /* implicit */ TransformerEncoderLayerOptions(int64_t d_model, int64_t nhead); + + /// the number of expected features in the input + TORCH_ARG(int64_t, d_model); + + /// the number of heads in the multiheadattention models + TORCH_ARG(int64_t, nhead); + + /// the dimension of the feedforward network model, default is 2048 + TORCH_ARG(int64_t, dim_feedforward) = 2048; + + /// the dropout value, default is 0.1 + TORCH_ARG(double, dropout) = 0.1; + + /// the activation function of intermediate layer, can be ``torch::kReLU``, + /// ``torch::GELU``, or a unary callable. Default: ``torch::kReLU`` + TORCH_ARG(activation_t, activation) = torch::kReLU; +}; + +// ============================================================================ + +/// Options for the `TransformerDecoderLayer` module. +/// +/// Example: +/// ``` +/// TransformerDecoderLayer model(TransformerDecoderLayerOptions(512, +/// 8).dropout(0.2)); +/// ``` +struct TORCH_API TransformerDecoderLayerOptions { + TransformerDecoderLayerOptions(int64_t d_model, int64_t nhead); + + /// number of expected features in the input + TORCH_ARG(int64_t, d_model); + + /// number of heads in the multiheadattention models + TORCH_ARG(int64_t, nhead); + + /// dimension of the feedforward network model. Default: 2048 + TORCH_ARG(int64_t, dim_feedforward) = 2048; + + /// dropout value. Default: 1 + TORCH_ARG(double, dropout) = 0.1; + + /// activation function of intermediate layer, can be ``torch::kGELU``, + /// ``torch::kReLU``, or a unary callable. Default: ``torch::kReLU`` + TORCH_ARG(activation_t, activation) = torch::kReLU; +}; + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/upsampling.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/upsampling.h new file mode 100644 index 0000000000000000000000000000000000000000..f0bcdd202af343d3a335a529ed5c9b4ef51ef0cf --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/upsampling.h @@ -0,0 +1,113 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace torch::nn { + +/// Options for the `Upsample` module. +/// +/// Example: +/// ``` +/// Upsample +/// model(UpsampleOptions().scale_factor(std::vector({3})).mode(torch::kLinear).align_corners(false)); +/// ``` +struct TORCH_API UpsampleOptions { + /// output spatial sizes. + TORCH_ARG(std::optional>, size) = std::nullopt; + + /// multiplier for spatial size. + TORCH_ARG(std::optional>, scale_factor) = std::nullopt; + + /// the upsampling algorithm: one of "nearest", "linear", "bilinear", + /// "bicubic" and "trilinear". Default: "nearest" + typedef std::variant< + enumtype::kNearest, + enumtype::kLinear, + enumtype::kBilinear, + enumtype::kBicubic, + enumtype::kTrilinear> + mode_t; + TORCH_ARG(mode_t, mode) = torch::kNearest; + + /// if "True", the corner pixels of the input and output tensors are + /// aligned, and thus preserving the values at those pixels. This only has + /// effect when :attr:`mode` is "linear", "bilinear", "bicubic", or + /// "trilinear". Default: "False" + TORCH_ARG(std::optional, align_corners) = std::nullopt; +}; + +namespace functional { + +/// Options for `torch::nn::functional::interpolate`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::interpolate(input, +/// F::InterpolateFuncOptions().size(std::vector({4})).mode(torch::kNearest)); +/// ``` +struct TORCH_API InterpolateFuncOptions { + typedef std::variant< + enumtype::kNearest, + enumtype::kLinear, + enumtype::kBilinear, + enumtype::kBicubic, + enumtype::kTrilinear, + enumtype::kArea, + enumtype::kNearestExact> + mode_t; + + /// output spatial sizes. + TORCH_ARG(std::optional>, size) = std::nullopt; + + /// multiplier for spatial size. + TORCH_ARG(std::optional>, scale_factor) = std::nullopt; + + /// the upsampling algorithm: one of "nearest", "linear", "bilinear", + /// "bicubic", "trilinear", "area", "nearest-exact". Default: "nearest" + TORCH_ARG(mode_t, mode) = torch::kNearest; + + /// Geometrically, we consider the pixels of the input and output as squares + /// rather than points. If set to "True", the input and output tensors are + /// aligned by the center points of their corner pixels, preserving the values + /// at the corner pixels. If set to "False", the input and output tensors + /// are aligned by the corner points of their corner pixels, and the + /// interpolation uses edge value padding for out-of-boundary values, making + /// this operation *independent* of input size when `scale_factor` is + /// kept the same. It is *required* when interpolating mode is "linear", + /// "bilinear", "bicubic" or "trilinear". Default: "False" + TORCH_ARG(std::optional, align_corners) = std::nullopt; + + /// recompute the scale_factor for use in the + /// interpolation calculation. When `scale_factor` is passed as a parameter, + /// it is used to compute the `output_size`. If `recompute_scale_factor` is + /// `true` or not specified, a new `scale_factor` will be computed based on + /// the output and input sizes for use in the interpolation computation (i.e. + /// the computation will be identical to if the computed `output_size` were + /// passed-in explicitly). Otherwise, the passed-in `scale_factor` will be + /// used in the interpolation computation. Note that when `scale_factor` is + /// floating-point, the recomputed scale_factor may differ from the one passed + /// in due to rounding and precision issues. + TORCH_ARG(std::optional, recompute_scale_factor) = std::nullopt; + + /// flag to apply anti-aliasing. Using anti-alias + /// option together with :attr:`align_corners` equals "False", interpolation + /// result would match Pillow result for downsampling operation. Supported + /// modes: "bilinear". Default: "False". + TORCH_ARG(bool, antialias) = false; +}; + +} // namespace functional + +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/vision.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/vision.h new file mode 100644 index 0000000000000000000000000000000000000000..e0aa9405a7d4520d5ee82c87baf87738848c36e1 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/options/vision.h @@ -0,0 +1,39 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::nn::functional { + +/// Options for `torch::nn::functional::grid_sample`. +/// +/// Example: +/// ``` +/// namespace F = torch::nn::functional; +/// F::grid_sample(input, grid, +/// F::GridSampleFuncOptions().mode(torch::kBilinear).padding_mode(torch::kZeros).align_corners(true)); +/// ``` +struct TORCH_API GridSampleFuncOptions { + typedef std:: + variant + mode_t; + typedef std:: + variant + padding_mode_t; + + /// interpolation mode to calculate output values. Default: Bilinear + TORCH_ARG(mode_t, mode) = torch::kBilinear; + /// padding mode for outside grid values. Default: Zeros + TORCH_ARG(padding_mode_t, padding_mode) = torch::kZeros; + /// Specifies perspective to pixel as point. Default: false + TORCH_ARG(std::optional, align_corners) = std::nullopt; +}; + +} // namespace torch::nn::functional + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/parallel/data_parallel.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/parallel/data_parallel.h new file mode 100644 index 0000000000000000000000000000000000000000..125346128dc6ffef426e967583f07abc16186b2c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/parallel/data_parallel.h @@ -0,0 +1,300 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace torch::nn { + +namespace { + +// Note [Replicating Modules] +// ~~~~~~~~~~~~~~~~~~~~~~~~~~ +// +// Module replication is implemented in the following two steps: +// 1) create a module replica on each destination device using Module.clone(). +// 2) manually add a gradient edge pointing from every parameter X in every +// module replica to the same parameter X in the original module, using +// ReduceAdd as the grad_fn. +// +// ReduceAdd can ONLY be used during the backward pass of data parallel. Forward +// pass cannot use this function as it does not setup gradient function and +// history at all. Do NOT try to use ReduceAdd for any other purposes. +// +// NB: An alternative is to add Broadcast and ReduceAddCoalesce to +// torch/csrc/autograd/functions/comm.cpp as normal autograd functions, +// implement a Replicatable (like cloneable) class and add it as a friend class +// in Module.h. In the forward pass, the Replicatable could use the Broadcast +// function to replicate every module parameter and set gradient functions using +// ReduceAddCoalesce (like how it is implemented in Python). However, unlike in +// Python, where changes to Linear._parameters["weight"] would also apply to +// Linear.weight (using Linear as an example), Linear.weight and +// Linear.parameters_["weight"] are two tensor objects pointing to the same +// TensorImpl. Assigning a new tensor to Linear.parameters_["weight"] will not +// change Linear.weight. To make this work, we will have to: +// 1) force every module to also inherit from Replicatable +// 2) force every module to implement an additional function, e.g., +// Replicatable::load_params(), to pick up changes from parameters_ to their +// own member fields. +// This will be an overkill as Replicatable will only be used in data_parallel, +// not even ddp. + +// Autograd function for the replicate step in data parallel. This is only used +// in data parallel, and should not be exposed as a user API. +struct ReduceAdd : public autograd::Node { + explicit ReduceAdd(const at::Device& destination_device) + : destination_device_(destination_device) {}; + ~ReduceAdd() override = default; + + // NOLINTNEXTLINE(cppcoreguidelines-rvalue-reference-param-not-moved) + autograd::variable_list apply(autograd::variable_list&& inputs) override { + TORCH_CHECK( + !torch::autograd::compute_requires_grad(inputs), + "ReduceAdd can only be used during the backward pass of data parallel."); + + Tensor output = torch::zeros_like(inputs[0], {destination_device_}); + + for (auto& input : inputs) { + TORCH_CHECK( + input.sizes() == inputs[0].sizes(), + "All inputs of ReduceAdd must have the same size, but got ", + input.sizes(), + " and ", + inputs[0].sizes()); + + TORCH_CHECK( + input.dtype() == inputs[0].dtype(), + "All inputs of ReduceAdd must have the same dtype, but got ", + input.dtype(), + " and ", + inputs[0].dtype()); + + // TODO: use nccl reduce + output.add_(input.to(destination_device_)); + } + + return {output}; + } + + private: + at::Device destination_device_; +}; + +} // namespace + +// A friend function to Module, it recursively sets gradient edges pointing from +// every parameter X in every module replica to the same parameter X in the +// original module. See [Replicating Modules] +template +void replicate_grad_edges( + const std::shared_ptr& module, + const std::vector>& replicas, + const std::vector& devices) { + for (auto& parameter : module->named_parameters(/*recurse=*/false)) { + auto grad_fn = std::make_shared((*parameter).device()); + grad_fn->set_next_edges(autograd::collect_next_edges(*parameter)); + + for (const auto i : c10::irange(devices.size())) { + autograd::set_history(replicas[i]->parameters_[parameter.key()], grad_fn); + } + } + + for (auto& buffer : module->named_buffers(/*recurse=*/false)) { + if (buffer.value().requires_grad()) { + auto grad_fn = std::make_shared((*buffer).device()); + grad_fn->set_next_edges(autograd::collect_next_edges(*buffer)); + + for (const auto i : c10::irange(devices.size())) { + autograd::set_history(replicas[i]->buffers_[buffer.key()], grad_fn); + } + } + } + + for (auto& child : module->children_) { + std::vector> child_replicas; + child_replicas.reserve(devices.size()); + for (auto& replica : replicas) { + child_replicas.push_back(replica->children_[child.key()]); + } + + // recursively set gradient edges for all children + replicate_grad_edges(*child, child_replicas, devices); + } +} + +namespace parallel { + +/// Replicates a module on the given list of devices. +/// A replica is created by calling `clone()` on the module. For this, the +/// module must inherit from `nn::Cloneable`, or define its own `clone()` +/// method, which is expected to perform a deep copy of the module. +template +std::vector> replicate( + const std::shared_ptr& module, + const std::vector& devices) { + std::vector> replicas; + replicas.reserve(devices.size()); + for (const auto& device : devices) { + replicas.push_back( + std::dynamic_pointer_cast(module->clone(device))); + } + // Configure gradient edges to point from replcia parameters to original + // module parameters. See [Replicating Modules] + replicate_grad_edges(module, replicas, devices); + return replicas; +} + +/// Replicates a module holder on the given list of devices. +/// This method allows calling `replicate()` with a module holder, such as +/// `Linear`. +template +std::vector> replicate( + const ModuleHolder& module, + const std::vector& devices) { + auto ptrs = replicate(module.ptr(), devices); + return std::vector>(ptrs.begin(), ptrs.end()); +} + +/// Applies the given inputs to the given modules in a parallel fashion. +/// Conceptually, a thread is spawned for each `(module, input)` pair, in which +/// `forward()` is called on the module with its corresponding input. The +/// outputs of the individual calls are stored in a vector and returned. +/// +/// The first exception caught by any thread is stashed and rethrown after all +/// threads have completed their operation. +/// +/// Further remarks: +/// 1. The length of the module container must match the length of the inputs. +/// 2. If a list of devices is supplied, it must match the list of modules in +/// length. Each device will be set to the current default device during the +/// invocation of the respective module. This means any tensors allocated on the +/// default device inside the module will be constructed on this device. +template +std::vector parallel_apply( + std::vector& modules, + const std::vector& inputs, + const std::optional>& devices = std::nullopt) { + TORCH_CHECK( + modules.size() == inputs.size(), "Must have as many inputs as modules"); + if (devices) { + TORCH_CHECK( + modules.size() == devices->size(), + "Must have as many devices as modules"); + } + + std::vector outputs(modules.size()); + std::mutex mutex; + + // std::exception_ptr can be passed between threads: + // > An instance of std::exception_ptr may be passed to another function, + // > possibly on another thread, where the exception may be rethrown [...]. + // https://en.cppreference.com/w/cpp/error/exception_ptr + std::exception_ptr exception; + + at::parallel_for( + /*begin=*/0, + /*end=*/modules.size(), + /*grain_size=*/1, + [&modules, &inputs, &devices, &outputs, &mutex, &exception]( + int64_t index, int64_t stop) { + for (; index < stop; ++index) { + try { + auto output = modules[index]->forward(inputs[index]); + output = + output.to(devices ? (*devices)[index] : inputs[index].device()); + std::lock_guard lock(mutex); + outputs[index] = output; + } catch (...) { + std::lock_guard lock(mutex); + if (!exception) { + exception = std::current_exception(); + } + } + } + }); + + if (exception) { + std::rethrow_exception(exception); + } + + return outputs; +} + +/// Evaluates `module(input)` in parallel across the given `devices`. If +/// `devices` is not supplied, the invocation is parallelized across all +/// available CUDA devices. If `output_device` is supplied, the final, combined +/// tensor will be placed on this device. If not, it defaults to the first +/// device in `devices`. +/// +/// In detail, this method performs the following four distinct steps: +/// 1. *Scatter* the input to the given devices, +/// 2. *Replicate* (deep clone) the model on each device, +/// 3. *Evaluate* each module with its input on its device, +/// 4. *Gather* the outputs of each replica into a single output tensor, located +/// on the `output_device`. +template +Tensor data_parallel( + ModuleType module, + Tensor input, + std::optional> devices = std::nullopt, + std::optional output_device = std::nullopt, + int64_t dim = 0) { + if (!devices) { + const auto device_count = torch::cuda::device_count(); + TORCH_CHECK( + device_count > 0, "Expected at least one CUDA device to be available"); + devices = std::vector(); + devices->reserve(device_count); + for (const auto index : c10::irange(device_count)) { + devices->emplace_back(kCUDA, static_cast(index)); + } + } + if (!output_device) { + output_device = devices->front(); + } + + if (devices->size() == 1) { + module->to(devices->front()); + input = input.to(devices->front()); + return module->forward(std::move(input)).to(*output_device); + } + + autograd::Scatter scatter(*devices, /*chunk_sizes=*/std::nullopt, dim); + auto scattered_inputs = fmap(scatter.apply({std::move(input)})); + // Input tensor might not be big enough to scale across all available devices + if (scattered_inputs.size() < devices->size()) { + devices->resize( + scattered_inputs.size(), + Device(DeviceType::COMPILE_TIME_MAX_DEVICE_TYPES)); + } + + auto replicas = replicate(module, *devices); + auto outputs = parallel_apply(replicas, scattered_inputs, *devices); + return autograd::Gather(*output_device, dim) + .apply(fmap(std::move(outputs))) + .front(); +} + +} // namespace parallel +} // namespace torch::nn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/pimpl-inl.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/pimpl-inl.h new file mode 100644 index 0000000000000000000000000000000000000000..1e5e5277ddb014be3642a51c6fa0785b20af6b28 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/pimpl-inl.h @@ -0,0 +1,81 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// This class exists only to do SFINAE on abstract types `T` that are really +// `ModuleHolder`, because there's no good way to say that `T` is a +// `ModuleHolder` over some unknown type `ModuleType`. With this, you can do +// `enable_if_t>`. +struct ModuleHolderIndicator {}; + +// A type trait that is true for types that are `ModuleHolder`s. +template +using is_module_holder = + std::is_base_of>; + +template +using disable_if_module_holder_t = + std::enable_if_t::value>; + +// A collection of templates that answer the question whether a type `T` is a +// `ModuleHolder`, and if so whether its contained type is of type `C`. This is +// tricky because it is hard to short circuit in template metaprogramming. A +// naive and incorrect solution to this problem would be something like +// `disable_if::value && typename T::ContainedType == C>`. +// This would disable all types that are not `ModuleHolder`s, because even +// though the `is_module_holder::value` may be `false` for such types the +// `T::ContainedType` access would be ill-formed and thus fail the whole +// expression by the rules of SFINAE. Instead we have to use template +// specialization to statically branch on the first condition +// (`is_module_holder`) and are only then allowed to query +// `T::ContainedType` in the branch for which the condition was true. + +// Base template. +template +struct is_module_holder_of_impl; + +// False branch. `T` is not a `ModuleHolder` and thus not a `ModuleHolder` with +// contained type `C`. +template +struct is_module_holder_of_impl : std::false_type {}; + +// True branch. `T` is a `ModuleHolder` and thus we can legit access its +// `ContainedType` and compare it against `C`. +template +struct is_module_holder_of_impl + : std::is_same {}; + +// Helper template. +template +struct is_module_holder_of : is_module_holder_of_impl< + is_module_holder::value, + std::decay_t, + std::decay_t> {}; + +// A collection of templates that allow deducing the return type of the +// `forward()` method, but only if a module actually has a `forward()` method, +// and otherwise deduces to the type `void`. + +template +struct return_type_of_forward_impl; + +template +struct return_type_of_forward_impl { + using type = decltype(::std::declval().forward(::std::declval()...)); +}; + +template +struct return_type_of_forward_impl { + using type = void; +}; + +template +using return_type_of_forward = return_type_of_forward_impl< + torch::detail::has_forward::value, + C, + Args...>; + +template +using return_type_of_forward_t = + typename return_type_of_forward::type; + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/pimpl.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/pimpl.h new file mode 100644 index 0000000000000000000000000000000000000000..b1c1cc2dfcb08fb0b644cf6fff131cbbfc3c791e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/pimpl.h @@ -0,0 +1,205 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include + +#include +#include +#include + +namespace torch { +namespace detail { +// Dump all the template metaprogramming in this file. +#include +} // namespace detail + +namespace nn { + +/// A `ModuleHolder` is essentially a wrapper around `std::shared_ptr` where +/// `M` is an `nn::Module` subclass, with convenient constructors defined for +/// the kind of constructions we want to allow for our modules. +template +class ModuleHolder : torch::detail::ModuleHolderIndicator { + protected: + /// The module pointer this class wraps. + /// NOTE: Must be placed at the top of the class so that we can use it with + /// trailing return types below. + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::shared_ptr impl_; + + public: + using ContainedType = Contained; + + /// Default constructs the contained module if if has a default constructor, + /// else produces a static error. + /// + /// NOTE: This uses the behavior of template + /// classes in C++ that constructors (or any methods) are only compiled when + /// actually used. + ModuleHolder() : impl_(default_construct()) { + static_assert( + std::is_default_constructible_v, + "You are trying to default construct a module which has " + "no default constructor. Use = nullptr to give it the empty state " + "(e.g. `Linear linear = nullptr;` instead of `Linear linear;`)."); + } + + /// Constructs the `ModuleHolder` with an empty contained value. Access to + /// the underlying module is not permitted and will throw an exception, until + /// a value is assigned. + /* implicit */ ModuleHolder(std::nullptr_t) : impl_(nullptr) {} + + /// Constructs the `ModuleHolder` with a contained module, forwarding all + /// arguments to its constructor. + template < + typename Head, + typename... Tail, + typename = std::enable_if_t< + !(torch::detail::is_module_holder_of::value && + (sizeof...(Tail) == 0))>> + explicit ModuleHolder(Head&& head, Tail&&... tail) + : impl_(new Contained( + std::forward(head), + std::forward(tail)...)) {} + + /// Constructs the `ModuleHolder` from a pointer to the contained type. + /// Example: `Linear(std::make_shared(...))`. + /* implicit */ ModuleHolder(std::shared_ptr module) + : impl_(std::move(module)) {} + + /// Returns true if the `ModuleHolder` contains a module, or false if it is + /// `nullptr`. + explicit operator bool() const noexcept { + return !is_empty(); + } + + /// Forwards to the contained module. + Contained* operator->() { + return get(); + } + + /// Forwards to the contained module. + const Contained* operator->() const { + return get(); + } + + /// Returns a reference to the contained module. + Contained& operator*() { + return *get(); + } + + /// Returns a const reference to the contained module. + const Contained& operator*() const { + return *get(); + } + + /// Returns a shared pointer to the underlying module. + const std::shared_ptr& ptr() const { + TORCH_CHECK(!is_empty(), "Accessing empty ModuleHolder"); + return impl_; + } + + /// Returns a pointer to the underlying module. + Contained* get() { + TORCH_CHECK(!is_empty(), "Accessing empty ModuleHolder"); + return impl_.get(); + } + + /// Returns a const pointer to the underlying module. + const Contained* get() const { + TORCH_CHECK(!is_empty(), "Accessing empty ModuleHolder"); + return impl_.get(); + } + + /// Calls the `forward()` method of the contained module. + template + auto operator()(Args&&... args) + -> torch::detail::return_type_of_forward_t { + // This will not compile if the module does not have a `forward()` method + // (as expected). + // NOTE: `std::forward` is qualified to prevent VS2017 emitting + // error C2872: 'std': ambiguous symbol + return impl_->forward(::std::forward(args)...); + } + + /// Forwards to the subscript operator of the contained module. + /// NOTE: std::forward is qualified to prevent VS2017 emitting + /// error C2872: 'std': ambiguous symbol + template + auto operator[](Arg&& arg) { + return (*impl_)[::std::forward(arg)]; + } + + /// Returns true if the `ModuleHolder` does not contain a module. + bool is_empty() const noexcept { + return impl_ == nullptr; + } + + private: + template + std::shared_ptr default_construct() { + if constexpr (std::is_default_constructible_v) { + return std::make_shared(); + } else { + return nullptr; + } + } +}; + +/// Pretty prints the given `Module` into the `ostream`. +template +std::ostream& operator<<( + std::ostream& stream, + const nn::ModuleHolder& module) { + return stream << *module; +} + +/// Serializes a `ModuleHolder` into an `OutputArchive`. +template +serialize::OutputArchive& operator<<( + serialize::OutputArchive& archive, + const nn::ModuleHolder& module) { + return archive << module.ptr(); +} + +/// Deserializes a `ModuleHolder` from an `InputArchive`. +template +serialize::InputArchive& operator>>( + serialize::InputArchive& archive, + nn::ModuleHolder& module) { + return archive >> module.ptr(); +} + +} // namespace nn +} // namespace torch + +// Workaround for CUDA 10.2 and below not allowing attribute unused on +// using declarations. +#ifdef __CUDACC__ +#define TORCH_UNUSED_EXCEPT_CUDA +#else +#define TORCH_UNUSED_EXCEPT_CUDA [[maybe_unused]] +#endif + +/// Defines a class `Name` which inherits from `nn::ModuleHolder` to provide a +/// wrapper over a `std::shared_ptr`. +/// `Impl` is a type alias for `ImplType` which provides a way to call static +/// method of `ImplType`. +#define TORCH_MODULE_IMPL(Name, ImplType) \ + class Name : public torch::nn::ModuleHolder { /* NOLINT */ \ + public: \ + using torch::nn::ModuleHolder::ModuleHolder; \ + using Impl TORCH_UNUSED_EXCEPT_CUDA = ImplType; \ + } + +/// Like `TORCH_MODULE_IMPL`, but defaults the `ImplType` name to `Impl`. +#define TORCH_MODULE(Name) TORCH_MODULE_IMPL(Name, Name##Impl) + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/utils.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..e83ac79b5beba541242517ce5583a700df0defb2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/utils.h @@ -0,0 +1,10 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/utils/clip_grad.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/utils/clip_grad.h new file mode 100644 index 0000000000000000000000000000000000000000..7ebad2ae501cd38a85380b02eed44bfa1d08ddd7 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/utils/clip_grad.h @@ -0,0 +1,149 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include +#include + +namespace torch::nn::utils { + +// Clips gradient norm of a vector of Tensors. +// See +// https://pytorch.org/docs/stable/nn.html?highlight=clip_grad_norm#torch.nn.utils.clip_grad_norm_ +// for more details about this module. +// +// Difference with the python version: unlike the python version, even when +// skipping the finiteness checks (error_if_nonfinite = false), this function +// will introduce a device <=> CPU synchronization (for devices where that makes +// sense!) in order to return a CPU-side `double`. This C++ version therefore +// cannot be run fully asynchronously w.r.t. the device of the gradients. +inline double clip_grad_norm_( + const std::vector& parameters, + double max_norm, + double norm_type = 2.0, + bool error_if_nonfinite = false) { + std::vector params_with_grad; + + for (const auto& param : parameters) { + auto& grad = param.grad(); + if (grad.defined()) { + params_with_grad.push_back(param); + } + } + + if (params_with_grad.empty()) { + return 0.0; + } + + Tensor total_norm_tensor; + if (norm_type == std::numeric_limits::infinity()) { + std::vector norms; + norms.reserve(params_with_grad.size()); + + for (const auto& param : params_with_grad) { + norms.emplace_back(param.grad().data().abs().max()); + } + total_norm_tensor = + (norms.size() == 1) ? norms[0] : torch::max(torch::stack(norms)); + } else if (norm_type == 0) { + total_norm_tensor = + torch::full({}, static_cast(params_with_grad.size())); + } else { + std::vector norms; + norms.reserve(params_with_grad.size()); + + for (const auto& param : params_with_grad) { + norms.emplace_back(param.grad().data().norm(norm_type)); + } + total_norm_tensor = + (norms.size() == 1) ? norms[0] : torch::stack(norms).norm(norm_type); + } + + // When possible (ie when skipping the finiteness check), we avoid + // synchronizing the CPU and the gradients' device until the very end to + // preserve async execution on the device. When checking for finite-ness, this + // optional ensures we only sync once. + std::optional total_norm = std::nullopt; + if (error_if_nonfinite) { + total_norm = total_norm_tensor.item().toDouble(); + TORCH_CHECK( + std::isfinite(*total_norm), + "The total norm of order ", + norm_type, + " for gradients from `parameters` ", + "is non-finite, so it cannot be clipped. To disable this error and scale ", + "the gradients with the non-finite norm anyway, set ", + "`error_if_nonfinite=false`"); + } + + auto clip_coef = max_norm / (total_norm_tensor + 1e-6); + auto clip_coef_clamped = + torch::clamp(clip_coef, std::nullopt /* min */, 1.0 /* max */); + for (auto& param : params_with_grad) { + param.grad().data().mul_(clip_coef_clamped); + } + + if (!total_norm.has_value()) { + total_norm = total_norm_tensor.item().toDouble(); + } + return *total_norm; +} + +// A wrapper around clip_grad_norm_ that allows us to call the function with a +// braced-init-list of Tensors. +inline double clip_grad_norm_( + std::initializer_list parameters, + double max_norm, + double norm_type = 2.0, + bool error_if_nonfinite = false) { + return clip_grad_norm_( + std::vector(parameters), max_norm, norm_type, error_if_nonfinite); +} + +// A wrapper around clip_grad_norm_ that allows us to call the function with a +// single Tensor. +inline double clip_grad_norm_( + Tensor parameter, + double max_norm, + double norm_type = 2.0, + bool error_if_nonfinite = false) { + std::vector params = {std::move(parameter)}; + return clip_grad_norm_(params, max_norm, norm_type, error_if_nonfinite); +} + +// Clips gradient of an iterable of parameters at specified value. +// Gradients are modified in-place. +// See https://pytorch.org/docs/stable/nn.html#clip-grad-value +// for more details about this module. +inline void clip_grad_value_( + const std::vector& parameters, + double clip_value) { + for (const auto& param : parameters) { + if (param.grad().defined()) { + param.grad().data().clamp_(-clip_value, clip_value); + } + } +} + +// A wrapper around clip_grad_value_ that allows us to call the function with a +// braced-init-list of Tensors. +inline void clip_grad_value_( + std::initializer_list parameters, + double clip_value) { + clip_grad_value_(std::vector(parameters), clip_value); +} + +// A wrapper around clip_grad_value_ that allows us to call the function with a +// single Tensor. +inline void clip_grad_value_(Tensor parameter, double clip_value) { + std::vector params = {std::move(parameter)}; + clip_grad_value_(params, clip_value); +} + +} // namespace torch::nn::utils + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/utils/convert_parameters.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/utils/convert_parameters.h new file mode 100644 index 0000000000000000000000000000000000000000..de51f38988216f45d9ed1ce002093d49296adea6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/utils/convert_parameters.h @@ -0,0 +1,83 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::nn::utils { + +// This helper function is to check if the parameters are located +// in the same device. Currently, the conversion between model parameters +// and single vector form is not supported for multiple allocations, +// e.g. parameters in different GPUs, or mixture of CPU/GPU. +inline std::optional _check_param_device( + const torch::Tensor& param, + std::optional old_param_device) { + // Meet the first parameter + if (old_param_device == std::nullopt) { + old_param_device = param.is_cuda() ? param.get_device() : -1; + } else { + bool warn = false; + if (param.is_cuda()) { // Check if in same GPU + warn = (param.get_device() != old_param_device); + } else { // Check if in CPU + warn = (old_param_device != -1); + } + if (warn) { + TORCH_CHECK( + false, + "Found two parameters on different devices, ", + "this is currently not supported."); + } + } + + return old_param_device; +} + +// Convert parameters to one vector +inline torch::Tensor parameters_to_vector( + const std::vector& parameters) { + std::optional param_device; + + std::vector vec; + vec.reserve(parameters.size()); + + for (const torch::Tensor& param : parameters) { + // Ensure the parameters are located in the same device + param_device = _check_param_device(param, param_device); + + vec.push_back(param.view(-1)); + } + + return torch::cat(vec); +} + +// Convert one vector to the parameters +inline void vector_to_parameters( + const torch::Tensor& vec, + const std::vector& parameters) { + // Flag for the device where the parameter is located + std::optional param_device; + + // Pointer for slicing the vector for each parameter + int64_t pointer = 0; + for (const torch::Tensor& param : parameters) { + // Ensure the parameters are located in the same device + param_device = _check_param_device(param, param_device); + + // The length of the parameter + auto num_param = param.numel(); + // Slice the vector, reshape it, and replace the old data of the parameter + param.set_data( + vec.slice(0, pointer, pointer + num_param).view_as(param).data()); + + // Increment the pointer + pointer += num_param; + } +} + +} // namespace torch::nn::utils + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/utils/rnn.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/utils/rnn.h new file mode 100644 index 0000000000000000000000000000000000000000..6118dc7650fb29a86097df06d52bbd4093455dc8 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/nn/utils/rnn.h @@ -0,0 +1,353 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +namespace torch::nn::utils::rnn { + +inline Tensor invert_permutation(const Tensor& permutation) { + if (!permutation.defined()) { + return torch::Tensor(); + } + Tensor output = + torch::empty_like(permutation, torch::MemoryFormat::Contiguous); + output.scatter_( + 0, + permutation, + torch::arange(0, permutation.numel(), permutation.device())); + return output; +} + +/// Holds the data and list of `batch_sizes` of a packed sequence. +/// +/// All RNN modules accept packed sequences as inputs. +/// +/// Note: +/// Instances of this class should never be created manually. They are meant +/// to be instantiated by functions like `pack_padded_sequence`. +/// +/// Batch sizes represent the number elements at each sequence step in +/// the batch, not the varying sequence lengths passed to +/// `pack_padded_sequence`. For instance, given data ``abc`` and ``x`` +/// the :class:`PackedSequence` would contain data ``axbc`` with +/// ``batch_sizes=[2,1,1]``. +/// +/// Attributes: +/// data (Tensor): Tensor containing packed sequence +/// batch_sizes (Tensor): Tensor of integers holding +/// information about the batch size at each sequence step +/// sorted_indices (Tensor, optional): Tensor of integers holding how this +/// :class:`PackedSequence` is constructed from sequences. +/// unsorted_indices (Tensor, optional): Tensor of integers holding how this +/// to recover the original sequences with correct order. +/// +/// .. note:: +/// `data` can be on arbitrary device and of arbitrary dtype. +/// `sorted_indices` and `unsorted_indices` must be ``torch::kInt64`` +/// tensors on the same device as `data`. +/// +/// However, `batch_sizes` should always be a CPU ``torch::kInt64`` tensor. +/// +/// This invariant is maintained throughout `PackedSequence` class, +/// and all functions that construct a `PackedSequence` in libtorch +/// (i.e., they only pass in tensors conforming to this constraint). +class PackedSequence { + public: + explicit PackedSequence( + Tensor data, + Tensor batch_sizes, + Tensor sorted_indices = {}, + Tensor unsorted_indices = {}) { + // NB: if unsorted_indices is provided, it should be the inverse permutation + // to sorted_indices. Don't assert it here because the PackedSequence ctor + // should only be used internally. + if (!unsorted_indices.defined()) { + unsorted_indices = invert_permutation(sorted_indices); + } + TORCH_CHECK( + batch_sizes.device().type() == kCPU, + "batch_sizes should always be on CPU. " + "Instances of PackedSequence should never be created manually. " + "They should be instantiated by functions like pack_sequence " + "and pack_padded_sequences in nn::utils::rnn. " + "https://pytorch.org/docs/stable/nn.html#torch.nn.utils.rnn.pack_sequence"); + data_ = std::move(data); + batch_sizes_ = std::move(batch_sizes); + sorted_indices_ = std::move(sorted_indices); + unsorted_indices_ = std::move(unsorted_indices); + } + + const Tensor& data() const { + return data_; + } + + const Tensor& batch_sizes() const { + return batch_sizes_; + } + + const Tensor& sorted_indices() const { + return sorted_indices_; + } + + const Tensor& unsorted_indices() const { + return unsorted_indices_; + } + + PackedSequence pin_memory() const { + // Why not convert `batch_sizes`? + // See NOTE [ device and dtype of a PackedSequence ] + return PackedSequence( + data_.pin_memory(), + batch_sizes_, + sorted_indices_.defined() ? sorted_indices_.pin_memory() : Tensor(), + unsorted_indices_.defined() ? unsorted_indices_.pin_memory() + : Tensor()); + } + + PackedSequence to(TensorOptions options) const { + // Performs dtype and/or device conversion on `data_`. + // + // If the ``data_`` Tensor already has the correct `torch::Dtype` + // and `torch::Device`, then ``self`` is returned. + // Otherwise, returns a copy with the desired configuration. + + // Why not convert `batch_sizes`? + // See NOTE [ device and dtype of a PackedSequence ] + Tensor data = data_.to(options); + if (data.is_same(data_)) { + return *this; + } else { + // Does not forward device or dtype args, device is set from data.device() + Tensor sorted_indices = sorted_indices_.defined() + ? sorted_indices_.to( + options.device(data.device()).dtype(sorted_indices_.dtype())) + : Tensor(); + Tensor unsorted_indices = unsorted_indices_.defined() + ? unsorted_indices_.to( + options.device(data.device()).dtype(unsorted_indices_.dtype())) + : Tensor(); + return PackedSequence( + std::move(data), + batch_sizes_, + std::move(sorted_indices), + std::move(unsorted_indices)); + } + } + + PackedSequence cuda() const { + return to(kCUDA); + } + + PackedSequence cpu() const { + return to(kCPU); + } + + /// Returns true if `data_` stored on a gpu + bool is_cuda() const { + return data_.is_cuda(); + } + + /// Returns true if `data_` stored on in pinned memory + bool is_pinned() const { + return data_.is_pinned(); + } + + private: + Tensor data_; + Tensor batch_sizes_; + Tensor sorted_indices_; + Tensor unsorted_indices_; +}; + +/// Packs a Tensor containing padded sequences of variable length. +/// +/// `input` can be of size ``T x B x *`` where `T` is the length of the +/// longest sequence (equal to ``lengths[0]``), ``B`` is the batch size, and +/// ``*`` is any number of dimensions (including 0). If ``batch_first`` is +/// ``true``, ``B x T x *`` `input` is expected. +/// +/// For unsorted sequences, use `enforce_sorted = false`. If `enforce_sorted` is +/// ``true``, the sequences should be sorted by length in a decreasing order, +/// i.e. +/// ``input[:,0]`` should be the longest sequence, and ``input[:,B-1]`` the +/// shortest one. +/// +/// Note: +/// This function accepts any input that has at least two dimensions. You +/// can apply it to pack the labels, and use the output of the RNN with +/// them to compute the loss directly. A Tensor can be retrieved from +/// a `PackedSequence` object by calling its ``.data()`` function. +/// +/// Arguments: +/// input (Tensor): padded batch of variable length sequences. +/// lengths (Tensor): list of sequences lengths of each batch element. +/// batch_first (bool, optional): if ``true``, the input is expected in ``B +/// x T x *`` +/// format. Default: ``false``. +/// enforce_sorted (bool, optional): if ``true``, the input is expected to +/// contain sequences sorted by length in a decreasing order. If +/// ``false``, this condition is not checked. Default: ``true``. +/// +/// Returns: +/// a `PackedSequence` object +inline PackedSequence pack_padded_sequence( + Tensor input, + Tensor lengths, + bool batch_first = false, + bool enforce_sorted = true) { + lengths = lengths.to(kInt64); + Tensor sorted_indices; + if (enforce_sorted) { + sorted_indices = Tensor(); + } else { + std::tie(lengths, sorted_indices) = + torch::sort(lengths, /*dim=*/-1, /*descending=*/true); + sorted_indices = sorted_indices.to(input.device()); + int64_t batch_dim = batch_first ? 0 : 1; + input = input.index_select(batch_dim, sorted_indices); + } + + auto [data, batch_sizes] = + torch::_pack_padded_sequence(input, lengths, batch_first); + return PackedSequence( + std::move(data), std::move(batch_sizes), std::move(sorted_indices), {}); +} + +/// Pads a packed batch of variable length sequences. +/// +/// It is an inverse operation to `pack_padded_sequence`. +/// +/// The returned Tensor's data will be of size ``T x B x *``, where `T` is the +/// length of the longest sequence and `B` is the batch size. If ``batch_first`` +/// is true, the data will be transposed into ``B x T x *`` format. +/// +/// Batch elements will be ordered decreasingly by their length. +/// +/// Arguments: +/// sequence (PackedSequence): batch to pad +/// batch_first (bool, optional): if ``true``, the output will be in ``B x T +/// x *`` +/// format. +/// padding_value (double, optional): values for padded elements. +/// total_length (int64_t, optional): if specified, the output will be +/// padded to +/// have length `total_length`. This method will throw error +/// if `total_length` is less than the max sequence length in +/// `sequence`. +/// +/// Returns: +/// Tuple of Tensor containing the padded sequence, and a Tensor +/// containing the list of lengths of each sequence in the batch. +inline std::tuple pad_packed_sequence( + const PackedSequence& sequence, + bool batch_first = false, + double padding_value = 0.0, + std::optional total_length = std::nullopt) { + int64_t max_seq_length = sequence.batch_sizes().size(0); + if (total_length.has_value()) { + int64_t total_length_val = total_length.value(); + TORCH_CHECK( + total_length_val >= max_seq_length, + "Expected total_length to be at least the length " + "of the longest sequence in input, but got " + "total_length=", + total_length_val, + " and max sequence length being ", + max_seq_length); + max_seq_length = total_length_val; + } + auto [padded_output, lengths] = torch::_pad_packed_sequence( + sequence.data(), + sequence.batch_sizes(), + batch_first, + padding_value, + max_seq_length); + const Tensor& unsorted_indices = sequence.unsorted_indices(); + if (unsorted_indices.defined()) { + int64_t batch_dim = batch_first ? 0 : 1; + return std::make_tuple( + padded_output.index_select(batch_dim, unsorted_indices), + lengths.index({unsorted_indices.cpu()})); + } + return std::make_tuple(padded_output, lengths); +} + +/// Pad a list of variable length Tensors with ``padding_value`` +/// +/// ``pad_sequence`` stacks a list of Tensors along a new dimension, +/// and pads them to equal length. For example, if the input is list of +/// sequences with size ``L x *`` and if batch_first is false, and ``T x B x *`` +/// otherwise. +/// +/// `B` is batch size. It is equal to the number of elements in ``sequences``. +/// `T` is length of the longest sequence. +/// `L` is length of the sequence. +/// `*` is any number of trailing dimensions, including none. +/// +/// Note: +/// This function returns a Tensor of size ``T x B x *`` or ``B x T x *`` +/// where `T` is the length of the longest sequence. This function assumes +/// trailing dimensions and type of all the Tensors in sequences are same. +/// +/// Arguments: +/// sequences (torch::ArrayRef): list of variable length sequences. +/// batch_first (bool, optional): output will be in ``B x T x *`` if true, +/// or in +/// ``T x B x *`` otherwise +/// padding_value (double, optional): value for padded elements. Default: 0. +/// padding_side (str, optional): the side to pad the sequences on. Default: +/// "right". +/// +/// Returns: +/// Tensor of size ``T x B x *`` if `batch_first` is ``false``. +/// Tensor of size ``B x T x *`` otherwise +inline Tensor pad_sequence( + ArrayRef sequences, + bool batch_first = false, + double padding_value = 0, + std::string_view padding_side = "right") { + return at::pad_sequence(sequences, batch_first, padding_value, padding_side); +} + +/// Packs a list of variable length Tensors +/// +/// ``sequences`` should be a list of Tensors of size ``L x *``, where `L` is +/// the length of a sequence and `*` is any number of trailing dimensions, +/// including zero. +/// +/// For unsorted sequences, use `enforce_sorted = false`. If ``enforce_sorted`` +/// is ``true``, the sequences should be sorted in the order of decreasing +/// length. +/// +/// +/// Arguments: +/// sequences (torch::ArrayRef): A list of sequences of decreasing +/// length. enforce_sorted (bool, optional): if ``true``, checks that the +/// input +/// contains sequences sorted by length in a decreasing order. If +/// ``false``, this condition is not checked. Default: ``true``. +/// +/// Returns: +/// a `PackedSequence` object +inline PackedSequence pack_sequence( + ArrayRef sequences, + bool enforce_sorted = true) { + Tensor lengths = torch::empty({(int64_t)sequences.size()}, kInt64); + for (const auto i : c10::irange(sequences.size())) { + lengths[static_cast(i)] = sequences[i].size(0); + } + return pack_padded_sequence( + at::pad_sequence(sequences), + std::move(lengths), + /*batch_first=*/false, + /*enforce_sorted=*/enforce_sorted); +} + +} // namespace torch::nn::utils::rnn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim.h new file mode 100644 index 0000000000000000000000000000000000000000..af26eef3d000978f926ff54561c64ba989623c71 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim.h @@ -0,0 +1,18 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/adagrad.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/adagrad.h new file mode 100644 index 0000000000000000000000000000000000000000..3fdcd61614f6cc6678661af4067d9ddff5af70a8 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/adagrad.h @@ -0,0 +1,104 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + +namespace torch::serialize { +class OutputArchive; +class InputArchive; +} // namespace torch::serialize + +namespace torch::optim { + +struct TORCH_API AdagradOptions + : public OptimizerCloneableOptions { + AdagradOptions(double lr = 1e-2); + TORCH_ARG(double, lr) = 1e-2; + TORCH_ARG(double, lr_decay) = 0; + TORCH_ARG(double, weight_decay) = 0; + TORCH_ARG(double, initial_accumulator_value) = 0; + TORCH_ARG(double, eps) = 1e-10; + + public: + void serialize(torch::serialize::InputArchive& archive) override; + void serialize(torch::serialize::OutputArchive& archive) const override; + TORCH_API friend bool operator==( + const AdagradOptions& lhs, + const AdagradOptions& rhs); + double get_lr() const override; + void set_lr(const double lr) override; +}; + +struct TORCH_API AdagradParamState + : public OptimizerCloneableParamState { + TORCH_ARG(torch::Tensor, sum); + TORCH_ARG(int64_t, step) = 0; + + public: + void serialize(torch::serialize::InputArchive& archive) override; + void serialize(torch::serialize::OutputArchive& archive) const override; + TORCH_API friend bool operator==( + const AdagradParamState& lhs, + const AdagradParamState& rhs); +}; + +class TORCH_API Adagrad : public Optimizer { + public: + explicit Adagrad( + const std::vector& param_groups, + AdagradOptions defaults = {}) + : Optimizer(param_groups, std::make_unique(defaults)) { + TORCH_CHECK(defaults.lr() >= 0, "Invalid learning rate: ", defaults.lr()); + TORCH_CHECK( + defaults.lr_decay() >= 0, + "Invalid lr_decay value: ", + defaults.lr_decay()); + TORCH_CHECK( + defaults.weight_decay() >= 0, + "Invalid weight_decay value: ", + defaults.weight_decay()); + TORCH_CHECK( + defaults.initial_accumulator_value() >= 0, + "Invalid initial_accumulator_value value: ", + defaults.initial_accumulator_value()); + TORCH_CHECK(defaults.eps() >= 0, "Invalid epsilon value: ", defaults.eps()); + + for (const auto& group : param_groups_) { + for (const auto& p : group.params()) { + auto state = std::make_unique(); + state->step(0); + state->sum(torch::full_like( + p.data(), + defaults.initial_accumulator_value(), + at::MemoryFormat::Preserve)); + state_[p.unsafeGetTensorImpl()] = std::move(state); + } + } + } + + explicit Adagrad(std::vector params, AdagradOptions defaults = {}) + : Adagrad({OptimizerParamGroup(std::move(params))}, std::move(defaults)) { + } + + torch::Tensor step(LossClosure closure = nullptr) override; + void save(serialize::OutputArchive& archive) const override; + void load(serialize::InputArchive& archive) override; + + private: + template + static void serialize(Self& self, Archive& archive) { + _TORCH_OPTIM_SERIALIZE_WITH_TEMPLATE_ARG(Adagrad); + } +}; +} // namespace torch::optim + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/adam.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/adam.h new file mode 100644 index 0000000000000000000000000000000000000000..9fe0994a4a0d98d29c2d7f5b2e05d4fdbd01cc02 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/adam.h @@ -0,0 +1,91 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include + +namespace torch::serialize { +class OutputArchive; +class InputArchive; +} // namespace torch::serialize + +namespace torch::optim { + +struct TORCH_API AdamOptions : public OptimizerCloneableOptions { + AdamOptions(double lr = 1e-3); + TORCH_ARG(double, lr) = 1e-3; + typedef std::tuple betas_t; + TORCH_ARG(betas_t, betas) = std::make_tuple(0.9, 0.999); + TORCH_ARG(double, eps) = 1e-8; + TORCH_ARG(double, weight_decay) = 0; + TORCH_ARG(bool, amsgrad) = false; + + public: + void serialize(torch::serialize::InputArchive& archive) override; + void serialize(torch::serialize::OutputArchive& archive) const override; + TORCH_API friend bool operator==( + const AdamOptions& lhs, + const AdamOptions& rhs); + double get_lr() const override; + void set_lr(const double lr) override; +}; + +struct TORCH_API AdamParamState + : public OptimizerCloneableParamState { + TORCH_ARG(int64_t, step) = 0; + TORCH_ARG(torch::Tensor, exp_avg); + TORCH_ARG(torch::Tensor, exp_avg_sq); + TORCH_ARG(torch::Tensor, max_exp_avg_sq); + + public: + void serialize(torch::serialize::InputArchive& archive) override; + void serialize(torch::serialize::OutputArchive& archive) const override; + TORCH_API friend bool operator==( + const AdamParamState& lhs, + const AdamParamState& rhs); +}; + +class TORCH_API Adam : public Optimizer { + public: + explicit Adam( + const std::vector& param_groups, + AdamOptions defaults = {}) + : Optimizer(param_groups, std::make_unique(defaults)) { + TORCH_CHECK(defaults.lr() >= 0, "Invalid learning rate: ", defaults.lr()); + TORCH_CHECK(defaults.eps() >= 0, "Invalid epsilon value: ", defaults.eps()); + auto betas = defaults.betas(); + TORCH_CHECK( + 0 <= std::get<0>(betas) && std::get<0>(betas) < 1.0, + "Invalid beta parameter at index 0: ", + std::get<0>(betas)); + TORCH_CHECK( + 0 <= std::get<1>(betas) && std::get<1>(betas) < 1.0, + "Invalid beta parameter at index 1: ", + std::get<1>(betas)); + TORCH_CHECK( + defaults.weight_decay() >= 0, + "Invalid weight_decay value: ", + defaults.weight_decay()); + } + explicit Adam(std::vector params, AdamOptions defaults = {}) + : Adam({OptimizerParamGroup(std::move(params))}, std::move(defaults)) {} + + torch::Tensor step(LossClosure closure = nullptr) override; + void save(serialize::OutputArchive& archive) const override; + void load(serialize::InputArchive& archive) override; + + private: + template + static void serialize(Self& self, Archive& archive) { + _TORCH_OPTIM_SERIALIZE_WITH_TEMPLATE_ARG(Adam); + } +}; +} // namespace torch::optim + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/adamw.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/adamw.h new file mode 100644 index 0000000000000000000000000000000000000000..349ea090b8947fe3fded870de16a4f3a99c4d780 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/adamw.h @@ -0,0 +1,91 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include + +namespace torch::serialize { +class OutputArchive; +class InputArchive; +} // namespace torch::serialize + +namespace torch::optim { + +struct TORCH_API AdamWOptions : public OptimizerCloneableOptions { + AdamWOptions(double lr = 1e-3); + TORCH_ARG(double, lr) = 1e-3; + typedef std::tuple betas_t; + TORCH_ARG(betas_t, betas) = std::make_tuple(0.9, 0.999); + TORCH_ARG(double, eps) = 1e-8; + TORCH_ARG(double, weight_decay) = 1e-2; + TORCH_ARG(bool, amsgrad) = false; + + public: + void serialize(torch::serialize::InputArchive& archive) override; + void serialize(torch::serialize::OutputArchive& archive) const override; + TORCH_API friend bool operator==( + const AdamWOptions& lhs, + const AdamWOptions& rhs); + double get_lr() const override; + void set_lr(const double lr) override; +}; + +struct TORCH_API AdamWParamState + : public OptimizerCloneableParamState { + TORCH_ARG(int64_t, step) = 0; + TORCH_ARG(torch::Tensor, exp_avg); + TORCH_ARG(torch::Tensor, exp_avg_sq); + TORCH_ARG(torch::Tensor, max_exp_avg_sq); + + public: + void serialize(torch::serialize::InputArchive& archive) override; + void serialize(torch::serialize::OutputArchive& archive) const override; + TORCH_API friend bool operator==( + const AdamWParamState& lhs, + const AdamWParamState& rhs); +}; + +class TORCH_API AdamW : public Optimizer { + public: + explicit AdamW( + const std::vector& param_groups, + AdamWOptions defaults = {}) + : Optimizer(param_groups, std::make_unique(defaults)) { + TORCH_CHECK(defaults.lr() >= 0, "Invalid learning rate: ", defaults.lr()); + TORCH_CHECK(defaults.eps() >= 0, "Invalid epsilon value: ", defaults.eps()); + auto betas = defaults.betas(); + TORCH_CHECK( + 0 <= std::get<0>(betas) && std::get<0>(betas) < 1.0, + "Invalid beta parameter at index 0: ", + std::get<0>(betas)); + TORCH_CHECK( + 0 <= std::get<1>(betas) && std::get<1>(betas) < 1.0, + "Invalid beta parameter at index 1: ", + std::get<1>(betas)); + TORCH_CHECK( + defaults.weight_decay() >= 0, + "Invalid weight_decay value: ", + defaults.weight_decay()); + } + explicit AdamW(std::vector params, AdamWOptions defaults = {}) + : AdamW({OptimizerParamGroup(std::move(params))}, std::move(defaults)) {} + + torch::Tensor step(LossClosure closure = nullptr) override; + void save(serialize::OutputArchive& archive) const override; + void load(serialize::InputArchive& archive) override; + + private: + template + static void serialize(Self& self, Archive& archive) { + _TORCH_OPTIM_SERIALIZE_WITH_TEMPLATE_ARG(AdamW); + } +}; +} // namespace torch::optim + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/lbfgs.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/lbfgs.h new file mode 100644 index 0000000000000000000000000000000000000000..0d33ff24b8ab4df4eeaedfa1ff4e12c2741816d0 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/lbfgs.h @@ -0,0 +1,105 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace torch::optim { + +struct TORCH_API LBFGSOptions : public OptimizerCloneableOptions { + LBFGSOptions(double lr = 1); + TORCH_ARG(double, lr) = 1; + TORCH_ARG(int64_t, max_iter) = 20; + TORCH_ARG(std::optional, max_eval) = std::nullopt; + TORCH_ARG(double, tolerance_grad) = 1e-7; + TORCH_ARG(double, tolerance_change) = 1e-9; + TORCH_ARG(int64_t, history_size) = 100; + TORCH_ARG(std::optional, line_search_fn) = std::nullopt; + + public: + void serialize(torch::serialize::InputArchive& archive) override; + void serialize(torch::serialize::OutputArchive& archive) const override; + TORCH_API friend bool operator==( + const LBFGSOptions& lhs, + const LBFGSOptions& rhs); + double get_lr() const override; + void set_lr(const double lr) override; +}; + +struct TORCH_API LBFGSParamState + : public OptimizerCloneableParamState { + TORCH_ARG(int64_t, func_evals) = 0; + TORCH_ARG(int64_t, n_iter) = 0; + TORCH_ARG(double, t) = 0; + TORCH_ARG(double, prev_loss) = 0; + TORCH_ARG(Tensor, d); + TORCH_ARG(Tensor, H_diag); + TORCH_ARG(Tensor, prev_flat_grad); + TORCH_ARG(std::deque, old_dirs); + TORCH_ARG(std::deque, old_stps); + TORCH_ARG(std::deque, ro); + TORCH_ARG(std::optional>, al) = std::nullopt; + + public: + void serialize(torch::serialize::InputArchive& archive) override; + void serialize(torch::serialize::OutputArchive& archive) const override; + TORCH_API friend bool operator==( + const LBFGSParamState& lhs, + const LBFGSParamState& rhs); +}; + +class TORCH_API LBFGS : public Optimizer { + public: + explicit LBFGS( + const std::vector& param_groups, + LBFGSOptions defaults = {}) + : Optimizer(param_groups, std::make_unique(defaults)) { + TORCH_CHECK( + param_groups_.size() == 1, + "LBFGS doesn't support per-parameter options (parameter groups)"); + if (defaults.max_eval() == std::nullopt) { + auto max_eval_val = (defaults.max_iter() * 5) / 4; + static_cast(param_groups_[0].options()) + .max_eval(max_eval_val); + static_cast(*defaults_).max_eval(max_eval_val); + } + _numel_cache = std::nullopt; + } + explicit LBFGS(std::vector params, LBFGSOptions defaults = {}) + : LBFGS({OptimizerParamGroup(std::move(params))}, std::move(defaults)) {} + + Tensor step(LossClosure closure) override; + void save(serialize::OutputArchive& archive) const override; + void load(serialize::InputArchive& archive) override; + + private: + std::optional _numel_cache; + int64_t _numel(); + Tensor _gather_flat_grad(); + void _add_grad(const double step_size, const Tensor& update); + std::tuple _directional_evaluate( + const LossClosure& closure, + const std::vector& x, + double t, + const Tensor& d); + void _set_param(const std::vector& params_data); + std::vector _clone_param(); + + template + static void serialize(Self& self, Archive& archive) { + _TORCH_OPTIM_SERIALIZE_WITH_TEMPLATE_ARG(LBFGS); + } +}; +} // namespace torch::optim + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/optimizer.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/optimizer.h new file mode 100644 index 0000000000000000000000000000000000000000..26c71d19eae5ee6265ac76824956c6592282806b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/optimizer.h @@ -0,0 +1,228 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +// Forward declarations confuse Doxygen +#ifndef DOXYGEN_SHOULD_SKIP_THIS +namespace at { +class Tensor; +} // namespace at + +namespace torch { +using at::Tensor; +namespace serialize { +class OutputArchive; +class InputArchive; +} // namespace serialize +} // namespace torch +#endif // DOXYGEN_SHOULD_SKIP_THIS + +namespace torch::optim { + +class TORCH_API OptimizerParamState { + public: + OptimizerParamState() = default; + OptimizerParamState(const OptimizerParamState&) = default; + OptimizerParamState& operator=(const OptimizerParamState&) = default; + OptimizerParamState(OptimizerParamState&&) noexcept = default; + OptimizerParamState& operator=(OptimizerParamState&&) noexcept = default; + virtual std::unique_ptr clone() const; + virtual void serialize(torch::serialize::InputArchive& archive); + virtual void serialize(torch::serialize::OutputArchive& archive) const; + virtual ~OptimizerParamState() = default; +}; + +template +class OptimizerCloneableParamState : public OptimizerParamState { + std::unique_ptr clone() const override { + return std::make_unique(static_cast(*this)); + } +}; + +class TORCH_API OptimizerOptions { + public: + OptimizerOptions() = default; + OptimizerOptions(const OptimizerOptions&) = default; + OptimizerOptions& operator=(const OptimizerOptions&) = default; + OptimizerOptions(OptimizerOptions&&) noexcept = default; + OptimizerOptions& operator=(OptimizerOptions&&) noexcept = default; + virtual std::unique_ptr clone() const; + virtual void serialize(torch::serialize::InputArchive& archive); + virtual void serialize(torch::serialize::OutputArchive& archive) const; + virtual ~OptimizerOptions() = default; + virtual double get_lr() const; + virtual void set_lr(const double lr); +}; + +template +class OptimizerCloneableOptions : public OptimizerOptions { + private: + std::unique_ptr clone() const override { + return std::make_unique(static_cast(*this)); + } +}; + +/// Stores parameters in the param_group and stores a pointer to the +/// OptimizerOptions +class TORCH_API OptimizerParamGroup { + public: + // NOTE: In order to store `OptimizerParamGroup` in a `std::vector`, it has to + // be copy-constructible. + OptimizerParamGroup(const OptimizerParamGroup& param_group) + : params_(param_group.params()), + options_( + param_group.has_options() ? param_group.options().clone() + : nullptr) {} + OptimizerParamGroup(OptimizerParamGroup&& param_group) = default; + OptimizerParamGroup(std::vector params) + : params_(std::move(params)) {} + OptimizerParamGroup( + std::vector params, + std::unique_ptr options) + : params_(std::move(params)), options_(std::move(options)) {} + + OptimizerParamGroup& operator=(const OptimizerParamGroup& param_group) = + delete; + OptimizerParamGroup& operator=(OptimizerParamGroup&& param_group) noexcept = + default; + ~OptimizerParamGroup() = default; + bool has_options() const; + OptimizerOptions& options(); + const OptimizerOptions& options() const; + void set_options(std::unique_ptr options); + std::vector& params(); + const std::vector& params() const; + + protected: + std::vector params_; + std::unique_ptr options_; +}; + +class TORCH_API Optimizer { + public: + // The copy constructor is deleted, because the user should use the + // `state_dict` / `load_state_dict` API to copy an optimizer instead. + Optimizer(const Optimizer& optimizer) = delete; + Optimizer(Optimizer&& optimizer) = default; + Optimizer& operator=(const Optimizer& optimizer) = delete; + Optimizer& operator=(Optimizer&& optimizer) = default; + + explicit Optimizer( + const std::vector& param_groups, + std::unique_ptr defaults) + : defaults_(std::move(defaults)) { + for (const auto& param_group : param_groups) { + add_param_group(param_group); + } + } + + /// Constructs the `Optimizer` from a vector of parameters. + explicit Optimizer( + std::vector parameters, + std::unique_ptr defaults) + : Optimizer( + {OptimizerParamGroup(std::move(parameters))}, + std::move(defaults)) {} + + /// Adds the given param_group to the optimizer's param_group list. + void add_param_group(const OptimizerParamGroup& param_group); + + virtual ~Optimizer() = default; + + using LossClosure = std::function; + /// A loss function closure, which is expected to return the loss value. + virtual Tensor step(LossClosure closure = nullptr) = 0; + + /// Adds the given vector of parameters to the optimizer's parameter list. + void add_parameters(const std::vector& parameters); + + /// Zeros out the gradients of all parameters. + void zero_grad(bool set_to_none = true); + + /// Provides a const reference to the parameters in the first param_group this + /// optimizer holds. + const std::vector& parameters() const noexcept; + + /// Provides a reference to the parameters in the first param_group this + /// optimizer holds. + std::vector& parameters() noexcept; + + /// Returns the number of parameters referenced by the optimizer. + size_t size() const noexcept; + + OptimizerOptions& defaults() noexcept; + + const OptimizerOptions& defaults() const noexcept; + + /// Provides a reference to the param_groups this optimizer holds. + std::vector& param_groups() noexcept; + + /// Provides a const reference to the param_groups this optimizer holds. + const std::vector& param_groups() const noexcept; + + /// Provides a reference to the state this optimizer holds + ska::flat_hash_map>& + state() noexcept; + + /// Provides a const reference to the state this optimizer holds + const ska::flat_hash_map>& state() + const noexcept; + + /// Serializes the optimizer state into the given `archive`. + virtual void save(serialize::OutputArchive& archive) const; + + /// Deserializes the optimizer state from the given `archive`. + virtual void load(serialize::InputArchive& archive); + + protected: + std::vector param_groups_; + ska::flat_hash_map> state_; + std::unique_ptr defaults_; +}; + +/* How do we decide whether to serialize undefined tensors or + std::nullopt values into the output archive? +Answer: we strictly follow the behavior of Python API. To be more specific: + +For optimizer options: +a) For undefined tensor: currently no tensor is used as an options argument in +Python API, so we don't need to worry about it now. b) For std::nullopt value: +we serialize std::nullopt values into the output archive, to follow the exact +same behavior as Python API. + +For optimizer param state: +a) For undefined tensor: in param state, undefined tensor in C++ impl is +equivalent to missing key in Python impl. Since we don't serialize missing keys +in Python API, we skip undefined tensors when serializing the param state. b) +For std::nullopt value: in param state, std::nullopt value in C++ impl is +equivalent to missing key in Python impl. Since we don't serialize missing keys +in Python API, we skip std::nullopt values when serializing the param state. */ + +/// Serializes an `Optimizer` into an `OutputArchive`. +TORCH_API serialize::OutputArchive& operator<<( + serialize::OutputArchive& archive, + const Optimizer& optimizer); + +/// Deserializes a `Tensor` from an `InputArchive`. +TORCH_API serialize::InputArchive& operator>>( + serialize::InputArchive& archive, + Optimizer& optimizer); + +} // namespace torch::optim + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/rmsprop.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/rmsprop.h new file mode 100644 index 0000000000000000000000000000000000000000..95871059563401b7729baba651fee2e052107492 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/rmsprop.h @@ -0,0 +1,96 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace torch::serialize { +class OutputArchive; +class InputArchive; +} // namespace torch::serialize + +namespace torch::optim { + +struct TORCH_API RMSpropOptions + : public OptimizerCloneableOptions { + RMSpropOptions(double lr = 1e-2); + TORCH_ARG(double, lr) = 1e-2; + TORCH_ARG(double, alpha) = 0.99; + TORCH_ARG(double, eps) = 1e-8; + TORCH_ARG(double, weight_decay) = 0; + TORCH_ARG(double, momentum) = 0; + TORCH_ARG(bool, centered) = false; + + public: + void serialize(torch::serialize::InputArchive& archive) override; + void serialize(torch::serialize::OutputArchive& archive) const override; + TORCH_API friend bool operator==( + const RMSpropOptions& lhs, + const RMSpropOptions& rhs); + double get_lr() const override; + void set_lr(const double lr) override; +}; + +struct TORCH_API RMSpropParamState + : public OptimizerCloneableParamState { + TORCH_ARG(int64_t, step) = 0; + TORCH_ARG(torch::Tensor, square_avg); + TORCH_ARG(torch::Tensor, momentum_buffer); + TORCH_ARG(torch::Tensor, grad_avg); + + public: + void serialize(torch::serialize::InputArchive& archive) override; + void serialize(torch::serialize::OutputArchive& archive) const override; + TORCH_API friend bool operator==( + const RMSpropParamState& lhs, + const RMSpropParamState& rhs); +}; + +class TORCH_API RMSprop : public Optimizer { + public: + explicit RMSprop( + const std::vector& param_groups, + RMSpropOptions defaults = {}) + : Optimizer(param_groups, std::make_unique(defaults)) { + TORCH_CHECK(defaults.lr() >= 0, "Invalid learning rate: ", defaults.lr()); + TORCH_CHECK(defaults.eps() >= 0, "Invalid epsilon value: ", defaults.eps()); + TORCH_CHECK( + defaults.momentum() >= 0, + "Invalid momentum value: ", + defaults.momentum()); + TORCH_CHECK( + defaults.weight_decay() >= 0, + "Invalid weight_decay value: ", + defaults.weight_decay()); + TORCH_CHECK( + defaults.alpha() >= 0, "Invalid alpha value: ", defaults.alpha()); + } + + explicit RMSprop(std::vector params, RMSpropOptions defaults = {}) + : RMSprop({OptimizerParamGroup(std::move(params))}, std::move(defaults)) { + } + + torch::Tensor step(LossClosure closure = nullptr) override; + void save(serialize::OutputArchive& archive) const override; + void load(serialize::InputArchive& archive) override; + + private: + template + static void serialize(Self& self, Archive& archive) { + _TORCH_OPTIM_SERIALIZE_WITH_TEMPLATE_ARG(RMSprop); + } +}; +} // namespace torch::optim + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/schedulers/lr_scheduler.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/schedulers/lr_scheduler.h new file mode 100644 index 0000000000000000000000000000000000000000..ee311911004afce099dbef5153bb3ea210341baf --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/schedulers/lr_scheduler.h @@ -0,0 +1,43 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include + +namespace torch::optim { + +class TORCH_API LRScheduler { + public: + // This class needs to take a reference of an optimizer from outside such that + // it can modify its learning rates; due to this the lifetime of said + // optimizer must be maintained + LRScheduler(torch::optim::Optimizer& optimizer); + + virtual ~LRScheduler() = default; + + void step(); + + protected: + // A vector of learning rates is calculated and returned from the specific + // subclass. A vector is returned with each element being a separate learning + // rate for each param group - although the normal use case would be to return + // a vector of identical elements. + virtual std::vector get_lrs() = 0; + + // Get current learning rates from the optimizer + std::vector get_current_lrs() const; + + unsigned step_count_{}; + + private: + void set_optimizer_lrs(const std::vector& learning_rates); + + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + torch::optim::Optimizer& optimizer_; +}; +} // namespace torch::optim + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/schedulers/reduce_on_plateau_scheduler.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/schedulers/reduce_on_plateau_scheduler.h new file mode 100644 index 0000000000000000000000000000000000000000..6a379c8d0d518ec5dddc0730343cfd5ad2ea301a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/schedulers/reduce_on_plateau_scheduler.h @@ -0,0 +1,64 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +#include + +namespace torch::optim { + +class TORCH_API ReduceLROnPlateauScheduler { + public: + enum SchedulerMode { min, max }; + enum ThresholdMode { rel, abs }; + ReduceLROnPlateauScheduler( + Optimizer& optimizer, + SchedulerMode mode = min, + float factor = 0.1, + int patience = 10, + double threshold = 1e-4, + ThresholdMode threshold_mode = rel, + int cooldown = 0, + const std::vector& min_lr = std::vector(), + double eps = 1e-8, + bool verbose = false); + + virtual ~ReduceLROnPlateauScheduler() = default; + + void step(float metric); + + private: + void reset(); + void reduce_lr(int epoch); + bool in_cooldown() const; + bool is_better(float a); + void init_is_better( + SchedulerMode mode, + double threshold, + ThresholdMode threshold_mode); + + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + Optimizer& optimizer; + SchedulerMode mode{}; + float mode_worse{}; + float factor; + int patience; + double threshold{}; + ThresholdMode threshold_mode{}; + int cooldown{}; + int cooldown_counter{}; + std::vector min_lrs; + double eps; + float best{}; + bool verbose; + int last_epoch{}; + int num_bad_epochs{}; +}; +} // namespace torch::optim + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/schedulers/step_lr.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/schedulers/step_lr.h new file mode 100644 index 0000000000000000000000000000000000000000..7acdfe2a093c49e6018f4032b169b7949198391c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/schedulers/step_lr.h @@ -0,0 +1,25 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::optim { + +class TORCH_API StepLR : public LRScheduler { + public: + StepLR( + torch::optim::Optimizer& optimizer, + const unsigned step_size, + const double gamma = 0.1); + + private: + std::vector get_lrs() override; + + const unsigned step_size_; + const double gamma_; +}; +} // namespace torch::optim + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/serialize.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/serialize.h new file mode 100644 index 0000000000000000000000000000000000000000..2045ccf7ee658fb9c7e9362005a0f2e16dff4501 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/serialize.h @@ -0,0 +1,320 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace torch::optim { +namespace detail { +// Utility function to save state +template +void serialize( + serialize::OutputArchive& archive, + const ska::flat_hash_map>& + state) { + for (const auto& item : state) { + serialize::OutputArchive param_state_archive(archive.compilation_unit()); + std::string tensorimpl_key = + std::to_string(reinterpret_cast(item.first)); + const DerivedOptimizerParamState& curr_state = + static_cast(*(item.second)); + curr_state.serialize(param_state_archive); + archive.write(tensorimpl_key, param_state_archive); + } +} + +// Utility function to load state +template +void serialize( + serialize::InputArchive& archive, + ska::flat_hash_map>& state) { + std::vector tensorimpl_keys = archive.keys(); + for (const std::string& tensorimpl_key : tensorimpl_keys) { + serialize::InputArchive param_state_archive; + archive.read(tensorimpl_key, param_state_archive); + DerivedOptimizerParamState param_state; + param_state.serialize(param_state_archive); + // NOLINTNEXTLINE(performance-no-int-to-ptr) + state[reinterpret_cast(std::stoull(tensorimpl_key))] = + std::make_unique(param_state); + } +} + +// Utility function to save param_groups +template +void serialize( + serialize::OutputArchive& archive, + const std::vector& param_groups) { + archive.write( + "param_groups/size", + torch::tensor(static_cast(param_groups.size()))); + for (const auto i : c10::irange(param_groups.size())) { + serialize::OutputArchive param_group_archive(archive.compilation_unit()); + std::vector params = param_groups[i].params(); + param_group_archive.write( + "params/size", torch::tensor(static_cast(params.size()))); + for (const auto index : c10::irange(params.size())) { + param_group_archive.write( + "params/" + std::to_string(index), + IValue(std::to_string( + reinterpret_cast(params[index].unsafeGetTensorImpl())))); + } + const DerivedOptimizerParamOptions& param_group_options = + static_cast( + param_groups[i].options()); + serialize::OutputArchive param_group_options_archive( + param_group_archive.compilation_unit()); + param_group_options.serialize(param_group_options_archive); + param_group_archive.write("options", param_group_options_archive); + archive.write("param_groups/" + std::to_string(i), param_group_archive); + } +} + +// Utility function to load param_groups +// We take as input vector of pair of string and unique_ptr to optimizer options +// so that we can retain the state for each param by using the old tensor impl +// keys (saved during serialization) and map the new tensor impl keys to the +// correct state for each param +template +void serialize( + serialize::InputArchive& archive, + std::vector< + std::pair, std::unique_ptr>>& + param_groups) { + torch::Tensor param_groups_size_tensor; + archive.read("param_groups/size", param_groups_size_tensor); + const int64_t param_groups_size = param_groups_size_tensor.item(); + for (const auto i : c10::irange(param_groups_size)) { + serialize::InputArchive param_group_archive; + archive.read("param_groups/" + std::to_string(i), param_group_archive); + torch::Tensor size_tensor; + param_group_archive.read("params/size", size_tensor); + const int64_t size = size_tensor.item(); + std::vector params; + for (const auto index : c10::irange(size)) { + IValue ivalue; + param_group_archive.read("params/" + std::to_string(index), ivalue); + std::string element = ivalue.toStringRef(); + params.emplace_back(element); + } + serialize::InputArchive param_group_options_archive; + param_group_archive.read("options", param_group_options_archive); + DerivedOptimizerParamOptions param_group_options(0); + param_group_options.serialize(param_group_options_archive); + param_groups.emplace_back(std::make_pair( + params, + std::make_unique(param_group_options))); + } +} +} // namespace detail + +// Note: These functions are all called `serialize()` so they can be called +// inside a template where the archive type is a template type and can thus be +// passed such that the appropriate overload is selected. + +/// Utility function to save a value of `int64_t` type. +void serialize( + serialize::OutputArchive& archive, + const std::string& key, + const int64_t& value); + +/// Utility function to load a value of `int64_t` type. +void serialize( + serialize::InputArchive& archive, + const std::string& key, + int64_t& value); + +/// Utility function to save a vector of step buffers. +void serialize( + serialize::OutputArchive& archive, + const std::string& key, + const std::vector& steps); + +/// Utility function to load a vector of step buffers. +void serialize( + serialize::InputArchive& archive, + const std::string& key, + std::vector& steps); + +// Utility function to save state and param_groups +template < + typename DerivedOptimizerParamState, + typename DerivedOptimizerParamOptions> +void serialize(serialize::OutputArchive& archive, const Optimizer& optimizer) { + archive.write("pytorch_version", IValue("1.5.0")); + serialize::OutputArchive state_archive(archive.compilation_unit()); + detail::serialize( + state_archive, optimizer.state()); + archive.write("state", state_archive); + + serialize::OutputArchive param_groups_archive(archive.compilation_unit()); + detail::serialize( + param_groups_archive, optimizer.param_groups()); + archive.write("param_groups", param_groups_archive); +} + +// Utility function to load state and param_groups and update state +template < + typename DerivedOptimizerParamState, + typename DerivedOptimizerParamOptions> +void serialize(serialize::InputArchive& archive, Optimizer& optimizer) { + IValue pytorch_version; + archive.read("pytorch_version", pytorch_version); + TORCH_INTERNAL_ASSERT(pytorch_version.toStringRef() == "1.5.0"); + serialize::InputArchive state_archive; + archive.read("state", state_archive); + ska::flat_hash_map> saved_state; + detail::serialize(state_archive, saved_state); + + serialize::InputArchive param_groups_archive; + archive.read("param_groups", param_groups_archive); + std::vector< + std::pair, std::unique_ptr>> + saved_param_groups; + detail::serialize( + param_groups_archive, saved_param_groups); + + // update state and optimizer options + TORCH_CHECK( + saved_param_groups.size() == optimizer.param_groups().size(), + "loaded state dict has a different number of parameter groups"); + for (const auto i : c10::irange(saved_param_groups.size())) { + std::vector param_group_old_keys = saved_param_groups[i].first; + std::vector params = optimizer.param_groups()[i].params(); + TORCH_CHECK( + param_group_old_keys.size() == params.size(), + "loaded state dict contains a parameter group that has a different size than the optimizer's parameter group"); + + for (const auto idx : c10::irange(params.size())) { + auto param_group_old_key = + // NOLINTNEXTLINE(performance-no-int-to-ptr) + reinterpret_cast(std::stoull(param_group_old_keys[idx])); + if (saved_state.find(param_group_old_key) != saved_state.end()) { + optimizer.state()[params[idx].unsafeGetTensorImpl()] = + std::move(saved_state[param_group_old_key]); + } + } + + auto& saved_options = reinterpret_cast( + *saved_param_groups[i].second); + auto& current_options = reinterpret_cast( + optimizer.param_groups()[i].options()); + current_options = saved_options; + } +} + +/// Utility function to save a vector of buffers. +template +void serialize( + serialize::OutputArchive& archive, + const std::string& key, + const BufferContainer& buffers) { + archive.write( + key + "/size", torch::tensor(static_cast(buffers.size()))); + for (const auto index : c10::irange(buffers.size())) { + archive.write( + key + "/" + std::to_string(index), buffers[index], /*is_buffer=*/true); + } +} + +/// Utility function to load a vector of buffers. +template +void serialize( + serialize::InputArchive& archive, + const std::string& key, + BufferContainer& buffers) { + buffers.clear(); + torch::Tensor size_tensor; + archive.read(key + "/size", size_tensor); + const size_t size = size_tensor.item(); + for (const auto index : c10::irange(size)) { + buffers.emplace_back(); + archive.read( + key + "/" + std::to_string(index), buffers.back(), /*is_buffer=*/true); + } +} + +template +c10::List deque_to_list(const std::deque& dq) { + c10::List list; + list.reserve(dq.size()); + for (const auto& e : dq) { + list.emplace_back(e); + } + return list; +} + +template +std::deque list_to_deque(const c10::List& list) { + std::deque dq; + for (const auto& e : list) { + dq.emplace_back(e); + } + return dq; +} + +#define _TORCH_OPTIM_SERIALIZE(name) \ + torch::optim::serialize(archive, #name, self.name) + +#define _TORCH_OPTIM_SERIALIZE_WITH_TEMPLATE_ARG(OptimizerName) \ + torch::optim::serialize( \ + archive, self) + +#define _TORCH_OPTIM_SERIALIZE_TORCH_ARG(name) \ + { \ + auto ivalue = torch::IValue(name()); \ + /* do not serialize if name is an undefined tensor*/ \ + if (!(ivalue.isTensor() && \ + ivalue.unsafeToTensorImpl() == \ + at::UndefinedTensorImpl::singleton())) { \ + archive.write(#name, ivalue); \ + } \ + } + +#define _TORCH_OPTIM_SERIALIZE_TORCH_ARG_DEQUE(name) \ + { \ + c10::IValue ivalue = torch::IValue(deque_to_list(name())); \ + archive.write(#name, ivalue); \ + } + +#define _TORCH_OPTIM_DESERIALIZE_TORCH_ARG(T, name) \ + { \ + c10::IValue ivalue; \ + bool exists = archive.try_read(#name, ivalue); \ + if (exists) { \ + name(ivalue.to()); \ + } else { \ + constexpr bool is_tensor_type = std::is_base_of_v; \ + TORCH_INTERNAL_ASSERT(is_tensor_type); \ + } \ + } + +#define _TORCH_OPTIM_DESERIALIZE_TORCH_ARG_OPTIONAL(T, name) \ + { \ + c10::IValue ivalue; \ + bool exists = archive.try_read(#name, ivalue); \ + if (exists) { \ + name(ivalue.toOptional()); \ + } \ + } + +#define _TORCH_OPTIM_DESERIALIZE_TORCH_ARG_DEQUE(T, name) \ + { \ + c10::IValue ivalue; \ + archive.read(#name, ivalue); \ + auto list = ivalue.to>(); \ + name(list_to_deque(list)); \ + } + +} // namespace torch::optim + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/sgd.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/sgd.h new file mode 100644 index 0000000000000000000000000000000000000000..a1875f052e27f7ac132103943591ad031befbcda --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/optim/sgd.h @@ -0,0 +1,90 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace torch::serialize { +class OutputArchive; +class InputArchive; +} // namespace torch::serialize + +namespace torch::optim { + +struct TORCH_API SGDOptions : public OptimizerCloneableOptions { + SGDOptions(double lr); + TORCH_ARG(double, lr); + TORCH_ARG(double, momentum) = 0; + TORCH_ARG(double, dampening) = 0; + TORCH_ARG(double, weight_decay) = 0; + TORCH_ARG(bool, nesterov) = false; + + public: + void serialize(torch::serialize::InputArchive& archive) override; + void serialize(torch::serialize::OutputArchive& archive) const override; + TORCH_API friend bool operator==( + const SGDOptions& lhs, + const SGDOptions& rhs); + double get_lr() const override; + void set_lr(const double lr) override; +}; + +struct TORCH_API SGDParamState + : public OptimizerCloneableParamState { + TORCH_ARG(torch::Tensor, momentum_buffer); + + public: + void serialize(torch::serialize::InputArchive& archive) override; + void serialize(torch::serialize::OutputArchive& archive) const override; + TORCH_API friend bool operator==( + const SGDParamState& lhs, + const SGDParamState& rhs); +}; + +class TORCH_API SGD : public Optimizer { + public: + explicit SGD( + const std::vector& param_groups, + SGDOptions defaults) + : Optimizer(param_groups, std::make_unique(defaults)) { + TORCH_CHECK(defaults.lr() >= 0, "Invalid learning rate: ", defaults.lr()); + TORCH_CHECK( + defaults.momentum() >= 0, + "Invalid momentum value: ", + defaults.momentum()); + TORCH_CHECK( + defaults.weight_decay() >= 0, + "Invalid weight_decay value: ", + defaults.weight_decay()); + TORCH_CHECK( + !defaults.nesterov() || + (defaults.momentum() > 0 && defaults.dampening() == 0), + "Nesterov momentum requires a momentum and zero dampening"); + } + + explicit SGD(std::vector params, SGDOptions defaults) + : SGD({OptimizerParamGroup(std::move(params))}, std::move(defaults)) {} + + torch::Tensor step(LossClosure closure = nullptr) override; + + void save(serialize::OutputArchive& archive) const override; + void load(serialize::InputArchive& archive) override; + + private: + template + static void serialize(Self& self, Archive& archive) { + _TORCH_OPTIM_SERIALIZE_WITH_TEMPLATE_ARG(SGD); + } +}; +} // namespace torch::optim + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/ordered_dict.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/ordered_dict.h new file mode 100644 index 0000000000000000000000000000000000000000..162d1c4ff511f98229ac06e6580164b25f159103 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/ordered_dict.h @@ -0,0 +1,521 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace torch { +/// An ordered dictionary implementation, akin to Python's `OrderedDict`. +template +class OrderedDict { + public: + /// A (key, value) pair. + class Item; + + // The lifetime of an iterator is bound to the lifetime of the `OrderedDict`. + // Further, any `insert()` operation may invalidate all iterators + // pointing into the vector. + using Iterator = typename std::vector::iterator; + using ConstIterator = typename std::vector::const_iterator; + + /// Constructs the `OrderedDict` with a short description of the kinds of keys + /// stored in the `OrderedDict`. This description is used in error messages + /// thrown by the `OrderedDict`. + explicit OrderedDict(std::string key_description = "Key"); + + /// Copy constructs this `OrderedDict` from `other`. + OrderedDict(const OrderedDict& other); + + /// Assigns items from `other` to this `OrderedDict`. + OrderedDict& operator=(const OrderedDict& other); + + // NB: Move works by default, because you can move-construct vectors of const + // values. I tried to make this noexcept (conditional on the move constructors + // of index_ and items_ being noexcept) but the obvious spelling didn't + // compile on Windows. + OrderedDict(OrderedDict&& other) noexcept = default; + OrderedDict& operator=(OrderedDict&& other) noexcept = default; + + ~OrderedDict() = default; + + /// Constructs a new `OrderedDict` and pre-populates it with the given + /// `Item`s. + /*implicit */ OrderedDict(std::initializer_list initializer_list); + + /// Returns the key description string the `OrderedDict` was constructed with. + const std::string& key_description() const noexcept; + + // Element Access + + /// Returns the very first item in the `OrderedDict` and throws an exception + /// if it is empty. + Item& front(); + + /// Returns the very first item in the `OrderedDict` and throws an exception + /// if it is empty. + const Item& front() const; + + /// Returns the very last item in the `OrderedDict` and throws an exception + /// if it is empty. + Item& back(); + + /// Returns the very last item in the `OrderedDict` and throws an exception + /// if it is empty. + const Item& back() const; + + /// Returns the item at the `index`-th position in the `OrderedDict`. Throws + /// an exception if the index is out of bounds. + Item& operator[](size_t index); + + /// Returns the item at the `index`-th position in the `OrderedDict`. Throws + /// an exception if the index is out of bounds. + const Item& operator[](size_t index) const; + + /// Returns the value associated with the given `key`. Throws an exception if + /// no such key is stored in the `OrderedDict`. Use `find()` for a + /// non-throwing way of accessing a value if it is present. + Value& operator[](const Key& key); + + /// Returns the value associated with the given `key`. Throws an exception if + /// no such key is stored in the `OrderedDict`. Use `find()` for a + /// non-throwing way of accessing a value if it is present. + const Value& operator[](const Key& key) const; + + // Lookup + + /// Returns a pointer to the value associated with the given key, or a + /// `nullptr` if no such key is stored in the `OrderedDict`. + Value* find(const Key& key) noexcept; + + /// Returns a pointer to the value associated with the given key, or a + /// `nullptr` if no such key is stored in the `OrderedDict`. + const Value* find(const Key& key) const noexcept; + + /// Returns true if the key is present in the `OrderedDict`. + bool contains(const Key& key) const noexcept; + + // Iterators + + /// Returns an iterator to the first item in the `OrderedDict`. Iteration is + /// ordered. + Iterator begin(); + + /// Returns an iterator to the first item in the `OrderedDict`. Iteration is + /// ordered. + ConstIterator begin() const; + + /// Returns an iterator one past the last item in the `OrderedDict`. + Iterator end(); + + /// Returns an iterator one past the last item in the `OrderedDict`. + ConstIterator end() const; + + // Capacity + + /// Returns the number of items currently stored in the `OrderedDict`. + size_t size() const noexcept; + + /// Returns true if the `OrderedDict` contains no elements. + bool is_empty() const noexcept; + + /// Resizes internal storage to fit at least `requested_capacity` items + /// without requiring reallocation. + void reserve(size_t requested_capacity); + + // Modifiers + + /// Inserts a new `(key, value)` pair into the `OrderedDict`. Throws an + /// exception if the key is already present. If insertion is successful, + /// immediately returns a reference to the inserted value. + template + Value& insert(K&& key, V&& value); + + /// Inserts a new `(key, value)` pair into the `OrderedDict`. Throws an + /// exception if the key is already present. If insertion is successful, + /// immediately returns a reference to the inserted value. + Value& insert(Key key, Value&& value); + + /// Inserts all items from `other` into this `OrderedDict`. If any key from + /// `other` is already present in this `OrderedDict`, an exception is thrown. + void update(OrderedDict&& other); + + /// Inserts all items from `other` into this `OrderedDict`. If any key from + /// `other` is already present in this `OrderedDict`, an exception is thrown. + void update(const OrderedDict& other); + + /// Removes the item that has `key` from this `OrderedDict` if exists and if + /// it doesn't an exception is thrown. + void erase(const Key& key); + + /// Removes all items from this `OrderedDict`. + void clear(); + + // Observers + + /// Returns the items stored in the `OrderedDict`. + const std::vector& items() const noexcept; + + /// Returns a newly allocated vector and copies all keys from this + /// `OrderedDict` into the vector. + ::std::vector keys() const; + + /// Returns a newly allocated vector and copies all values from this + /// `OrderedDict` into the vector. + ::std::vector values() const; + + /// Returns a newly allocated vector and copies all keys and values from this + /// `OrderedDict` into a vector of `std::pair`. + ::std::vector> pairs() const; + + /// Returns true if both dicts contain the same keys and values, in the same + /// order. + template + friend bool operator==( + const OrderedDict& a, + const OrderedDict& b); + + private: + /// A mapping from a key to an index into the `items_` vector. + ::std::unordered_map index_; + + /// The items stored in the `OrderedDict`. + ::std::vector items_; + + /// A description of the keys stored in the `OrderedDict`. + ::std::string key_description_{"Key"}; +}; + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~ OrderedDict::Item ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +template +class OrderedDict::Item { + public: + /// Constructs a new item. + Item(Key key, Value value) : pair_(std::move(key), std::move(value)) {} + + /// Returns a reference to the value. + Value& operator*() { + return value(); + } + + /// Returns a reference to the value. + const Value& operator*() const { + return value(); + } + + /// Allows access to the value using the arrow operator. + Value* operator->() { + return &value(); + } + + /// Allows access to the value using the arrow operator. + const Value* operator->() const { + return &value(); + } + + /// Returns a reference to the key. + const Key& key() const noexcept { + return pair_.first; + } + + /// Returns a reference to the value. + Value& value() noexcept { + return pair_.second; + } + + /// Returns a reference to the value. + const Value& value() const noexcept { + return pair_.second; + } + + /// Returns a `(key, value)` pair. + const std::pair& pair() const noexcept { + return pair_; + } + + private: + /// This is stored as an std::pair because it will make Python binding a lot, + /// lot easier. + ::std::pair pair_; +}; + +// ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ OrderedDict ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +template +OrderedDict::OrderedDict(std::string key_description) + : key_description_(std::move(key_description)) {} + +template +OrderedDict::OrderedDict(const OrderedDict& other) + : index_(other.index_), key_description_(other.key_description_) { + // Copy we have to do ourselves, because items' keys are const, so we have to + // re-insert the items. + for (const auto& item : other.items_) { + items_.push_back(item); + } +} + +template +OrderedDict& OrderedDict::operator=( + const OrderedDict& other) { + index_ = other.index_; + items_.clear(); + for (auto& item : other.items_) { + items_.push_back(item); + } + key_description_ = other.key_description_; + return *this; +} + +template +OrderedDict::OrderedDict( + std::initializer_list initializer_list) + : OrderedDict("Key") { + items_.reserve(initializer_list.size()); + for (auto& item : initializer_list) { + // Copy the key here and move it into the index. + items_.emplace_back(item.key(), std::move(item.value())); + index_.emplace(std::move(item.key()), size() - 1); + } +} + +template +typename OrderedDict::Iterator OrderedDict::begin() { + return items_.begin(); +} + +template +typename OrderedDict::ConstIterator OrderedDict::begin() + const { + return items_.begin(); +} + +template +typename OrderedDict::Iterator OrderedDict::end() { + return items_.end(); +} + +template +typename OrderedDict::ConstIterator OrderedDict::end() + const { + return items_.end(); +} + +template +typename OrderedDict::Item& OrderedDict::front() { + TORCH_CHECK(!items_.empty(), "Called front() on an empty OrderedDict"); + return items_.front(); +} + +template +const typename OrderedDict::Item& OrderedDict::front() + const { + TORCH_CHECK(!items_.empty(), "Called front() on an empty OrderedDict"); + return items_.front(); +} + +template +typename OrderedDict::Item& OrderedDict::back() { + TORCH_CHECK(!items_.empty(), "Called back() on an empty OrderedDict"); + return items_.back(); +} + +template +const typename OrderedDict::Item& OrderedDict::back() + const { + TORCH_CHECK(!items_.empty(), "Called back() on an empty OrderedDict"); + return items_.back(); +} + +template +typename OrderedDict::Item& OrderedDict::operator[]( + size_t index) { + TORCH_CHECK(index < items_.size(), "Index ", index, " is out of bounds"); + return items_[index]; +} + +template +const typename OrderedDict::Item& OrderedDict:: +operator[](size_t index) const { + TORCH_CHECK(index < items_.size(), "Index ", index, " is out of bounds"); + return items_[index]; +} + +template +Value& OrderedDict::operator[](const Key& key) { + if (auto* value = find(key)) { + return *value; + } + TORCH_CHECK(false, key_description_, " '", key, "' is not defined"); +} + +template +const Value& OrderedDict::operator[](const Key& key) const { + if (auto* value = find(key)) { + return *value; + } + TORCH_CHECK(false, key_description_, " '", key, "' is not defined"); +} + +template +template +Value& OrderedDict::insert(K&& key, V&& value) { + TORCH_CHECK( + index_.count(key) == 0, key_description_, " '", key, "' already defined"); + // Copy `key` here and move it into the index. + items_.emplace_back(key, std::forward(value)); + index_.emplace(std::forward(key), size() - 1); + return items_.back().value(); +} + +template +Value& OrderedDict::insert(Key key, Value&& value) { + return insert(std::move(key), std::move(value)); +} + +template +void OrderedDict::update(OrderedDict&& other) { + reserve(size() + other.size()); + for (auto&& item : std::move(other)) { + // We want to call `insert()` to prevent duplicate keys. + insert(std::move(item.key()), std::move(item.value())); + } +} + +template +void OrderedDict::update(const OrderedDict& other) { + reserve(size() + other.size()); + for (auto& item : other) { + // We want to call `insert()` to prevent duplicate keys. + insert(item.key(), item.value()); + } +} + +template +Value* OrderedDict::find(const Key& key) noexcept { + auto iterator = index_.find(key); + if (iterator == index_.end()) { + return nullptr; + } + return &items_[iterator->second].value(); +} + +template +const Value* OrderedDict::find(const Key& key) const noexcept { + auto iterator = index_.find(key); + if (iterator == index_.end()) { + return nullptr; + } + return &items_[iterator->second].value(); +} + +template +void OrderedDict::erase(const Key& key) { + auto it = index_.find(key); + TORCH_CHECK(it != index_.end(), "Key '", key, "' doesn't exist"); + + auto index = it->second; + index_.erase(it); + items_.erase(items_.begin() + index); + + for (auto& pair : index_) + if (pair.second > index) + --pair.second; +} + +template +bool OrderedDict::contains(const Key& key) const noexcept { + return find(key) != nullptr; +} + +template +void OrderedDict::clear() { + index_.clear(); + items_.clear(); +} + +template +size_t OrderedDict::size() const noexcept { + return items_.size(); +} + +template +bool OrderedDict::is_empty() const noexcept { + return items_.empty(); +} + +template +const std::string& OrderedDict::key_description() const noexcept { + return key_description_; +} + +template +const std::vector::Item>& OrderedDict< + Key, + Value>::items() const noexcept { + return items_; +} + +template +::std::vector OrderedDict::keys() const { + std::vector keys; + keys.reserve(size()); + for (const auto& item : items_) { + keys.push_back(item.key()); + } + return keys; +} + +template +::std::vector OrderedDict::values() const { + std::vector values; + values.reserve(size()); + for (const auto& item : items_) { + values.push_back(item.value()); + } + return values; +} + +template +::std::vector> OrderedDict::pairs() const { + std::vector> values; + values.reserve(size()); + for (const auto& item : items_) { + values.push_back(item.pair()); + } + return values; +} + +template +void OrderedDict::reserve(size_t requested_capacity) { + index_.reserve(requested_capacity); + items_.reserve(requested_capacity); +} + +template +bool operator==( + const torch::OrderedDict& a, + const torch::OrderedDict& b) { + using Item = typename torch::OrderedDict::Item; + if (a.index_ != b.index_) + return false; + if (a.items_.size() != b.items_.size()) + return false; + // NOTE: There's no point in comparing keys for items_, as we already know + // that index is equal. + return std::equal( + a.items_.begin(), + a.items_.end(), + b.items_.begin(), + [](const Item& a, const Item& b) { return a.value() == b.value(); }); +} + +} // namespace torch + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/python.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/python.h new file mode 100644 index 0000000000000000000000000000000000000000..5927102f65f0215ec1adabaf0314ea04e87f8884 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/python.h @@ -0,0 +1,264 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace torch::python { +namespace detail { +inline Device py_object_to_device(py::object object) { + PyObject* obj = object.ptr(); + if (THPDevice_Check(obj)) { + return reinterpret_cast(obj)->device; + } + TORCH_CHECK_TYPE(false, "Expected device"); +} + +inline Dtype py_object_to_dtype(py::object object) { + PyObject* obj = object.ptr(); + if (THPDtype_Check(obj)) { + return reinterpret_cast(obj)->scalar_type; + } + TORCH_CHECK_TYPE(false, "Expected dtype"); +} + +template +using PyModuleClass = + py::class_>; + +/// Dynamically creates a subclass of `torch.nn.cpp.ModuleWrapper` that is also +/// a subclass of `torch.nn.Module`, and passes it the user-provided C++ module +/// to which it delegates all calls. +template +void bind_cpp_module_wrapper( + const py::module& module, + PyModuleClass cpp_class, + const char* name) { + // Grab the `torch.nn.cpp.ModuleWrapper` class, which we'll subclass + // with a dynamically created class below. + py::object cpp_module = + py::module::import("torch.nn.cpp").attr("ModuleWrapper"); + + // Grab the `type` class which we'll use as a metaclass to create a new class + // dynamically. + py::object type_metaclass = + py::reinterpret_borrow((PyObject*)&PyType_Type); + + // The `ModuleWrapper` constructor copies all functions to its own `__dict__` + // in its constructor, but we do need to give our dynamic class a constructor. + // Inside, we construct an instance of the original C++ module we're binding + // (the `torch::nn::Module` subclass), and then forward it to the + // `ModuleWrapper` constructor. + py::dict attributes; + + // `type()` always needs a `str`, but pybind11's `str()` method always creates + // a `unicode` object. + py::object name_str = py::str(name); + + // Dynamically create the subclass of `ModuleWrapper`, which is a subclass of + // `torch.nn.Module`, and will delegate all calls to the C++ module we're + // binding. + py::object wrapper_class = + type_metaclass(name_str, py::make_tuple(cpp_module), attributes); + + // The constructor of the dynamic class calls `ModuleWrapper.__init__()`, + // which replaces its methods with those of the C++ module. + wrapper_class.attr("__init__") = py::cpp_function( + [cpp_module, cpp_class]( + const py::object& self, + const py::args& args, + const py::kwargs& kwargs) { + cpp_module.attr("__init__")(self, cpp_class(*args, **kwargs)); + }, + py::is_method(wrapper_class)); + + // Calling `my_module.my_class` now means that `my_class` is a subclass of + // `ModuleWrapper`, and whose methods call into the C++ module we're binding. + module.attr(name) = wrapper_class; +} +} // namespace detail + +/// Adds method bindings for a pybind11 `class_` that binds an `nn::Module` +/// subclass. +/// +/// Say you have a pybind11 class object created with `py::class_(m, +/// "Net")`. This function will add all the necessary `.def()` calls to bind the +/// `nn::Module` base class' methods, such as `train()`, `eval()` etc. into +/// Python. +/// +/// Users should prefer to use `bind_module` if possible. +template +py::class_ add_module_bindings( + py::class_ module) { + // clang-format off + return module + .def("train", + [](ModuleType& module, bool mode) { module.train(mode); }, + py::arg("mode") = true) + .def("eval", [](ModuleType& module) { module.eval(); }) + .def("clone", [](ModuleType& module) { return module.clone(); }) + .def_property_readonly( + "training", [](ModuleType& module) { return module.is_training(); }) + .def("zero_grad", [](ModuleType& module) { module.zero_grad(); }) + .def_property_readonly( "_parameters", [](ModuleType& module) { + return module.named_parameters(/*recurse=*/false); + }) + .def("parameters", [](ModuleType& module, bool recurse) { + return module.parameters(recurse); + }, + py::arg("recurse") = true) + .def("named_parameters", [](ModuleType& module, bool recurse) { + return module.named_parameters(recurse); + }, + py::arg("recurse") = true) + .def_property_readonly("_buffers", [](ModuleType& module) { + return module.named_buffers(/*recurse=*/false); + }) + .def("buffers", [](ModuleType& module, bool recurse) { + return module.buffers(recurse); }, + py::arg("recurse") = true) + .def("named_buffers", [](ModuleType& module, bool recurse) { + return module.named_buffers(recurse); + }, + py::arg("recurse") = true) + .def_property_readonly( + "_modules", [](ModuleType& module) { return module.named_children(); }) + .def("modules", [](ModuleType& module) { return module.modules(); }) + .def("named_modules", + [](ModuleType& module, const py::object& /* unused */, std::string prefix, bool remove_duplicate /* unused */) { + return module.named_modules(std::move(prefix)); + }, + py::arg("memo") = py::none(), + py::arg("prefix") = std::string(), + py::arg("remove_duplicate") = true) + .def("children", [](ModuleType& module) { return module.children(); }) + .def("named_children", + [](ModuleType& module) { return module.named_children(); }) + .def("to", [](ModuleType& module, py::object object, bool non_blocking) { + if (THPDevice_Check(object.ptr())) { + module.to( + reinterpret_cast(object.ptr())->device, + non_blocking); + } else { + module.to(detail::py_object_to_dtype(object), non_blocking); + } + }, + py::arg("dtype_or_device"), + py::arg("non_blocking") = false) + .def("to", + [](ModuleType& module, + const py::object& device, + const py::object& dtype, + bool non_blocking) { + if (device.is_none()) { + module.to(detail::py_object_to_dtype(dtype), non_blocking); + } else if (dtype.is_none()) { + module.to(detail::py_object_to_device(device), non_blocking); + } else { + module.to( + detail::py_object_to_device(device), + detail::py_object_to_dtype(dtype), + non_blocking); + } + }, + py::arg("device"), + py::arg("dtype"), + py::arg("non_blocking") = false) + .def("cuda", [](ModuleType& module) { module.to(kCUDA); }) + .def("cpu", [](ModuleType& module) { module.to(kCPU); }) + .def("float", [](ModuleType& module) { module.to(kFloat32); }) + .def("double", [](ModuleType& module) { module.to(kFloat64); }) + .def("half", [](ModuleType& module) { module.to(kFloat16); }) + .def("__str__", [](ModuleType& module) { return module.name(); }) + .def("__repr__", [](ModuleType& module) { return module.name(); }); + // clang-format on +} + +/// Creates a pybind11 class object for an `nn::Module` subclass type and adds +/// default bindings. +/// +/// After adding the default bindings, the class object is returned, such that +/// you can add more bindings. +/// +/// Example usage: +/// \rst +/// .. code-block:: cpp +/// +/// struct Net : torch::nn::Module { +/// Net(int in, int out) { } +/// torch::Tensor forward(torch::Tensor x) { return x; } +/// }; +/// +/// PYBIND11_MODULE(my_module, m) { +/// torch::python::bind_module(m, "Net") +/// .def(py::init()) +/// .def("forward", &Net::forward); +/// } +/// \endrst +template +std::enable_if_t< + !torch::detail::has_forward::value || force_enable, + detail::PyModuleClass> +bind_module(py::module module, const char* name) { + py::module cpp = module.def_submodule("cpp"); + auto cpp_class = + add_module_bindings(detail::PyModuleClass(cpp, name)); + detail::bind_cpp_module_wrapper(module, cpp_class, name); + return cpp_class; +} + +/// Creates a pybind11 class object for an `nn::Module` subclass type and adds +/// default bindings. +/// +/// After adding the default bindings, the class object is returned, such that +/// you can add more bindings. +/// +/// If the class has a `forward()` method, it is automatically exposed as +/// `forward()` and `__call__` in Python. +/// +/// Example usage: +/// \rst +/// .. code-block:: cpp +/// +/// struct Net : torch::nn::Module { +/// Net(int in, int out) { } +/// torch::Tensor forward(torch::Tensor x) { return x; } +/// }; +/// +/// PYBIND11_MODULE(my_module, m) { +/// torch::python::bind_module(m, "Net") +/// .def(py::init()) +/// .def("forward", &Net::forward); +/// } +/// \endrst +template < + typename ModuleType, + typename = std::enable_if_t::value>> +detail::PyModuleClass bind_module( + py::module module, + const char* name) { + return bind_module(module, name) + .def("forward", &ModuleType::forward) + .def("__call__", &ModuleType::forward); +} +} // namespace torch::python + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/python/init.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/python/init.h new file mode 100644 index 0000000000000000000000000000000000000000..01f0a2bd4c7d6679c07d0047e5ce1f441a553e88 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/python/init.h @@ -0,0 +1,13 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::python { +/// Initializes Python bindings for the C++ frontend. +void init_bindings(PyObject* module); +} // namespace torch::python + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/serialize.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/serialize.h new file mode 100644 index 0000000000000000000000000000000000000000..34d4afcdcfca92cb81914efdc7e6f3ab3e50e88e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/serialize.h @@ -0,0 +1,149 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include + +namespace torch { + +/// Serializes the given `value`. +/// There must be an overload of `operator<<` between `serialize::OutputArchive` +/// and `Value` for this method to be well-formed. Currently, such an overload +/// is provided for (subclasses of): +/// +/// - `torch::nn::Module`, +/// - `torch::optim::Optimizer` +/// - `torch::Tensor` +/// +/// To perform the serialization, a `serialize::OutputArchive` is constructed, +/// and all arguments after the `value` are forwarded to its `save_to` method. +/// For example, you can pass a filename, or an `ostream`. +/// +/// \rst +/// .. code-block:: cpp +/// +/// torch::nn::Linear model(3, 4); +/// torch::save(model, "model.pt"); +/// +/// torch::optim::SGD sgd(model->parameters(), 0.9); // 0.9 is learning rate +/// std::ostringstream stream; +/// // Note that the same stream cannot be used in multiple torch::save(...) +/// // invocations, otherwise the header will be corrupted. +/// torch::save(sgd, stream); +/// +/// auto tensor = torch::ones({3, 4}); +/// torch::save(tensor, "my_tensor.pt"); +/// \endrst +template +void save(const Value& value, SaveToArgs&&... args) { + serialize::OutputArchive archive(std::make_shared()); + archive << value; + archive.save_to(std::forward(args)...); +} + +/// Serializes the given `tensor_vec` of type `std::vector`. +/// +/// To perform the serialization, a `serialize::OutputArchive` is constructed, +/// and all arguments after the `tensor_vec` are forwarded to its `save_to` +/// method. For example, you can pass a filename, or an `ostream`. +/// +/// \rst +/// .. code-block:: cpp +/// +/// std::vector tensor_vec = { torch::randn({1, 2}), +/// torch::randn({3, 4}) }; torch::save(tensor_vec, "my_tensor_vec.pt"); +/// +/// std::vector tensor_vec = { torch::randn({5, 6}), +/// torch::randn({7, 8}) }; std::ostringstream stream; +/// // Note that the same stream cannot be used in multiple torch::save(...) +/// // invocations, otherwise the header will be corrupted. +/// torch::save(tensor_vec, stream); +/// \endrst +template +void save(const std::vector& tensor_vec, SaveToArgs&&... args) { + serialize::OutputArchive archive(std::make_shared()); + for (const auto i : c10::irange(tensor_vec.size())) { + auto& value = tensor_vec[i]; + archive.write(std::to_string(i), value); + } + archive.save_to(std::forward(args)...); +} + +TORCH_API std::vector pickle_save(const torch::IValue& ivalue); +TORCH_API torch::IValue pickle_load(const std::vector& data); + +/// Deserializes the given `value`. +/// There must be an overload of `operator>>` between `serialize::InputArchive` +/// and `Value` for this method to be well-formed. Currently, such an overload +/// is provided for (subclasses of): +/// +/// - `torch::nn::Module`, +/// - `torch::optim::Optimizer` +/// - `torch::Tensor` +/// +/// To perform the serialization, a `serialize::InputArchive` is constructed, +/// and all arguments after the `value` are forwarded to its `load_from` method. +/// For example, you can pass a filename, or an `istream`. +/// +/// \rst +/// .. code-block:: cpp +/// +/// torch::nn::Linear model(3, 4); +/// torch::load(model, "model.pt"); +/// +/// torch::optim::SGD sgd(model->parameters(), 0.9); // 0.9 is learning rate +/// std::istringstream stream("..."); +/// torch::load(sgd, stream); +/// +/// auto tensor = torch::ones({3, 4}); +/// torch::load(tensor, "my_tensor.pt"); +/// \endrst +template +void load(Value& value, LoadFromArgs&&... args) { + serialize::InputArchive archive; + archive.load_from(std::forward(args)...); + archive >> value; +} + +/// Deserializes the given `tensor_vec` of type `std::vector`. +/// +/// To perform the serialization, a `serialize::InputArchive` is constructed, +/// and all arguments after the `value` are forwarded to its `load_from` method. +/// For example, you can pass a filename, or an `istream`. +/// +/// \rst +/// .. code-block:: cpp +/// +/// std::vector tensor_vec; +/// torch::load(tensor_vec, "my_tensor_vec.pt"); +/// +/// std::vector tensor_vec; +/// std::istringstream stream("..."); +/// torch::load(tensor_vec, stream); +/// \endrst +template +void load(std::vector& tensor_vec, LoadFromArgs&&... args) { + serialize::InputArchive archive; + archive.load_from(std::forward(args)...); + + // NOTE: The number of elements in the serialized `std::vector` + // is not known ahead of time, so we need a while-loop to increment the index, + // and use `archive.try_read(...)` to check whether we have reached the end of + // the serialized `std::vector`. + size_t index = 0; + torch::Tensor value; + while (archive.try_read(std::to_string(index), value)) { + tensor_vec.push_back(std::move(value)); + value = torch::Tensor(); + index++; + } +} +} // namespace torch + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/serialize/archive.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/serialize/archive.h new file mode 100644 index 0000000000000000000000000000000000000000..7ac3b174751d9c651996479afb2ff7f852833667 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/serialize/archive.h @@ -0,0 +1,9 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/serialize/input-archive.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/serialize/input-archive.h new file mode 100644 index 0000000000000000000000000000000000000000..64142d8818a04ea5525ff6f2ed4ebf139e810aa0 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/serialize/input-archive.h @@ -0,0 +1,120 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace at { +class Tensor; +} // namespace at + +namespace torch { +using at::Tensor; +namespace jit { +struct Module; +} // namespace jit +} // namespace torch + +namespace torch::serialize { + +/// A recursive representation of tensors that can be deserialized from a file +/// or stream. In most cases, users should not have to interact with this class, +/// and should instead use `torch::load`. +class TORCH_API InputArchive final { + public: + /// Default-constructs the `InputArchive`. + InputArchive(); + + // Move is allowed. + InputArchive(InputArchive&&) = default; + InputArchive& operator=(InputArchive&&) = default; + + // Copy is disallowed. + InputArchive(InputArchive&) = delete; + InputArchive& operator=(InputArchive&) = delete; + + ~InputArchive() = default; + + /// Reads an `IValue` associated with a given `key`. + void read(const std::string& key, c10::IValue& ivalue); + + /// Reads an `IValue` associated with a given `key`. If there is no `IValue` + /// associated with the `key`, this returns false, otherwise it returns true. + bool try_read(const std::string& key, c10::IValue& ivalue); + + /// Reads a `tensor` associated with a given `key`. If there is no `tensor` + /// associated with the `key`, this returns false, otherwise it returns true. + /// If the tensor is expected to be a buffer (not differentiable), `is_buffer` + /// must be `true`. + bool try_read(const std::string& key, Tensor& tensor, bool is_buffer = false); + + /// Reads a `tensor` associated with a given `key`. + /// If the tensor is expected to be a buffer (not differentiable), `is_buffer` + /// must be `true`. + void read(const std::string& key, Tensor& tensor, bool is_buffer = false); + + /// Reads a `InputArchive` associated with a given `key`. If there is no + /// `InputArchive` associated with the `key`, this returns false, otherwise + /// it returns true. + bool try_read(const std::string& key, InputArchive& archive); + + /// Reads an `InputArchive` associated with a given `key`. + /// The archive can thereafter be used for further deserialization of the + /// nested data. + void read(const std::string& key, InputArchive& archive); + + /// Loads the `InputArchive` from a serialized representation stored in the + /// file at `filename`. Storage are remapped using device option. If device + /// is not specified, the module is loaded to the original device. + void load_from( + const std::string& filename, + std::optional device = std::nullopt); + + /// Loads the `InputArchive` from a serialized representation stored in the + /// given `stream`. Storage are remapped using device option. If device + /// is not specified, the module is loaded to the original device. + void load_from( + std::istream& stream, + std::optional device = std::nullopt); + + // Loads given the specified flat array. + void load_from( + const char* data, + size_t size, + std::optional device = std::nullopt); + + // Loads given the specified read and size functions. + void load_from( + const std::function& + read_func, + const std::function& size_func, + std::optional device = std::nullopt); + + // Returns the vector of keys in the input archive. + std::vector keys(); + + /// Forwards all arguments to `read()`. + /// Useful for generic code that can be reused for both `InputArchive` and + /// `OutputArchive` (where `operator()` forwards to `write()`). + template + void operator()(Ts&&... ts) { + read(std::forward(ts)...); + } + + private: + jit::Module module_; + std::string hierarchy_prefix_; +}; +} // namespace torch::serialize + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/serialize/output-archive.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/serialize/output-archive.h new file mode 100644 index 0000000000000000000000000000000000000000..615892117abf1b7bfa505f3b0f3acce163632dae --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/serialize/output-archive.h @@ -0,0 +1,85 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include +#include + +namespace at { +class Tensor; +} // namespace at + +namespace torch { +using at::Tensor; +namespace jit { +struct Module; +} // namespace jit +} // namespace torch + +namespace torch::serialize { +class TORCH_API OutputArchive final { + public: + explicit OutputArchive(std::shared_ptr cu); + explicit OutputArchive() + : cu_(std::make_shared()), + module_("__torch__.Module", cu_) {} + + // Move is allowed. + OutputArchive(OutputArchive&&) = default; + OutputArchive& operator=(OutputArchive&&) = default; + + // Copy is disallowed. + OutputArchive(OutputArchive&) = delete; + OutputArchive& operator=(OutputArchive&) = delete; + + std::shared_ptr compilation_unit() const { + return cu_; + } + + /// Writes an `IValue` to the `OutputArchive`. + void write(const std::string& key, const c10::IValue& ivalue); + + /// Writes a `(key, tensor)` pair to the `OutputArchive`, and marks it as + /// being or not being a buffer (non-differentiable tensor). + void write( + const std::string& key, + const Tensor& tensor, + bool is_buffer = false); + + /// Writes a nested `OutputArchive` under the given `key` to this + /// `OutputArchive`. + void write(const std::string& key, OutputArchive& nested_archive); + + /// Saves the `OutputArchive` into a serialized representation in a file at + /// `filename`. + void save_to(const std::string& filename); + + /// Saves the `OutputArchive` into a serialized representation into the given + /// `stream`. + void save_to(std::ostream& stream); + + /// Saves the `OutputArchive` into a serialized representation using the + /// given writer function. + void save_to(const std::function& func); + + /// Forwards all arguments to `write()`. + /// Useful for generic code that can be reused for both `OutputArchive` and + /// `InputArchive` (where `operator()` forwards to `read()`). + template + void operator()(Ts&&... ts) { + write(std::forward(ts)...); + } + + private: + std::shared_ptr cu_; + jit::Module module_; +}; +} // namespace torch::serialize + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/serialize/tensor.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/serialize/tensor.h new file mode 100644 index 0000000000000000000000000000000000000000..f6166613b3dfdb445315bc0fec4cfe3308286e3e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/serialize/tensor.h @@ -0,0 +1,25 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch { +inline serialize::OutputArchive& operator<<( + serialize::OutputArchive& archive, + const Tensor& tensor) { + archive.write("0", tensor); + return archive; +} + +inline serialize::InputArchive& operator>>( + serialize::InputArchive& archive, + Tensor& tensor) { + archive.read("0", tensor); + return archive; +} +} // namespace torch + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/sparse.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/sparse.h new file mode 100644 index 0000000000000000000000000000000000000000..11ff08d38c5754df9697695a18a7fd3d0a48ca37 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/sparse.h @@ -0,0 +1,8 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/special.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/special.h new file mode 100644 index 0000000000000000000000000000000000000000..a001eb068265c17af76a544a2cd977bd03c69201 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/special.h @@ -0,0 +1,1408 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::special { + +/// Computes the natural logarithm of the absolute value of the gamma function +/// See https://pytorch.org/docs/main/special.html#torch.special.gammaln. +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, dtype=kDouble); +/// torch::special::gammaln(t); +/// ``` +inline Tensor gammaln(const Tensor& self) { + return torch::special_gammaln(self); +} + +inline Tensor& gammaln_out(Tensor& result, const Tensor& self) { + return torch::special_gammaln_out(result, self); +} + +/// Computes the regularized lower incomplete gamma function +/// See https://pytorch.org/docs/main/special.html#torch.special.gammainc. +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, dtype=kDouble); +/// auto s = torch::randn(128, dtype=kDouble); +/// torch::special::gammainc(s, t); +/// ``` +inline Tensor gammainc(const Tensor& self, const Tensor& other) { + return torch::special_gammainc(self, other); +} + +inline Tensor& gammainc_out( + Tensor& result, + const Tensor& self, + const Tensor& other) { + return torch::special_gammainc_out(result, self, other); +} + +/// Computes the regularized upper incomplete gamma function +/// See https://pytorch.org/docs/main/special.html#torch.special.gammainc. +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, dtype=kDouble); +/// auto s = torch::randn(128, dtype=kDouble); +/// torch::special::gammaincc(s, t); +/// ``` +inline Tensor gammaincc(const Tensor& self, const Tensor& other) { + return torch::special_gammaincc(self, other); +} + +inline Tensor& gammaincc_out( + Tensor& result, + const Tensor& self, + const Tensor& other) { + return torch::special_gammaincc_out(result, self, other); +} + +/// Computes the multivariate log-gamma function with dimension `p`, elementwise +/// See https://pytorch.org/docs/main/special.html#torch.special.multigammaln. +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, dtype=kDouble); +/// torch::special::multigammaln(t, 1); +/// ``` +inline Tensor multigammaln(const Tensor& self, int64_t p) { + return torch::special_multigammaln(self, p); +} + +inline Tensor& multigammaln_out(Tensor& result, const Tensor& self, int64_t p) { + return torch::special_multigammaln_out(result, self, p); +} + +/// Computes the nth derivative of the digamma function on the input. +/// See https:://pytorch.org/docs/main/special.html#torch.special.polygamma. +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, dtype=kDouble); +/// torch::special::polygamma(2, t); +/// ``` +inline Tensor polygamma(int64_t n, const Tensor& self) { + return torch::special_polygamma(n, self); +} + +inline Tensor& polygamma_out(Tensor& result, int64_t n, const Tensor& self) { + return torch::special_polygamma_out(result, n, self); +} + +/// Computes the logarithmic derivative of the gamma function on input +/// See https://pytorch.org/docs/main/special.html#torch.special.psi +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, dtype=kDouble); +/// torch::special::psi(t); +/// ``` +inline Tensor psi(const Tensor& self) { + return torch::special_psi(self); +} + +inline Tensor& psi_out(Tensor& result, const Tensor& self) { + return torch::special_psi_out(result, self); +} + +/// Computes the logarithmic derivative of the gamma function on input +/// See https://pytorch.org/docs/main/special.html#torch.special.digamma +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, dtype=kDouble); +/// torch::special::digamma(t); +/// ``` +inline Tensor digamma(const Tensor& self) { + return torch::special_digamma(self); +} + +inline Tensor& digamma_out(Tensor& result, const Tensor& self) { + return torch::special_digamma_out(result, self); +} + +/// Computes entropy of input, elementwise +/// See https://pytorch.org/docs/main/special.html#torch.special.entr. +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, dtype=kDouble); +/// torch::special::entr(t); +/// ``` +inline Tensor entr(const Tensor& self) { + return torch::special_entr(self); +} + +inline Tensor& entr_out(Tensor& result, const Tensor& self) { + return torch::special_entr_out(result, self); +} + +/// Computes the error function +/// See https://pytorch.org/docs/main/special.html#torch.special.erf. +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, dtype=kDouble); +/// torch::special::erf(t); +/// ``` +inline Tensor erf(const Tensor& self) { + return torch::special_erf(self); +} + +inline Tensor& erf_out(Tensor& result, const Tensor& self) { + return torch::special_erf_out(result, self); +} + +/// Computes the complementary error function +/// See https://pytorch.org/docs/main/special.html#torch.special.erfc. +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, dtype=kDouble); +/// torch::special::erfc(t); +/// ``` +inline Tensor erfc(const Tensor& self) { + return torch::special_erfc(self); +} + +inline Tensor& erfc_out(Tensor& result, const Tensor& self) { + return torch::special_erfc_out(result, self); +} + +/// Computes the scaled complementary error function +/// See https://pytorch.org/docs/main/special.html#torch.special.erfcx. +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, dtype=kDouble); +/// torch::special::erfcx(t); +/// ``` +inline Tensor erfcx(const Tensor& self) { + return torch::special_erfcx(self); +} + +inline Tensor& erfcx_out(Tensor& result, const Tensor& self) { + return torch::special_erfcx_out(result, self); +} + +/// Computes the inverse error function +/// See https://pytorch.org/docs/main/special.html#torch.special.erfinv. +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, dtype=kDouble); +/// torch::special::erfinv(t); +/// ``` +inline Tensor erfinv(const Tensor& self) { + return torch::special_erfinv(self); +} + +inline Tensor& erfinv_out(Tensor& result, const Tensor& self) { + return torch::special_erfinv_out(result, self); +} + +/// Computes the log of summed exponentials of each row of input in the given +/// dimension dim See +/// https://pytorch.org/docs/main/special.html#torch.special.logsumexp. +/// +/// Example: +/// ``` +/// auto t = torch::randn(3, 3); +/// torch::special::logsumexp(t, 1); +/// ``` +inline Tensor logsumexp(const Tensor& self, IntArrayRef dims, bool keepdim) { + return torch::special_logsumexp(self, dims, keepdim); +} + +inline Tensor& logsumexp_out( + Tensor& result, + const Tensor& self, + IntArrayRef dims, + bool keepdim) { + return torch::special_logsumexp_out(result, self, dims, keepdim); +} + +/// Computes the argument, x, for which the area under the Gaussian probability +/// density function (integrated from minus infinity to x) is equal to input, +/// elementwise. See +/// https://pytorch.org/docs/main/special.html#torch.special.ndtri +/// +/// Example: +/// ``` +/// auto t = torch::rand(128, dtype=kDouble); +/// torch::special::ndtri(t); +/// ``` +inline Tensor ndtri(const Tensor& self) { + return torch::special_ndtri(self); +} + +inline Tensor& ndtri_out(Tensor& result, const Tensor& self) { + return torch::special_ndtri_out(result, self); +} + +/// Computes the log of area under the standard Gaussian probability density +/// function, integrated from minus infinity to :attr:`input`, elementwise See +/// https://pytorch.org/docs/main/special.html#torch.special.log_ndtr +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, dtype=kDouble); +/// torch::special::log_ndtr(t); +/// ``` +inline Tensor log_ndtr(const Tensor& self) { + return torch::special_log_ndtr(self); +} + +inline Tensor& log_ndtr_out(Tensor& result, const Tensor& self) { + return torch::special_log_ndtr_out(result, self); +} + +/// Computes the logit of input, elementwise. +/// See https://pytorch.org/docs/main/special.html#torch.special.logit. +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, dtype=kDouble); +/// torch::special::logit(t); +/// ``` +inline Tensor logit(const Tensor& self) { + return torch::special_logit(self); +} + +inline Tensor& logit_out(Tensor& result, const Tensor& self) { + return torch::special_logit_out(result, self); +} + +/// Computes the expit (also known as the logistic sigmoid function) of input, +/// elementwise See +/// https://pytorch.org/docs/main/special.html#torch.special.expit. +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, dtype=kDouble); +/// torch::special::expit(t); +/// ``` +inline Tensor expit(const Tensor& self) { + return torch::special_expit(self); +} + +inline Tensor& expit_out(Tensor& result, const Tensor& self) { + return torch::special_expit_out(result, self); +} + +/// Computes the base two exponential function of :attr:`input`, elementwise +/// See https://pytorch.org/docs/main/special.html#torch.special.exp2. +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, dtype=kDouble); +/// torch::special::exp2(t); +/// ``` +inline Tensor exp2(const Tensor& self) { + return torch::special_exp2(self); +} + +inline Tensor& exp2_out(Tensor& result, const Tensor& self) { + return torch::special_exp2_out(result, self); +} + +/// Computes the exponential of the elements minus 1, elementwise +/// See https://pytorch.org/docs/main/special.html#torch.special.expm1. +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, dtype=kDouble); +/// torch::special::expm1(t); +/// ``` +inline Tensor expm1(const Tensor& self) { + return torch::special_expm1(self); +} + +inline Tensor& expm1_out(Tensor& result, const Tensor& self) { + return torch::special_expm1_out(result, self); +} + +/// Computes x * log(y) for inputs, elementwise +/// See https://pytorch.org/docs/main/special.html#torch.special.xlogy. +/// +/// Example: +/// ``` +/// auto x = torch::randn(128, dtype=kDouble); +/// auto y = torch::randn(128, dtype=kDouble); +/// torch::special::xlogy(x, y); +/// ``` +inline Tensor xlogy(const Tensor& self, const Tensor& other) { + return torch::special_xlogy(self, other); +} + +inline Tensor xlogy(const Scalar& self, const Tensor& other) { + return torch::special_xlogy(self, other); +} + +inline Tensor xlogy(const Tensor& self, const Scalar& other) { + return torch::special_xlogy(self, other); +} + +inline Tensor& xlogy_out( + Tensor& result, + const Tensor& self, + const Tensor& other) { + return torch::special_xlogy_out(result, self, other); +} + +inline Tensor& xlogy_out( + Tensor& result, + const Scalar& self, + const Tensor& other) { + return torch::special_xlogy_out(result, self, other); +} + +inline Tensor& xlogy_out( + Tensor& result, + const Tensor& self, + const Scalar& other) { + return torch::special_xlogy_out(result, self, other); +} + +/// Computes x * log1p(y) for inputs, elementwise +/// See https://pytorch.org/docs/main/special.html#torch.special.xlog1py. +/// +/// Example: +/// ``` +/// auto x = torch::randn(128, dtype=kDouble); +/// auto y = torch::randn(128, dtype=kDouble); +/// torch::special::xlog1py(x, y); +/// ``` +inline Tensor xlog1py(const Tensor& self, const Tensor& other) { + return torch::special_xlog1py(self, other); +} + +inline Tensor xlog1py(const Scalar& self, const Tensor& other) { + return torch::special_xlog1py(self, other); +} + +inline Tensor xlog1py(const Tensor& self, const Scalar& other) { + return torch::special_xlog1py(self, other); +} + +inline Tensor& xlog1py_out( + Tensor& result, + const Tensor& self, + const Tensor& other) { + return torch::special_xlog1py_out(result, self, other); +} + +inline Tensor& xlog1py_out( + Tensor& result, + const Scalar& self, + const Tensor& other) { + return torch::special_xlog1py_out(result, self, other); +} + +inline Tensor& xlog1py_out( + Tensor& result, + const Tensor& self, + const Scalar& other) { + return torch::special_xlog1py_out(result, self, other); +} + +/// Computes Hurwitz Zeta function for inputs, elementwise +/// See https://pytorch.org/docs/main/special.html#torch.special.zeta. +/// +/// Example: +/// ``` +/// auto x = torch::randn(128, dtype=kDouble); +/// auto y = torch::randn(128, dtype=kDouble); +/// torch::special::zeta(x, y); +/// ``` +inline Tensor zeta(const Tensor& self, const Tensor& other) { + return torch::special_zeta(self, other); +} + +inline Tensor zeta(const Scalar& self, const Tensor& other) { + return torch::special_zeta(self, other); +} + +inline Tensor zeta(const Tensor& self, const Scalar& other) { + return torch::special_zeta(self, other); +} + +inline Tensor& zeta_out( + Tensor& result, + const Tensor& self, + const Tensor& other) { + return torch::special_zeta_out(result, self, other); +} + +inline Tensor& zeta_out( + Tensor& result, + const Scalar& self, + const Tensor& other) { + return torch::special_zeta_out(result, self, other); +} + +inline Tensor& zeta_out( + Tensor& result, + const Tensor& self, + const Scalar& other) { + return torch::special_zeta_out(result, self, other); +} + +/// Computes the zeroth order modified Bessel function of the first kind of +/// input, elementwise See +/// https://pytorch.org/docs/main/special.html#torch.special.i0 +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, dtype=kDouble); +/// torch::special::i0(t); +/// ``` +inline Tensor i0(const Tensor& self) { + return torch::special_i0(self); +} + +inline Tensor& i0_out(Tensor& result, const Tensor& self) { + return torch::special_i0_out(result, self); +} + +/// Computes the area under the standard Gaussian probability density function, +/// integrated from minus infinity to :attr:`input`, elementwise +/// See https://pytorch.org/docs/main/special.html#torch.special.ndtr +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, dtype=kDouble); +/// torch::special::ndtr(t); +/// ``` +inline Tensor ndtr(const Tensor& self) { + return torch::special_ndtr(self); +} + +inline Tensor& ndtr_out(Tensor& result, const Tensor& self) { + return torch::special_ndtr_out(result, self); +} + +/// Computes the exponentially scaled zeroth order modified Bessel function of +/// the first kind See +/// https://pytorch.org/docs/main/special.html#torch.special.i0e. +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, dtype=kDouble); +/// torch::special::i0e(t); +/// ``` +inline Tensor i0e(const Tensor& self) { + return torch::special_i0e(self); +} + +inline Tensor& i0e_out(Tensor& result, const Tensor& self) { + return torch::special_i0e_out(result, self); +} + +/// Computes the first order modified Bessel function of the first kind +/// See https://pytorch.org/docs/main/special.html#torch.special.i1. +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, dtype=kDouble); +/// torch::special::i1(t); +/// ``` +inline Tensor i1(const Tensor& self) { + return torch::special_i1(self); +} + +inline Tensor& i1_out(Tensor& result, const Tensor& self) { + return torch::special_i1_out(result, self); +} + +/// Computes the exponentially scaled first order modified Bessel function of +/// the first kind See +/// https://pytorch.org/docs/main/special.html#torch.special.i1e. +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, dtype=kDouble); +/// torch::special::i1e(t); +/// ``` +inline Tensor i1e(const Tensor& self) { + return torch::special_i1e(self); +} + +inline Tensor& i1e_out(Tensor& result, const Tensor& self) { + return torch::special_i1e_out(result, self); +} + +/// Computes the sinc of input, elementwise +/// See https://pytorch.org/docs/main/special.html#torch.special.sinc. +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, dtype=kDouble); +/// torch::special::sinc(t); +/// ``` +inline Tensor sinc(const Tensor& self) { + return torch::special_sinc(self); +} + +inline Tensor& sinc_out(Tensor& result, const Tensor& self) { + return torch::special_sinc_out(result, self); +} + +/// Rounds the elements of the input +/// See https://pytorch.org/docs/main/special.html#torch.special.round. +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, dtype=kDouble); +/// torch::special::round(t); +/// ``` +inline Tensor round(const Tensor& self) { + return torch::special_round(self); +} + +inline Tensor& round_out(Tensor& result, const Tensor& self) { + return torch::special_round_out(result, self); +} + +/// Computes log(1 + x) of the input, elementwise +/// See https://pytorch.org/docs/main/special.html#torch.special.log1p. +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, dtype=kDouble); +/// torch::special::log1p(t); +/// ``` +inline Tensor log1p(const Tensor& self) { + return torch::special_log1p(self); +} + +inline Tensor& log1p_out(Tensor& result, const Tensor& self) { + return torch::special_log1p_out(result, self); +} + +/// Computes log followed by softmax(x) of the input +/// See https://pytorch.org/docs/main/special.html#torch.special.log_softmax. +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, 128, dtype=kDouble); +/// torch::special::log_softmax(t, 0); +/// ``` +inline Tensor log_softmax( + const Tensor& self, + int64_t dim, + std::optional dtype) { + return torch::special_log_softmax(self, dim, dtype); +} + +/// Computes softmax of the input along a given dimension +/// See https://pytorch.org/docs/main/special.html#torch.special.softmax. +/// +/// Example: +/// ``` +/// auto t = torch::randn(128, 128, dtype=kDouble); +/// torch::special::softmax(t, 0); +/// ``` +inline Tensor softmax( + const Tensor& self, + int64_t dim, + std::optional dtype) { + return torch::special_softmax(self, dim, dtype); +} + +/// Airy function Ai. +/// +/// See https://pytorch.org/docs/main/special.html#torch.special.airy_ai. +/// +/// Example: +/// +/// ``` +/// auto x = torch::randn(128, dtype=kDouble); +/// +/// torch::special::airy_ai(x); +/// ``` +inline Tensor airy_ai(const Tensor& x) { + return torch::special_airy_ai(x); +} + +inline Tensor& airy_ai_out(Tensor& y, const Tensor& x) { + return torch::special_airy_ai_out(y, x); +} + +/// Bessel function of the first kind of order 0. +/// +/// See https://pytorch.org/docs/main/special.html#torch.special.bessel_j0. +/// +/// Example: +/// +/// ``` +/// auto x = torch::randn(128, dtype=kDouble); +/// +/// torch::special::bessel_j0(x); +/// ``` +inline Tensor bessel_j0(const Tensor& self) { + return torch::special_bessel_j0(self); +} + +inline Tensor& bessel_j0_out(Tensor& result, const Tensor& self) { + return torch::special_bessel_j0_out(result, self); +} + +/// Bessel function of the first kind of order 1. +/// +/// See https://pytorch.org/docs/main/special.html#torch.special.bessel_j1. +/// +/// Example: +/// +/// ``` +/// auto x = torch::randn(128, dtype=kDouble); +/// +/// torch::special::bessel_j1(x); +/// ``` +inline Tensor bessel_j1(const Tensor& self) { + return torch::special_bessel_j1(self); +} + +inline Tensor& bessel_j1_out(Tensor& result, const Tensor& self) { + return torch::special_bessel_j1_out(result, self); +} + +/// Bessel function of the second kind of order 0. +/// +/// See https://pytorch.org/docs/main/special.html#torch.special.bessel_y0. +/// +/// Example: +/// +/// ``` +/// auto x = torch::randn(128, dtype=kDouble); +/// +/// torch::special::bessel_y0(x); +/// ``` +inline Tensor bessel_y0(const Tensor& self) { + return torch::special_bessel_y0(self); +} + +inline Tensor& bessel_y0_out(Tensor& result, const Tensor& self) { + return torch::special_bessel_y0_out(result, self); +} + +/// Bessel function of the second kind of order 1. +/// +/// See https://pytorch.org/docs/main/special.html#torch.special.bessel_y1. +/// +/// Example: +/// +/// ``` +/// auto x = torch::randn(128, dtype=kDouble); +/// +/// torch::special::bessel_y1(x); +/// ``` +inline Tensor bessel_y1(const Tensor& self) { + return torch::special_bessel_y1(self); +} + +inline Tensor& bessel_y1_out(Tensor& result, const Tensor& self) { + return torch::special_bessel_y1_out(result, self); +} + +/// Chebyshev polynomial of the first kind. +/// +/// See +/// https://pytorch.org/docs/main/special.html#torch.special.chebyshev_polynomial_t. +/// +/// Example: +/// +/// ``` +/// auto x = torch::randn(128, dtype=kDouble); +/// auto n = torch::randn(128, dtype=kDouble); +/// +/// torch::special::chebyshev_polynomial_t(x, n); +/// ``` +inline Tensor chebyshev_polynomial_t(const Tensor& x, const Tensor& n) { + return torch::special_chebyshev_polynomial_t(x, n); +} + +inline Tensor chebyshev_polynomial_t(const Scalar& x, const Tensor& n) { + return torch::special_chebyshev_polynomial_t(x, n); +} + +inline Tensor chebyshev_polynomial_t(const Tensor& x, const Scalar& n) { + return torch::special_chebyshev_polynomial_t(x, n); +} + +inline Tensor& chebyshev_polynomial_t_out( + Tensor& output, + const Tensor& x, + const Tensor& n) { + return torch::special_chebyshev_polynomial_t_out(output, x, n); +} + +inline Tensor& chebyshev_polynomial_t_out( + Tensor& output, + const Scalar& x, + const Tensor& n) { + return torch::special_chebyshev_polynomial_t_out(output, x, n); +} + +inline Tensor& chebyshev_polynomial_t_out( + Tensor& output, + const Tensor& x, + const Scalar& n) { + return torch::special_chebyshev_polynomial_t_out(output, x, n); +} + +/// Chebyshev polynomial of the second kind. +/// +/// See +/// https://pytorch.org/docs/main/special.html#torch.special.chebyshev_polynomial_u. +/// +/// Example: +/// +/// ``` +/// auto x = torch::randn(128, dtype=kDouble); +/// auto n = torch::randn(128, dtype=kDouble); +/// +/// torch::special::chebyshev_polynomial_u(x, n); +/// ``` +inline Tensor chebyshev_polynomial_u(const Tensor& x, const Tensor& n) { + return torch::special_chebyshev_polynomial_u(x, n); +} + +inline Tensor chebyshev_polynomial_u(const Scalar& x, const Tensor& n) { + return torch::special_chebyshev_polynomial_u(x, n); +} + +inline Tensor chebyshev_polynomial_u(const Tensor& x, const Scalar& n) { + return torch::special_chebyshev_polynomial_u(x, n); +} + +inline Tensor& chebyshev_polynomial_u_out( + Tensor& output, + const Tensor& x, + const Tensor& n) { + return torch::special_chebyshev_polynomial_u_out(output, x, n); +} + +inline Tensor& chebyshev_polynomial_u_out( + Tensor& output, + const Scalar& x, + const Tensor& n) { + return torch::special_chebyshev_polynomial_u_out(output, x, n); +} + +inline Tensor& chebyshev_polynomial_u_out( + Tensor& output, + const Tensor& x, + const Scalar& n) { + return torch::special_chebyshev_polynomial_u_out(output, x, n); +} + +/// Chebyshev polynomial of the third kind. +/// +/// See +/// https://pytorch.org/docs/main/special.html#torch.special.chebyshev_polynomial_v. +/// +/// Example: +/// +/// ``` +/// auto x = torch::randn(128, dtype=kDouble); +/// auto n = torch::randn(128, dtype=kDouble); +/// +/// torch::special::chebyshev_polynomial_v(x, n); +/// ``` +inline Tensor chebyshev_polynomial_v(const Tensor& x, const Tensor& n) { + return torch::special_chebyshev_polynomial_v(x, n); +} + +inline Tensor chebyshev_polynomial_v(const Scalar& x, const Tensor& n) { + return torch::special_chebyshev_polynomial_v(x, n); +} + +inline Tensor chebyshev_polynomial_v(const Tensor& x, const Scalar& n) { + return torch::special_chebyshev_polynomial_v(x, n); +} + +inline Tensor& chebyshev_polynomial_v_out( + Tensor& output, + const Tensor& x, + const Tensor& n) { + return torch::special_chebyshev_polynomial_v_out(output, x, n); +} + +inline Tensor& chebyshev_polynomial_v_out( + Tensor& output, + const Scalar& x, + const Tensor& n) { + return torch::special_chebyshev_polynomial_v_out(output, x, n); +} + +inline Tensor& chebyshev_polynomial_v_out( + Tensor& output, + const Tensor& x, + const Scalar& n) { + return torch::special_chebyshev_polynomial_v_out(output, x, n); +} + +/// Chebyshev polynomial of the fourth kind. +/// +/// See +/// https://pytorch.org/docs/main/special.html#torch.special.chebyshev_polynomial_w. +/// +/// Example: +/// +/// ``` +/// auto x = torch::randn(128, dtype=kDouble); +/// auto n = torch::randn(128, dtype=kDouble); +/// +/// torch::special::chebyshev_polynomial_w(x, n); +/// ``` +inline Tensor chebyshev_polynomial_w(const Tensor& x, const Tensor& n) { + return torch::special_chebyshev_polynomial_w(x, n); +} + +inline Tensor chebyshev_polynomial_w(const Scalar& x, const Tensor& n) { + return torch::special_chebyshev_polynomial_w(x, n); +} + +inline Tensor chebyshev_polynomial_w(const Tensor& x, const Scalar& n) { + return torch::special_chebyshev_polynomial_w(x, n); +} + +inline Tensor& chebyshev_polynomial_w_out( + Tensor& output, + const Tensor& x, + const Tensor& n) { + return torch::special_chebyshev_polynomial_w_out(output, x, n); +} + +inline Tensor& chebyshev_polynomial_w_out( + Tensor& output, + const Scalar& x, + const Tensor& n) { + return torch::special_chebyshev_polynomial_w_out(output, x, n); +} + +inline Tensor& chebyshev_polynomial_w_out( + Tensor& output, + const Tensor& x, + const Scalar& n) { + return torch::special_chebyshev_polynomial_w_out(output, x, n); +} + +/// Physicist’s Hermite polynomial. +/// +/// See +/// https://pytorch.org/docs/main/special.html#torch.special.hermite_polynomial_h. +/// +/// Example: +/// +/// ``` +/// auto x = torch::randn(128, dtype=kDouble); +/// auto n = torch::randn(128, dtype=kDouble); +/// +/// torch::special::hermite_polynomial_h(x, n); +/// ``` +inline Tensor hermite_polynomial_h(const Tensor& x, const Tensor& n) { + return torch::special_hermite_polynomial_h(x, n); +} + +inline Tensor hermite_polynomial_h(const Scalar& x, const Tensor& n) { + return torch::special_hermite_polynomial_h(x, n); +} + +inline Tensor hermite_polynomial_h(const Tensor& x, const Scalar& n) { + return torch::special_hermite_polynomial_h(x, n); +} + +inline Tensor& hermite_polynomial_h_out( + Tensor& output, + const Tensor& x, + const Tensor& n) { + return torch::special_hermite_polynomial_h_out(output, x, n); +} + +inline Tensor& hermite_polynomial_h_out( + Tensor& output, + const Scalar& x, + const Tensor& n) { + return torch::special_hermite_polynomial_h_out(output, x, n); +} + +inline Tensor& hermite_polynomial_h_out( + Tensor& output, + const Tensor& x, + const Scalar& n) { + return torch::special_hermite_polynomial_h_out(output, x, n); +} + +/// Probabilist’s Hermite polynomial. +/// +/// See +/// https://pytorch.org/docs/main/special.html#torch.special.hermite_polynomial_he. +/// +/// Example: +/// +/// ``` +/// auto x = torch::randn(128, dtype=kDouble); +/// auto n = torch::randn(128, dtype=kDouble); +/// +/// torch::special::hermite_polynomial_he(x, n); +/// ``` +inline Tensor hermite_polynomial_he(const Tensor& x, const Tensor& n) { + return torch::special_hermite_polynomial_he(x, n); +} + +inline Tensor hermite_polynomial_he(const Scalar& x, const Tensor& n) { + return torch::special_hermite_polynomial_he(x, n); +} + +inline Tensor hermite_polynomial_he(const Tensor& x, const Scalar& n) { + return torch::special_hermite_polynomial_he(x, n); +} + +inline Tensor& hermite_polynomial_he_out( + Tensor& output, + const Tensor& x, + const Tensor& n) { + return torch::special_hermite_polynomial_he_out(output, x, n); +} + +inline Tensor& hermite_polynomial_he_out( + Tensor& output, + const Scalar& x, + const Tensor& n) { + return torch::special_hermite_polynomial_he_out(output, x, n); +} + +inline Tensor& hermite_polynomial_he_out( + Tensor& output, + const Tensor& x, + const Scalar& n) { + return torch::special_hermite_polynomial_he_out(output, x, n); +} + +/// Laguerre polynomial. +/// +/// See +/// https://pytorch.org/docs/main/special.html#torch.special.laguerre_polynomial_l. +/// +/// Example: +/// +/// ``` +/// auto x = torch::randn(128, dtype=kDouble); +/// auto n = torch::randn(128, dtype=kDouble); +/// +/// torch::special::laguerre_polynomial_l(x, n); +/// ``` +inline Tensor laguerre_polynomial_l(const Tensor& x, const Tensor& n) { + return torch::special_laguerre_polynomial_l(x, n); +} + +inline Tensor laguerre_polynomial_l(const Scalar& x, const Tensor& n) { + return torch::special_laguerre_polynomial_l(x, n); +} + +inline Tensor laguerre_polynomial_l(const Tensor& x, const Scalar& n) { + return torch::special_laguerre_polynomial_l(x, n); +} + +inline Tensor& laguerre_polynomial_l_out( + Tensor& output, + const Tensor& x, + const Tensor& n) { + return torch::special_laguerre_polynomial_l_out(output, x, n); +} + +inline Tensor& laguerre_polynomial_l_out( + Tensor& output, + const Scalar& x, + const Tensor& n) { + return torch::special_laguerre_polynomial_l_out(output, x, n); +} + +inline Tensor& laguerre_polynomial_l_out( + Tensor& output, + const Tensor& x, + const Scalar& n) { + return torch::special_laguerre_polynomial_l_out(output, x, n); +} + +/// Legendre polynomial. +/// +/// See +/// https://pytorch.org/docs/main/special.html#torch.special.legendre_polynomial_p. +/// +/// Example: +/// +/// ``` +/// auto x = torch::randn(128, dtype=kDouble); +/// auto n = torch::randn(128, dtype=kDouble); +/// +/// torch::special::legendre_polynomial_p(x, n); +/// ``` +inline Tensor legendre_polynomial_p(const Tensor& x, const Tensor& n) { + return torch::special_legendre_polynomial_p(x, n); +} + +inline Tensor legendre_polynomial_p(const Scalar& x, const Tensor& n) { + return torch::special_legendre_polynomial_p(x, n); +} + +inline Tensor legendre_polynomial_p(const Tensor& x, const Scalar& n) { + return torch::special_legendre_polynomial_p(x, n); +} + +inline Tensor& legendre_polynomial_p_out( + Tensor& output, + const Tensor& x, + const Tensor& n) { + return torch::special_legendre_polynomial_p_out(output, x, n); +} + +inline Tensor& legendre_polynomial_p_out( + Tensor& output, + const Scalar& x, + const Tensor& n) { + return torch::special_legendre_polynomial_p_out(output, x, n); +} + +inline Tensor& legendre_polynomial_p_out( + Tensor& output, + const Tensor& x, + const Scalar& n) { + return torch::special_legendre_polynomial_p_out(output, x, n); +} + +/// Modified Bessel function of the first kind of order 0. +/// +/// See +/// https://pytorch.org/docs/main/special.html#torch.special.modified_bessel_i0. +/// +/// Example: +/// +/// ``` +/// auto x = torch::randn(128, dtype=kDouble); +/// +/// torch::special::modified_bessel_i0(x); +/// ``` +inline Tensor modified_bessel_i0(const Tensor& self) { + return torch::special_modified_bessel_i0(self); +} + +inline Tensor& modified_bessel_i0_out(Tensor& result, const Tensor& self) { + return torch::special_modified_bessel_i0_out(result, self); +} + +/// Modified Bessel function of the first kind of order 1. +/// +/// See +/// https://pytorch.org/docs/main/special.html#torch.special.modified_bessel_i1. +/// +/// Example: +/// +/// ``` +/// auto x = torch::randn(128, dtype=kDouble); +/// +/// torch::special::modified_bessel_i1(x); +/// ``` +inline Tensor modified_bessel_i1(const Tensor& self) { + return torch::special_modified_bessel_i1(self); +} + +inline Tensor& modified_bessel_i1_out(Tensor& result, const Tensor& self) { + return torch::special_modified_bessel_i1_out(result, self); +} + +/// Modified Bessel function of the second kind of order 0. +/// +/// See +/// https://pytorch.org/docs/main/special.html#torch.special.modified_bessel_k0. +/// +/// Example: +/// +/// ``` +/// auto x = torch::randn(128, dtype=kDouble); +/// +/// torch::special::modified_bessel_k0(x); +/// ``` +inline Tensor modified_bessel_k0(const Tensor& self) { + return torch::special_modified_bessel_k0(self); +} + +inline Tensor& modified_bessel_k0_out(Tensor& result, const Tensor& self) { + return torch::special_modified_bessel_k0_out(result, self); +} + +/// Modified Bessel function of the second kind of order 1. +/// +/// See +/// https://pytorch.org/docs/main/special.html#torch.special.modified_bessel_k1. +/// +/// Example: +/// +/// ``` +/// auto x = torch::randn(128, dtype=kDouble); +/// +/// torch::special::modified_bessel_k1(x); +/// ``` +inline Tensor modified_bessel_k1(const Tensor& self) { + return torch::special_modified_bessel_k1(self); +} + +inline Tensor& modified_bessel_k1_out(Tensor& result, const Tensor& self) { + return torch::special_modified_bessel_k1_out(result, self); +} + +/// Scaled modified Bessel function of the second kind of order 0. +/// +/// See +/// https://pytorch.org/docs/main/special.html#torch.special.scaled_modified_bessel_k0. +/// +/// Example: +/// +/// ``` +/// auto x = torch::randn(128, dtype=kDouble); +/// +/// torch::special::scaled_modified_bessel_k0(x); +/// ``` +inline Tensor scaled_modified_bessel_k0(const Tensor& x) { + return torch::special_scaled_modified_bessel_k0(x); +} + +inline Tensor& scaled_modified_bessel_k0_out(Tensor& y, const Tensor& x) { + return torch::special_scaled_modified_bessel_k0_out(y, x); +} + +/// Scaled modified Bessel function of the second kind of order 1. +/// +/// See +/// https://pytorch.org/docs/main/special.html#torch.special.scaled_modified_bessel_k1. +/// +/// Example: +/// +/// ``` +/// auto x = torch::randn(128, dtype=kDouble); +/// +/// torch::special::scaled_modified_bessel_k1(x); +/// ``` +inline Tensor scaled_modified_bessel_k1(const Tensor& x) { + return torch::special_scaled_modified_bessel_k1(x); +} + +inline Tensor& scaled_modified_bessel_k1_out(Tensor& y, const Tensor& x) { + return torch::special_scaled_modified_bessel_k1_out(y, x); +} + +/// Shifted Chebyshev polynomial of the first kind. +/// +/// See +/// https://pytorch.org/docs/main/special.html#torch.special.shifted_chebyshev_polynomial_t. +/// +/// Example: +/// +/// ``` +/// auto x = torch::randn(128, dtype=kDouble); +/// auto n = torch::randn(128, dtype=kDouble); +/// +/// torch::special::shifted_chebyshev_polynomial_t(x, n); +/// ``` +inline Tensor shifted_chebyshev_polynomial_t(const Tensor& x, const Tensor& n) { + return torch::special_shifted_chebyshev_polynomial_t(x, n); +} + +inline Tensor shifted_chebyshev_polynomial_t(const Scalar& x, const Tensor& n) { + return torch::special_shifted_chebyshev_polynomial_t(x, n); +} + +inline Tensor shifted_chebyshev_polynomial_t(const Tensor& x, const Scalar& n) { + return torch::special_shifted_chebyshev_polynomial_t(x, n); +} + +inline Tensor& shifted_chebyshev_polynomial_t_out( + Tensor& output, + const Tensor& x, + const Tensor& n) { + return torch::special_shifted_chebyshev_polynomial_t_out(output, x, n); +} + +inline Tensor& shifted_chebyshev_polynomial_t_out( + Tensor& output, + const Scalar& x, + const Tensor& n) { + return torch::special_shifted_chebyshev_polynomial_t_out(output, x, n); +} + +inline Tensor& shifted_chebyshev_polynomial_t_out( + Tensor& output, + const Tensor& x, + const Scalar& n) { + return torch::special_shifted_chebyshev_polynomial_t_out(output, x, n); +} + +/// Shifted Chebyshev polynomial of the second kind. +/// +/// See +/// https://pytorch.org/docs/main/special.html#torch.special.shifted_chebyshev_polynomial_u. +/// +/// Example: +/// +/// ``` +/// auto x = torch::randn(128, dtype=kDouble); +/// auto n = torch::randn(128, dtype=kDouble); +/// +/// torch::special::shifted_chebyshev_polynomial_u(x, n); +/// ``` +inline Tensor shifted_chebyshev_polynomial_u(const Tensor& x, const Tensor& n) { + return torch::special_shifted_chebyshev_polynomial_u(x, n); +} + +inline Tensor shifted_chebyshev_polynomial_u(const Scalar& x, const Tensor& n) { + return torch::special_shifted_chebyshev_polynomial_u(x, n); +} + +inline Tensor shifted_chebyshev_polynomial_u(const Tensor& x, const Scalar& n) { + return torch::special_shifted_chebyshev_polynomial_u(x, n); +} + +inline Tensor& shifted_chebyshev_polynomial_u_out( + Tensor& output, + const Tensor& x, + const Tensor& n) { + return torch::special_shifted_chebyshev_polynomial_u_out(output, x, n); +} + +inline Tensor& shifted_chebyshev_polynomial_u_out( + Tensor& output, + const Scalar& x, + const Tensor& n) { + return torch::special_shifted_chebyshev_polynomial_u_out(output, x, n); +} + +inline Tensor& shifted_chebyshev_polynomial_u_out( + Tensor& output, + const Tensor& x, + const Scalar& n) { + return torch::special_shifted_chebyshev_polynomial_u_out(output, x, n); +} + +/// Shifted Chebyshev polynomial of the third kind. +/// +/// See +/// https://pytorch.org/docs/main/special.html#torch.special.shifted_chebyshev_polynomial_v. +/// +/// Example: +/// +/// ``` +/// auto x = torch::randn(128, dtype=kDouble); +/// auto n = torch::randn(128, dtype=kDouble); +/// +/// torch::special::shifted_chebyshev_polynomial_v(x, n); +/// ``` +inline Tensor shifted_chebyshev_polynomial_v(const Tensor& x, const Tensor& n) { + return torch::special_shifted_chebyshev_polynomial_v(x, n); +} + +inline Tensor shifted_chebyshev_polynomial_v(const Scalar& x, const Tensor& n) { + return torch::special_shifted_chebyshev_polynomial_v(x, n); +} + +inline Tensor shifted_chebyshev_polynomial_v(const Tensor& x, const Scalar& n) { + return torch::special_shifted_chebyshev_polynomial_v(x, n); +} + +inline Tensor& shifted_chebyshev_polynomial_v_out( + Tensor& output, + const Tensor& x, + const Tensor& n) { + return torch::special_shifted_chebyshev_polynomial_v_out(output, x, n); +} + +inline Tensor& shifted_chebyshev_polynomial_v_out( + Tensor& output, + const Scalar& x, + const Tensor& n) { + return torch::special_shifted_chebyshev_polynomial_v_out(output, x, n); +} + +inline Tensor& shifted_chebyshev_polynomial_v_out( + Tensor& output, + const Tensor& x, + const Scalar& n) { + return torch::special_shifted_chebyshev_polynomial_v_out(output, x, n); +} + +/// Shifted Chebyshev polynomial of the fourth kind. +/// +/// See +/// https://pytorch.org/docs/main/special.html#torch.special.shifted_chebyshev_polynomial_w. +/// +/// Example: +/// +/// ``` +/// auto x = torch::randn(128, dtype=kDouble); +/// auto n = torch::randn(128, dtype=kDouble); +/// +/// torch::special::shifted_chebyshev_polynomial_w(x, n); +/// ``` +inline Tensor shifted_chebyshev_polynomial_w(const Tensor& x, const Tensor& n) { + return torch::special_shifted_chebyshev_polynomial_w(x, n); +} + +inline Tensor shifted_chebyshev_polynomial_w(const Scalar& x, const Tensor& n) { + return torch::special_shifted_chebyshev_polynomial_w(x, n); +} + +inline Tensor shifted_chebyshev_polynomial_w(const Tensor& x, const Scalar& n) { + return torch::special_shifted_chebyshev_polynomial_w(x, n); +} + +inline Tensor& shifted_chebyshev_polynomial_w_out( + Tensor& output, + const Tensor& x, + const Tensor& n) { + return torch::special_shifted_chebyshev_polynomial_w_out(output, x, n); +} + +inline Tensor& shifted_chebyshev_polynomial_w_out( + Tensor& output, + const Scalar& x, + const Tensor& n) { + return torch::special_shifted_chebyshev_polynomial_w_out(output, x, n); +} + +inline Tensor& shifted_chebyshev_polynomial_w_out( + Tensor& output, + const Tensor& x, + const Scalar& n) { + return torch::special_shifted_chebyshev_polynomial_w_out(output, x, n); +} + +/// Spherical Bessel function of the first kind of order 0. +/// +/// See +/// https://pytorch.org/docs/main/special.html#torch.special.spherical_bessel_j0. +/// +/// Example: +/// +/// ``` +/// auto x = torch::randn(128, dtype=kDouble); +/// +/// torch::special::spherical_bessel_j0(x); +/// ``` +inline Tensor spherical_bessel_j0(const Tensor& x) { + return torch::special_spherical_bessel_j0(x); +} + +inline Tensor& spherical_bessel_j0_out(Tensor& y, const Tensor& x) { + return torch::special_spherical_bessel_j0_out(y, x); +} +} // namespace torch::special + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/torch.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/torch.h new file mode 100644 index 0000000000000000000000000000000000000000..42f5958d9cbc325bbdded2cd1cf8a4f918cc1117 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/torch.h @@ -0,0 +1,13 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#ifdef TORCH_API_INCLUDE_EXTENSION_H +#include + +#endif // defined(TORCH_API_INCLUDE_EXTENSION_H) + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/types.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/types.h new file mode 100644 index 0000000000000000000000000000000000000000..639b9dad7d865382e888c595900ea006311cbaa9 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/types.h @@ -0,0 +1,75 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include + +#include +#include + +#include + +namespace torch { + +// NOTE [ Exposing declarations in `at::` to `torch::` ] +// +// The following line `using namespace at;` is responsible for exposing all +// declarations in `at::` namespace to `torch::` namespace. +// +// According to the rules laid out in +// https://en.cppreference.com/w/cpp/language/qualified_lookup, section +// "Namespace members": +// ``` +// Qualified lookup within the scope of a namespace N first considers all +// declarations that are located in N and all declarations that are located in +// the inline namespace members of N (and, transitively, in their inline +// namespace members). If there are no declarations in that set then it +// considers declarations in all namespaces named by using-directives found in N +// and in all transitive inline namespace members of N. +// ``` +// +// This means that if both `at::` and `torch::` namespaces have a function with +// the same signature (e.g. both `at::func()` and `torch::func()` exist), after +// `namespace torch { using namespace at; }`, when we call `torch::func()`, the +// `func()` function defined in `torch::` namespace will always be called, and +// the `func()` function defined in `at::` namespace is always hidden. +using namespace at; // NOLINT + +#if !defined(FBCODE_CAFFE2) && !defined(C10_NODEPRECATED) +using std::nullopt; // NOLINT +using std::optional; // NOLINT +#endif + +using Dtype = at::ScalarType; + +/// Fixed width dtypes. +constexpr auto kUInt8 = at::kByte; +constexpr auto kInt8 = at::kChar; +constexpr auto kInt16 = at::kShort; +constexpr auto kInt32 = at::kInt; +constexpr auto kInt64 = at::kLong; +constexpr auto kUInt16 = at::kUInt16; +constexpr auto kUInt32 = at::kUInt32; +constexpr auto kUInt64 = at::kUInt64; +constexpr auto kFloat16 = at::kHalf; +constexpr auto kFloat32 = at::kFloat; +constexpr auto kFloat64 = at::kDouble; + +/// Rust-style short dtypes. +constexpr auto kU8 = kUInt8; +constexpr auto kU16 = kUInt16; +constexpr auto kU32 = kUInt32; +constexpr auto kU64 = kUInt64; +constexpr auto kI8 = kInt8; +constexpr auto kI16 = kInt16; +constexpr auto kI32 = kInt32; +constexpr auto kI64 = kInt64; +constexpr auto kF16 = kFloat16; +constexpr auto kF32 = kFloat32; +constexpr auto kF64 = kFloat64; +} // namespace torch + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/utils.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..bbff5f56e8a83dd9872d6138cbd9ab59a7b9550b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/utils.h @@ -0,0 +1,122 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +// NOLINTBEGIN(misc-unused-using-decls) +namespace torch { + +/// A RAII, thread-local guard that disabled gradient calculation. +/// +/// Disabling gradient calculation is useful for inference, when you are sure +/// that you will not call `at::Tensor::backward`. It will reduce memory +/// consumption for computations that would otherwise have `requires_grad() == +/// true`. +/// +/// In this mode, the result of every computation will have +/// `requires_grad() == false`, even when the inputs have `requires_grad() == +/// true`. +/// +/// This context manager is thread-local; it will not affect computation +/// in other threads. +/// +/// Example: +/// @code +/// auto x = torch::tensor({1.}, torch::requires_grad()); +/// { +/// torch::NoGradGuard no_grad; +/// auto y = x * 2; +/// std::cout << y.requires_grad() << std::endl; // prints `false` +/// } +/// { +/// auto doubler = [](torch::Tensor x) { +/// torch::NoGradGuard no_grad; +/// return x * 2; +/// }; +/// auto z = doubler(x); +/// std::cout << z.requires_grad() << std::endl; // prints `false` +/// } +/// @endcode +using NoGradGuard = at::NoGradGuard; + +/// A RAII, thread-local guard that sets gradient calculation to on or off. +/// +/// ``AutoGradMode`` will enable or disable grads based on its argument +/// `enabled`. +/// +/// This context manager is thread-local; it will not affect computation +/// in other threads. +/// +/// \param enabled: Flag whether to enable grad (``true``), or disable +/// (``false``). This can be used to conditionally enable +/// gradients. +/// +/// Example: +/// @code +/// auto x = torch::tensor({1.}, torch::requires_grad()); +/// { +/// torch::AutoGradMode enable_grad(true); +/// auto y = x * 2; +/// std::cout << y.requires_grad() << std::endl; // prints `true` +/// } +/// { +/// torch::AutoGradMode enable_grad(false); +/// auto y = x * 2; +/// std::cout << y.requires_grad() << std::endl; // prints `false` +/// } +/// @endcode +using AutoGradMode = at::AutoGradMode; + +/// Sets the global random seed for all newly created CPU and CUDA tensors. +using at::manual_seed; + +// Called during new thread initialization +using at::init_num_threads; + +// Returns the number of threads used in parallel region. +using at::get_num_threads; + +// Sets the number of threads to be used in parallel region. +using at::set_num_threads; + +// Returns the number of threads used for inter-op parallelism. +using at::get_num_interop_threads; + +// Sets the number of threads to be used for inter-op parallelism. +using at::set_num_interop_threads; + +// Returns true if both t1, t2 are undefined or both are defined and equal +inline bool equal_if_defined(const Tensor& t1, const Tensor& t2) { + return ( + (!t1.defined() && !t2.defined()) || + (t1.defined() && t2.defined() && torch::equal(t1, t2))); +} + +// RecordFunction API +using at::addGlobalCallback; +using at::addThreadLocalCallback; +using at::CallbackHandle; +using at::clearCallbacks; +using at::clearGlobalCallbacks; +using at::clearThreadLocalCallbacks; +using at::DisableRecordFunctionGuard; +using at::enableRecordFunction; +using at::hasCallbacks; +using at::hasGlobalCallbacks; +using at::hasThreadLocalCallbacks; +using at::isRecordFunctionEnabled; +using at::RecordFunction; +using at::RecordFunctionCallback; +using at::RecordFunctionGuard; +using at::removeCallback; + +} // namespace torch +// NOLINTEND(misc-unused-using-decls) + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/version.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/version.h new file mode 100644 index 0000000000000000000000000000000000000000..d3112874aed4464561385dbe6e403150e7d8262a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/version.h @@ -0,0 +1,6 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/xpu.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/xpu.h new file mode 100644 index 0000000000000000000000000000000000000000..280cbb6e920d097c5216ca2a393c0cf89d945eaf --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/api/include/torch/xpu.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include + +namespace torch::xpu { + +/// Returns the number of XPU devices available. +size_t TORCH_API device_count(); + +/// Returns true if at least one XPU device is available. +bool TORCH_API is_available(); + +/// Sets the seed for the current GPU. +void TORCH_API manual_seed(uint64_t seed); + +/// Sets the seed for all available GPUs. +void TORCH_API manual_seed_all(uint64_t seed); + +/// Waits for all kernels in all streams on a XPU device to complete. +void TORCH_API synchronize(int64_t device_index); + +} // namespace torch::xpu + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/autograd.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/autograd.h new file mode 100644 index 0000000000000000000000000000000000000000..8aa0d835c9d52fac0c36d6ed52038dc0b26a9357 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/autograd.h @@ -0,0 +1,41 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::distributed::autograd { + +using torch::autograd::variable_list; + +/// C++ API of Distributed Autograd that kicks off the distributed backward pass +/// using the provided roots. This currently implements the +/// :ref:`fast-mode-algorithm` which assumes all RPC messages sent in the same +/// distributed autograd context across workers would be part of the autograd +/// graph during the backward pass. +/// +/// We use the provided roots to discover the autograd graph and compute +/// appropriate dependencies. This method blocks until the entire +/// autograd computation is done. +/// This function accumulates gradients in the leaves - you might need to zero +/// them before calling it. +/// +/// \param context_id The autograd context id for which we should retrieve the +/// gradients. +/// \param roots Tensors which represent the roots of the autograd computation. +/// All the tensors should be scalars. +/// \param retain_graph If `false`, the graph used to compute the grad will be +/// freed. Note that in nearly all cases setting this +/// option to `true` is not needed and often can be worked +/// around in a much more efficient way. Usually, you need +/// to set this to `true` to run backward multiple times. +TORCH_API void backward( + int64_t context_id, + const variable_list& roots, + bool retain_graph = false); + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/context/container.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/context/container.h new file mode 100644 index 0000000000000000000000000000000000000000..c28137cb4fbf2f19118ba4b546d6c612aefaf35c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/context/container.h @@ -0,0 +1,167 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +namespace torch::distributed::autograd { + +// Singleton class per worker which is responsible for storing the distributed +// autograd context for each autograd pass and also cleans up data for an +// autograd pass once its done. +// +// Each autograd pass is assigned a unique autograd_context_id and all data for +// that pass (DistAutogradContext) is stored in this container indexed by the +// autograd_context_id. The autograd_context_id itself is a 64 bit globally +// unique id. The first 16 bits is the worker_id and the next 48 bits is an +// auto-incrementing id for each worker. +// +// This container is also responsible for maintaining a globally unique message +// id, which is used to associate send/recv autograd function pairs. The format +// is similar to the autograd_context_id where we have a 64 bit integer with +// first 16 bits being the worker id and next 48 bits are auto-incrementing. +class TORCH_API DistAutogradContainer { + public: + explicit DistAutogradContainer(uint32_t num_shards); + + // One time initialization of the container. + static DistAutogradContainer& init(int64_t worker_id); + + // Retrieve the singleton instance of the container, ensures we have + // initialized the container. + static DistAutogradContainer& getInstance(); + + // Create a new context for a distributed autograd pass. + const ContextPtr newContext(); + + // Clean up resources for a given context_id once the autograd pass is done. + // Sends RPC to other workers this worker knows about, telling them to clean + // up their context as well. Throws an exception if the context_id does not + // exist. + void releaseContext(int64_t context_id); + + // Releases an autograd context if it is present on this node. Also sends RPC + // to other workers this worker knows about, telling them to clean up their + // context. Does nothing if it is not present. + void releaseContextIfPresent(int64_t context_id); + + // Checks if the passed in context_id is valid. + void isValidContext(int64_t context_id); + + // Retrieve the autograd context for a given context_id. + ContextPtr retrieveContext(int64_t context_id); + + // Retrieves the currently active autograd context for the current thread. + ContextPtr currentContext(); + + // Checks whether or not the current thread has a valid autograd context. + bool hasValidContext() const; + + // Generate a new autograd_message_id for send/recv autograd functions. + int64_t newAutogradMessageId(); + + // Creates a new autograd context with the provided context_id. If a context + // already exists with the provided context_id, we just return it. + // This does not set the current context for the current thread. + ContextPtr getOrCreateContext(int64_t context_id); + + // Retrieves the maximum possible autograd_context_id/autograd_message_id that + // can be generated by this worker. + int64_t getMaxId(); + + // Retrieves the worker ID for this node + rpc::worker_id_t getWorkerId() const; + + // Can set current context id if there is no valid context yet + static void setCurrentContextId(int64_t contextId); + + // Forcibly sets the thread local current context id. Should only be used in + // cases where you know what you're doing and need to override the thread + // local. Otherwise, use setCurrentContextId instead. + static void forceCurrentContextId(int64_t contextId); + + // Clear current context id + void clearCurrentContext(); + + // Returns the number of autograd contexts in the container. + size_t numAutogradContexts() const; + + // Returns the current thread local context id for this thread. + static int64_t currentContextId(); + + DistAutogradContainer() = delete; + ~DistAutogradContainer() = default; + DistAutogradContainer(const DistAutogradContainer&) = delete; + DistAutogradContainer& operator=(const DistAutogradContainer&) = delete; + DistAutogradContainer(DistAutogradContainer&&) = delete; + DistAutogradContainer& operator=(DistAutogradContainer&&) = delete; + + private: + // Number of shards for the map storing autograd contexts. We'd like this + // to be a power of 2 and we don't expect a value much higher than the + // number of cores would provide much benefit. + static constexpr uint32_t kNumDefaultShards = 128; + + // Use cache line size for alignment. + static constexpr int kCacheLineSize = 64; + + // Structure holding one shard of the sharded autograd context map with its + // associated lock. Align to cache line size to avoid contention between + // adjacent entries. + struct alignas(kCacheLineSize) ContextsShard { + // Lock for this shard. + mutable std::mutex lock; + + // Map storing autograd contexts for this shard. + std::unordered_map contexts; + }; + + static DistAutogradContainer& getInstanceInternal(); + + // Retrieve the shard for given context_id. + ContextsShard& getShard(int64_t context_id); + + // Sends an RPC to the workers that have a context corresponding to passed in + // context_id. This function should be called with the lock. + void sendReleaseContextRpc( + const std::unordered_set& workerIds, + int64_t context_id); + + // Erase context_id from the autograd context map, and reset the thread local + // current context id if it corresponds to the passed in context id. This + // function should be called with the lock. + void eraseContextIdAndReset(ContextsShard& shard, int64_t context_id); + + // Compute the number of shards for the autograd_contexts_ map. + static uint32_t computeNumShards(); + + // Auto incrementing context id used to identify unique autograd passes. + // Initialized with the first 16 bits being the worker_id. + std::atomic next_context_id_; + + // Unique id to identify a worker in the distributed setting. + int16_t worker_id_; + + // Whether or not the container has been initialized appropriately. + bool initialized_; + + // Sharded autograd context map. + std::vector autograd_contexts_; + + // Number of shards for the sharded autograd_contexts_ map. + uint32_t num_shards_; + + // Autograd message id to identify unique send/recv autograd function pairs. + std::atomic next_autograd_message_id_; + + // Maximum allowed value for autograd_context_id or autograd_message_id. + int64_t max_id_; +}; + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/context/context.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/context/context.h new file mode 100644 index 0000000000000000000000000000000000000000..09de23c1154c2c496ba6af372f993e3a9477a310 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/context/context.h @@ -0,0 +1,176 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include +#include +#include + +namespace torch::distributed::autograd { + +class RecvRpcBackward; + +// DistAutogradContext which stores information for a single distributed +// autograd pass on a worker. +class TORCH_API DistAutogradContext { + public: + using GradCallback = std::function; + + explicit DistAutogradContext(int64_t contextId); + ~DistAutogradContext() = default; + + // Retrieves the autograd context id for this context. + int64_t contextId() const; + + // Records a 'send' autograd function for this context with the provided + // message id. + void addSendFunction( + const std::shared_ptr& func, + int64_t autograd_message_id); + + // Records a 'recv' autograd function for this context with the provided + // message id. + void addRecvFunction( + std::shared_ptr& func, + int64_t autograd_message_id); + + // Given an autograd_message_id, retrieve the appropriate send function. + std::shared_ptr retrieveSendFunction( + int64_t autograd_message_id); + + // Return all send functions for this context. + std::unordered_map> sendFunctions() + const; + + // Return all recv functions for this context. + std::unordered_map> recvFunctions() + const; + + // Adds a future message recording an outstanding RPC. + void addOutstandingRpc(const c10::intrusive_ptr& jitFuture); + + // Returns all gradients. + const c10::Dict getGradients() const; + + // This function gives a mutable grad reference to the callback. + // If the callback returns true, it means the grad in the context + // needs to be updated. + void runGradCallbackForVariable( + const torch::autograd::Variable& variable, + const GradCallback& cb); + + DistAutogradContext(const DistAutogradContext&) = delete; + DistAutogradContext& operator=(const DistAutogradContext&) = delete; + DistAutogradContext(DistAutogradContext&&) = delete; + DistAutogradContext& operator=(DistAutogradContext&&) = delete; + + // records the workerID of a node that we sent an RPC to. + // workerIDs are added here when we attach a send function to this autograd + // context + void addKnownWorkerId(const rpc::worker_id_t workerId); + + // Retrieves a set containing the known workerIds for this context + // These are the different workers that this context has sent RPCs to. + std::unordered_set getKnownWorkerIds() const; + + private: + friend class BackwardPassCleanupGuard; + friend class DistEngine; + friend class RecvRpcBackward; + friend class DistAccumulateGradCaptureHook; + + // Record that we would like to accumulate the provided gradient on the given + // variable. + void accumulateGrad( + const torch::autograd::Variable& variable, + const torch::Tensor& grad, + size_t num_expected_refs); + + // Retrieve the GraphTask. + std::shared_ptr retrieveGraphTask(); + + // Set the appropriate graph task for the backward pass. Can be called only + // once. + void setGraphTask(std::shared_ptr graphTask); + + // Resets the graph task to ensure we can run another distributed backward + // pass for the same autograd context. + void resetGraphTask(); + + // Waits for all outstanding RPCs for this context to finish and clears all + // outstanding rpcs held in this context. This should be called only once. + c10::intrusive_ptr clearAndWaitForOutstandingRpcsAsync(); + + void clearOutstandingRpcs(); + + // Record an event to mark the completion of gradient computation. These + // events will later help to properly synchronize gradients consumptions + // in getGradients(). We need these events because backward and + // optimizer.step are separate RPC calls, and will occur on different CUDA + // streams. Without synchronization, it is possible that gradients are + // consumed before they are ready. + void recordGradEvent(c10::Device device); + + const int64_t contextId_; + + // Set containing known worker IDs, used in cleaning up autograd context. + // Whenever a sendRpcBackward is attached to the autograd graph for this + // context, the destination is added here. + std::unordered_set knownWorkerIds_; + + // Map from autograd_message_id to appropriate 'send' autograd function. + std::unordered_map> + sendAutogradFunctions_; + + // Map from autograd_message_id to appropriate 'recv' autograd function. + std::unordered_map> + recvAutogradFunctions_; + + // Gradients accumulated in this context so far. The key is the variable on + // which the gradient needs to be accumulated and the value is the gradient + // that needs to be accumulated on that variable.. + c10::Dict accumulatedGrads_; + + // See comments for recordGradEvent(c10::Device device); + std::unordered_map gradReadyEvents_; + const c10::impl::VirtualGuardImpl impl_; + + // The autograd GraphTask for the backward pass on this node for this context. + std::shared_ptr graphTask_; + + // List of futures for RPCs initiated by this node to propagate gradients to + // other nodes. The distributed autograd engine on this node can return + // successfully only if all these futures are done and are successful. + std::vector> outStandingRpcs_; + + // Lock to protect concurrent modification of the context. + mutable std::mutex lock_; +}; + +using ContextPtr = std::shared_ptr; + +// This class stores a shared_ptr to a DistAutogradContext instance in a +// thread local variable. The instance is given by the call site. The class +// doesn't know the current context. It's just a util class. +class TORCH_API ThreadLocalDistAutogradContext { + public: + // Store 'new_context' to the thread local variable maintained by this class. + explicit ThreadLocalDistAutogradContext(ContextPtr&& new_context); + ~ThreadLocalDistAutogradContext(); + + // Retrieve the stored DistAutogradContext instance. + static ContextPtr getContextPtr(); + + private: + ContextPtr prev_context_ptr_; +}; + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/engine/dist_engine.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/engine/dist_engine.h new file mode 100644 index 0000000000000000000000000000000000000000..aef84bb4113f439f667d3745a426266db14ccbaa --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/engine/dist_engine.h @@ -0,0 +1,177 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include +#include + +namespace torch::distributed::autograd { + +// Forward declaration. +class BackwardPassCleanupGuard; + +// This is a singleton class responsible for running distributed backward +// passes. This engine relies heavily on the vanilla autograd engine and tries +// to reuse it as much as possible. This class is mostly responsible for the +// distributed aspects of autograd and tries to hook into the autograd engine +// where convenient. + +// Unlike the vanilla autograd engine, the distributed autograd engine +// accumulates the gradients in the appropriate DistAutogradContext. This avoids +// multiple trainer nodes stomping on each others gradients. +class TORCH_API DistEngine { + public: + // Retrieve the singleton instance. + static DistEngine& getInstance(); + + // Given a list of root variables, start the distributed backwards pass from + // these variables and accumulate all the gradients in the current autograd + // context on each node. This method is used to kickoff distributed autograd + // on a single node. + void execute( + int64_t context_id, + const torch::autograd::variable_list& roots, + bool retainGraph); + + // Given a send function to execute in the autograd engine, ensures we compute + // dependencies once for this node and enqueues the send function for execute + // in the engine. + // This method is used to kick off the autograd computation on a node when it + // receives gradients from the corresponding 'recv' method on another node. + // The gradients are accumulated in the provided autograd context. + c10::intrusive_ptr executeSendFunctionAsync( + const ContextPtr& autogradContext, + const std::shared_ptr& sendFunction, + bool retainGraph); + + // Number of backward passes currently running for the Distributed Engine. + size_t numBackwardPasses() const; + + // Returns key-value pairs consisting of useful debugging information related + // to distributed autograd. + std::unordered_map getDebugInfo() const; + + DistEngine(const DistEngine&) = delete; + DistEngine& operator=(const DistEngine&) = delete; + DistEngine(DistEngine&&) = delete; + DistEngine& operator=(DistEngine&&) = delete; + + private: + // Make sure this is a singleton. + DistEngine(); + ~DistEngine(); + + // Validates the input roots for the backward computations and retrieves the + // appropriate root edges and corresponding gradients. Populates root_edges + // with the appropriate gradient edges and grads with the gradients for each + // edge. + void validateRootsAndRetrieveEdges( + const torch::autograd::variable_list& roots, + torch::autograd::edge_list& rootEdges, + torch::autograd::variable_list& grads); + + // Given the autograd context, root edges and grads, we compute dependencies + // for the local node and fill out the provided GraphTask and GraphRoot with + // appropriate information for the local autograd engine. + // We also determine all leaf nodes(functions) in the graph and accumulate + // them in outputEdges. + void computeDependencies( + const ContextPtr& context, + const torch::autograd::edge_list& rootEdges, + const torch::autograd::variable_list& grads, + const std::shared_ptr& graphRoot, + torch::autograd::edge_list& outputEdges, + bool retainGraph); + + // Given a pre-populated GraphTask and a root node, compute the backward pass + // for the autograd graph until the graph task ready queue is empty. + // + // This method assumes that the appropriate GraphTask has already been + // initialized appropriately. It will construct a local ready queue to + // traverse the GraphTask instead of using the GraphTask embedded + // cpu_ready_queue, this is because dist engine might run the same GraphTask + // from different SendFunctions concurrently in different threads. The method + // will only mark the GraphTask as completed when it needs to, which means it + // might not mark as completed for every call as dist engine would like to + // keep the GraphTask alive when it not receives all gradients. + // + // When `incrementOutstandingTasks=false`, the function does not increment + // 'outstanding_tasks_' in the appropriate GraphTask. It is assumed we've + // already done this before hand for this task (to ensure we don't pre-mark + // this graph_task as completed). This is useful in the distributed autograd + // case where we need to increment 'outstanding_tasks_' first to indicate the + // local autograd engine the graph task is not completed until it receives the + // signals from other workers over the network. + // + // XXX: calling this function assumes that we will have NO GPU nodetasks be + // executed for the graph_task, the caller of this function need to ensure + // this otherwise there will be undefined behaviors. A correct way to fix this + // is to re-design the autograd engine so that GPU worker thread to behave the + // same as CPU caller thread, record the operation/thread for the device, and + // reuse it in backward. + // TODO: 1. Add assert in the dist engine to ensure no GPU NodeTasks during + // backward + // 2. properly setup the thread local ready queue to enable reentrant + // backwards + void execute_graph_task_until_ready_queue_empty( + torch::autograd::NodeTask&& node_task, + bool incrementOutstandingTasks = true); + + // Run the local autograd engine using the provided graphTask and graphRoot + // and accumulate the gradients part 'outputEdges' in the provided autograd + // context. + c10::intrusive_ptr runEngineAndAccumulateGradients( + const ContextPtr& autogradContext, + const std::shared_ptr& graphRoot, + const torch::autograd::edge_list& outputEdges, + bool incrementOutStandingTasks = true); + + // Run after the backward pass is done to appropriately cleanup structures. + void cleanupBackwardPass(const ContextPtr& autogradContext); + + // Global thread to execute CPU continuations. + void globalCpuThread( + const std::shared_ptr& ready_queue); + + // Set of autograd context_ids, which we have already initialized for + // distributed autograd on this node (e.g.: already computed dependencies) + std::unordered_set initializedContextIds_; + + mutable std::mutex initializedContextIdsLock_; + + // Reference to local autograd engine. + torch::autograd::Engine& engine_; + + // Ready queue used by the CPU thread in distributed engine. + // See Note [GPU to CPU continuations] + std::shared_ptr global_cpu_ready_queue_; + + // See Note [GPU to CPU continuations] + std::thread global_cpu_thread_; + + friend class BackwardPassCleanupGuard; +}; + +// Guard to clean up resources once the backward pass is done. +class BackwardPassCleanupGuard { + public: + explicit BackwardPassCleanupGuard(ContextPtr autogradContext) + : autogradContext_(std::move(autogradContext)) {} + + ~BackwardPassCleanupGuard() { + DistEngine::getInstance().cleanupBackwardPass(autogradContext_); + } + + private: + ContextPtr autogradContext_; +}; + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/functions/recvrpc_backward.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/functions/recvrpc_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..ec9959de9fc137a3da052c0be05e566ede4c6bdb --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/functions/recvrpc_backward.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::distributed::autograd { + +// Forward declarations. +class DistAutogradContext; + +// As part of our distributed autograd implementation, whenever we receive an +// RPC from a node, we add a 'RecvRpcBackward' autograd function to the +// autograd graph. This is more or less a placeholder function that is used to +// pass gradients to the remote host during the backward pass. The inputs to the +// RPC function are the inputs to this autograd function. +class TORCH_API RecvRpcBackward : public torch::autograd::Node { + public: + explicit RecvRpcBackward( + const AutogradMetadata& autogradMetadata, + const std::shared_ptr& autogradContext, + rpc::worker_id_t fromWorkerId, + rpc::DeviceMap deviceMap); + + torch::autograd::variable_list apply( + torch::autograd::variable_list&& grads) override; + + private: + const AutogradMetadata autogradMetadata_; + + // Hold a weak reference to the autograd context to avoid circular + // dependencies with the context (since it holds a reference to + // RecvRpcBackward). + std::weak_ptr autogradContext_; + + // The worker id from which the RPC was received. During the backward pass, + // we need to propagate the gradients to this workerId. + rpc::worker_id_t fromWorkerId_; + + // Device mapping for tensors sent over RPC. + const rpc::DeviceMap deviceMap_; +}; + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/functions/sendrpc_backward.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/functions/sendrpc_backward.h new file mode 100644 index 0000000000000000000000000000000000000000..e9d651a206361595c8c5f82d67545f29811d8b51 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/functions/sendrpc_backward.h @@ -0,0 +1,38 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::distributed::autograd { + +// As part of our distributed autograd implementation, whenever we send an RPC +// from one node to another, we add a 'SendRpcBackward' autograd function to the +// autograd graph. This is more or less a placeholder function that is used to +// kickoff the autograd engine on the current worker on the backward pass. The +// edges for this autograd function are the inputs to the RPC method. +// +// During the backward pass, this function is queued for execution in the +// autograd engine which eventually runs the rest of the autograd graph. +struct TORCH_API SendRpcBackward : public torch::autograd::Node { + public: + torch::autograd::variable_list apply( + torch::autograd::variable_list&& inputs) override; + + // SendRpcBackward is actually the root of an autograd graph on the local + // node. As a result, it doesn't receive any 'inputs', but rather the RPC + // framework passes gradients over to this function to kickoff local autograd + // computation. + void setGrads(const torch::autograd::variable_list& grads); + + // Retrieve the grads for the function. + const torch::autograd::variable_list& getGrads() const; + + private: + torch::autograd::variable_list grads_; +}; + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/python_autograd.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/python_autograd.h new file mode 100644 index 0000000000000000000000000000000000000000..2dd0604b70cc367ced7f549ec00841affbe4b7f9 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/python_autograd.h @@ -0,0 +1,14 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::distributed::autograd { + +PyMethodDef* python_functions(); + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/autograd_metadata.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/autograd_metadata.h new file mode 100644 index 0000000000000000000000000000000000000000..c6b35f8f5c9590e55195de8618c6acc6de15e719 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/autograd_metadata.h @@ -0,0 +1,26 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::distributed::autograd { + +// This structure represents autograd metadata that we need to pass across +// different nodes when we call an RPC which needs autograd computation. +struct TORCH_API AutogradMetadata { + AutogradMetadata(int64_t autogradContextId, int64_t autogradMessageId); + + // autogradContextId_ is a globally unique integer that identifies a + // particular distributed autograd pass. + int64_t autogradContextId; + // autogradMessageId_ is a globally unique integer that identifies a pair + // of send/recv autograd functions. + int64_t autogradMessageId; +}; + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/cleanup_autograd_context_req.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/cleanup_autograd_context_req.h new file mode 100644 index 0000000000000000000000000000000000000000..94948652d8f95a90343351b180433f10db90b353 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/cleanup_autograd_context_req.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::distributed::autograd { + +// Used to request other workers to clean up their autograd context. +class TORCH_API CleanupAutogradContextReq : public rpc::RpcCommandBase { + public: + explicit CleanupAutogradContextReq(int64_t context_id); + // Serialization and deserialization methods. + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage( + const rpc::Message& message); + + // Retrieve the context id we are cleaning up with this message. + int64_t getContextId(); + + private: + int64_t context_id_; +}; + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/cleanup_autograd_context_resp.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/cleanup_autograd_context_resp.h new file mode 100644 index 0000000000000000000000000000000000000000..6b1733c5aa61bd1c39837e7b84bbb748a3c7bc77 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/cleanup_autograd_context_resp.h @@ -0,0 +1,24 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::distributed::autograd { + +// Empty response for CleanupAutogradContextReq. Send to acknowledge receipt of +// a CleanupAutogradContextReq. +class TORCH_API CleanupAutogradContextResp : public rpc::RpcCommandBase { + public: + CleanupAutogradContextResp() = default; + // Serialization and deserialization methods. + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage( + const rpc::Message& message); +}; + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/propagate_gradients_req.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/propagate_gradients_req.h new file mode 100644 index 0000000000000000000000000000000000000000..e56f32af955b8653fecf6dd92c06eddc76ab1c2a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/propagate_gradients_req.h @@ -0,0 +1,43 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::distributed::autograd { + +// Used to propagate gradients from one node to another during a distributed +// backwards pass. This RPC call is invoked when we hit a `recv` autograd +// function during backward pass execution. +class TORCH_API PropagateGradientsReq : public rpc::RpcCommandBase { + public: + PropagateGradientsReq( + const AutogradMetadata& autogradMetadata, + std::vector grads, + bool retainGraph = false); + + const AutogradMetadata& getAutogradMetadata(); + + const std::vector& getGrads(); + + // Serialization and deserialization methods. + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage( + const rpc::Message& message); + + // Whether or not to retain the autograd graph. + bool retainGraph(); + + private: + AutogradMetadata autogradMetadata_; + std::vector grads_; + bool retainGraph_; +}; + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/propagate_gradients_resp.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/propagate_gradients_resp.h new file mode 100644 index 0000000000000000000000000000000000000000..4e2fd092c76b3a36479d61e4376dd8b4e5da5135 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/propagate_gradients_resp.h @@ -0,0 +1,25 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::distributed::autograd { + +// Response for the PropagateGradients call. Currently, this class is mostly +// just a placeholder and sends an empty message over the wire. The purpose of +// this RPC command is to indicate whether or not the PropagateGradientsReq call +// was successfully or not. +class TORCH_API PropagateGradientsResp : public rpc::RpcCommandBase { + public: + PropagateGradientsResp() = default; + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage( + const rpc::Message& message); +}; + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rpc_with_autograd.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rpc_with_autograd.h new file mode 100644 index 0000000000000000000000000000000000000000..66df8c943e9d3d2a9af59fa135ab584cc95ae8f6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rpc_with_autograd.h @@ -0,0 +1,99 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::distributed::autograd { + +// Represents an RPC that includes autograd information. This class basically +// wraps another `RpcCommandBase` object which represents the actual RPC and has +// additional autograd information associated with that RPC. +class TORCH_API RpcWithAutograd final : public rpc::RpcCommandBase { + public: + // Used when we are sending an RPC over the wire. + RpcWithAutograd( + rpc::worker_id_t fromWorkerId, + rpc::MessageType messageType, + const AutogradMetadata& autogradMetadata, + c10::intrusive_ptr wrappedMessage, + rpc::DeviceMap deviceMap = {}); + + // Used when receiving an RPC over the wire. + RpcWithAutograd( + rpc::worker_id_t fromWorkerId, + rpc::MessageType messageType, + const AutogradMetadata& autogradMetadata, + std::unique_ptr wrappedRpc, + rpc::MessageType wrappedMessageType, + std::vector tensors, + rpc::DeviceMap deviceMap = {}); + + c10::intrusive_ptr toMessageImpl() && override; + + static std::unique_ptr fromMessage( + const rpc::Message& message); + + // Retrieves tensors as part of this RPC, which need to be considered for + // autograd computations. + std::vector& tensors(); + + const AutogradMetadata& autogradMetadata() const; + + RpcCommandBase& wrappedRpc(); + + void setWrappedRpc(std::unique_ptr wrappedRpc); + + std::unique_ptr moveWrappedRpc() &&; + + // Message type of the wrapped RPC. + rpc::MessageType wrappedMessageType() const; + + // Retrieve the worker id from which the RPC originated. + rpc::worker_id_t fromWorkerId() const; + + // Retrieve the device map. + const rpc::DeviceMap& deviceMap(); + + private: + // WorkerId from which this RPC originated. This is necessary for knowing + // which worker we need to contact during the backward pass. + rpc::worker_id_t fromWorkerId_; + + // Message type for this call. + rpc::MessageType messageType_; + + AutogradMetadata autogradMetadata_; + + // Since wrappedMessage_ is destructively constructed from wrappedRpc_, + // they are valid exclusively. They are used for different purpose. + // wrappedRpc_ is used while constructing receive rpcWithAutograd; + // wrappedMessage_ is used while constructing send rpcWithAutograd; + + // When receive rpcWithAutograd is constructed fromMessage, it is valid; + // When send rpcWithAutograd is constructed before toMessage, it is nullptr; + std::unique_ptr wrappedRpc_; + + // Serialized message representing wrappedRpc_. Used mostly as a cache to + // avoid serializing the request twice. + // When receive rpcWithAutograd is constructed fromMessage, it is nullptr; + // When send rpcWithAutograd is constructed before toMessage, it is valid; + c10::intrusive_ptr wrappedMessage_; + + // message type of the wrappedMessage, this is stored separately since + // wrappedMessage_ is not always guaranteed to be populated. + rpc::MessageType wrappedMessageType_; + + // Tensors part of the wrappedRpc that need to be considered for autograd. + std::vector tensors_; + + // Device mapping for tensors that are sent across an RPC to another node. + rpc::DeviceMap deviceMap_; +}; + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rpc_with_profiling_req.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rpc_with_profiling_req.h new file mode 100644 index 0000000000000000000000000000000000000000..8bfc2764e9e172cc63d8325cc363ffd35589106e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rpc_with_profiling_req.h @@ -0,0 +1,66 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace torch::distributed::autograd { + +class TORCH_API RpcWithProfilingReq : public rpc::RpcCommandBase { + public: + // For sending RPCs, invoked when client is creating this RPC command. + RpcWithProfilingReq( + rpc::MessageType messageType, + c10::intrusive_ptr wrappedMessage, + torch::autograd::profiler::ProfilerConfig&& profilerConfig, + rpc::ProfilingId profilingKeyId); + + // For receiving an RPC + // Used in fromMessage. + RpcWithProfilingReq( + rpc::MessageType messageType, + std::unique_ptr wrappedRpc, + rpc::MessageType wrappedMessageType, + std::vector tensors, + torch::autograd::profiler::ProfilerConfig&& profilerConfig, + rpc::ProfilingId profilingKeyId); + + // Convert this RPC Command to a Message that can be sent over the wire. + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage( + const rpc::Message& message); + + // Retrieve the profiling data that is associated with this command. + torch::autograd::profiler::ProfilerConfig getProfilingConfig() const; + // Retrieve the globally unique profiling ID corresponding to this command. + const rpc::ProfilingId& getProfilingId() const; + // Retrieve the original RPC which this ProfilingRPC wraps. + RpcCommandBase& wrappedRpc(); + // Destructively move the wrapped RPC. + std::unique_ptr moveWrappedRpc() &&; + // Message type of the wrapped RPC + rpc::MessageType wrappedMessageType() const; + void setWrappedRpc(std::unique_ptr wrappedRpc); + + private: + // message type + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const rpc::MessageType messageType_; + // wrapped message + c10::intrusive_ptr wrappedMessage_; + std::unique_ptr wrappedRpc_; + rpc::MessageType wrappedMessageType_; + std::vector tensors_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const torch::autograd::profiler::ProfilerConfig profilerConfig_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const rpc::ProfilingId profilingKeyId_; +}; +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rpc_with_profiling_resp.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rpc_with_profiling_resp.h new file mode 100644 index 0000000000000000000000000000000000000000..fb9db4ccb84e2f961a3829d2ec28dfec7fcb2136 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rpc_with_profiling_resp.h @@ -0,0 +1,63 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace torch::distributed::autograd { +class TORCH_API RpcWithProfilingResp : public rpc::RpcCommandBase { + public: + // For sending RPCs over the wire + RpcWithProfilingResp( + rpc::MessageType messageType, + c10::intrusive_ptr wrappedMessage, + std::vector profiledEvents, + rpc::ProfilingId profilingId); + + // For receiving RPCs. Used in from message when converting a message received + // over the wire. + RpcWithProfilingResp( + rpc::MessageType messageType, + std::unique_ptr wrappedRpc, + rpc::MessageType wrappedMessageType, + std::vector tensors, + std::vector profiledEvents, + rpc::ProfilingId profilingId); + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage( + const rpc::Message& message); + // Retrieve remote Events + std::vector getProfiledEvents() const; + // Retrieve the globally unique profiling ID corresponding to this command. + const rpc::ProfilingId& getProfilingId() const; + // Retrieve the original RPC which this ProfilingRPC wraps. + RpcCommandBase& wrappedRpc(); + // Destructively move the wrapped RPC. + std::unique_ptr moveWrappedRpc() &&; + // Message type of the wrapped RPC + rpc::MessageType wrappedMessageType() const; + // Set the wrapped RPC for this RPC. + void setWrappedRpc(std::unique_ptr wrappedRpc); + + private: + // message type + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const rpc::MessageType messageType_; + // wrapped message + c10::intrusive_ptr wrappedMessage_; + std::unique_ptr wrappedRpc_; + rpc::MessageType wrappedMessageType_; + std::vector tensors_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const std::vector profiledEvents_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const rpc::ProfilingId profilingId_; +}; +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rref_backward_req.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rref_backward_req.h new file mode 100644 index 0000000000000000000000000000000000000000..1cb78980aa6deeef29a2e918eaf87ed970b7a127 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rref_backward_req.h @@ -0,0 +1,43 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::distributed::autograd { + +// Internal system RPC to invoke distributed backward pass on remote nodes when +// 'rref.backward()' is invoked. +class TORCH_API RRefBackwardReq : public rpc::RpcCommandBase { + public: + RRefBackwardReq( + const rpc::RRefId& rrefId, + int64_t autogradContextId, + bool retainGraph = false); + + const rpc::RRefId& getRRefId() const; + + int64_t getAutogradContextId() const; + + bool retainGraph() const; + + // Serialization and deserialization methods. + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage( + const rpc::Message& message); + + private: + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const rpc::RRefId rrefId_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const int64_t autogradContextId_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const bool retainGraph_; +}; + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rref_backward_resp.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rref_backward_resp.h new file mode 100644 index 0000000000000000000000000000000000000000..c4bf412302ced62d07a5ca8a24675376f1ed2b68 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/rpc_messages/rref_backward_resp.h @@ -0,0 +1,22 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::distributed::autograd { + +// Response for the RRefBackwardReq. +class TORCH_API RRefBackwardResp : public rpc::RpcCommandBase { + public: + RRefBackwardResp() = default; + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage( + const rpc::Message& message); +}; + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/utils.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..47903ceb5e9bf2491e8896cdc2fed920b03e9448 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/autograd/utils.h @@ -0,0 +1,61 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::distributed::autograd { + +// This method is used to attach the 'send' autograd function to the autograd +// graph when we use RPC. This method creates a new 'send' autograd function +// and attaches the provided tensors as next_edges to the 'send' function. In +// addition to this, it also registers the send function in the provided +// autograd context. Finally, the RPC message is updated with appropriate +// autograd information for the recipient. +TORCH_API void addSendRpcBackward( + const ContextPtr& autogradContext, + const AutogradMetadata& autogradMetadata, + std::vector& tensors); + +// This method is used to attach the 'recv' autograd function to the autograd +// graph when we use RPC. This method creates a new 'recv' autograd function +// and attaches the provided tensors as inputs to the 'recv' function. It +// creates a new autograd context if needed and registers the 'recv' function +// with this context. +// +// Returns a pointer to the autograd context created. +TORCH_API ContextPtr addRecvRpcBackward( + const AutogradMetadata& autogradMetadata, + std::vector& tensors, + rpc::worker_id_t fromWorkerId, + const rpc::DeviceMap& deviceMap); + +// This method is a wrapper utility used internally to wrap autograd info +// and attach autograd function for each type of rpc call if it has valid +// context and tensors require grads or forceGradRecording is true, in this +// case, return RpcWithAutograd message; otherwise return original rpc message. +// NB: forceGradRecording is useful when the request does not contain any tensor +// but the corresponding response does. +TORCH_API c10::intrusive_ptr getMessageWithAutograd( + const rpc::worker_id_t dstId, + c10::intrusive_ptr wrappedRpcMsg, + rpc::MessageType msgType, + bool forceGradRecording = false, + const rpc::DeviceMap& deviceMap = {}); + +// Send message after autograd checking +TORCH_API c10::intrusive_ptr sendMessageWithAutograd( + rpc::RpcAgent& agent, + const rpc::WorkerInfo& dst, + c10::intrusive_ptr wrappedRpcMsg, + bool forceGradRecording = false, + const float rpcTimeoutSeconds = torch::distributed::rpc::kUnsetRpcTimeout, + bool forceDisableProfiling = false); + +} // namespace torch::distributed::autograd + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Backend.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Backend.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d8a979143f9119d48da9dac1a89a943ddf4a85f5 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Backend.hpp @@ -0,0 +1,535 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include + +constexpr auto kBackendDefaultTimeout = + std::chrono::milliseconds(30 * 60 * 1000); + +namespace c10d { + +enum class ErrorType { + SUCCESS = 0, + TIMEOUT = 1, + // e.g., NCCL error, etc + COMM_ERROR = 2, + // TODO, do we need to distinguish between remote timeout or remote COMM + // errors? + REMOTE_ERROR = 3 +}; + +class TORCH_API Backend : public torch::CustomClassHolder { + public: + // Backend Options is a base struct that defines the basic options + // when constructing a Backend. Each Backend subclass should + // extend this struct and define its options if it wants to provide more + // config options (beyond basic ones defined here) to end user. + struct TORCH_API Options : torch::CustomClassHolder { + explicit Options( + std::string backend, + std::chrono::milliseconds timeout = kBackendDefaultTimeout) + : timeout(timeout), backend(std::move(backend)) {} + ~Options() override = default; + + std::chrono::milliseconds timeout; + + // backend name + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const std::string backend; + std::string group_name; + std::vector global_ranks_in_group; + }; + + explicit Backend(int rank, int size); + ~Backend() override = 0; + + int getRank() const { + return rank_; + } + + int getSize() const { + return size_; + } + + // Returns an unique opaque ID of this backend that can be used to correlate + // with its collectives. + int64_t getID() const { + return reinterpret_cast(this); + } + + virtual bool supportsSplitting() const { + return false; + } + + virtual bool supportsCoalescing() const { + return false; + } + + virtual bool supportsTimeEstimation() const { + return false; + } + + virtual bool supportsShrinking() const { + return false; + } + + // Shrink the backend by excluding specified ranks. Backends that support + // communicator shrinking should override this and return a new backend + // instance representing the shrunken group. Backends may use opts_override + // to supply backend-specific options for the new group. + virtual c10::intrusive_ptr shrink( + const std::vector& /*ranks_to_exclude*/, + int /*shrink_flags*/ = 0, + const c10::intrusive_ptr& /*opts_override*/ = nullptr) { + TORCH_CHECK( + false, + c10::str("Backend ", getBackendName(), " does not support shrink")); + } + + virtual void setTimeout(std::chrono::milliseconds timeout) { + TORCH_CHECK( + false, + c10::str( + "Backend ", getBackendName(), " does not support setting timeout")); + } + + virtual void startCoalescing() { + TORCH_CHECK( + false, + c10::str( + "Backend ", + getBackendName(), + " does not implement startCoalescing")); + } + + virtual c10::intrusive_ptr endCoalescing() { + TORCH_CHECK( + false, + c10::str( + "Backend ", getBackendName(), " does not implement endCoalescing")); + } + + // Subclasses must override this method to return the backend name + virtual const std::string getBackendName() const { + TORCH_INTERNAL_ASSERT(false, "getBackendName is not implemented."); + } + + // Subclasses must override this method to return the backend name + virtual c10::intrusive_ptr getBackendOptions() { + TORCH_CHECK( + false, + c10::str( + "Backend ", + getBackendName(), + " does not implement getBackendOptions.")); + } + + virtual c10::intrusive_ptr broadcast( + std::vector& /* tensors */, + const BroadcastOptions& /* opts */ = BroadcastOptions()) { + TORCH_CHECK( + false, + c10::str("Backend ", getBackendName(), " does not support broadcast")); + } + + virtual c10::intrusive_ptr allreduce( + std::vector& /* tensors */, + const AllreduceOptions& /* opts */ = AllreduceOptions()) { + TORCH_CHECK( + false, + c10::str("Backend ", getBackendName(), " does not support allreduce")); + } + + virtual c10::intrusive_ptr allreduce_sparse( + std::vector& /* tensors */, + const AllreduceOptions& /* opts */ = AllreduceOptions()) { + TORCH_CHECK( + false, + c10::str( + "Backend ", + getBackendName(), + " does not support allreduce sparse")); + } + + virtual c10::intrusive_ptr allreduce_coalesced( + std::vector& /* tensors */, + const AllreduceCoalescedOptions& /* opts */ = + AllreduceCoalescedOptions()) { + TORCH_CHECK( + false, + c10::str( + "Backend ", + getBackendName(), + " does not support allreduce_coalesced")); + } + + virtual c10::intrusive_ptr reduce( + std::vector& /* tensors */, + const ReduceOptions& /* opts */ = ReduceOptions()) { + TORCH_CHECK( + false, + c10::str("Backend ", getBackendName(), " does not support reduce")); + } + + virtual c10::intrusive_ptr allgather( + std::vector>& /* outputTensors */, + std::vector& /* inputTensors */, + const AllgatherOptions& /* opts */ = AllgatherOptions()) { + TORCH_CHECK( + false, + c10::str("Backend ", getBackendName(), " does not support allgather")); + } + + // Gathers a single tensor inputBuffer into a single buffer outputBuffer that + // is interpreted as a contiguous collection of size inputBuffer * WORLD_SIZE. + // For implementers of ProcessGroup API and advanced users only. + // Note: this function will be deprecated in near future. + virtual c10::intrusive_ptr _allgather_base( + at::Tensor& /* outputBuffer */, + at::Tensor& /* inputBuffer */, + const AllgatherOptions& /* opts */ = AllgatherOptions()) { + TORCH_CHECK( + false, + c10::str( + "Backend ", getBackendName(), " does not support _allgather_base")); + } + + // This function is deprecated and will be moved out of Backend to comms: + // * do not add dependencies on this function, + // * do not implement it in your Backend, implement _allgather_base + // instead. + virtual c10::intrusive_ptr allgather_coalesced( + std::vector>& /* outputTensorLists */, + std::vector& /* inputTensors */, + const AllgatherOptions& /* opts */ = AllgatherOptions()) { + TORCH_CHECK( + false, + c10::str( + "Backend ", + getBackendName(), + " does not support allgather_coalesced")); + } + + // This function is a coalesced version of `allgather_into_tensor` (currently + // still named as `_allgather_base`). Each tensor in the vector corresponds to + // an input/output of one `allgather_into_tensor` operation. + virtual c10::intrusive_ptr allgather_into_tensor_coalesced( + std::vector& /* outputs */, + std::vector& /* inputs */, + const AllgatherOptions& /* opts */ = AllgatherOptions()) { + TORCH_CHECK( + false, + c10::str( + "Backend ", + getBackendName(), + " does not support allgather_into_tensor_coalesced")); + } + + virtual c10::intrusive_ptr gather( + std::vector>& /* outputTensors */, + std::vector& /* inputTensors */, + const GatherOptions& /* opts */ = GatherOptions()) { + TORCH_CHECK( + false, + c10::str("Backend ", getBackendName(), " does not support gather")); + } + + virtual c10::intrusive_ptr scatter( + std::vector& /* outputTensors */, + std::vector>& /* inputTensors */, + const ScatterOptions& /* opts */ = ScatterOptions()) { + TORCH_CHECK( + false, + c10::str("Backend ", getBackendName(), " does not support scatter")); + } + + virtual c10::intrusive_ptr reduce_scatter( + std::vector& /* outputTensors */, + std::vector>& /* inputTensors */, + const ReduceScatterOptions& /* opts */ = ReduceScatterOptions()) { + TORCH_CHECK( + false, + c10::str( + "Backend ", getBackendName(), " does not support reduce_scatter")); + } + + virtual c10::intrusive_ptr _reduce_scatter_base( + at::Tensor& /* outputBuffer */, + at::Tensor& /* inputBuffer */, + const ReduceScatterOptions& /* opts */ = ReduceScatterOptions()) { + TORCH_CHECK( + false, + c10::str( + "Backend ", + getBackendName(), + " does not support _reduce_scatter_base")); + } + + // This function is a coalesced version of `reduce_scatter_tensor` (currently + // still named as `_reduce_scatter_base`). Each tensor in the vector + // corresponds to an input/output of one `reduce_scatter_tensor` operation. + virtual c10::intrusive_ptr reduce_scatter_tensor_coalesced( + std::vector& /* outputs */, + std::vector& /* inputs */, + const ReduceScatterOptions& /* opts */ = ReduceScatterOptions()) { + TORCH_CHECK( + false, + c10::str( + "Backend ", + getBackendName(), + " does not support reduce_scatter_tensor_coalesced")); + } + + virtual c10::intrusive_ptr alltoall_base( + at::Tensor& /* outputBuffer */, + at::Tensor& /* inputBuffer */, + std::vector& /* outputSplitSizes */, + std::vector& /* inputSplitSizes */, + const AllToAllOptions& /* opts */ = AllToAllOptions()) { + TORCH_CHECK( + false, + c10::str( + "Backend ", getBackendName(), " does not support alltoall_base")); + } + + virtual c10::intrusive_ptr alltoall( + std::vector& /* outputTensors */, + std::vector& /* inputTensors */, + const AllToAllOptions& opts = AllToAllOptions()) { + TORCH_CHECK( + false, + c10::str("Backend ", getBackendName(), " does not support alltoall")); + } + + virtual void monitoredBarrier( + const BarrierOptions& /* unused */, + bool /* unused */ = false) { + auto backendName = getBackendName(); + TORCH_CHECK( + false, + c10::str( + "Backend ", + backendName, + " does not support monitoredBarrier, only GLOO supports monitored barrier.")); + } + + // Agrees on an initial sequence number for the whole group by having rank 0 + // create it and broadcast it to other ranks using the store. Only implemented + // for GLOO and NCCL backends currently. + virtual void setSequenceNumberForGroup() { + auto backendName = getBackendName(); + TORCH_CHECK( + false, + c10::str( + "Backend ", + backendName, + " does not yet support sequence numbers.")); + } + + // Retrieves the current sequence number for the whole group, which should be + // in sync. If the returned number is not consistent across the group, it + // may indicate that there is some sort of collective desynchronization. + virtual uint64_t getSequenceNumberForGroup() { + auto backendName = getBackendName(); + TORCH_CHECK( + false, + c10::str( + "Backend ", + backendName, + " does not yet support sequence numbers.")); + } + + virtual c10::intrusive_ptr send( + std::vector& /* tensors */, + int /* dstRank */, + int /* tag */) { + TORCH_CHECK( + false, + c10::str("Backend ", getBackendName(), " does not support send")); + } + + virtual c10::intrusive_ptr recv( + std::vector& /* tensors */, + int /* srcRank */, + int /* tag */) { + TORCH_CHECK( + false, + c10::str("Backend ", getBackendName(), " does not support recv")); + } + + virtual c10::intrusive_ptr recvAnysource( + std::vector& /* tensors */, + int /* tag */) { + TORCH_CHECK( + false, + c10::str( + "Backend ", getBackendName(), " does not support recvAnysource")); + } + + virtual c10::intrusive_ptr barrier( + const BarrierOptions& /* opts */ = BarrierOptions()) { + TORCH_CHECK( + false, + c10::str("Backend ", getBackendName(), " does not support barrier")); + } + + virtual void registerOnCompletionHook( + std::function)>&& hook) { + TORCH_CHECK( + false, + "Only ProcessGrouppNCCL supports onCompletion hook, but got ", + getBackendName(), + " backend."); + } + + virtual void waitForPendingWorks() { + TORCH_CHECK( + false, + "Only ProcessGrouppNCCL supports waitForPendingWorks, but got ", + getBackendName(), + " backend."); + } + + virtual void enableCollectivesTiming() { + TORCH_CHECK( + false, + "Backend ", + getBackendName(), + " is missing implementation of enableCollectivesTiming."); + } + + virtual c10::intrusive_ptr split( + const c10::intrusive_ptr& store, + const std::vector& ranks, + const c10::intrusive_ptr& opts) { + TORCH_CHECK( + false, + "Backend ", + getBackendName(), + " is missing implementation of split."); + } + + virtual c10::intrusive_ptr merge( + const c10::intrusive_ptr& store, + const c10::intrusive_ptr& opts, + const int& rank, + const int& size) { + TORCH_CHECK( + false, + "Backend ", + getBackendName(), + " is missing implementation of merge."); + } + + bool hasHooks() const { + return onCompletionHook_ != nullptr; + } + + // Do not call this directly, use ProcessGroup::setGroupName instead. + virtual void setGroupUid(const std::string& pg_uid) { + pg_uid_ = pg_uid; + } + + const std::string& getGroupUid() const { + return pg_uid_; + } + + void setGroupDesc(const std::string& desc) { + pg_desc_ = desc; + } + + const std::string& getGroupDesc() const { + return pg_desc_; + } + + // See similar functions in ProcessGroup.hpp for context. + std::optional getBoundDeviceId() const { + return bound_device_id_; + } + + // Perform an eager connect to the specified device if the backend supports + // it. + virtual void eagerConnectSingleDevice(at::Device device) { + // no-op in the default case; this is an optimization some + // backends may perform + } + + void setBoundDeviceId(std::optional device) { + if (device) { + TORCH_CHECK(device->has_index(), "setBoundDeviceId must have an index"); + } + bound_device_id_ = device; + } + + virtual ErrorType getError() { + TORCH_CHECK( + false, + c10::str("Backend ", getBackendName(), " does not support getError")); + } + + virtual std::shared_ptr getMemAllocator() { + TORCH_CHECK( + false, + c10::str( + "Backend ", getBackendName(), " does not support getMemAllocator")); + } + + // Allocate tensor (aten::empty) from backend's communication-optimized memory + // pool + virtual at::Tensor allocateTensor(long size, at::TensorOptions options = {}) { + TORCH_CHECK( + false, + c10::str( + "Backend ", getBackendName(), " does not support allocateTensor")); + } + + // Returns true if backend supports tensor allocation + virtual bool supportsTensorAlloc(c10::DeviceIndex deviceIdx) { + // Change to true in concrete backend if supported + return false; + } + + // Aborts all pending operations and connections in the backend if the backend + // supports it. + virtual void abort() {} + + // Shutdown the backend if the backend supports it. This should be used for + // normal shutdown. + virtual void shutdown() {} + + protected: + // Implementations of this interface need to call this to setup + // appropriate logging etc. + void init(); + + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const int rank_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const int size_; + // Debug level setting. It is parsed once when ProcessGroup is constructed and + // remains the same across use of this process group. + DebugLevel dist_debug_level_; + std::string pg_uid_; + std::string pg_desc_; + + std::function)> onCompletionHook_; + + std::optional bound_device_id_; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Backoff.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Backoff.hpp new file mode 100644 index 0000000000000000000000000000000000000000..329dc1f97857e93886096a508bcde1b20c75146d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Backoff.hpp @@ -0,0 +1,57 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +namespace c10d { + +class TORCH_API Backoff { + public: + virtual ~Backoff() = default; + + virtual std::chrono::milliseconds nextBackoff() = 0; + virtual void reset() = 0; + + void sleepBackoff() { + std::this_thread::sleep_for(nextBackoff()); + } +}; + +class TORCH_API ExponentialBackoffWithJitter : public Backoff { + public: + ExponentialBackoffWithJitter(); + + std::chrono::milliseconds nextBackoff() override; + void reset() override; + + public: + std::chrono::milliseconds initialInterval{500}; + double randomizationFactor{0.5}; + double multiplier{1.5}; + std::chrono::milliseconds maxInterval{60000}; + + private: + std::mt19937 gen_; + std::chrono::milliseconds currentInterval_{0}; +}; + +class TORCH_API FixedBackoff : public Backoff { + public: + FixedBackoff(std::chrono::milliseconds interval); + + std::chrono::milliseconds nextBackoff() override; + void reset() override; + + private: + std::chrono::milliseconds interval_; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/FakeProcessGroup.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/FakeProcessGroup.hpp new file mode 100644 index 0000000000000000000000000000000000000000..453d2f4e64f83cf9466ce951e8be8a4f666a1393 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/FakeProcessGroup.hpp @@ -0,0 +1,258 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace c10d { + +class FakeWork : public Work { + public: + int seq_id = -1; + bool wait(std::chrono::milliseconds timeout = kNoTimeout) override { + return true; + } + + c10::intrusive_ptr getFuture() override { + auto fut = c10::make_intrusive(c10::NoneType::get()); + fut->markCompleted(); + return fut; + } +}; + +class FakeProcessGroup : public Backend { + public: + struct Options : Backend::Options { + explicit Options() : Backend::Options("fake") {} + + int fake_option = 0; + bool error_on_collective = false; + }; + + // Static factory method for official APIs + static c10::intrusive_ptr _create_internal( + int rank, + int size, + c10::intrusive_ptr options = c10::make_intrusive()) { + return c10::make_intrusive( + rank, size, std::move(options)); + } + + const std::string getBackendName() const override { + return "fake"; + } + + c10::intrusive_ptr getBackendOptions() override { + return c10::static_intrusive_pointer_cast(options_); + } + + c10::intrusive_ptr broadcast( + std::vector& /* tensors */, + const BroadcastOptions& /* opts */ = BroadcastOptions()) override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + c10::intrusive_ptr allreduce( + std::vector& /* tensors */, + const AllreduceOptions& /* opts */ = AllreduceOptions()) override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + c10::intrusive_ptr allreduce_sparse( + std::vector& /* tensors */, + const AllreduceOptions& /* opts */ = AllreduceOptions()) override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + c10::intrusive_ptr allreduce_coalesced( + std::vector& /* tensors */, + const AllreduceCoalescedOptions& /* opts */ = + AllreduceCoalescedOptions()) override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + c10::intrusive_ptr reduce( + std::vector& /* tensors */, + const ReduceOptions& /* opts */ = ReduceOptions()) override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + // NOTE [allgather on FakeProcessGroup] + // Assume each rank have the same input tensor so we just copy to the results + // since it's not a real allgather, we simply make this copying logic to let + // some simple validation works (i.e. calling allgather to see if each rank + // have the same tensor or not). + // + // NOTE: in general it's not good form to try to make FakeProcessGroup work + // with real data, but the reasoning here is that we want FakeProcessGroup to + // work with DeviceMesh's init code that have the data validation, which + // makes it worth the tradeoff. + c10::intrusive_ptr allgather( + std::vector>& outputTensors, + std::vector& inputTensors, + const AllgatherOptions& /* opts */ = AllgatherOptions()) override { + checkCollectiveError(); + for (auto& tensor : outputTensors[0]) { + tensor.copy_(inputTensors[0]); + } + return c10::make_intrusive(); + } + + c10::intrusive_ptr _allgather_base( + at::Tensor& outputBuffer, + at::Tensor& inputBuffer, + const AllgatherOptions& /* opts */ = AllgatherOptions()) override { + checkCollectiveError(); + auto chunks = outputBuffer.chunk(size_); + for (auto& tensor : chunks) { + tensor.copy_(inputBuffer); + } + return c10::make_intrusive(); + } + + c10::intrusive_ptr allgather_coalesced( + std::vector>& /* outputTensorLists */, + std::vector& /* inputTensors */, + const AllgatherOptions& /* opts */ = AllgatherOptions()) override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + c10::intrusive_ptr allgather_into_tensor_coalesced( + std::vector& outputs, + std::vector& inputs, + const AllgatherOptions& /* opts */ = AllgatherOptions()) override { + checkCollectiveError(); + for (size_t i = 0; i < outputs.size(); ++i) { + auto chunks = outputs[i].chunk(size_); + for (auto& chunk : chunks) { + chunk.copy_(inputs[i]); + } + } + return c10::make_intrusive(); + } + + c10::intrusive_ptr gather( + std::vector>& /* outputTensors */, + std::vector& /* inputTensors */, + const GatherOptions& /* opts */ = GatherOptions()) override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + c10::intrusive_ptr scatter( + std::vector& /* outputTensors */, + std::vector>& /* inputTensors */, + const ScatterOptions& /* opts */ = ScatterOptions()) override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + c10::intrusive_ptr reduce_scatter( + std::vector& /* outputTensors */, + std::vector>& /* inputTensors */, + const ReduceScatterOptions& /* opts */ = + ReduceScatterOptions()) override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + c10::intrusive_ptr _reduce_scatter_base( + at::Tensor& /* outputBuffer */, + at::Tensor& /* inputBuffer */, + const ReduceScatterOptions& /* opts */ = + ReduceScatterOptions()) override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + c10::intrusive_ptr reduce_scatter_tensor_coalesced( + std::vector& /* outputs */, + std::vector& /* inputs */, + const ReduceScatterOptions& /* opts */ = + ReduceScatterOptions()) override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + c10::intrusive_ptr alltoall_base( + at::Tensor& /* outputBuffer */, + at::Tensor& /* inputBuffer */, + std::vector& /* outputSplitSizes */, + std::vector& /* inputSplitSizes */, + const AllToAllOptions& /* opts */ = AllToAllOptions()) override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + c10::intrusive_ptr alltoall( + std::vector& /* outputTensors */, + std::vector& /* inputTensors */, + const AllToAllOptions& opts = AllToAllOptions()) override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + c10::intrusive_ptr send( + std::vector& /* tensors */, + int /* dstRank */, + int /* tag */) override { + return c10::make_intrusive(); + } + + c10::intrusive_ptr recv( + std::vector& /* tensors */, + int /* srcRank */, + int /* tag */) override { + return c10::make_intrusive(); + } + + c10::intrusive_ptr recvAnysource( + std::vector& /* tensors */, + int /* tag */) override { + return c10::make_intrusive(); + } + + void startCoalescing() override { + // No-op + } + + c10::intrusive_ptr endCoalescing(OpType /* optype */) { + checkCollectiveError(); + return c10::make_intrusive(); + } + + c10::intrusive_ptr endCoalescing() override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + c10::intrusive_ptr barrier( + const BarrierOptions& /* opts */ = BarrierOptions()) override { + checkCollectiveError(); + return c10::make_intrusive(); + } + + // Private constructor used by official APIs + FakeProcessGroup(int rank, int size, c10::intrusive_ptr options) + : Backend(rank, size), options_(std::move(options)) {} + c10::intrusive_ptr options_; + + private: + void checkCollectiveError() { + TORCH_CHECK( + !options_ || !options_->error_on_collective, + "FakeProcessGroup collective operation error (error_on_collective=true)"); + } +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/FileStore.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/FileStore.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7725d9856572a906d688992dd6eaa284d019887d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/FileStore.hpp @@ -0,0 +1,72 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include + +#include + +namespace c10d { + +class TORCH_API FileStore : public Store { + public: + explicit FileStore(std::string path, int numWorkers); + + c10::intrusive_ptr clone() override; + + ~FileStore() override; + + void set(const std::string& key, const std::vector& value) override; + + std::vector compareSet( + const std::string& key, + const std::vector& expectedValue, + const std::vector& desiredValue) override; + + std::vector get(const std::string& key) override; + + int64_t add(const std::string& key, int64_t value) override; + + int64_t getNumKeys() override; + + bool deleteKey(const std::string& key) override; + + bool check(const std::vector& keys) override; + + void wait(const std::vector& keys) override; + + void wait( + const std::vector& keys, + const std::chrono::milliseconds& timeout) override; + + // Returns the path used by the FileStore. + const std::string& getPath() const noexcept { + return path_; + } + + std::vector listKeys() override; + + protected: + int64_t addHelper(const std::string& key, int64_t i); + + std::string path_; + off_t pos_{0}; + + int numWorkers_; + const std::string cleanupKey_; + const std::string refCountKey_; + const std::string regularPrefix_; + const std::string deletePrefix_; + + std::unordered_map> cache_; + + std::mutex activeFileOpLock_; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/FlightRecorder.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/FlightRecorder.hpp new file mode 100644 index 0000000000000000000000000000000000000000..87a2cce06f6c36d365988cd664464a6827b2f068 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/FlightRecorder.hpp @@ -0,0 +1,333 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include + +#include +#include + +#include +#include +#include +#include +#include + +namespace c10d { + +#define DEFINE_CONSTANT(name, value) \ + static c10::IValue name = value; \ + static std::string name##_str = value; +// Update whenever changing contents or formatting of the dump +// (minor when adding fields, major when changing existing fields) +// Also update both JSON and Pickle dumps to make use of the newly defined +// field(s). +DEFINE_CONSTANT(version_val, "2.10") +DEFINE_CONSTANT(entries_key, "entries") +DEFINE_CONSTANT(nccl_comm_key, "nccl_comm_state") +DEFINE_CONSTANT(comm_lib_version_key, "comm_lib_version") +DEFINE_CONSTANT(version_key, "version") +DEFINE_CONSTANT(pg_config_key, "pg_config") +DEFINE_CONSTANT(pg_status_key, "pg_status") +DEFINE_CONSTANT(record_id_key, "record_id") +DEFINE_CONSTANT(pg_id_key, "pg_id") +DEFINE_CONSTANT(pg_name_key, "process_group") +DEFINE_CONSTANT(collective_seq_id_key, "collective_seq_id") +DEFINE_CONSTANT(p2p_seq_id_key, "p2p_seq_id") +DEFINE_CONSTANT(is_p2p_key, "is_p2p") +DEFINE_CONSTANT(op_id_key, "op_id") +DEFINE_CONSTANT(profiling_name_key, "profiling_name") +DEFINE_CONSTANT(input_sizes_key, "input_sizes") +DEFINE_CONSTANT(input_dtypes_key, "input_dtypes") +DEFINE_CONSTANT(output_sizes_key, "output_sizes") +DEFINE_CONSTANT(output_dtypes_key, "output_dtypes") +DEFINE_CONSTANT(time_created_key, "time_created_ns") +DEFINE_CONSTANT(duration_key, "duration_ms") +DEFINE_CONSTANT(timeout_key, "timeout_ms") +DEFINE_CONSTANT(frames_key, "frames") +DEFINE_CONSTANT(state_key, "state") +DEFINE_CONSTANT(line_key, "line") +DEFINE_CONSTANT(name_key, "name") +DEFINE_CONSTANT(filename_key, "filename") +DEFINE_CONSTANT(retired_key, "retired") +DEFINE_CONSTANT(time_discovered_started_key, "time_discovered_started_ns") +DEFINE_CONSTANT(time_discovered_completed_key, "time_discovered_completed_ns") +DEFINE_CONSTANT(completed_state, "completed") +DEFINE_CONSTANT(scheduled_state, "scheduled") +DEFINE_CONSTANT(started_state, "started") +DEFINE_CONSTANT(thread_id_key, "thread_id") +DEFINE_CONSTANT(thread_name_key, "thread_name") +#undef DEFINE_CONSTANT + +// Write NCCL debug info to local disk or any storage users define. +// There are some constrains we set for the debug info writer: +// 1. The writer should only be registered once. +// 2. Once registered, users cannot change it including un-register. +// 3. It is recommended to register the customized writer in the trainer setup, +// If users don't register before calling launchAsyncDebugDump, then users +// lose the chance to register (and the default writer will be +// auto-registered). +class TORCH_API DebugInfoWriter { + public: + virtual ~DebugInfoWriter() = default; + virtual void write(const std::string& trace); + static DebugInfoWriter& getWriter(int rank); + static void registerWriter(std::unique_ptr writer); + virtual std::string getWriterTarget() { + return filename_; + } + + protected: + DebugInfoWriter( + const std::string& namePrefix, + int rank, + bool enableDynamicFilename = false) { + filename_ = c10::str(namePrefix, rank); + enable_dynamic_filename_ = enableDynamicFilename; + rank_ = rank; + } + std::string filename_; + int rank_; + bool enable_dynamic_filename_; + + private: + static std::unique_ptr writer_; + static std::atomic hasWriterRegistered_; +}; + +template +struct FlightRecorder { + static FlightRecorder* get() { + // intentionally leak on exit + // because this will hold python state that may get destructed + static FlightRecorder* instance = + new FlightRecorder(); + return instance; + } + FlightRecorder() { + max_entries_ = + getCvarInt({"TORCH_FR_BUFFER_SIZE", "TORCH_NCCL_TRACE_BUFFER_SIZE"}, 0); + capture_cpp_stack_ = getCvarBool( + {"TORCH_FR_CPP_STACK", "TORCH_NCCL_TRACE_CPP_STACK"}, false); + enabled_ = max_entries_ > 0; + reset_epoch_start_idx_[0] = 0; + } + struct Entry { + size_t id_; // incremented id in the trace buffer + // used to figure out where in the circular entries + // buffer this entry will be located to + // update state information + size_t reset_epoch_; // epoch when this entry was created + size_t pg_id_; + std::tuple pg_name_; // + + // collective_seq_id and p2p_seq_id refer to actual kernel launches (e.g. 1 + // per coalesced group). + // collective_seq_id only increments for true collective operations (over + // all ranks in the group). p2p_seq_id only increments over non-collective + // operations in the group. op_id refers to logical operations (e.g. one per + // op inside coalesced group) + size_t collective_seq_id_; + size_t p2p_seq_id_; + size_t op_id_; + std::string profiling_name_; + + std::shared_ptr traceback_; + // we borrow pointers to start_ and end_ so we can query the state + // on reporting. However, once the event is completed, the call + // to `complete` will clear these. + EventType *start_, *end_; + + // timestamp when the entry was created, likely close to the time the work + // was 'enqueued'- not necessarily started + c10::time_t time_created_; + + // configured timeout for this entry + c10::time_t timeout_ms_; + + // Is this a P2P event? + bool isP2P_; + + std::optional duration_; + + // timestamp when our CPU threads discovered that the kernel started. + // will always be _after_ it actually started, and can be very late + // if the watchdog thread got stuck on CUDA APIs. + std::optional time_discovered_started_; + + // timestamp when our CPU threads discovered that the kernel completed. + // will always be _after_ it actually completed, and can be the same time + // as the discovery of the start if the watchdog thread is stuck on CUDA + // APIs + std::optional time_discovered_completed_; + + // size information for input/output tensors + c10::SmallVector input_dims_; + std::vector input_dtypes_; + c10::SmallVector output_dims_; + std::vector output_dtypes_; + c10::SmallVector sizes_; // flattened from inputs, outputs + std::thread::id thread_id_; + std::string thread_name_; + bool retired_ = false; // is this work entry no longer in the workMetaList_? + // a retired but not completed event has timed out + + // Returns the traceback of current entry, in string form. + // Note: `getTraceback` invokes `torch::symbolize`, which may need to + // acquire the GIL. If you don't want to block the current thread or take + // the risk of a GIL deadlock, you can use an asynchronous calling mechanism + // like std::async. + TORCH_API std::string getTraceback(); + }; + + bool enabled_ = false; + bool capture_cpp_stack_ = false; + std::mutex mutex_; + std::vector entries_; + size_t max_entries_ = 0; + size_t next_ = 0; + size_t id_ = 0; + size_t reset_epoch_ = 0; + std::unordered_map + reset_epoch_start_idx_; // maps reset_epoch to the idx where it starts + std::map> all_pg_status_; + std::map, std::vector> + pg_name_to_ranks_; + std::string comm_lib_version_; + + struct TraceIdentifier { + std::optional id; + std::optional reset_epoch; + }; + + TraceIdentifier recordWithResetEnabled( + size_t pg_id, + const std::tuple& pg_name, + size_t collective_seq_id, + size_t p2p_seq_id, + size_t op_id, + std::string profiling_name, + const std::vector& inputs, + const std::vector& outputs, + EventType* start, + EventType* end, + std::chrono::milliseconds timeout_ms, + std::shared_ptr pg_status, + bool isP2P); + + std::optional record( + size_t pg_id, + const std::tuple& pg_name, + size_t collective_seq_id, + size_t p2p_seq_id, + size_t op_id, + std::string profiling_name, + const std::vector& inputs, + const std::vector& outputs, + EventType* start, + EventType* end, + std::chrono::milliseconds timeout_ms, + std::shared_ptr pg_status, + bool isP2P); + + TORCH_API void record_pg_ranks( + const std::tuple& pg_name, + std::vector ranks); + + void record_accelerator_version(const std::string comm_lib_version); + + void update_state(Entry& r); + + std::vector dump_entries(); + + // Returns the index in entries_ for the given id and reset_epoch. + // Caller must hold mutex_lock before calling this method. + size_t getIdxFromId(size_t id, size_t reset_epoch) const; + + // Returns the entry with the given id and reset_epoch, if it exists. + // Otherwise, returns std::nullopt. + TORCH_API std::optional getEntry( + std::optional id, + std::optional reset_epoch); + + TORCH_API std::optional getEntry(std::optional id); + + /* + Mark an Event as completed and free its events. + This is called by the watchdog thread, and is asynchronous from the + perspective of the main thread. + compute_duration defaults to true since retire_id is only called in the + watchdog thread, which is currently a place we call cuda APIs which may hang, + but care should be taken to avoid computing duration in any function that must + never hang. (timing must also be enabled for compute_duration - see + TORCH_NCCL_ENABLE_TIMING). + */ + TORCH_API void retire_id( + std::optional id, + std::optional reset_epoch, + bool compute_duration = true); + + TORCH_API void retire_id( + std::optional id, + bool compute_duration = true); + + TORCH_API void reset_all(); + + const c10::List getCollectiveTrace( + bool includeStacktraces, + bool onlyActive); + + // dump pg_entries + const c10::Dict getPgConfig(); + + const std::map> + getPgConfigJson(); + + // dump pg_status + const c10::Dict getPgStatus(); + + const std::map> + getPgStatusJson(); + + std::string dump_json( + const std::optional>>& extraDumpMap, + bool includeCollectives, + bool onlyActive); + + std::string dump( + const std::optional>>& extraDumpMap, + bool includeCollectives, + bool includeStackTraces, + bool onlyActive); +}; + +// Whether to include stack trace in the Flight Recorder trace (default true) +static std::vector TORCH_INCLUDE_STACK_TRACE = { + "TORCH_INCLUDE_STACK_TRACE"}; + +// Whether to include only active collectives in the Flight Recorder trace +// (default false) +static std::vector TORCH_INCLUDE_ONLY_ACTIVE = { + "TORCH_INCLUDE_ONLY_ACTIVE"}; + +// Dumps the fr traces and additional information about the Process +// Group. +TORCH_API std::string dump_fr_trace( + bool includeCollectives, + bool includeStackTraces, + bool onlyActive); + +// Dumps the fr traces and additional information about the Process +// Group in JSON formatted string. +// We don't include stack traces in JSON format as it is far too much data. +TORCH_API std::string dump_fr_trace_json( + bool includeCollectives, + bool onlyActive); +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/FlightRecorderDetail.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/FlightRecorderDetail.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0567b3076c3e534363aa0ad13c0a763c73ff5065 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/FlightRecorderDetail.hpp @@ -0,0 +1,641 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include + +#include +#include + +#include + +namespace c10d { + +template +float getDurationFromEvent(EventType& start, EventType& end); + +// Returns the traceback of current entry, in string form. +// Note: `getTraceback` invokes `torch::symbolize`, which may need to acquire +// the GIL. If you don't want to block the current thread or take the risk of a +// GIL deadlock, you can use an asynchronous calling mechanism like std::async. +template +std::string FlightRecorder::Entry::getTraceback() { + torch::CapturedTraceback* traceback = traceback_.get(); + torch::SymbolizedTracebacks s_tbs = torch::symbolize({traceback}); + // We use 0 because we only have one traceback here. + const auto& s_tb = s_tbs.tracebacks.at(0); + std::stringstream oss; + for (auto idx : c10::irange(s_tb.size())) { + auto frame_id = s_tb[idx]; + const auto& frame = s_tbs.all_frames.at(frame_id); + oss << '#' << idx << ' ' << frame.funcname << " from " << frame.filename + << ':' << frame.lineno << '\n'; + } + /* Resulted format is like: + #0 all_reduce from pytorch/torch/distributed/distributed_c10d.py:2696 + #1 wrapper from pytorch/torch/distributed/c10d_logger.py:83 + #2 bar from /home/user/repro.py:15 + #3 foo from /home/user/repro.py:24 + #4 main from /home/user/repro.py:34 + #5 from /home/user/repro.py:40 + */ + return oss.str(); +} + +template +std::optional FlightRecorder::record( + size_t pg_id, + const std::tuple& pg_name, + size_t collective_seq_id, + size_t p2p_seq_id, + size_t op_id, + std::string profiling_name, + const std::vector& inputs, + const std::vector& outputs, + EventType* start, + EventType* end, + std::chrono::milliseconds timeout_ms, + std::shared_ptr pg_status, + bool isP2P) { + auto result = recordWithResetEnabled( + pg_id, + pg_name, + collective_seq_id, + p2p_seq_id, + op_id, + std::move(profiling_name), + inputs, + outputs, + start, + end, + timeout_ms, + std::move(pg_status), + isP2P); + return result.id; +} + +template +typename FlightRecorder::TraceIdentifier FlightRecorder:: + recordWithResetEnabled( + size_t pg_id, + const std::tuple& pg_name, + size_t collective_seq_id, + size_t p2p_seq_id, + size_t op_id, + std::string profiling_name, + const std::vector& inputs, + const std::vector& outputs, + EventType* start, + EventType* end, + std::chrono::milliseconds timeout_ms, + std::shared_ptr pg_status, + bool isP2P) { + if (!enabled_) { + return TraceIdentifier{std::nullopt, std::nullopt}; + } + if (all_pg_status_.find(pg_id) == all_pg_status_.end()) { + // Current pg_status is not in FR. + all_pg_status_[pg_id] = std::move(pg_status); + } + auto traceback = + torch::CapturedTraceback::gather(true, true, capture_cpp_stack_); + std::lock_guard guard(mutex_); + + TORCH_CHECK( + reset_epoch_start_idx_.find(reset_epoch_) != + reset_epoch_start_idx_.end()); + + auto te = Entry{ + id_, + reset_epoch_, + pg_id, + pg_name, + collective_seq_id, + p2p_seq_id, + op_id, + std::move(profiling_name), + std::move(traceback), + start, + end, + c10::getTime(), + timeout_ms.count(), + isP2P, + std::nullopt, + std::nullopt, + std::nullopt, + {}, + {}, + {}, + {}, + {}, + std::this_thread::get_id(), + c10::getThreadName(), + false}; + + for (const auto& input : inputs) { + c10::IntArrayRef sizes = input.sizes(); + te.input_dtypes_.push_back(input.dtype().toScalarType()); + te.input_dims_.push_back(static_cast(sizes.size())); + te.sizes_.insert(te.sizes_.end(), sizes.begin(), sizes.end()); + } + + for (const auto& output : outputs) { + c10::IntArrayRef sizes = output.sizes(); + te.output_dtypes_.push_back(output.dtype().toScalarType()); + te.output_dims_.push_back(static_cast(sizes.size())); + te.sizes_.insert(te.sizes_.end(), sizes.begin(), sizes.end()); + } + + const auto next = next_++; + + if (entries_.size() < max_entries_) { + entries_.emplace_back(std::move(te)); + } else { + entries_[next] = std::move(te); + } + + if (next_ == max_entries_) { + next_ = 0; + } + + const auto id = id_++; + return TraceIdentifier{id, reset_epoch_}; +} + +template +void FlightRecorder::record_pg_ranks( + const std::tuple& pg_name, + std::vector ranks) { + if (!enabled_) { + return; + } + std::lock_guard guard(mutex_); + pg_name_to_ranks_[pg_name] = std::move(ranks); +} + +template +void FlightRecorder::record_accelerator_version( + const std::string comm_lib_version) { + if (!enabled_) { + return; + } + std::lock_guard guard(mutex_); + comm_lib_version_ = std::move(comm_lib_version); +} + +template +void FlightRecorder::update_state(Entry& r) { + try { + if (r.start_ != nullptr) { + bool started = r.start_->query(); + if (started && !r.time_discovered_started_) { + r.time_discovered_started_ = c10::getTime(); + } + } + if (r.end_ != nullptr) { + bool completed = r.end_->query(); + if (completed && !r.time_discovered_completed_) { + r.time_discovered_completed_ = c10::getTime(); + } + } + } catch (std::exception& e) { + LOG(ERROR) << "Failed to update state for entry " << r.id_ << ": " + << r.profiling_name_ << " with error: " << e.what(); + } +} + +template +std::vector::Entry> FlightRecorder< + EventType>::dump_entries() { + std::vector result; + { + std::lock_guard guard(mutex_); + // Filter entries during insertion - only keep entries from current epoch + auto filter = [this](const Entry& e) { + return e.reset_epoch_ == reset_epoch_; + }; + std::copy_if( + entries_.begin() + static_cast(next_), + entries_.end(), + std::back_inserter(result), + filter); + std::copy_if( + entries_.begin(), + entries_.begin() + static_cast(next_), + std::back_inserter(result), + filter); + } + // query any remaining events + for (auto& r : result) { + update_state(r); + r.start_ = r.end_ = nullptr; + } + return result; +} + +template +// Returns the index in entries_ for the given id and reset_epoch. +// Caller must hold mutex_lock before calling this method. +size_t FlightRecorder::getIdxFromId(size_t id, size_t reset_epoch) + const { + // Look up the starting idx for the given reset epoch + auto it = reset_epoch_start_idx_.find(reset_epoch); + TORCH_CHECK(it != reset_epoch_start_idx_.end()); + // Calculate idx based on where the epoch started + return (it->second + id) % max_entries_; +} + +template +// Returns the entry with the given id and reset_epoch, if it exists. Otherwise, +// returns std::nullopt. +std::optional::Entry> FlightRecorder< + EventType>:: + getEntry(std::optional id, std::optional reset_epoch) { + if (!enabled_ || !id || !reset_epoch) { + return std::nullopt; + } + + std::unique_lock guard(mutex_); + Entry entry = entries_.at(getIdxFromId(*id, *reset_epoch)); + if (entry.id_ == *id && entry.reset_epoch_ == *reset_epoch) { + return entry; + } + return std::nullopt; +} + +template +std::optional::Entry> FlightRecorder< + EventType>::getEntry(std::optional id) { + return getEntry(id, 0); +} + +template +void FlightRecorder::retire_id( + std::optional id, + std::optional reset_epoch, + bool compute_duration) { + if (!enabled_ || !id || !reset_epoch) { + return; + } + + bool can_compute_duration = false; + EventType* startEvent = nullptr; + EventType* endEvent = nullptr; + std::optional duration = std::nullopt; + + std::unique_lock guard(mutex_); + + Entry* entry = &entries_.at(getIdxFromId(*id, *reset_epoch)); + if (entry->id_ == *id && entry->reset_epoch_ == *reset_epoch) { + update_state(*entry); + + if (compute_duration) { + can_compute_duration = entry->time_discovered_completed_.has_value() && + entry->start_ && entry->end_; + startEvent = entry->start_; + endEvent = entry->end_; + } + entry->retired_ = true; + entry->start_ = entry->end_ = nullptr; + } + + if (can_compute_duration) { + // Compute duration without without holding the lock, because + // cudaEventDuration() can hang, and we need to acquire the lock before we + // can dump(), which we never want to block. + guard.unlock(); + duration = getDurationFromEvent(*startEvent, *endEvent); + guard.lock(); + + // Refresh the entry pointer, see if the entry has been overwritten + entry = &entries_.at(getIdxFromId(*id, *reset_epoch)); + if (!(entry->id_ == *id && entry->reset_epoch_ == *reset_epoch)) { + LOG(INFO) << "retire_id abandoned for id " << *id + << ", event was overwritten while waiting to compute duration."; + return; + } + if (duration.has_value()) { + entry->duration_ = duration; + } + } +} + +template +void FlightRecorder::retire_id( + std::optional id, + bool compute_duration) { + retire_id(id, 0, compute_duration); +} + +template +void FlightRecorder::reset_all() { + std::lock_guard guard(mutex_); + if (!entries_.empty()) { + // Soft delete: increment epoch to mark all existing entries as old + // Store where the new epoch starts in the circular buffer + reset_epoch_++; + reset_epoch_start_idx_[reset_epoch_] = next_; + id_ = 0; + } +} + +template +const c10::List FlightRecorder::getCollectiveTrace( + bool includeStacktraces, + bool onlyActive) { + auto entries = new_list(); + // Entries are returned in the order they were recorded + auto result = dump_entries(); + std::vector tracebacks; + torch::SymbolizedTracebacks stracebacks; + std::vector all_frames; + if (includeStacktraces) { + for (auto& e : result) { + tracebacks.push_back(e.traceback_.get()); + } + stracebacks = torch::symbolize(tracebacks); + for (const auto& f : stracebacks.all_frames) { + auto d = new_dict(); + d.insert(name_key, f.funcname); + d.insert(filename_key, f.filename); + d.insert(line_key, int64_t(f.lineno)); + all_frames.emplace_back(std::move(d)); + } + } + for (auto i : c10::irange(result.size())) { + auto dict = new_dict(); + auto& e = result.at(i); + // Skip completed events + if (onlyActive && e.time_discovered_completed_.has_value()) { + continue; + } + if (includeStacktraces) { + auto& tb = stracebacks.tracebacks.at(i); + auto frames = new_list(); + for (auto frame : tb) { + frames.push_back(all_frames.at(frame)); + } + dict.insert(frames_key, frames); + } + + dict.insert(record_id_key, int64_t(e.id_)); + dict.insert(pg_id_key, int64_t(e.pg_id_)); + dict.insert(pg_name_key, e.pg_name_); + dict.insert(thread_name_key, e.thread_name_); + dict.insert(thread_id_key, c10::str(e.thread_id_)); + dict.insert(collective_seq_id_key, int64_t(e.collective_seq_id_)); + dict.insert(p2p_seq_id_key, int64_t(e.p2p_seq_id_)); + dict.insert(op_id_key, int64_t(e.op_id_)); + dict.insert(profiling_name_key, e.profiling_name_); + dict.insert(time_created_key, int64_t(e.time_created_)); + if (e.duration_) { + dict.insert(duration_key, *e.duration_); + } + + auto it = e.sizes_.begin(); + auto read_sizes = [&](const c10::SmallVector& dims) { + auto sizes = new_list(); + for (auto dim : dims) { + auto arg_sizes = new_list(); + for ([[maybe_unused]] auto i : c10::irange(dim)) { + arg_sizes.push_back(*it++); + } + sizes.push_back(arg_sizes); + } + return sizes; + }; + + dict.insert(input_sizes_key, read_sizes(e.input_dims_)); + std::vector input_dtypes_strs; + input_dtypes_strs.reserve(e.input_dtypes_.size()); + for (const auto& input_dtype : e.input_dtypes_) { + input_dtypes_strs.emplace_back(c10::toString(input_dtype)); + } + dict.insert(input_dtypes_key, input_dtypes_strs); + dict.insert(output_sizes_key, read_sizes(e.output_dims_)); + std::vector output_dtypes_strs; + output_dtypes_strs.reserve(e.output_dtypes_.size()); + for (const auto& output_dtype : e.output_dtypes_) { + output_dtypes_strs.emplace_back(c10::toString(output_dtype)); + } + dict.insert(output_dtypes_key, output_dtypes_strs); + if (e.time_discovered_completed_.has_value()) { + dict.insert(state_key, completed_state); + } else if (e.time_discovered_started_.has_value()) { + dict.insert(state_key, started_state); + } else { + dict.insert(state_key, scheduled_state); + } + + dict.insert( + time_discovered_started_key, + e.time_discovered_started_.has_value() + ? int64_t(*e.time_discovered_started_) + : c10::IValue()); + dict.insert( + time_discovered_completed_key, + e.time_discovered_completed_.has_value() + ? int64_t(*e.time_discovered_completed_) + : c10::IValue()); + dict.insert(retired_key, e.retired_); + dict.insert(timeout_key, e.timeout_ms_); + dict.insert(is_p2p_key, e.isP2P_); + + entries.push_back(dict); + } + return entries; +} + +template +const c10::Dict FlightRecorder< + EventType>::getPgConfig() { + auto pg_config = new_dict(); + for (const auto& [pg_name, ranks] : pg_name_to_ranks_) { + auto pg_info = new_dict(); + pg_info.insert("name", std::get<0>(pg_name)); + pg_info.insert("desc", std::get<1>(pg_name)); + pg_info.insert("ranks", ranks_str(ranks)); + pg_config.insert(std::get<0>(pg_name), pg_info); + } + return pg_config; +} + +template +const std::map> FlightRecorder< + EventType>::getPgConfigJson() { + std::map> result; + for (const auto& [pg_name, ranks] : pg_name_to_ranks_) { + auto pg_info = std::map(); + pg_info["name"] = std::get<0>(pg_name); + pg_info["desc"] = std::get<1>(pg_name); + pg_info["ranks"] = ranks_str(ranks); + result.emplace(std::get<0>(pg_name), pg_info); + } + return result; +} + +template +const c10::Dict FlightRecorder< + EventType>::getPgStatus() { + auto all_pg_status = new_dict(); + for (const auto& [pg_id, status] : all_pg_status_) { + auto pg_status = new_dict(); + pg_status.insert("last_enqueued_collective", status->lastEnqueuedSeq); + pg_status.insert("last_started_collective", status->lastStartedSeq); + pg_status.insert("last_completed_collective", status->lastCompletedSeq); + all_pg_status.insert(std::to_string(pg_id), pg_status); + } + return all_pg_status; +} + +template +const std::map> FlightRecorder< + EventType>::getPgStatusJson() { + std::map> result; + for (const auto& [pg_id, status] : all_pg_status_) { + auto pg_status = std::map(); + pg_status["last_enqueued_collective"] = + std::to_string(status->lastEnqueuedSeq); + pg_status["last_started_collective"] = + std::to_string(status->lastStartedSeq); + pg_status["last_completed_collective"] = + std::to_string(status->lastCompletedSeq); + result[std::to_string(pg_id)] = pg_status; + } + return result; +} + +using json = nlohmann::json; +template +std::string FlightRecorder::dump_json( + const std::optional>>& extraDumpMap, + bool includeCollectives, + bool onlyActive) { + json result; + result[version_key_str] = version_val_str; + result[comm_lib_version_key_str] = comm_lib_version_; + result[pg_config_key_str] = getPgConfigJson(); + result[pg_status_key_str] = getPgStatusJson(); + + // collective trace + if (includeCollectives) { + std::list entries; + for (auto& e : dump_entries()) { + json j; + if (onlyActive && e.time_discovered_completed_.has_value()) { + continue; + } + j[record_id_key_str] = int64_t(e.id_); + j[pg_id_key_str] = int64_t(e.pg_id_); + j[pg_name_key_str] = e.pg_name_; + j[thread_name_key_str] = e.thread_name_; + j[thread_id_key_str] = c10::str(e.thread_id_); + j[collective_seq_id_key_str] = int64_t(e.collective_seq_id_); + j[p2p_seq_id_key_str] = int64_t(e.p2p_seq_id_); + j[op_id_key_str] = int64_t(e.op_id_); + j[profiling_name_key_str] = e.profiling_name_; + j[time_created_key_str] = int64_t(e.time_created_); + if (e.duration_) { + j[duration_key_str] = *e.duration_; + } + auto it = e.sizes_.begin(); + auto read_sizes = [&](const c10::SmallVector& dims) { + auto sizes = std::list>(); + for (auto dim : dims) { + auto arg_sizes = std::list(); + for (auto i : c10::irange(dim)) { + (void)i; + arg_sizes.push_back(*it++); + } + sizes.push_back(arg_sizes); + } + return sizes; + }; + j[input_sizes_key_str] = read_sizes(e.input_dims_); + std::vector input_dtypes_strs; + input_dtypes_strs.reserve(e.input_dtypes_.size()); + for (const auto& input_dtype : e.input_dtypes_) { + input_dtypes_strs.emplace_back(c10::toString(input_dtype)); + } + j[input_dtypes_key_str] = input_dtypes_strs; + j[output_sizes_key_str] = read_sizes(e.output_dims_); + std::vector output_dtypes_strs; + output_dtypes_strs.reserve(e.output_dtypes_.size()); + for (const auto& output_dtype : e.output_dtypes_) { + output_dtypes_strs.emplace_back(c10::toString(output_dtype)); + } + j[output_dtypes_key_str] = output_dtypes_strs; + if (e.time_discovered_completed_.has_value()) { + j[state_key_str] = completed_state_str; + } else if (e.time_discovered_started_.has_value()) { + j[state_key_str] = started_state_str; + } else { + j[state_key_str] = scheduled_state_str; + } + j[time_discovered_started_key_str] = + e.time_discovered_started_.has_value() + ? int64_t(*e.time_discovered_started_) + : 0; + j[time_discovered_completed_key_str] = + e.time_discovered_completed_.has_value() + ? int64_t(*e.time_discovered_completed_) + : 0; + j[retired_key_str] = e.retired_; + j[timeout_key_str] = e.timeout_ms_; + j[is_p2p_key_str] = e.isP2P_; + entries.emplace_back(j); + } + + if (!entries.empty()) { + result[entries_key_str] = entries; + } + } + + if (extraDumpMap.has_value()) { + result[nccl_comm_key_str] = extraDumpMap.value(); + } + return result.dump(); +} + +template +std::string FlightRecorder::dump( + const std::optional>>& extraDumpMap, + bool includeCollectives, + bool includeStackTraces, + bool onlyActive) { + STATIC_SCOPED_WAIT_COUNTER(pytorch.wait_counter.FlightRecorder__dump); + auto result = new_dict(); + // common values + result.insert(version_key, version_val); + result.insert(pg_config_key, getPgConfig()); + result.insert(comm_lib_version_key_str, comm_lib_version_); + result.insert(pg_status_key, getPgStatus()); + + // collective trace + if (includeCollectives) { + result.insert( + entries_key, getCollectiveTrace(includeStackTraces, onlyActive)); + } + + // convert extraDumpMap into a dictionary + auto per_comm_dict = new_dict(); + if (extraDumpMap.has_value()) { + for (const auto& [ncclId, ncclDump] : extraDumpMap.value()) { + auto inner_dict = new_dict(); + for (const auto& [key, value] : ncclDump) { + inner_dict.insert(key, value); + } + per_comm_dict.insert(ncclId, inner_dict); + } + } + if (!per_comm_dict.empty()) { + result.insert(nccl_comm_key, per_comm_dict); + } + return pickle_str(result); +} +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Functional.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Functional.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b171a3ac776f56fd1ff7d6e74c5449449ced9cec --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Functional.hpp @@ -0,0 +1,90 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace c10d { + +C10_EXPORT at::Tensor& all_reduce_( + at::Tensor& input, + std::string reduce_op, + std::string group_name); + +C10_EXPORT at::Tensor all_reduce( + const at::Tensor& input, + std::string reduce_op, + std::string group_name); + +C10_EXPORT std::vector all_reduce_coalesced_( + std::vector inputs, + // NOLINTNEXTLINE(performance-unnecessary-value-param) + std::string reduce_op, + // NOLINTNEXTLINE(performance-unnecessary-value-param) + std::string group_name); + +C10_EXPORT std::vector all_reduce_coalesced( + // NOLINTNEXTLINE(performance-unnecessary-value-param) + std::vector inputs, + std::string reduce_op, + std::string group_name); + +C10_EXPORT std::vector all_gather_into_tensor_coalesced( + std::vector inputs, + int64_t group_size, + // NOLINTNEXTLINE(performance-unnecessary-value-param) + std::string group_name); + +C10_EXPORT at::Tensor all_gather_into_tensor( + const at::Tensor& input, + int64_t group_size, + std::string group_name); + +C10_EXPORT at::Tensor& all_gather_into_tensor_out( + at::Tensor& input, + int64_t group_size, + const std::string& group_name, + at::Tensor& output); + +C10_EXPORT std::vector reduce_scatter_tensor_coalesced( + std::vector inputs, + // NOLINTNEXTLINE(performance-unnecessary-value-param) + std::string reduce_op, + int64_t group_size, + // NOLINTNEXTLINE(performance-unnecessary-value-param) + std::string group_name); + +C10_EXPORT at::Tensor reduce_scatter_tensor( + const at::Tensor& input, + std::string reduce_op, + int64_t group_size, + std::string group_name); + +C10_EXPORT at::Tensor reduce_scatter_tensor_out( + const at::Tensor& input, + std::string reduce_op, + int64_t group_size, + std::string group_name, + at::Tensor& output); + +C10_EXPORT at::Tensor all_to_all_single( + const at::Tensor& input, + at::SymIntArrayRef output_split_sizes, + at::SymIntArrayRef input_split_sizes, + // NOLINTNEXTLINE(performance-unnecessary-value-param) + std::string group_name); + +C10_EXPORT at::Tensor& broadcast_( + at::Tensor& input, + int64_t src, + std::string group_name); + +C10_EXPORT at::Tensor broadcast( + const at::Tensor& input, + int64_t src, + std::string group_name); + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/GlooDeviceFactory.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/GlooDeviceFactory.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9770bbe9f3822bc68fa8c3344e92c17b95004dff --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/GlooDeviceFactory.hpp @@ -0,0 +1,40 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef USE_C10D_GLOO + +#include + +#include +#include +#include + +namespace c10d { + +class TORCH_API GlooDeviceFactory { + public: + // Create new device instance for specific interface. + static std::shared_ptr<::gloo::transport::Device> makeDeviceForInterface( + const std::string& interface, + bool lazyInit); + + // Create new device instance for specific hostname or address. + static std::shared_ptr<::gloo::transport::Device> makeDeviceForHostname( + const std::string& hostname, + bool lazyInit); +}; + +TORCH_DECLARE_SHARED_REGISTRY( + GlooDeviceRegistry, + ::gloo::transport::Device, + const std::string&, /* interface */ + const std::string&, /* hostname */ + bool /* lazyInit */); + +} // namespace c10d + +#endif // USE_C10D_GLOO + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/GroupRegistry.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/GroupRegistry.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4df50c8b39e5fbfc27e74477651d865a5d3dc06d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/GroupRegistry.hpp @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace c10d { + +C10_EXPORT void set_thread_isolation_mode(bool enable); + +bool get_thread_isolation_mode(); + +C10_EXPORT void register_process_group( + const std::string& group_name, + const c10::intrusive_ptr& group); + +C10_EXPORT c10::intrusive_ptr resolve_process_group( + const std::string& group_name); + +C10_EXPORT void unregister_process_group(const std::string& group_name); + +C10_EXPORT void unregister_all_process_groups(); + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/HashStore.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/HashStore.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e9b6186ef3bbf413ade67ddc83b764c43e655fa6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/HashStore.hpp @@ -0,0 +1,86 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +namespace c10d { + +class TORCH_API HashStore : public Store { + public: + c10::intrusive_ptr clone() override; + + ~HashStore() override = default; + + void set(const std::string& key, const std::vector& data) override; + + std::vector compareSet( + const std::string& key, + const std::vector& expectedValue, + const std::vector& desiredValue) override; + + std::vector get(const std::string& key) override; + + void wait(const std::vector& keys) override { + wait(keys, timeout_); + } + + void wait( + const std::vector& keys, + const std::chrono::milliseconds& timeout) override; + + int64_t add(const std::string& key, int64_t value) override; + + int64_t getNumKeys() override; + + bool check(const std::vector& keys) override; + + bool deleteKey(const std::string& key) override; + + void append(const std::string& key, const std::vector& value) + override; + + std::vector> multiGet( + const std::vector& keys) override; + + void multiSet( + const std::vector& keys, + const std::vector>& values) override; + + // Returns true if this store support append, multiGet and multiSet + bool hasExtendedApi() const override; + + void queuePush(const std::string& key, const std::vector& value) + override; + + std::vector queuePop(const std::string& key, bool block) override; + + int64_t queueLen(const std::string& key) override; + + std::vector listKeys() override; + + protected: + bool checkLocked( + const std::unique_lock& lock, + const std::vector& keys); + + void waitLocked( + std::unique_lock& lock, + const std::vector& keys, + const std::chrono::milliseconds& timeout); + + protected: + std::unordered_map> map_; + std::unordered_map>> queues_; + std::mutex m_; + std::condition_variable cv_; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/NCCLUtils.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/NCCLUtils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..87417b52ae59481ef89a4cfb2c9c377d1941eae4 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/NCCLUtils.hpp @@ -0,0 +1,448 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef USE_C10D_NCCL + +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include + +constexpr int64_t kCommInitBusyWaitMillis = 2; + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 14, 0) +#define NCCL_HAS_COMM_NONBLOCKING +#endif + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 18, 0) +#define NCCL_HAS_COMM_SPLIT +#endif + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 23, 0) +#define NCCL_HAS_INIT_RANK_SCALABLE +#endif + +// ncclGetLastError() is enabled only for NCCL versions 2.13+ +// ncclRemoteError only exists in NCCL versions 2.13+ +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 13, 0) +#define ENABLE_NCCL_GET_LAST_ERROR +#define NCCL_REMOTE_ERROR +#endif + +static_assert( + NCCL_VERSION_CODE >= NCCL_VERSION(2, 7, 0), + "NCCL version must be 2.7 or later"); +// The following macros represent features supported prior to NCCL 2.7, +// therefore we can define them unconditionally, given the static_assert above. +// TODO: remove these macros from code. +#define ENABLE_NCCL_ERROR_CHECKING +#define ENABLE_NCCL_P2P_SUPPORT +// End of macros for NCCL 2.7 and below. + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 11, 0) +#define ENABLE_NCCL_PREMUL_SUM_SUPPORT +#endif + +// Note: the first version that supports ncclConfig_t is 2.14. Here we +// fast-forward the version requirement to 2.17 where ncclConfig_t has CTA and +// CGA fields because they have already been pybinded out. +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 17, 0) +#define NCCL_HAS_CONFIG +#endif + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 19, 0) +#define NCCL_HAS_COMM_REGISTER +#endif + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 27, 0) +#define NCCL_HAS_COMM_WINDOW_REGISTER +#endif + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 19, 0) +#define NCCL_HAS_MEM_ALLOC +#endif + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 26, 0) +#define NCCL_HAS_QOS +#endif + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 24, 0) +#define NCCL_SUPPORTS_FP8 +#endif + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 27, 0) +#define NCCL_HAS_COLLNET +#endif + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 27, 0) +#define NCCL_HAS_CTA_POLICY +#endif + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 27, 0) +#define NCCL_HAS_NVLS_CTAS +#endif + +#if NCCL_VERSION_CODE >= NCCL_VERSION(2, 27, 0) +#define NCCL_HAS_COMM_SHRINK +#endif + +// Macro to throw on a non-successful NCCL return value. +#define C10D_NCCL_CHECK(cmd, failureReason) \ + do { \ + ncclResult_t result = cmd; \ + if (result != ncclSuccess) { \ + std::string err = "NCCL error in: " + std::string(__FILE__) + ":" + \ + std::to_string(__LINE__) + ", " + ncclGetErrorWithVersion(result) + \ + "\n" + getNcclErrorDetailStr(result, failureReason); \ + TORCH_CHECK_WITH(DistBackendError, false, err); \ + } \ + } while (0) + +// Macro to throw on a non-successful NCCL return value for NONBLOCKING calls. +#define C10D_NCCL_CHECK_NONBLOCKING(cmd, failureReason) \ + do { \ + ncclResult_t result = cmd; \ + if (result != ncclSuccess && result != ncclInProgress) { \ + std::string err = "NCCL error in: " + std::string(__FILE__) + ":" + \ + std::to_string(__LINE__) + ", " + ncclGetErrorWithVersion(result) + \ + "\n" + getNcclErrorDetailStr(result, failureReason); \ + TORCH_CHECK_WITH(DistBackendError, false, err); \ + } \ + } while (0) + +// Error out if (current time - startTime) is greater than timeout (sec). +#define C10D_CHECK_TIMEOUT(startTime, timeout) \ + do { \ + auto currentTime = std::chrono::steady_clock::now(); \ + auto timeElapsed = std::chrono::duration_cast( \ + currentTime - startTime) \ + .count(); \ + if (timeElapsed > timeout) { \ + std::string err = "NCCL timeout in: " + std::string(__FILE__) + ":" + \ + std::to_string(__LINE__); \ + TORCH_CHECK_WITH(DistBackendError, false, err); \ + } \ + } while (0) + +// Macro to throw on a non-successful NCCL return value, non-blocking. +// Thread-safe: uses NCCLComm wrapper's getAsyncError() which acquires mutex +// before calling ncclCommGetAsyncError to prevent race conditions between +// watchdog and main threads. +#define C10D_NCCL_CHECK_TIMEOUT_BASE( \ + cmd, commWrapper, failureReason, yield_fn) \ + do { \ + ncclResult_t result = cmd; \ + auto startTimepoint = std::chrono::steady_clock::now(); \ + auto timeout = nccl_nonblocking_timeout(); \ + while (result == ncclInProgress) { \ + C10D_CHECK_TIMEOUT(startTimepoint, timeout); \ + yield_fn; \ + commWrapper->getAsyncError(&result); \ + } \ + if (result != ncclSuccess) { \ + std::string err = "NCCL error in: " + std::string(__FILE__) + ":" + \ + std::to_string(__LINE__) + ", " + ncclGetErrorWithVersion(result) + \ + "\n" + getNcclErrorDetailStr(result, failureReason); \ + TORCH_CHECK_WITH(DistBackendError, false, err); \ + } \ + } while (0) + +// Sleep for kCommInitBusyWaitMillis milliseconds. +#define C10D_SCHED_SLEEP() \ + std::this_thread::sleep_for( \ + std::chrono::milliseconds(kCommInitBusyWaitMillis)) + +// Macro to throw exception on a non-successful NCCL return value or timeout. +// This macro uses sched_yield() to yield the CPU. +// Thus suitable for NCCL calls that would quickly turn ncclSuccess, e.g. +// collectives. +#define C10D_NCCL_CHECK_TIMEOUT(cmd, commWrapper, failureReason) \ + C10D_NCCL_CHECK_TIMEOUT_BASE(cmd, commWrapper, failureReason, sched_yield()) + +// Macro to throw exception on a non-successful NCCL return value or timeout. +// This macro uses sleep to yield the CPU. +// Thus suitable for NCCL calls that would take longer to turn ncclSuccess, e.g. +// ncclCommInitRankConfig, ncclCommFinalize, etc. +#define C10D_NCCL_CHECK_TIMEOUT_SLEEP(cmd, commWrapper, failureReason) \ + C10D_NCCL_CHECK_TIMEOUT_BASE( \ + cmd, commWrapper, failureReason, C10D_SCHED_SLEEP()) + +#define C10D_NCCL_CHECK_TIMEOUT_GROUPEND(cmd, comm, failureReason) \ + do { \ + ncclResult_t state = cmd; \ + auto startTimepoint = std::chrono::steady_clock::now(); \ + auto timeout = nccl_nonblocking_timeout(); \ + if (state == ncclInProgress) { \ + do { \ + C10D_CHECK_TIMEOUT(startTimepoint, timeout); \ + sched_yield(); \ + comm->getAsyncError(&state); \ + } while (state == ncclInProgress); \ + } \ + if (state != ncclSuccess) { \ + std::string err = "NCCL error in: " + std::string(__FILE__) + ":" + \ + std::to_string(__LINE__) + ", " + ncclGetErrorWithVersion(state) + \ + "\n" + getNcclErrorDetailStr(state, failureReason); \ + TORCH_CHECK_WITH(DistBackendError, false, err); \ + } \ + } while (0) + +// Macro to print and abort on a non-successful NCCL return value. +#define C10D_NCCL_ASSERT(cmd) \ + do { \ + ncclResult_t result = cmd; \ + if (result != ncclSuccess) { \ + std::string err = ncclGetErrorWithVersion(result); \ + fprintf( \ + stderr, \ + "NCCL error in: %s:%d, %s\n", \ + __FILE__, \ + __LINE__, \ + err.c_str()); \ + abort(); \ + } \ + } while (0) + +namespace c10d { + +// NCCL type typing +static std::map ncclDataType = { + {at::kChar, ncclInt8}, + {at::kByte, ncclUint8}, + {at::kFloat, ncclFloat}, + {at::kDouble, ncclDouble}, + {at::kInt, ncclInt32}, + {at::kLong, ncclInt64}, + {at::kHalf, ncclHalf}, + {at::kBool, ncclUint8}, +#ifdef NCCL_SUPPORTS_FP8 + {at::kFloat8_e5m2, ncclFloat8e5m2}, + {at::kFloat8_e4m3fn, ncclFloat8e4m3}, +#else + {at::kFloat8_e5m2, ncclUint8}, + {at::kFloat8_e4m3fn, ncclUint8}, +#endif + // NVIDIA GPUs does not support the UZ version standing for "no negative + // zero". See https://onnx.ai/onnx/technical/float8.html + {at::kFloat8_e4m3fnuz, ncclUint8}, + {at::kFloat8_e5m2fnuz, ncclUint8}, +#if HAS_NCCL_BF16_DATATYPE + {at::kBFloat16, ncclBfloat16}, +#endif // HAS_NCCL_BF16_DATATYPE +}; + +TORCH_API size_t hashTensors(const std::vector& tensors); +TORCH_API int genNcclSplitColor(const std::vector& ranks); +TORCH_API std::string getNcclVersion(); +TORCH_API std::tuple getNcclVersionTuple(); +TORCH_API int getNcclVersionNumber(); +TORCH_API std::string ncclGetErrorWithVersion(ncclResult_t error); +int nccl_nonblocking_timeout(); + +// Provides additional detail into NCCL error codes based on when these are +// thrown in the NCCL codebase. +TORCH_API std::string getNcclErrorDetailStr( + ncclResult_t error, + std::optional processGroupFailureReason = std::nullopt); + +// Helper function that gets the data type and issues error if not supported +ncclDataType_t getNcclDataType(at::ScalarType type); + +// RAII wrapper for NCCL communicator +class NCCLComm { + using MutexType = std::recursive_mutex; + using LockType = std::unique_lock; + + public: + explicit NCCLComm(ncclComm_t ncclComm); + + NCCLComm() = default; + + ~NCCLComm() noexcept; + + void setUniqueHash(ncclUniqueId ncclId); + void setUniqueHash(std::string hash); + std::string getUniqueHash(); + + static std::shared_ptr create( + int numRanks, + int rank, + ncclUniqueId commId, + at::DeviceIndex deviceIndex); + +#ifdef NCCL_HAS_CONFIG + static std::shared_ptr create( + int numRanks, + int rank, + ncclUniqueId commId, + at::DeviceIndex deviceIndex, + ncclConfig_t& config); +#ifdef NCCL_HAS_INIT_RANK_SCALABLE + static std::shared_ptr create_scalable( + int numRanks, + int rank, + std::vector& commIds, + at::DeviceIndex deviceIndex, + ncclConfig_t& config); +#endif // NCCL_HAS_INIT_RANK_SCALABLE +#endif // NCCL_HAS_CONFIG + +#ifdef NCCL_HAS_COMM_SPLIT + static std::shared_ptr split( + NCCLComm* source, + int color_id, + int rank, + ncclConfig_t& config); +#endif // NCCL_HAS_COMM_SPLIT + +#ifdef NCCL_HAS_COMM_SHRINK + static std::shared_ptr shrink( + NCCLComm* source, + std::vector& ranks_to_exclude, + ncclConfig_t* config, + int shrinkFlags = 0); +#endif // NCCL_HAS_COMM_SHRINK + +#if (defined(IS_NCCLX) || defined(USE_ROCM)) && defined(NCCL_COMM_DUMP) + std::unordered_map ncclCommDump(); +#endif + + at::DeviceIndex getDeviceIndex(); + + // Must not be copyable + NCCLComm(const NCCLComm&) = delete; + NCCLComm& operator=(const NCCLComm&) = delete; + + // Do not support move assignment as there is no valid use case + NCCLComm& operator=(NCCLComm&& other) = delete; + + // Move constructable + // NOLINTNEXTLINE(*-noexcept-move-*) + NCCLComm(NCCLComm&& other); + + ncclComm_t getNcclComm(); + + // Wait for the communicator to be ready. This is a blocking function. + // Useful in nonblocking mode: NCCL requires the communicator to be ready + // before issuing a second command. + // Arguments: + // longInterval: if true, wait with sleep of an interval; otherwise, wait + // with `sched_yield` which is faster (but acquires CPU more frequently). + // Use `longInterval=true` when waiting for initialization or finalize to + // complete. Use `longInterval=false` when waiting collective call to return + // ncclSuccess. + void waitReady(bool longInterval); + + std::optional getNcclCommFailureReason() const; + + void abort(std::optional commFailureReason = std::nullopt); + + // Finalize a communicator -- asking it to flush its operations. When the + // communicator is marked as nonblocking, this is a nonblocking function; + // otherwise, it will block till all operations complete. + void finalize(); + + // Destroy a communicator. This is a blocking function. + void destroy(); + + bool isInitialized() const; + + bool isAborted() const; + + uint64_t getCommSplitCounter() const; + + ncclResult_t checkForNcclError(); + + // Thread-safe wrapper for ncclCommGetAsyncError that acquires the mutex + // before calling the NCCL API. This is needed because NCCL does not provide + // thread-safety guarantees for ncclCommGetAsyncError, and both the main + // thread and watchdog thread may call it concurrently. + ncclResult_t getAsyncError(ncclResult_t* asyncError); + + ncclResult_t registerSegment( + void* ptr, + size_t size, + bool errorOnRereg = true, + bool window = false); + + ncclResult_t deregisterSegment(void* ptr, bool window = false); + + std::string repr() const; + + friend class ProcessGroupNCCL; + + protected: + // Unique hash for this communicator. + std::string uniqueHash_; + bool aborted_{false}; + uint64_t ncclCommSplitCounter_{0}; + ncclResult_t ncclAsyncErr_{ncclSuccess}; + mutable MutexType mutex_; + // Rank that this communicator corresponds to. + int rank_{}; + // Optional reason for communicator failure, provided by ProcessGroupNCCL for + // better error messaging. + std::optional commFailureReason_; + bool initialized_{false}; + // Whether this communicator is using nonblocking mode. Recorded during comm + // creation or split. For safety, we give a default value of true (more + // protection). + bool nonBlocking_{true}; + // Device index for which the NCCL comm is created + at::DeviceIndex deviceIndex_{-1}; +#ifdef NCCL_HAS_COMM_REGISTER + // Stores handlers for tensors registered by NCCL + std::unordered_map registeredSegmentHandles_; +#endif // NCCL_HAS_COMM_REGISTER + + private: + ncclComm_t ncclComm_{nullptr}; +}; + +// Helper that automatically cleans up premul sums. +struct ncclRedOpRAII { + ncclRedOpRAII() = default; + ncclRedOpRAII(ncclRedOp_t op) : op_(op) {} + ncclRedOpRAII(ncclRedOp_t op, ncclComm_t comm) + : op_(op), comm_(comm), premul_sum_(true) {} + ncclRedOpRAII(const ncclRedOpRAII&) = delete; + ncclRedOpRAII& operator=(const ncclRedOpRAII&) = delete; + ncclRedOpRAII(ncclRedOpRAII&& tmp) noexcept : ncclRedOpRAII() { + std::swap(tmp.op_, this->op_); + std::swap(tmp.comm_, this->comm_); + std::swap(tmp.premul_sum_, this->premul_sum_); + } +#if defined(ENABLE_NCCL_PREMUL_SUM_SUPPORT) + ~ncclRedOpRAII() { + if (premul_sum_) { + ncclRedOpDestroy(op_, comm_); + } + } +#endif // ENABLE_NCCL_PREMUL_SUM_SUPPORT + operator ncclRedOp_t() const { + return op_; + } + ncclRedOp_t op_{}; + ncclComm_t comm_{}; + bool premul_sum_ = false; +}; + +void printNcclCommProxyTrace( + const std::string& dumpReason, + const std::unordered_map& dumpMap); +} // namespace c10d + +#endif // USE_C10D_NCCL + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/NanCheck.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/NanCheck.hpp new file mode 100644 index 0000000000000000000000000000000000000000..53915ea60411f46bb381ae2ed314d5e9348f97aa --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/NanCheck.hpp @@ -0,0 +1,21 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef USE_C10D_NCCL + +#include +#include + +namespace c10d { + +// Check for NaNs in a tensor on a given stream. If any are found, throw a +// device-side error. +void checkForNan(const at::Tensor& tensor, at::cuda::CUDAStream& stream); + +} // namespace c10d + +#endif // USE_C10D_NCCL + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ParamCommsUtils.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ParamCommsUtils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1d1b140dfa054f4326ce078b532ab2db1007f88f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ParamCommsUtils.hpp @@ -0,0 +1,185 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace torch { + +class TORCH_API ParamCommsDebugInfo : public c10::DebugInfoBase { + public: + ParamCommsDebugInfo() = default; + ParamCommsDebugInfo( + std::tuple pgName, + int rank, + std::string&& collName, + int64_t inNelems, + int64_t outNelems, + at::ScalarType dType, + std::vector inSplitSizes, + std::vector outSplitSizes, + int globalRankStart, + int globalRankStride, + int worldSize); + + ~ParamCommsDebugInfo() override = default; + + const std::string getProcessGroupName() const { + return std::get<0>(pgName_); + } + + const std::string getProcessGroupDesc() const { + return std::get<1>(pgName_); + } + + int getRank() const { + return rank_; + } + + int getWorldSize() const { + return worldSize_; + } + + int getGlobalRankStart() const { + return globalRankStart_; + } + + int getGlobalRankStride() const { + return globalRankStride_; + } + + const std::string getCollectiveName() const { + return collectiveName_; + } + + int64_t getInMessageNelems() const { + return inMessageNelems_; + } + + int64_t getOutMessageNelems() const { + return outMessageNelems_; + } + + at::ScalarType getDType() const { + return dType_; + } + + const std::vector& getInputSplitSizes() const { + return inputSplitSizes_; + } + + const std::vector& getOutputSplitSizes() const { + return outputSplitSizes_; + } + + const std::vector& getGroupRanks() const { + return groupRanks_; + } + + private: + std::tuple pgName_; // + int rank_{}; + int worldSize_{}; + std::string collectiveName_; + int64_t inMessageNelems_{}; + int64_t outMessageNelems_{}; + at::ScalarType dType_ = at::kByte; + std::vector inputSplitSizes_; + std::vector outputSplitSizes_; + int globalRankStart_{}; + int globalRankStride_{}; + std::vector groupRanks_; +}; + +#define RECORD_PARAM_COMMS( \ + seq, \ + pgName, \ + rank, \ + collName, \ + inNelems, \ + outNelems, \ + dType, \ + inSplitSizes, \ + outSplitSizes, \ + globalRankStart, \ + globalRankStride, \ + worldSize) \ + auto paramCommsInfo = std::make_shared( \ + pgName, \ + rank, \ + collName, \ + inNelems, \ + outNelems, \ + dType, \ + inSplitSizes, \ + outSplitSizes, \ + globalRankStart, \ + globalRankStride, \ + worldSize); \ + c10::DebugInfoGuard g(c10::DebugInfoKind::PARAM_COMMS_INFO, paramCommsInfo); \ + std::initializer_list paramList = { \ + seq, \ + pgName, \ + rank, \ + collName, \ + inSplitSizes, \ + outSplitSizes, \ + globalRankStart, \ + globalRankStride, \ + worldSize}; \ + c10::ArrayRef paramInputs(paramList); \ + RECORD_FUNCTION(at::kParamCommsCallName, paramInputs); + +#define RECORD_PARAM_COMMS_DATA( \ + seq, \ + pgName, \ + InputTensors, \ + OutputTensors, \ + rank, \ + collName, \ + inNelems, \ + outNelems, \ + dType, \ + inSplitSizes, \ + outSplitSizes, \ + globalRankStart, \ + globalRankStride, \ + worldSize) \ + auto paramCommsInfo = std::make_shared( \ + pgName, \ + rank, \ + collName, \ + inNelems, \ + outNelems, \ + dType, \ + inSplitSizes, \ + outSplitSizes, \ + globalRankStart, \ + globalRankStride, \ + worldSize); \ + c10::DebugInfoGuard g(c10::DebugInfoKind::PARAM_COMMS_INFO, paramCommsInfo); \ + std::initializer_list paramList = { \ + c10::IValue(InputTensors), \ + seq, \ + pgName, \ + rank, \ + collName, \ + inSplitSizes, \ + outSplitSizes, \ + globalRankStart, \ + globalRankStride, \ + worldSize}; \ + c10::ArrayRef paramInputs(paramList); \ + RECORD_FUNCTION_WITH_INPUTS_OUTPUTS( \ + at::kParamCommsCallName, \ + paramInputs, \ + std::vector(1, c10::IValue(OutputTensors))); +} // namespace torch + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/PrefixStore.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/PrefixStore.hpp new file mode 100644 index 0000000000000000000000000000000000000000..48411dd8182e462c0e2592f30e0714a70769cc63 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/PrefixStore.hpp @@ -0,0 +1,82 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace c10d { + +class TORCH_API PrefixStore : public Store { + public: + explicit PrefixStore(std::string prefix, c10::intrusive_ptr store); + + c10::intrusive_ptr clone() override; + + using Store::set; + void set(const std::string& key, const std::vector& value) override; + + using Store::compareSet; + std::vector compareSet( + const std::string& key, + const std::vector& expectedValue, + const std::vector& desiredValue) override; + + std::vector get(const std::string& key) override; + + int64_t add(const std::string& key, int64_t value) override; + + bool deleteKey(const std::string& key) override; + + int64_t getNumKeys() override; + + bool check(const std::vector& keys) override; + + void wait(const std::vector& keys) override; + + void wait( + const std::vector& keys, + const std::chrono::milliseconds& timeout) override; + + const std::chrono::milliseconds& getTimeout() const noexcept override; + + void setTimeout(const std::chrono::milliseconds& timeout) override; + + void append(const std::string& key, const std::vector& value) + override; + + std::vector> multiGet( + const std::vector& keys) override; + + void multiSet( + const std::vector& keys, + const std::vector>& values) override; + + // Returns true if this store support append, multiGet and multiSet + bool hasExtendedApi() const override; + + void queuePush(const std::string& key, const std::vector& value) + override; + + std::vector queuePop(const std::string& key, bool block) override; + + int64_t queueLen(const std::string& key) override; + + c10::intrusive_ptr getUnderlyingStore(); + + // Recursively to fetch the store before layers of wrapping with PrefixStore. + c10::intrusive_ptr getUnderlyingNonPrefixStore(); + + std::vector listKeys() override; + + protected: + std::string prefix_; + c10::intrusive_ptr store_; + + std::string joinKey(const std::string& key); + std::vector joinKeys(const std::vector& keys); +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroup.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroup.hpp new file mode 100644 index 0000000000000000000000000000000000000000..535b3d8c2bcd45ef7280670199fa3054fc61a520 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroup.hpp @@ -0,0 +1,1037 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +// ************************************************************************* +// PROCESS GROUP collective communication API IS BEING CHANGED BETWEEN +// versions 1.7 and 1.8. +// PLEASE DO NOT ADD ANY DEPENDENCIES. +// SEE RFC: https://github.com/pytorch/pytorch/issues/39662 +// ************************************************************************* + +constexpr auto kProcessGroupDefaultTimeout = + std::chrono::milliseconds(30 * 60 * 1000); + +namespace c10d { + +// We only call `register_work()` in two cases: +// 1. If the work object is created from a functional collective call. +// 2. If the work object is created from a non-functional collective call within +// the `with allow_inflight_collective_as_graph_input_ctx()` context manager. +C10_EXPORT void register_work( + const at::Tensor& tensor, + const c10::intrusive_ptr& work); + +C10_EXPORT at::Tensor wait_tensor(const at::Tensor& tensor); + +// We only call `unregister_work()` in one case: +// 1. If the work object is created from a non-functional collective call within +// the `with allow_inflight_collective_as_graph_input_ctx()` context manager. +// +// Q: What about the functional collective case? +// A: The unregistration of work object for functional collective is done in +// the required user-side explicit call to `wait_tensor()`. +C10_EXPORT void unregister_work(const c10::intrusive_ptr& work); + +C10_EXPORT size_t get_work_registry_size(); + +C10_EXPORT void set_allow_inflight_collective_as_graph_input(bool value); + +C10_EXPORT bool allow_inflight_collective_as_graph_input(); + +// ProcessGroup is a base class that captures collective and point to +// point communication in a fixed set of processes. +// +// The functions specified in the class below describe the API alone; +// implementations are provided in subclasses. +// +// Every function that performs I/O is executed asynchronously by a +// thread pool owned by the ProcessGroup (by default). They return an +// object that can be used to wait for completion or error. +// +// The ProcessGroup can instantiate subgroups with fewer or an equal +// number of members. Implementations must take care that multiple +// process groups can be used in parallel and synchronize accordingly. +// +// The ProcessGroup assumes a fixed set of processes. If the set +// changes, existing instances must be destructed and instantiation +// and initialization must start from scratch. For members of the +// process group to find each other (referred to as rendezvous from +// hereon) +// +class TORCH_API ProcessGroup : public torch::CustomClassHolder { + public: + struct TORCH_API MergeOptions : torch::CustomClassHolder { + explicit MergeOptions( + const std::chrono::milliseconds timeout = kProcessGroupDefaultTimeout, + const std::optional group_name = std::nullopt, + const std::optional group_desc = std::nullopt) + : timeout(timeout), group_name(group_name), group_desc(group_desc) {} + ~MergeOptions() override = default; + MergeOptions(const MergeOptions&) = delete; + MergeOptions& operator=(const MergeOptions&) = delete; + + std::chrono::milliseconds timeout; + std::optional group_name; + std::optional group_desc; + }; + + enum BackendType : uint8_t { + UNDEFINED = 0, + GLOO = 1, + NCCL = 2, + UCC = 3, + MPI = 4, + XCCL = 5, + CUSTOM = 6, + }; + + static std::string backendTypeToString(const BackendType& type) { + switch (type) { + case BackendType::GLOO: + return "gloo"; + case BackendType::NCCL: + return "nccl"; + case BackendType::XCCL: + return "xccl"; + case BackendType::UCC: + return "ucc"; + case BackendType::MPI: + return "mpi"; + case BackendType::UNDEFINED: + return "undefined"; + case BackendType::CUSTOM: + return "custom"; + default: + TORCH_CHECK(false, "THis should never happen!"); + } + } + + static BackendType strToBackendType(const std::string& backend) { + if (backend == "undefined") { + return BackendType::UNDEFINED; + } else if (backend == "gloo") { + return BackendType::GLOO; + } else if (backend == "nccl") { + return BackendType::NCCL; + } else if (backend == "xccl") { + return BackendType::XCCL; + } else if (backend == "ucc") { + return BackendType::UCC; + } else if (backend == "mpi") { + return BackendType::MPI; + } else { + return BackendType::CUSTOM; + } + } + + // Not used, set for backwards compatibility and only used for TypeDef in + // Ops.cpp + explicit ProcessGroup(int rank, int size); + + explicit ProcessGroup( + c10::intrusive_ptr<::c10d::Store> store, + int rank, + int size); + ~ProcessGroup() override; + + virtual int getRank() const { + return rank_; + } + + virtual int getSize() const { + return size_; + } + + // Returns an unique opaque ID of this process group object. + int64_t getID() const { + return reinterpret_cast(this); + } + + // Returns an unique opaque ID of a backend for the specific backend type + // that can correlate with this process group's collectives. + int64_t getBackendID(BackendType backend_type) const { + return reinterpret_cast(getBackend(backend_type).get()); + } + + virtual const std::string getBackendName() const { + return backendTypeToString(backendType_); + } + + BackendType getBackendType() const { + return backendType_; + } + + inline bool backendSupportsSequenceNumbers(BackendType backendType) { + if (backendType == BackendType::GLOO || backendType == BackendType::NCCL || + backendType == BackendType::XCCL || backendType == BackendType::UCC) + return true; + return false; + } + + virtual void setTimeout(std::chrono::milliseconds timeout) { + for (auto& backend : backendTypeToBackend_) { + backend.second->setTimeout(timeout); + } + } + + int64_t incrementSplitCount() { + return splitCounter_++; + } + + virtual void startCoalescing(c10::DeviceType deviceType) { + // only nccl has implemented startCoalescing so only execute for nccl + // backends + auto backend = getBackend(deviceType); + backend->startCoalescing(); + } + + virtual c10::intrusive_ptr endCoalescing(c10::DeviceType deviceType) { + // only nccl has implemented endCoalescing so only execute for nccl + // backends + auto backend = getBackend(deviceType); + auto work = backend->endCoalescing(); + return work; + } + + virtual c10::intrusive_ptr broadcast( + std::vector& tensors, + const BroadcastOptions& opts = BroadcastOptions()) { + static auto op = + c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::broadcast_", "") + .typed< + std::tuple, c10::intrusive_ptr>( + at::TensorList, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + int64_t, + int64_t, + bool, + int64_t)>(); + // It's awakward to unbox the opts here and box them again in the custom C++ + // op. But it's also complicated to make opts as a CustomClassHolder. Leave + // it as it is now. + auto work = std::get<1>(op.call( + tensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + opts.rootRank, + opts.rootTensor, + opts.asyncOp, + opts.timeout.count())); + + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor : tensors) { + c10d::register_work(tensor, work); + } + } + return work; + } + + virtual c10::intrusive_ptr allreduce( + std::vector& tensors, + const AllreduceOptions& opts = AllreduceOptions()) { + static auto op = + c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::allreduce_", "") + .typed< + std::tuple, c10::intrusive_ptr>( + at::TensorList, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + const c10::intrusive_ptr<::c10d::ReduceOp>&, + const std::optional& sparse_indices, + bool, + int64_t)>(); + + auto work = std::get<1>(op.call( + tensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + c10::make_intrusive(opts.reduceOp), + opts.sparseIndices, + opts.asyncOp, + opts.timeout.count())); + + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor : tensors) { + c10d::register_work(tensor, work); + } + } + return work; + } + + virtual c10::intrusive_ptr allreduce_coalesced( + std::vector& tensors, + const AllreduceCoalescedOptions& opts = AllreduceCoalescedOptions()) { + static auto op = c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::allreduce_coalesced_", "") + .typed( + at::TensorList, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + const c10::intrusive_ptr<::c10d::ReduceOp>&, + bool, + int64_t)>(); + + auto work = op.call( + tensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + c10::make_intrusive(opts.reduceOp), + opts.asyncOp, + opts.timeout.count()); + + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor : tensors) { + c10d::register_work(tensor, work); + } + } + return work; + } + + virtual c10::intrusive_ptr reduce( + std::vector& tensors, + const ReduceOptions& opts = ReduceOptions()) { + static auto op = c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::reduce_", "") + .typed( + at::TensorList, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + const c10::intrusive_ptr<::c10d::ReduceOp>&, + int64_t, + int64_t, + bool, + int64_t)>(); + auto work = op.call( + tensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + c10::make_intrusive(opts.reduceOp), + opts.rootRank, + opts.rootTensor, + opts.asyncOp, + opts.timeout.count()); + + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor : tensors) { + c10d::register_work(tensor, work); + } + } + return work; + } + + virtual c10::intrusive_ptr allgather( + std::vector>& outputTensors, + std::vector& inputTensors, + const AllgatherOptions& opts = AllgatherOptions()) { + static auto op = c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::allgather_", "") + .typed>, + c10::intrusive_ptr>( + const std::vector>&, + at::TensorList, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + bool, + int64_t)>(); + + auto work = std::get<1>(op.call( + outputTensors, + inputTensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + opts.asyncOp, + opts.timeout.count())); + + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor_list : outputTensors) { + for (const auto& tensor : tensor_list) { + c10d::register_work(tensor, work); + } + } + } + return work; + } + + // Gathers a single tensor inputBuffer into a single buffer outputBuffer that + // is interpreted as a contiguous collection of size inputBuffer * WORLD_SIZE. + // For implementers of ProcessGroup API and advanced users only. + // Note: this function will be deprecated in near future. + virtual c10::intrusive_ptr _allgather_base( + at::Tensor& outputBuffer, + at::Tensor& inputBuffer, + const AllgatherOptions& opts = AllgatherOptions()) { + static auto op = + c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::_allgather_base_", "") + .typed>( + at::Tensor&, + at::Tensor&, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + bool, + int64_t)>(); + + auto work = std::get<1>(op.call( + outputBuffer, + inputBuffer, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + opts.asyncOp, + opts.timeout.count())); + + if (c10d::allow_inflight_collective_as_graph_input()) { + c10d::register_work(outputBuffer, work); + } + return work; + } + + // This function is deprecated and will be moved out of ProcessGroup to comms: + // * do not add dependencies on this function, + // * do not implement it in your ProcessGroup, implement _allgather_base + // instead. + virtual c10::intrusive_ptr allgather_coalesced( + std::vector>& outputTensorLists, + std::vector& inputTensors, + const AllgatherOptions& opts = AllgatherOptions()) { + static auto op = c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::allgather_coalesced_", "") + .typed( + const std::vector>&, + const at::TensorList&, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + bool)>(); + + auto work = op.call( + outputTensorLists, + inputTensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + opts.asyncOp); + + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor_list : outputTensorLists) { + for (const auto& tensor : tensor_list) { + c10d::register_work(tensor, work); + } + } + } + return work; + } + + // This function is a coalesced version of `allgather_into_tensor` (currently + // still named as `_allgather_base`). Each tensor in the vector corresponds to + // an input/output of one `allgather_into_tensor` operation. + virtual c10::intrusive_ptr allgather_into_tensor_coalesced( + std::vector& outputTensors, + std::vector& inputTensors, + const AllgatherOptions& opts = AllgatherOptions()) { + static auto op = + c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::allgather_into_tensor_coalesced_", "") + .typed( + const at::TensorList, + const at::TensorList, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + bool)>(); + + auto work = op.call( + outputTensors, + inputTensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + opts.asyncOp); + + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor : outputTensors) { + c10d::register_work(tensor, work); + } + } + return work; + } + + virtual c10::intrusive_ptr gather( + std::vector>& outputTensors, + std::vector& inputTensors, + const GatherOptions& opts = GatherOptions()) { + static auto op = c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::gather_", "") + .typed( + const std::vector>&, + const at::TensorList&, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + int64_t, + bool, + int64_t)>(); + auto work = op.call( + outputTensors, + inputTensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + opts.rootRank, + opts.asyncOp, + opts.timeout.count()); + + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor_list : outputTensors) { + for (const auto& tensor : tensor_list) { + c10d::register_work(tensor, work); + } + } + } + return work; + } + + virtual c10::intrusive_ptr scatter( + std::vector& outputTensors, + std::vector>& inputTensors, + const ScatterOptions& opts = ScatterOptions()) { + static auto op = + c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::scatter_", "") + .typed< + std::tuple, c10::intrusive_ptr>( + const at::TensorList&, + const std::vector>&, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + int64_t, + bool, + int64_t)>(); + auto work = std::get<1>(op.call( + outputTensors, + inputTensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + opts.rootRank, + opts.asyncOp, + opts.timeout.count())); + + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor : outputTensors) { + c10d::register_work(tensor, work); + } + } + return work; + } + + virtual c10::intrusive_ptr reduce_scatter( + std::vector& outputTensors, + std::vector>& inputTensors, + const ReduceScatterOptions& opts = ReduceScatterOptions()) { + static auto op = + c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::reduce_scatter_", "") + .typed< + std::tuple, c10::intrusive_ptr>( + const at::TensorList&, + const std::vector>&, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + const c10::intrusive_ptr<::c10d::ReduceOp>&, + bool, + int64_t)>(); + auto work = std::get<1>(op.call( + outputTensors, + inputTensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + c10::make_intrusive<::c10d::ReduceOp>(opts.reduceOp), + opts.asyncOp, + opts.timeout.count())); + + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor : outputTensors) { + c10d::register_work(tensor, work); + } + } + return work; + } + + virtual c10::intrusive_ptr _reduce_scatter_base( + at::Tensor& outputBuffer, + at::Tensor& inputBuffer, + const ReduceScatterOptions& opts = ReduceScatterOptions()) { + static auto op = + c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::_reduce_scatter_base_", "") + .typed>( + at::Tensor&, + at::Tensor&, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + const c10::intrusive_ptr<::c10d::ReduceOp>&, + bool, + int64_t)>(); + auto work = std::get<1>(op.call( + outputBuffer, + inputBuffer, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + c10::make_intrusive<::c10d::ReduceOp>(opts.reduceOp), + opts.asyncOp, + opts.timeout.count())); + + if (c10d::allow_inflight_collective_as_graph_input()) { + c10d::register_work(outputBuffer, work); + } + return work; + } + + // This function is a coalesced version of `reduce_scatter_tensor` (currently + // still named as `_reduce_scatter_base`). Each tensor in the vector + // corresponds to an input/output of one `reduce_scatter_tensor` operation. + virtual c10::intrusive_ptr reduce_scatter_tensor_coalesced( + std::vector& outputTensors, + std::vector& inputTensors, + const ReduceScatterOptions& opts = ReduceScatterOptions()) { + static auto op = + c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::reduce_scatter_tensor_coalesced_", "") + .typed( + const at::TensorList, + const at::TensorList, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + const c10::intrusive_ptr<::c10d::ReduceOp>&, + bool, + int64_t)>(); + + auto work = op.call( + outputTensors, + inputTensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + c10::make_intrusive<::c10d::ReduceOp>(opts.reduceOp), + opts.asyncOp, + opts.timeout.count()); + + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor : outputTensors) { + c10d::register_work(tensor, work); + } + } + return work; + } + + virtual c10::intrusive_ptr alltoall_base( + at::Tensor& outputBuffer, + at::Tensor& inputBuffer, + std::vector& outputSplitSizes, + std::vector& inputSplitSizes, + const AllToAllOptions& opts = AllToAllOptions()) { + static auto op = c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::alltoall_base_", "") + .typed( + at::Tensor&, + at::Tensor&, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + std::vector, + std::vector, + bool, + int64_t)>(); + auto work = op.call( + outputBuffer, + inputBuffer, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + outputSplitSizes, + inputSplitSizes, + opts.asyncOp, + opts.timeout.count()); + + if (c10d::allow_inflight_collective_as_graph_input()) { + c10d::register_work(outputBuffer, work); + } + return work; + } + + virtual c10::intrusive_ptr alltoall( + std::vector& outputTensors, + std::vector& inputTensors, + const AllToAllOptions& opts = AllToAllOptions()) { + static auto op = + c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::alltoall_", "") + .typed< + std::tuple, c10::intrusive_ptr>( + const at::TensorList&, + const at::TensorList&, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + bool, + int64_t)>(); + auto work = std::get<1>(op.call( + outputTensors, + inputTensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + opts.asyncOp, + opts.timeout.count())); + + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor : outputTensors) { + c10d::register_work(tensor, work); + } + } + return work; + } + + virtual void monitoredBarrier( + const BarrierOptions& opts, + bool wait_all_ranks = false) { + static auto op = c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::monitored_barrier_", "") + .typed&, + const std::vector&, + int64_t, + bool)>(); + // Default to using cpu implementation, monitored barrier is only for GLOO + at::Tensor tensor = at::empty({0}, at::TensorOptions().device(at::kCPU)); + op.call( + tensor, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + opts.device_ids, + opts.timeout.count(), + wait_all_ranks); + } + + // Agrees on an initial sequence number for the whole group by having rank 0 + // create it and broadcast it to other ranks using the store. Only implemented + // for GLOO and NCCL backends currently. + virtual void setSequenceNumberForGroup() { + auto backendType = getBackendType(); + // TODO: HACK for backend name to get sequence number for that backend. + if (backendSupportsSequenceNumbers(backendType)) { + getDefaultBackend()->setSequenceNumberForGroup(); + } else { + TORCH_CHECK( + false, + c10::str( + "ProcessGroup ", + getBackendName(), + " does not yet support sequence numbers.")); + } + } + + // Retrieves the current sequence number for the whole group, which should be + // in sync. If the returned number is not consistent across the group, it + // may indicate that there is some sort of collective desynchronization. + virtual uint64_t getSequenceNumberForGroup() { + auto backendType = getBackendType(); + + // TODO: HACK for backend name to get sequence number for that backend. + if (backendSupportsSequenceNumbers(backendType)) { + return getDefaultBackend()->getSequenceNumberForGroup(); + } else { + TORCH_CHECK( + false, + c10::str( + "ProcessGroup ", + getBackendName(), + " does not yet support sequence numbers.")); + } + } + + virtual c10::intrusive_ptr send( + std::vector& tensors, + int dstRank, + int tag) { + static auto op = c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::send", "") + .typed( + at::TensorList, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + int64_t, + int64_t)>(); + auto work = op.call( + tensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + dstRank, + tag); + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor : tensors) { + c10d::register_work(tensor, work); + } + } + return work; + } + + virtual c10::intrusive_ptr recv( + std::vector& tensors, + int srcRank, + int tag) { + static auto op = c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::recv_", "") + .typed( + at::TensorList, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + int64_t, + int64_t)>(); + auto work = op.call( + tensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + srcRank, + tag); + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor : tensors) { + c10d::register_work(tensor, work); + } + } + return work; + } + + virtual c10::intrusive_ptr recvAnysource( + std::vector& tensors, + int tag) { + static auto op = c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::recv_any_source_", "") + .typed( + at::TensorList, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + int64_t)>(); + auto work = op.call( + tensors, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + tag); + if (c10d::allow_inflight_collective_as_graph_input()) { + for (const auto& tensor : tensors) { + c10d::register_work(tensor, work); + } + } + return work; + } + + virtual c10::intrusive_ptr barrier( + const BarrierOptions& opts = BarrierOptions()) { + static at::Tensor tensor; + // TODO: if nccl was specified then use it + auto device = opts.device; + if (device.has_value()) { + // set device tensor from argument + tensor = at::empty( + {1}, at::TensorOptions().device(device.value()).dtype(at::kByte)); + } else if (backendType_ == c10d::ProcessGroup::BackendType::NCCL) { + // set cuda tensor + tensor = at::empty( + {1}, + at::TensorOptions().device(at::DeviceType::CUDA).dtype(at::kByte)); + } else if (backendType_ == c10d::ProcessGroup::BackendType::XCCL) { + // set xpu tensor for override cpu dispatch + tensor = at::empty( + {1}, + at::TensorOptions().device(at::DeviceType::XPU).dtype(at::kByte)); + } else { + // Default to using cpu implementation + tensor = at::empty( + {1}, + at::TensorOptions().device(at::DeviceType::CPU).dtype(at::kByte)); + } + + static auto op = c10::Dispatcher::singleton() + .findSchemaOrThrow("c10d::barrier", "") + .typed( + at::Tensor, + const c10::intrusive_ptr<::c10d::ProcessGroup>&, + const std::vector&, + bool, + int64_t)>(); + + auto work = op.call( + tensor, + c10::intrusive_ptr::unsafe_reclaim_from_nonowning(this), + opts.device_ids, + opts.asyncOp, + opts.timeout.count()); + if (c10d::allow_inflight_collective_as_graph_input()) { + c10d::register_work(tensor, work); + } + return work; + } + + bool hasBackends() { + return !deviceTypeToBackendType_.empty(); + } + + void setBackend( + c10::DeviceType deviceType, + BackendType backendType, + const std::optional>& backend) { + // TODO: should we add these entries after the backend setting succeeds? + deviceTypeToBackendType_[deviceType] = backendType; + deviceTypes_.insert(deviceType); + // if the backendType is already set then reuse it for this device + if (backendTypeToBackend_.find(backendType) != + backendTypeToBackend_.end()) { + auto existingBackend = backendTypeToBackend_.at(backendType); + deviceTypeToBackend_[deviceType] = existingBackend; + TORCH_CHECK( + existingBackend->getBoundDeviceId() == + (*backend)->getBoundDeviceId()); + } else { + // check if backend has value + if (backend.has_value()) { + deviceTypeToBackend_[deviceType] = backend.value(); + backendTypeToBackend_[backendType] = backend.value(); + (*backend)->setBoundDeviceId(bound_device_id_); + } + } + } + + c10::intrusive_ptr getDefaultBackend() const { + auto backend_iter = backendTypeToBackend_.find(backendType_); + TORCH_CHECK( + backend_iter != backendTypeToBackend_.end(), + "Could not find the default backend type ", + uint16_t(backendType_), + " for Process Group with name ", + getBackendName(), + "."); + return backend_iter->second; + } + + void setDefaultBackend(const BackendType& backendType) { + backendType_ = backendType; + } + + void setDefaultBackend(const std::string& backend) { + backendType_ = strToBackendType(backend); + } + + c10::intrusive_ptr getBackend(c10::DeviceType deviceType); + + c10::intrusive_ptr getBackend(BackendType backendType) const { + TORCH_CHECK( + backendTypeToBackend_.find(backendType) != backendTypeToBackend_.end(), + "Could not find backend type ", + uint16_t(backendType), + " for Process Group with name ", + backendTypeToString(backendType), + "."); + return backendTypeToBackend_.at(backendType); + } + + // Return device types supported by this ProcessGroup. + // Note: the return type is `Device` rather than `DeviceType` for the purpose + // of easy comparison at Python level. The `Device` will have default index + // (-1). + std::vector getDeviceTypes() const { + std::vector devices; + devices.reserve(deviceTypes_.size()); + for (auto& dt : deviceTypes_) { + devices.emplace_back(dt); + } + return devices; + } + + void registerOnCompletionHook( + std::function)>&& hook) { + getDefaultBackend()->registerOnCompletionHook(std::move(hook)); + } + + void waitForPendingWorks() { + getDefaultBackend()->waitForPendingWorks(); + } + + virtual void shutdown() { + for (auto& backend : backendTypeToBackend_) { + backend.second->shutdown(); + } + } + + virtual void abort() { + for (auto& backend : backendTypeToBackend_) { + backend.second->abort(); + } + } + + bool hasHooks() const { + auto backend_iter = backendTypeToBackend_.find(backendType_); + if (backend_iter == backendTypeToBackend_.end()) { + TORCH_WARN( + "No backend of type ", + uint16_t(backendType_), + " found for Process Group with name ", + getBackendName(), + ". Assuming no hooks are registered."); + return false; + } + + return backend_iter->second->hasHooks(); + } + + virtual const std::string& getGroupName() const; + virtual void setGroupName(const std::string& name); + virtual const std::string& getGroupDesc() const; + virtual void setGroupDesc(const std::string& name); + void enableCollectivesTiming(); + + void release_resources() override; + + // ProcessGroups optionally can be "bound" to a specific device. + // Currently this is only for nccl and allows for some opt-in + // optimizations such as automatic use of ncclCommSplit. The device + // is specified in `init_process_group` and eventually makes it + // here and then down into the actual backend instances. + std::optional getBoundDeviceId() const { + return bound_device_id_; + } + + c10::intrusive_ptr getStore() const { + return store_; + } + + void setBoundDeviceId(std::optional device) { + if (device) { + TORCH_CHECK(device->has_index(), "setBoundDeviceId must have an index"); + } + bound_device_id_ = device; + } + + // This creates a new subgroup using the specified ranks. + // The current rank must be included in the list of new_ranks. + virtual c10::intrusive_ptr splitGroup( + const std::vector& ranks, + const std::optional& timeout, + const std::optional>& opts, + const std::optional& name, + const std::optional& groupDesc); + + // This creates a new subgroup using the specified ranks. + // The current rank must be included in the list of new_ranks. + virtual c10::intrusive_ptr mergeRemoteGroup( + const c10::intrusive_ptr& store, + const MergeOptions& opts, + const int& size); + + protected: + // Implementations of this interface need to call this to setup + // appropriate logging etc. + void init(); + + c10::intrusive_ptr store_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const int rank_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const int size_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + BackendType backendType_; + std::string pg_desc_; + int64_t splitCounter_; + + // Debug level setting. It is parsed once when ProcessGroup is constructed and + // remains the same across use of this process group. + DebugLevel dist_debug_level_{DebugLevel::Off}; + + // Backend classes for this ProcessGroup + std::unordered_set deviceTypes_; + // This mapping is ordered, as splitGroup must call split on the underlying + // backends in a consistent order. + std::map deviceTypeToBackendType_; + std::unordered_map> + deviceTypeToBackend_; + std::unordered_map> + backendTypeToBackend_; + + std::optional bound_device_id_; +}; + +// Thread local functions for managing the currently active process group. +TORCH_API c10::intrusive_ptr& currentProcessGroup(); +TORCH_API void setProcessGroup(c10::intrusive_ptr processGroup); + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupGloo.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupGloo.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2b4bed946be7bdaaf3601e1dd847410ef5b64f5f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupGloo.hpp @@ -0,0 +1,503 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef USE_C10D_GLOO + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include +#include + +#include + +namespace c10d { + +constexpr const char* GLOO_BACKEND_NAME = "gloo"; + +// Control whether or not connections are established in a full mesh or lazily +// as needed. +static std::vector TORCH_GLOO_LAZY_INIT = {"TORCH_GLOO_LAZY_INIT"}; + +// Returns default value for lazyInit. +bool TORCH_API getDefaultGlooLazyInit(); + +// ProcessGroupGloo implements Gloo bindings for c10d. +// +// All functions on this class are expected to be called in the same +// order across processes in the group. This is the only way that we +// can guarantee to match up the same calls across processes. For +// multi-threaded usage of process groups, you can consider using +// multiple process group instances. +// +class TORCH_API ProcessGroupGloo : public Backend { + public: + // AsyncWork is the Gloo specific superclass for asynchronous work items. + // We can split asynchronous work into 3 phases: + // 1) Sanity checks and prepare input (e.g. memcpy) + // 2) Run operation on background thread + // 3) Synchronize with completion on foreground thread + // + // There is state to be shared between these 3 phases and all of this state + // is captured in the AsyncWork class and its derivatives. + // + // Note: while we are porting operations to use new style collectives, there + // is a split between operations using the existing caching approach and + // operations using the new AsyncWork base class. Over time we will port + // all operations and perform needed cleanup. + // + // FIXME: This probably should be called WorkGloo since the work is executed + // in sync mode by a background thread. + class TORCH_API AsyncWork : public Work { + public: + explicit AsyncWork( + std::shared_ptr context, + std::vector> outputTensors, + OpType opType, + uint64_t seq, + std::chrono::milliseconds timeout, + const char* profilingTitle = nullptr, + const std::optional>& inputTensors = + std::nullopt); + + ~AsyncWork() override = default; + + static void execute(const c10::intrusive_ptr& work); + + virtual void run() = 0; + + std::vector result() override; + + c10::intrusive_ptr getFuture() override; + uint64_t getSequencenumber() const override; + std::chrono::milliseconds getTimeout() const; + virtual const std::vector getInputTensors() = 0; + virtual const std::vector getOutputTensors() = 0; + inline std::string getProfilerTitle() const { + return profilingTitle_; + } + inline at::ThreadLocalState getTLS() const { + return tls_; + } + + protected: + friend class ProcessGroupGloo; + // unique id used to tell the trace buffer that this + // work has completed + std::optional trace_id_; + std::optional trace_reset_epoch_; + std::shared_ptr context_; + const std::chrono::milliseconds timeout_; + + private: + void finishWorkGloo(); + void finishWorkGlooError(const std::exception_ptr& eptr); + inline void recordAsyncWorkProfilingInfo( + const char* profilingTitle, + const std::optional>& inputTensors); + + const std::vector> outputTensors_; + c10::intrusive_ptr future_; + std::function recordFunctionBeforeCallback_; + const uint64_t seq_; + std::string profilingTitle_; + at::ThreadLocalState tls_; + }; + + // Wrap c10d store as Gloo store + class TORCH_API GlooStore : public ::gloo::rendezvous::Store { + public: + GlooStore(c10::intrusive_ptr<::c10d::Store> store) + : store_(std::move(store)) {} + + void setUint(const std::string& key, const std::vector& value) { + store_->set(key, value); + } + + void set(const std::string& key, const std::vector& value) override { + std::vector tmp(value.begin(), value.end()); + store_->set(key, tmp); + } + + std::vector getUint(const std::string& key) { + auto value = store_->get(key); + return value; + } + + std::vector get(const std::string& key) override { + auto value = store_->get(key); + return std::vector(value.begin(), value.end()); + } + + void wait(const std::vector& keys) override { + store_->wait(keys, ::c10d::Store::kDefaultTimeout); + } + + void wait( + const std::vector& keys, + const std::chrono::milliseconds& timeout) override { + store_->wait(keys, timeout); + } + +#ifdef GLOO_STORE_HAS_STORE_V2 + bool has_v2_support() override { + return store_->hasExtendedApi(); + } + + std::vector> multi_get( + const std::vector& keys) override { + std::vector> res; + for (auto& value : store_->multiGet(keys)) { + res.emplace_back(value.begin(), value.end()); + } + return res; + } + + void multi_set( + const std::vector& keys, + const std::vector>& values) override { + std::vector> u_values; + u_values.reserve(values.size()); + for (auto& value : values) { + u_values.emplace_back(value.begin(), value.end()); + } + store_->multiSet(keys, u_values); + } + + void append(const std::string& key, const std::vector& value) + override { + std::vector tmp(value.begin(), value.end()); + return store_->append(key, tmp); + } + + int64_t add(const std::string& key, int64_t value) override { + return store_->add(key, value); + } +#endif + + const c10::intrusive_ptr<::c10d::Store>& _getStore() const { + return store_; + } + + protected: + c10::intrusive_ptr<::c10d::Store> store_; + }; + + // For send and recv operations there is no need to pass them to the + // thread pool as they are entirely completed by the device thread. + // This work object is used to synchronize completion of the send or + // recv operation. It keeps a reference to the tensor it is + // operating on to prevent it from being deallocated while the + // operation is still in flight. + class TORCH_API SendWork : public Work { + public: + explicit SendWork( + at::Tensor& tensor, + std::unique_ptr<::gloo::transport::UnboundBuffer> buffer, + uint64_t seq); + + bool wait(std::chrono::milliseconds timeout = kNoTimeout) override; + + void abort() override; + + uint64_t getSequencenumber() const override; + + protected: + at::Tensor tensor_; + std::unique_ptr<::gloo::transport::UnboundBuffer> buffer_; + const uint64_t seq_; + }; + + class TORCH_API RecvWork : public Work { + public: + explicit RecvWork( + at::Tensor& tensor, + std::unique_ptr<::gloo::transport::UnboundBuffer> buffer, + OpType opType, + uint64_t seq, + const char* profilingTitle = nullptr); + + int sourceRank() const override; + + bool wait(std::chrono::milliseconds timeout = kNoTimeout) override; + + void abort() override; + + uint64_t getSequencenumber() const override; + + protected: + at::Tensor tensor_; + std::unique_ptr<::gloo::transport::UnboundBuffer> buffer_; + int srcRank_; + const uint64_t seq_; + }; + + struct TORCH_API Options : public Backend::Options { + explicit Options( + std::chrono::milliseconds timeout = kBackendDefaultTimeout); + + // return intrusive_ptr of the object + static c10::intrusive_ptr create( + std::chrono::milliseconds timeout = kBackendDefaultTimeout) { + return c10::make_intrusive(timeout); + } + + static c10::intrusive_ptr create_default( + std::chrono::milliseconds timeout = kBackendDefaultTimeout); + + std::vector> devices; + int threads; + }; + + const std::string getBackendName() const override { + return std::string(GLOO_BACKEND_NAME); + } + + bool supportsSplitting() const override { + return true; + } + + // Helper functions to create a new device object. + // They are static functions on this class to keep them logically + // separate from the rest of the code base (e.g. torch/csrc/distributed). + + // Create new device instance for specific interface. + static std::shared_ptr<::gloo::transport::Device> createDeviceForInterface( + const std::string& interface, + bool lazyInit = false); + + // Create new device instance for specific hostname or address. + static std::shared_ptr<::gloo::transport::Device> createDeviceForHostname( + const std::string& hostname, + bool lazyInit = false); + + // Create new device instance. + // It tries to resolve this machine's hostname and bind to that address. + // If that fails (i.e. the hostname doesn't resolve to an address), it + // falls back to binding to the loopback address. + static std::shared_ptr<::gloo::transport::Device> createDefaultDevice( + bool lazyInit = false); + + explicit ProcessGroupGloo( + const c10::intrusive_ptr& store, + int rank, + int size, + c10::intrusive_ptr options = Options::create()); + + ~ProcessGroupGloo() override; + + c10::intrusive_ptr getOptions() { + return options_; + } + + void setTimeout(std::chrono::milliseconds timeout) override { + options_->timeout = timeout; + for (auto& context : contexts_) { + context->setTimeout(timeout); + } + } + + c10::intrusive_ptr getBackendOptions() override { + return c10::static_intrusive_pointer_cast(options_); + } + + c10::intrusive_ptr split( + const c10::intrusive_ptr& store, + const std::vector& ranks, + const c10::intrusive_ptr& opts) override; + + c10::intrusive_ptr merge( + const c10::intrusive_ptr& store, + const c10::intrusive_ptr& opts, + const int& rank, + const int& size) override; + + const std::vector& groupRanks() const; + + c10::intrusive_ptr broadcast( + std::vector& tensors, + const BroadcastOptions& opts = BroadcastOptions()) override; + + c10::intrusive_ptr allreduce( + std::vector& tensors, + const AllreduceOptions& opts = AllreduceOptions()) override; + + c10::intrusive_ptr allreduce_sparse( + std::vector& tensors, + const AllreduceOptions& opts = AllreduceOptions()) override; + + c10::intrusive_ptr allreduce_coalesced( + std::vector& tensors, + const AllreduceCoalescedOptions& opts = + AllreduceCoalescedOptions()) override; + + c10::intrusive_ptr reduce( + std::vector& tensors, + const ReduceOptions& opts = ReduceOptions()) override; + + c10::intrusive_ptr _reduce_scatter_base( + at::Tensor& outputTensor, + at::Tensor& inputTensor, + const ReduceScatterOptions& opts = ReduceScatterOptions()) override; + + c10::intrusive_ptr _allgather_base( + at::Tensor& output_tensor, + at::Tensor& input_tensor, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr allgather( + std::vector>& outputs, + std::vector& inputs, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr allgather_coalesced( + std::vector>& output_lists, + std::vector& input_list, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr allgather_into_tensor_coalesced( + std::vector& outputs, + std::vector& inputs, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr gather( + std::vector>& outputs, + std::vector& inputs, + const GatherOptions& opts = GatherOptions()) override; + + c10::intrusive_ptr scatter( + std::vector& outputs, + std::vector>& inputs, + const ScatterOptions& opts = ScatterOptions()) override; + + c10::intrusive_ptr reduce_scatter( + std::vector& outputs, + std::vector>& inputs, + const ReduceScatterOptions& opts = ReduceScatterOptions()) override; + + c10::intrusive_ptr reduce_scatter_tensor_coalesced( + std::vector& outputTensors, + std::vector& inputTensors, + const ReduceScatterOptions& opts = ReduceScatterOptions()) override; + + c10::intrusive_ptr alltoall_base( + at::Tensor& outputTensor, + at::Tensor& inputTensor, + std::vector& outputCounts, + std::vector& inputCounts, + const AllToAllOptions& opts = AllToAllOptions()) override; + + c10::intrusive_ptr send( + std::vector& tensors, + int dstRank, + int tag) override; + + c10::intrusive_ptr recv( + std::vector& tensors, + int srcRank, + int tag) override; + + c10::intrusive_ptr recvAnysource( + std::vector& tensors, + int tag) override; + + c10::intrusive_ptr barrier( + const BarrierOptions& opts = BarrierOptions()) override; + + void enableCollectivesTiming() override; + + const std::shared_ptr<::gloo::rendezvous::Store>& _getStore() const { + return store_; + } + + // Similar to barrier(), but blocks rank 0 until all other ranks have + // acknowledged that they are alive (through send/recv from rank 0). Rank 0 + // is able to report all failed ranks if waitAllRanks = true, otherwise + // reports the first rank it detected as failed. + void monitoredBarrier( + const BarrierOptions& opts = BarrierOptions(), + bool waitAllRanks = false) override; + + // Agrees on an initial sequence number for the whole group by having rank 0 + // create it and broadcast it to other ranks using the store. + void setSequenceNumberForGroup() override; + + // Retrieves the current sequence number for the whole group, which should be + // in sync. If the returned number is not consistent across the group, it + // may indicate that there is some sort of collective desynchronization. + uint64_t getSequenceNumberForGroup() override; + + int getNumThreads() { + return options_->threads; + } + + protected: + std::shared_ptr<::gloo::rendezvous::Store> store_; + const c10::intrusive_ptr options_; + + // Every Gloo context represents a set of connections to its peers. + // In order to use more than one device (or allow for parallelism on + // a single device), you need multiple contexts. + std::vector> contexts_; + std::vector threads_; + bool stop_; + + // Incremented for every collective we kick off. + // The value is used as tag for collective operations. Collectives are kicked + // off in identical order across processes. Therefore the tag can be used + // to match up operations during concurrent execution. + uint32_t collectiveCounter_; + + // Returns next collective tag to use (uses collectiveCounter_). + uint32_t nextTag(); + + // Returns the context to use for the specified tag. + // With `nextTag` returning an increasing number, this should lead + // to contexts being used in a round-robin fashion. + std::shared_ptr<::gloo::Context> getContext(uint32_t tag); + + // Entrypoint for worker threads. + void runLoop(int workerIndex); + + // Queue work to run on worker thread. + void enqueue(c10::intrusive_ptr work); + + // Keep both a queue of pending work, and a vector with in progress work. + // Both of these can only be mutated when holding the queue lock. + // We keep both around instead of just the queue, so we can grab a weak_ptr + // to all in progress and pending work when executing a barrier. + // When executing a barrier, we need to ensure that all prior work + // has completed before completing itself. + std::deque> workQueue_; + std::vector> workInProgress_; + std::mutex workMutex_; + std::condition_variable workProduceCV_; + std::condition_variable workConsumeCV_; + uint64_t seq_{0}; + size_t local_id_; + std::shared_ptr pgStatus_ = + std::make_shared(); +}; + +} // namespace c10d + +#endif // USE_C10D_GLOO + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupGlooDetail.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupGlooDetail.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4fa60b50e78683e889c9b153fd2e816350c1d44b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupGlooDetail.hpp @@ -0,0 +1,679 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef USE_C10D_GLOO + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#define GENERATE_ALL_TYPES(type, func, ...) \ + switch (type) { \ + case ::at::ScalarType::Float: \ + func(__VA_ARGS__); \ + break; \ + case ::at::ScalarType::Double: \ + func(__VA_ARGS__); \ + break; \ + case ::at::ScalarType::Half: \ + func(__VA_ARGS__); \ + break; \ + case ::at::ScalarType::BFloat16: \ + func(__VA_ARGS__); \ + break; \ + case ::at::ScalarType::Char: \ + func(__VA_ARGS__); \ + break; \ + case ::at::ScalarType::Byte: \ + case ::at::ScalarType::Bool: \ + func(__VA_ARGS__); \ + break; \ + case ::at::ScalarType::Int: \ + func(__VA_ARGS__); \ + break; \ + case ::at::ScalarType::Long: \ + func(__VA_ARGS__); \ + break; \ + default: \ + TORCH_CHECK(false, "Invalid scalar type"); \ + } + +#define HOST_NAME_MAX 256 +#else +#define GENERATE_ALL_TYPES(type, func, args...) \ + switch (type) { \ + case ::at::ScalarType::Float: \ + func(args); \ + break; \ + case ::at::ScalarType::Double: \ + func(args); \ + break; \ + case ::at::ScalarType::Half: \ + func(args); \ + break; \ + case ::at::ScalarType::BFloat16: \ + func(args); \ + break; \ + case ::at::ScalarType::Char: \ + func(args); \ + break; \ + case ::at::ScalarType::Byte: \ + case ::at::ScalarType::Bool: \ + func(args); \ + break; \ + case ::at::ScalarType::Int: \ + func(args); \ + break; \ + case ::at::ScalarType::Long: \ + func(args); \ + break; \ + default: \ + TORCH_CHECK(false, "Invalid scalar type"); \ + } +#endif + +namespace c10d { + +TORCH_DECLARE_TYPED_REGISTRY( + GlooAllreduceRegistry, + c10::DeviceType, + ProcessGroupGloo::AsyncWork, + c10::intrusive_ptr, + std::shared_ptr, + std::vector&, + ReduceOp, + uint32_t, + uint64_t, + std::chrono::milliseconds); + +// This function initializes a vector of CUDA streams, one for every +// tensor in the input tensor vector, and ensures that these streams are +// synchronized with the current default streams. This is needed so +// that new work on the new streams is serialized w.r.t. all operations +// on the tensors. +TORCH_API void initializeStreamsEvents( + const std::vector& tensors, + std::vector& streams, + std::vector& events); + +// This function initializes a vector of CUDA streams, one per device, +// and ensures that these streams are synchronized with the current default +// streams. It is assumed that the tensors in the nested tensor vectors are +// on the same device. +TORCH_API void initializeStreamsEvents( + std::vector>& tensors, + std::vector& streams, + std::vector& events); + +typedef void (*ReduceFunc)(void*, const void*, const void*, size_t); + +template , int> = 0> +ReduceFunc toFunction(const ReduceOp& r) { + switch (r) { + case ReduceOp::SUM: + case ReduceOp::AVG: + return ReduceFunc(&::gloo::sum); + case ReduceOp::PRODUCT: + return ReduceFunc(&::gloo::product); + case ReduceOp::MIN: + return ReduceFunc(&::gloo::min); + case ReduceOp::MAX: + return ReduceFunc(&::gloo::max); + case ReduceOp::BAND: + TORCH_CHECK(false, "Cannot use ReduceOp.BAND with non-integral dtype"); + break; + case ReduceOp::BOR: + TORCH_CHECK(false, "Cannot use ReduceOp.BOR with non-integral dtype"); + break; + case ReduceOp::BXOR: + TORCH_CHECK(false, "Cannot use ReduceOp.BXOR with non-integral dtype"); + break; + case ReduceOp::PREMUL_SUM: + TORCH_CHECK(false, "Cannot use ReduceOp.PREMUL_SUM with Gloo"); + break; + case ReduceOp::UNUSED: + default: + break; + } + + TORCH_CHECK(false, "Unhandled ReduceOp"); +} + +// Bitwise AND with SFINAE guard for integral types. +template , int> = 0> +void band(void* c, const void* a, const void* b, size_t n) { + auto tc = static_cast(c); + auto ta = static_cast(a); + auto tb = static_cast(b); + for (const auto i : c10::irange(n)) { + tc[i] = ta[i] & tb[i]; + } +} + +// Bitwise OR with SFINAE guard for integral types. +template , int> = 0> +void bor(void* c, const void* a, const void* b, size_t n) { + auto tc = static_cast(c); + auto ta = static_cast(a); + auto tb = static_cast(b); + for (const auto i : c10::irange(n)) { + tc[i] = ta[i] | tb[i]; + } +} + +// Bitwise XOR with SFINAE guard for integral types. +template , int> = 0> +void bxor(void* c, const void* a, const void* b, size_t n) { + auto tc = static_cast(c); + auto ta = static_cast(a); + auto tb = static_cast(b); + for (const auto i : c10::irange(n)) { + tc[i] = ta[i] ^ tb[i]; + } +} + +template , int> = 0> +ReduceFunc toFunction(const ReduceOp& r) { + switch (r) { + case ReduceOp::SUM: + case ReduceOp::AVG: + return ReduceFunc(&::gloo::sum); + case ReduceOp::PRODUCT: + return ReduceFunc(&::gloo::product); + case ReduceOp::MIN: + return ReduceFunc(&::gloo::min); + case ReduceOp::MAX: + return ReduceFunc(&::gloo::max); + case ReduceOp::BAND: + return ReduceFunc(&band); + case ReduceOp::BOR: + return ReduceFunc(&bor); + case ReduceOp::BXOR: + return ReduceFunc(&bxor); + case ReduceOp::PREMUL_SUM: + TORCH_CHECK(false, "Cannot use ReduceOp.PREMUL_SUM with Gloo"); + break; + case ReduceOp::UNUSED: + default: + break; + } + + TORCH_CHECK(false, "Unhandled ReduceOp"); +} + +template +void setInputs(O& opts, std::vector& tensors) { + opts.setInputs(getDataPointers(tensors), tensors[0].numel()); +} + +template +void setInput(O& opts, at::Tensor& tensor) { + opts.setInput(getDataPointer(tensor), tensor.numel()); +} + +template +void setInput(O& opts, at::Tensor& tensor, std::vector& counts) { + opts.setInput(getDataPointer(tensor), counts); +} + +template +void setInput(O& opts, at::Tensor& tensor, std::vector& counts) { + opts.setInput(getDataPointer(tensor), counts); +} + +template +void setOutputs(O& opts, std::vector& tensors, int64_t count) { + opts.setOutputs(getDataPointers(tensors), count); +} + +template +void setOutput(O& opts, at::Tensor& tensor) { + opts.setOutput(getDataPointer(tensor), tensor.numel()); +} + +template +void setOutput(O& opts, at::Tensor& tensor, std::vector& counts) { + opts.setOutput(getDataPointer(tensor), counts); +} + +template +void setOutput(O& opts, at::Tensor& tensor, std::vector& counts) { + opts.setOutput(getDataPointer(tensor), counts); +} + +static at::Tensor pinnedLike(at::Tensor& tensor) { + auto* allocator = at::detail::getCUDAHooks().getPinnedMemoryAllocator(); + auto storage = c10::Storage( + c10::Storage::use_byte_size_t(), + static_cast(at::detail::computeStorageNbytes( + tensor.sizes(), tensor.strides(), tensor.dtype().itemsize())), + allocator, + /*resizable=*/false); + return at::empty({0}, tensor.options().device(at::kCPU)) + .set_(storage, 0, tensor.sizes(), tensor.strides()); +} + +class AsyncAllreduceWork : public ProcessGroupGloo::AsyncWork { + public: + AsyncAllreduceWork( + std::shared_ptr context, + std::vector& inputs, + ReduceOp reduceOp, + uint32_t tag, + uint64_t seq, + std::chrono::milliseconds timeout) + : ProcessGroupGloo::AsyncWork( + std::move(context), + {inputs}, + OpType::ALLREDUCE, + seq, + timeout, + "gloo:all_reduce", + inputs), + inputs(inputs), + reduceOp(std::move(reduceOp)), + tag(tag) {} + + std::vector inputs; + const ReduceOp reduceOp; + const uint32_t tag; + + void allreduce(std::vector& tensors) { + auto tensor = tensors[0]; + if (tensor.is_complex()) { + TORCH_CHECK( + c10d::isComplexViewAsRealAllowed(reduceOp), + "all_reduce does not support", + reduceOp, + "on complex tensors"); + tensor = at::view_as_real(tensor); + } + gloo::AllreduceOptions opts(context_); + const auto& scalarType = tensor.scalar_type(); + opts.setReduceFunction(getFunction(scalarType, reduceOp)); + opts.setTag(tag); + opts.setTimeout(timeout_); + // Use tensor.numel() instead of tensors[0].numel() to + // get the right number of elements when tensors[0] is complex + GENERATE_ALL_TYPES(scalarType, setOutputs, opts, tensors, tensor.numel()); + gloo::allreduce(opts); + + // Gloo doesn't support AVG so we use SUM + division. + if (reduceOp == ReduceOp::AVG) { + tensors[0] /= context_->size; + } + } + + const std::vector getInputTensors() override { + return inputs; + } + + const std::vector getOutputTensors() override { + return inputs; + } + + void run() override { + allreduce(inputs); + } + + template + void getFunction(gloo::AllreduceOptions::Func& fn, const ReduceOp op) { + fn = toFunction(op); + } + + gloo::AllreduceOptions::Func getFunction( + const at::ScalarType& dtype, + const ReduceOp& op) { + gloo::AllreduceOptions::Func fn; + GENERATE_ALL_TYPES(dtype, getFunction, fn, op); + return fn; + } +}; + +class AsyncAllreduceCoalescedWork : public AsyncAllreduceWork { + public: + AsyncAllreduceCoalescedWork( + const std::shared_ptr& context, + std::vector& inputs, + ReduceOp reduceOp, + uint32_t tag, + uint64_t seq, + std::chrono::milliseconds timeout) + : AsyncAllreduceWork( + context, + inputs, + std::move(reduceOp), + tag, + seq, + timeout) {} + + void run() override { + allreduceCoalesced(inputs); + } + + private: + void allreduceCoalesced(std::vector& tensors) { + // reduce coalesced, flattened tensors. + at::Tensor coalescedTensor = flattenDenseTensors(tensors); + std::vector allreduceInput = {coalescedTensor}; + allreduce(allreduceInput); + + // separate and reshape tensors. + size_t offset = 0; + for (at::Tensor& tensor : tensors) { + const int64_t tensorNumel = tensor.numel(); + const c10::IntArrayRef tensorShape = tensor.sizes(); + tensor.copy_(coalescedTensor.slice(0, offset, offset + tensorNumel) + .view(tensorShape)); + offset += tensorNumel; + } + } +}; + +class AsyncSparseAllreduceWork : public ProcessGroupGloo::AsyncWork { + public: + AsyncSparseAllreduceWork( + std::shared_ptr context, + std::vector& inputs, + uint32_t tag, + uint64_t seq, + std::chrono::milliseconds timeout) + : ProcessGroupGloo::AsyncWork( + std::move(context), + {inputs}, + OpType::_ALLREDUCE_SPARSE, + seq, + timeout, + "gloo:sparse_all_reduce", + inputs), + inputs(inputs), + tag(tag) {} + + std::vector inputs; + const uint32_t tag; + + // We share dimensionality about the sparse tensors before collecting + // their contents. We assume here that the maximum number of sparse + // and dense dimensions is 4. This is stored in a contiguous piece of + // memory so that we can easily run allgather on it. + // + // The layout of this memory is as follows: + // + // - [0:4]: sparse dims + // - [4:8]: dense dims + // - [8]: nnz + // + class SparseTensorMetadata { + public: + static constexpr auto dim = 9; + + // Construct from an existing metadata tensor to facilitate structured + // access to metadata from peers, after gathering it. + explicit SparseTensorMetadata(at::Tensor metadata) + : metadata_(std::move(metadata)), + data_(metadata_.mutable_data_ptr()) { + AT_ASSERT(metadata_.scalar_type() == at::kLong); + AT_ASSERT(metadata_.dim() == 1); + AT_ASSERT(metadata_.size(0) == dim); + } + + // Populate the metadata. + void populate_from_sparse_tensor(const at::Tensor& tensor) { + const auto sparse_dim = tensor.sparse_dim(); + AT_ASSERT(sparse_dim <= 4); + for (const auto i : c10::irange(4)) { + if (i < sparse_dim) { + data_[i] = tensor.size(i); + } + } + const auto dense_dim = tensor.dense_dim(); + AT_ASSERT(dense_dim <= 4); + for (const auto i : c10::irange(4)) { + if (i < dense_dim) { + data_[i + 4] = tensor.size(sparse_dim + i); + } + } + data_[8] = tensor._nnz(); + } + + std::vector sizes() const { + std::vector sizes; + // Sparse sizes + for (const auto i : c10::irange(4)) { + if (data_[i] <= 0) { + break; + } + sizes.push_back(data_[i]); + } + // Dense sizes + for (const auto i : c10::irange(4, 8)) { + if (data_[i] <= 0) { + break; + } + sizes.push_back(data_[i]); + } + return sizes; + } + + int64_t nnz() const { + return data_[8]; + } + + protected: + at::Tensor metadata_; + int64_t* data_; + }; + + // Sparse allreduce is implemented with allgather on indices and values. + // Every process then sums the resulting sparse tensors locally. + // The nnz for sparse tensors may be different across processes, so first + // we run allgather on the nnz, and then allgather with max(nnz). + at::Tensor allreduce(std::vector& tensors) { + // TODO: This is a massive hack! There is some confusion about + // Variable/Tensor inside the body of this function. Turning off + // grad smooths over the confusion for now. This fixes + // test/test_c10d_gloo.py ProcessGroupGlooTest.test_sparse_allreduce_basics + // + // The correct fix is to stop allocating tensors that are not variables, + // but to conveniently do this c10d must depend on torch not ATen + at::AutoDispatchBelowAutograd guard; + auto input = tensors[0]; + + // Perform local reduction if we have multiple inputs. + for (const auto i : c10::irange(1, tensors.size())) { + input += tensors[i]; + } + + // Need to coalesce before we can access indices and values. + input = input.coalesce(); + + // Gather metadata information from all ranks. + auto metadata = allgather_metadata(input); + + // Sanity check dimensionality across ranks. + { + const auto expected = metadata[context_->rank].sizes(); + for (const auto i : c10::irange(context_->size)) { + if (i == context_->rank) { + continue; + } + const auto actual = metadata[i].sizes(); + TORCH_CHECK(actual == expected, "Sparse dimensions do not match"); + } + } + + // Gather all indices and all values. + auto indices = allgather_indices(input, metadata); + auto values = allgather_values(input, metadata); + + // Perform global reduction. + AT_ASSERT(static_cast(indices.size()) == context_->size); + AT_ASSERT(static_cast(values.size()) == context_->size); + auto output = at::sparse_coo_tensor( + indices[0], values[0], input.sizes(), input.options()); + for (const auto i : c10::irange(1, context_->size)) { + output += at::sparse_coo_tensor( + indices[i], values[i], input.sizes(), input.options()); + } + + // Coalesce for good measure. + return output.coalesce(); + } + + void run() override { + auto output = allreduce(inputs); + + // This copy is needed when we run a multi-gpu version of reduce (multiple + // inputs per rank). + for (const auto i : c10::irange(inputs.size())) { + inputs[i].copy_(output); + } + } + + const std::vector getInputTensors() override { + return inputs; + } + + const std::vector getOutputTensors() override { + return inputs; + } + + private: + std::vector allgather_metadata( + const at::Tensor& tensor) { + auto buffer = + at::zeros({context_->size, SparseTensorMetadata::dim}, at::kLong); + + // Prepare metadata vector (1 entry per rank) + std::vector metadata; + metadata.reserve(context_->size); + for (const auto i : c10::irange(context_->size)) { + metadata.emplace_back(buffer.select(0, i)); + } + + // Populate data for this rank + metadata[context_->rank].populate_from_sparse_tensor(tensor); + + // Allgather metadata + gloo::AllgatherOptions opts(context_); + opts.setOutput(buffer.mutable_data_ptr(), buffer.numel()); + opts.setTag(tag); + opts.setTimeout(timeout_); + gloo::allgather(opts); + + return metadata; + } + + std::vector allgather_indices( + const at::Tensor& tensor, + const std::vector& metadata) { + const auto sparseDim = tensor.sparse_dim(); + + std::vector counts(context_->size); + size_t totalSize = 0; + for (const auto i : c10::irange(metadata.size())) { + counts[i] = metadata[i].nnz() * sparseDim; + totalSize += counts[i]; + } + + auto output = at::empty({static_cast(totalSize)}, at::kLong); + + // tensors copied from cuda may not be contiguous, get a contiguous + // tensor before use its data_ptr + auto input = tensor.indices().contiguous(); + + // Allgatherv indices. + gloo::AllgathervOptions opts(context_); + opts.setInput( + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-const-cast) + const_cast(input.const_data_ptr()), + input.numel()); + opts.setOutput(output.mutable_data_ptr(), counts); + opts.setTag(tag); + opts.setTimeout(timeout_); + gloo::allgatherv(opts); + + // Compile indices tensor per rank. + std::vector indices; + indices.reserve(metadata.size()); + int64_t offset = 0; + for (const auto& i : metadata) { + const auto nnz = i.nnz(); + const auto numel = sparseDim * nnz; + indices.push_back( + output.narrow(0, offset, numel).reshape({sparseDim, nnz})); + offset += numel; + } + + return indices; + } + + std::vector allgather_values( + const at::Tensor& tensor, + const std::vector& metadata) { + // There are nnz #dense_dim()-dimensional tensors per rank. + const auto valueShape = tensor.sizes().slice(tensor.sparse_dim()); + int64_t denseNumel = 1; + for (auto dim : valueShape) { + denseNumel *= dim; + } + + std::vector counts(context_->size); + int64_t totalSize = 0; + for (const auto i : c10::irange(metadata.size())) { + counts[i] = metadata[i].nnz() * denseNumel; + totalSize += static_cast(counts[i]); + } + + auto output = at::empty({totalSize}, tensor.scalar_type()); + + // Allgatherv indices. + gloo::AllgathervOptions opts(context_); + // tensors copied from cuda may not be contiguous, get a contiguous + // tensor before use its data_ptr + at::Tensor valueTensor = tensor.values().contiguous(); + GENERATE_ALL_TYPES(valueTensor.scalar_type(), setInput, opts, valueTensor); + GENERATE_ALL_TYPES( + valueTensor.scalar_type(), setOutput, opts, output, counts); + opts.setTag(tag); + opts.setTimeout(timeout_); + gloo::allgatherv(opts); + + // Compile values tensor per rank. + std::vector values; + values.reserve(metadata.size()); + int64_t offset = 0; + for (const auto& i : metadata) { + const auto nnz = i.nnz(); + const auto numel = denseNumel * nnz; + auto tensorShape = std::vector({(int64_t)nnz}); + std::copy( + valueShape.begin(), + valueShape.end(), + std::back_inserter(tensorShape)); + values.push_back(output.narrow(0, offset, numel).reshape(tensorShape)); + offset += numel; + } + + return values; + } +}; + +} // namespace c10d + +#endif + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupMPI.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupMPI.hpp new file mode 100644 index 0000000000000000000000000000000000000000..66d592145e2a8cc0d5ebe5a97c43e660788a6606 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupMPI.hpp @@ -0,0 +1,278 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef USE_C10D_MPI + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include + +#include + +namespace c10d { + +constexpr const char* MPI_BACKEND_NAME = "mpi"; + +// WorkEntry is the state associated with a single MPI run instance. +// It include the source Tensor list and destination Tensor list, as well as +// The actual run function that will operate either on src or dst or both. +struct WorkEntry { + explicit WorkEntry( + std::vector* srcPtr, + std::vector* dstPtr, + std::function&)> run) + : dst(dstPtr ? *dstPtr : std::vector()), run(std::move(run)) { + if (srcPtr) { + src = *srcPtr; + } + } + + // Not copyable + WorkEntry(const WorkEntry&) = delete; + // Not copy assignable + WorkEntry& operator=(const WorkEntry&) = delete; + + // For input and output tensors (in-place), we will always use src + std::vector src; + + // Copy of user provided outputs. + const std::vector dst; + + // src rank returned, for recv only + int* srcRank = nullptr; + std::function&)> run; +}; + +// ProcessGroupMPI implements MPI bindings for c10d. +// +// All functions on this class are expected to be called in the same +// order across processes in the group. This is the only way that we +// can guarantee to match up the same calls across processes. +// +// All MPI functions provided by this class is asynchronously scheduled on a +// Worker thread. Therefore, ProcessGroupMPI requires the MPI implementation +// that is used to have a minimum thread support value of MPI_THREAD_SERIALIZED. +// That is, The process may be multi-threaded, and multiple threads may make +// MPI calls, but only one at a time: MPI calls are not made concurrently from +// two distinct threads (all MPI calls are serialized). However, with +// MPI_THREAD_SERIALIZED, ProcessGroupMPI will only support a single process +// group. In other words, no more than 1 process group can be created globally. +// +// If you would like to use multiple ProcessGroupMPI, it requires your MPI +// implementation to have a thread support value of MPI_THREAD_MULTIPLE, that +// is, multiple threads may call MPI, with no restriction. +// +// Also note that ProcessGroupMPI only supports a single Tensor operation. In +// other words, the size of the input Tensor vector should always be 1. +// +// CUDA tensor can be supported if the MPI used is CUDA-aware MPI, and +// ProcessGroupMPI will automatically detect this support. +class TORCH_API ProcessGroupMPI : public Backend { + public: + class WorkMPI : public Work { + public: + explicit WorkMPI( + std::vector outputTensors, + const char* profilingTitle = nullptr, + const std::optional>& inputTensors = + std::nullopt) + : Work(-1, OpType::UNKNOWN, profilingTitle, inputTensors), + outputTensors_(std::move(outputTensors)), + future_(c10::make_intrusive( + c10::ListType::create(c10::TensorType::get()))) {} + + std::vector result() override; + + c10::intrusive_ptr getFuture() override; + + protected: + friend class ProcessGroupMPI; + + private: + void finishWorkMPI(); + void finishWorkMPIError(const std::exception_ptr& eptr); + + std::vector outputTensors_; + c10::intrusive_ptr future_; + }; + + class AsyncWork : public Work { + public: + AsyncWork( + MPI_Request request, + std::vector outputTensors, + const char* profilingTitle = nullptr, + const std::optional>& inputTensors = + std::nullopt); + + ~AsyncWork() override; + + bool isCompleted() override; + + bool isSuccess() const override; + + int sourceRank() const override; + + bool wait(std::chrono::milliseconds timeout = kUnsetTimeout) override; + + void abort() override; + + std::vector result() override; + + protected: + void populateException(); + + private: + const std::vector outputTensors_; + MPI_Request request_; + MPI_Status status_{}; + }; + + // Constructor will spawn up the worker thread loop + explicit ProcessGroupMPI(int rank, int size, MPI_Comm pgComm); + + ~ProcessGroupMPI() override; + + // Abort the MPI program, needs to be called when exception is detected + void abort() override; + + const std::string getBackendName() const override { + return std::string(MPI_BACKEND_NAME); + } + + c10::intrusive_ptr broadcast( + std::vector& data, + const BroadcastOptions& opts = BroadcastOptions()) override; + + c10::intrusive_ptr allreduce( + std::vector& tensors, + const AllreduceOptions& opts = AllreduceOptions()) override; + + c10::intrusive_ptr allreduce_coalesced( + std::vector& tensors, + const AllreduceCoalescedOptions& opts = + AllreduceCoalescedOptions()) override; + + c10::intrusive_ptr reduce( + std::vector& tensors, + const ReduceOptions& opts = ReduceOptions()) override; + + c10::intrusive_ptr allgather( + std::vector>& outputTensors, + std::vector& inputTensors, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr _allgather_base( + at::Tensor& outputbuffer, + at::Tensor& inputbuffer, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr allgather_coalesced( + std::vector>& outputTensorLists, + std::vector& inputTensors, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr gather( + std::vector>& outputTensors, + std::vector& inputTensors, + const GatherOptions& opts = GatherOptions()) override; + + c10::intrusive_ptr scatter( + std::vector& outputTensors, + std::vector>& inputTensors, + const ScatterOptions& opts = ScatterOptions()) override; + + c10::intrusive_ptr reduce_scatter( + std::vector& outputTensors, + std::vector>& inputTensors, + const ReduceScatterOptions& opts = ReduceScatterOptions()) override; + + c10::intrusive_ptr _reduce_scatter_base( + at::Tensor& outputTensor, + at::Tensor& inputTensor, + const ReduceScatterOptions& opts = ReduceScatterOptions()) override; + + c10::intrusive_ptr alltoall_base( + at::Tensor& outputTensor, + at::Tensor& inputTensor, + std::vector& outputSplitSizes, + std::vector& inputSplitSizes, + const AllToAllOptions& opts = AllToAllOptions()) override; + + c10::intrusive_ptr alltoall( + std::vector& outputTensors, + std::vector& inputTensors, + const AllToAllOptions& opts = AllToAllOptions()) override; + + c10::intrusive_ptr send( + std::vector& tensors, + int dstRank, + int tag) override; + + c10::intrusive_ptr recv( + std::vector& tensors, + int srcRank, + int tag) override; + + c10::intrusive_ptr recvAnysource( + std::vector& tensor, + int tag) override; + + c10::intrusive_ptr barrier( + const BarrierOptions& opts = BarrierOptions()) override; + + // Creating a new ProcessGroupMPI, will initialize MPI if not initialized + static c10::intrusive_ptr createProcessGroupMPI( + std::vector ranks = {}); + + protected: + using WorkType = + std::tuple, c10::intrusive_ptr>; + // Worker thread loop + void runLoop(); + // Helper function that is called by the destructor + void destroy(); + + c10::intrusive_ptr enqueue( + std::unique_ptr entry, + const char* profilingTitle = nullptr, + const std::optional>& inputTensors = + std::nullopt); + + bool stop_; + + std::mutex pgMutex_; + std::thread workerThread_; + + std::deque queue_; + std::condition_variable queueProduceCV_; + std::condition_variable queueConsumeCV_; + + // Global states + static void initMPIOnce(); + static void mpiExit(); + + static std::mutex pgGlobalMutex_; + static int mpiThreadSupport_; + + MPI_Comm pgComm_; +}; + +} // namespace c10d + +#endif // USE_C10D_MPI + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupNCCL.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupNCCL.hpp new file mode 100644 index 0000000000000000000000000000000000000000..262f50440c2f6550d4449aac19f737b70c1f22bb --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupNCCL.hpp @@ -0,0 +1,1535 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef USE_C10D_NCCL + +#if defined(__linux__) +#include +#include +#include +#include +#endif + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace c10d { + +// Control broadcasting of NCCL uniqueId +static std::vector TORCH_NCCL_BCAST_UNIQUEID = { + "TORCH_NCCL_BCAST_UNIQUEID"}; + +// Control EagerInit P2P serialization warning +static std::vector + TORCH_NCCL_SHOW_EAGER_INIT_P2P_SERIALIZATION_WARNING = { + "TORCH_NCCL_SHOW_EAGER_INIT_P2P_SERIALIZATION_WARNING"}; + +// Control whether to always use high priority streams +static std::vector TORCH_NCCL_HIGH_PRIORITY = { + "TORCH_NCCL_HIGH_PRIORITY"}; + +// Control whether or not wait() is blocking or non-blocking. +static std::vector TORCH_NCCL_BLOCKING_WAIT = { + "TORCH_NCCL_BLOCKING_WAIT", + "NCCL_BLOCKING_WAIT"}; + +// TODO: We want to eventually remove this variable and make users to use +// the default value (3 - SkipCleanUp). +// Control whether or not we perform Async Error Handling with NCCL. +static std::vector TORCH_NCCL_ASYNC_ERROR_HANDLING = { + "TORCH_NCCL_ASYNC_ERROR_HANDLING", + "NCCL_ASYNC_ERROR_HANDLING"}; + +// Control whether dumping debug info on watchdog +// timeout is enabled. This variable must be set together with +// TORCH_NCCL_ENABLE_MONITORING=1 and TORCH_NCCL_TRACE_BUFFER_SIZE > 0. +static std::vector TORCH_NCCL_DUMP_ON_TIMEOUT = { + "TORCH_NCCL_DUMP_ON_TIMEOUT"}; + +// Control whether to propagate NCCL errors to all ranks through TCPStore. +static std::vector TORCH_NCCL_PROPAGATE_ERROR = { + "TORCH_NCCL_PROPAGATE_ERROR"}; + +// Control whether Desync Debug is enabled. This variable must be set +// together with TORCH_NCCL_ASYNC_ERROR_HANDLING. +static std::vector TORCH_NCCL_DESYNC_DEBUG = { + "TORCH_NCCL_DESYNC_DEBUG", + "NCCL_DESYNC_DEBUG"}; + +// Enable recording start-events for all ProcessGroupNCCL collectives, and +// compute accurate collective timing per-collective. (Note: end-events are +// recorded by default. Turn on this flag can increase chances of a watchdog +// hang due to performing a CUDA event query which eventually calls +// cudaEventElapsedTime() API. +static std::vector TORCH_NCCL_ENABLE_TIMING = { + "TORCH_NCCL_ENABLE_TIMING", + "NCCL_ENABLE_TIMING"}; + +// Enable monitoring thread which aborts the process when the ProcessGroupNCCL +// Watchdog thread gets stuck and no heartbeat is detected after +// TORCH_NCCL_HEARTBEAT_TIMEOUT_SEC. This can happen due to calling CUDA/NCCL +// APIs that may hang. It is Useful to prevent jobs being stuck for a prolonged +// time than necessary tying up cluster resources. +static std::vector TORCH_NCCL_ENABLE_MONITORING = { + "TORCH_NCCL_ENABLE_MONITORING"}; + +// Control the watchdog heartbeat timeout period after which the monitoring +// thread will abort the process. +static std::vector TORCH_NCCL_HEARTBEAT_TIMEOUT_SEC = { + "TORCH_NCCL_HEARTBEAT_TIMEOUT_SEC"}; + +// Whether to rethrow CUDA Errors in the watchdog (default true) +static std::vector TORCH_NCCL_RETHROW_CUDA_ERRORS = { + "TORCH_NCCL_RETHROW_CUDA_ERRORS"}; + +// The maximum number of events we store in the flight recorder's ring buffer. +// (One event could be the start or end of a collective, for example). +static std::vector TORCH_NCCL_TRACE_BUFFER_SIZE = { + "TORCH_NCCL_TRACE_BUFFER_SIZE"}; + +// Control how much extra time we will wait for dumping the debugging info +// before we exit and throws timeout exception. +static std::vector TORCH_NCCL_WAIT_TIMEOUT_DUMP_MILSEC = { + "TORCH_NCCL_WAIT_TIMEOUT_DUMP_MILSEC"}; + +// Control the interval inside the monitoring thread to check the coordinated +// signal from other ranks, e.g. to dump the debugging information. +static std::vector TORCH_NCCL_COORD_CHECK_MILSEC = { + "TORCH_NCCL_COORD_CHECK_MILSEC"}; + +// Whether to log C++ stack traces on unclean shutdown (default true) +static std::vector TORCH_NCCL_LOG_CPP_STACK_ON_UNCLEAN_SHUTDOWN = { + "TORCH_NCCL_LOG_CPP_STACK_ON_UNCLEAN_SHUTDOWN"}; + +// Whether to include only active collectives in the Flight Recorder trace +// (default false) +static std::vector TORCH_NCCL_EXTRA_DUMP_ON_EXEC = { + "TORCH_NCCL_EXTRA_DUMP_ON_EXEC"}; + +// Control whether to use CudaEventCache for the collective in watchdog thread. +// We noticed in the past when cuda global lock is held, destroying CudaEvent +// can cause a hang. +static std::vector TORCH_NCCL_CUDA_EVENT_CACHE = { + "TORCH_NCCL_CUDA_EVENT_CACHE"}; + +// Control the number of ranks each root can cover during NCCL comm init. +static std::vector TORCH_NCCL_RANKS_PER_ROOT = { + "TORCH_NCCL_RANKS_PER_ROOT"}; + +static std::vector TORCH_NCCL_NAN_CHECK = {"TORCH_NCCL_NAN_CHECK"}; + +constexpr const char* NCCL_BACKEND_NAME = "nccl"; + +constexpr const char* kStoreDumpKey = "exception_dump"; + +constexpr const char* kStoreErrorSignalKey = "remote_error"; + +constexpr const int kWorkStatusUpdatePeriodMs = 30 * 1000; // 30 seconds + +constexpr auto kProcessGroupNCCLDefaultTimeout = + std::chrono::milliseconds(10 * 60 * 1000); + +// NoHandling: do not handle asynchronous NCCL errors +// TearDown: tear down process upon error, see `WorkNCCL::handleException` +// CleanUpOnly: just clean up collectives and abort communicators without +// tearing down process SkipCleanUp: (this is a temporary option and can be +// removed in future) tear down process without cleaning up NCCL communicators. +// This should be used as a last resort in case `ncclCommAbort` itself is +// hanging +enum ErrorHandlingMode { + NoHandling = 0, + TearDown = 1, + CleanUpOnly = 2, + SkipCleanUp = 3 +}; + +#define SHOULD_CLEAN_UP(a) (a != NoHandling && a != SkipCleanUp) + +#define SHOULD_TEAR_DOWN(a) (a != NoHandling && a != CleanUpOnly) + +#define PRINT_COLLECTIVE_HASH_SIGNATURE(phase, opType, numel, hashValue) \ + LOG(WARNING) << logPrefix() << "Hash of " << phase << " to NCCL " << opType \ + << " with size " << numel << " is " << hashValue; + +// If set, ProcessGroupNCCL doesn't use recordStream calls to ensure +// caching allocator safety for tensors used on both user-facing and +// internal comm streams. +// Instead, it stashes live references to those tensors until after +// user-facing streams are synced with comm streams. +// See stashed_for_allocator_safety_ below. +static std::vector TORCH_NCCL_AVOID_RECORD_STREAMS = { + "TORCH_NCCL_AVOID_RECORD_STREAMS"}; + +// If set, ProcessGroupNCCL registers postAlloc and preFree hooks to cuda cache +// allocator so that whenever a tensor is allocated or freed, ProcessGroupNCCL +// can register/deregister the tensor on all available NCCL communicators. +static std::vector TORCH_NCCL_USE_TENSOR_REGISTER_ALLOCATOR_HOOK = + {"TORCH_NCCL_USE_TENSOR_REGISTER_ALLOCATOR_HOOK", + "NCCL_USE_TENSOR_REGISTER_ALLOCATOR_HOOK"}; + +#if defined(__linux__) +struct DumpPipe { + DumpPipe(int rank) { + std::string fileStem = + getCvarString({"TORCH_NCCL_DEBUG_INFO_PIPE_FILE"}, ""); + if (fileStem.empty() || + getCvarInt({"TORCH_NCCL_TRACE_BUFFER_SIZE"}, 0) <= 0) { + return; + } + TORCH_CHECK(!fileStem.empty(), "TORCH_NCCL_DEBUG_INFO_PIPE_FILE is empty"); + std::string filename = c10::str(fileStem, rank, ".pipe"); + TORCH_CHECK( + unlink(filename.c_str()) != -1 || errno == ENOENT, + "Error removing existing named pipe ", + filename, + ", Error: ", + std::strerror(errno)); + TORCH_CHECK( + mkfifo(filename.c_str(), 0666) != -1, + "Error creating named pipe ", + filename, + ", Error: ", + std::strerror(errno)); + fd_ = open(filename.c_str(), O_RDONLY | O_NONBLOCK); + LOG(INFO) << "Pipe file " << filename + << " has been opened, write to it to trigger NCCL Debug Dump."; + TORCH_CHECK(fd_ != -1, "Error opening named pipe ", filename); + } + bool shouldDump() { + if (fd_ == -1) { + return false; + } + // NOLINTNEXTLINE(*array*) + char buf[128]{}; + // non-blocking from O_NONBLOCK above. + // Ignore EINTR because we already will poll this + // again later. + ssize_t bytesRead = read(fd_, &buf, 128); + return bytesRead > 0; + } + ~DumpPipe() { + if (fd_ != -1) { + close(fd_); + } + } + + private: + int fd_ = -1; +}; +#else +struct DumpPipe { + DumpPipe(int rank) {} + bool shouldDump() { + return false; + } +}; +#endif + +// A shelf for stashing tensors between op call and `work.wait()`. +// Used in case of async ops. +class TensorShelf { + public: + // Stash tensors so that CachingAllocator cannot recycle them prematurely. + void stash(std::vector& tensors); + // Stash tensors from another shelf. + void stash(TensorShelf& other); + // Unstage the stashed tensors so that CachingAllocator can recycle them. + // Same as `clear()`. + void unstash(); + // Whether shelf is empty. + bool empty(); + // Clear the shelf. + void clear(); + + protected: + // Get the inner tensor vector. Use with caution as it is not protected by + // mutex. + std::vector& get(); + + private: + std::vector tVector_; + // Need a mutex to protect `tVector_` because it can be potentially accessed + // from both main thread and watchdog thread. + std::mutex mutex_; +}; + +// ProcessGroupNCCL implements NCCL bindings for c10d. +// +// All functions of the class are expected to be called in the same order +// across all processes in the process group. This is the only way that we +// can guarantee to match up the same calls among all processes. +// +// All NCCL functions provided by this class are asynchronous functions. More +// specifically, each NCCL call is scheduled on a separate CUDA stream that is +// different from the current CUDA stream. This is for the purpose of +// achieving potentially concurrency and better performance. As a result, +// it is the callers' responsibility to make sure that the CUDA stream their +// code works on needs to wait for the NCCL operation from +// this class. +// +// This can be done by calling: +// +// either WorkNCCL::wait() or WorkNCCL::synchronize(), both achieves the same +// functionality and are synonyms. +// +// Also note that WorkNCCL::finishedGPUExecution() is a helper function only +// provided by ProcessGroupNCCL to check if the NCCL operation of WorkNCCL has +// finished execution on the GPU (not just scheduled). +// +// Example on using the NCCL process group +// +// ProcessGroupNCCL pg(store, rank, size); +// std::shared_ptr work = pg.allreduce(tensors); +// +// // At this point, NCCL kernel has already by queued successfully +// // Now, let current stream wait for the NCCL to finish, this function is +// // async operation as well +// +// work->wait() +// +// // Now continue on other work in the current stream. +class TORCH_API ProcessGroupNCCL : public Backend { + public: + class WorkNCCL : public Work, public std::enable_shared_from_this { + public: + friend struct WorkInfo; + + // Constructor takes a list of CUDA devices + WorkNCCL( + std::string pgUID, + std::string pgDesc, + at::Device& device, + int rank, + OpType opType, + uint64_t seq, + bool isP2P = false, + const char* profilingTitle = nullptr, + const std::optional>& inputs = std::nullopt, + bool enableTiming = false, + bool cudaEventCacheEnabled = false, + DebugLevel distDebugLevel = DebugLevel::Off); + // Copy constructor doing partial copy without outputs_. Cleanup thread + // monitors and removes finished works. However it will deadlock when + // destructs outputs_ tensors who are view tensors in autograd graph. + WorkNCCL(const WorkNCCL& w); + + ~WorkNCCL() override = default; + + // Checks if the NCCL kernel has started to execute. + bool isStarted(); + + // Checks if request has completed. In this specific case of NCCL, it checks + // if the NCCL operation has completed on the GPU in its own NCCL stream. + // Non-blocking operation. + bool isCompleted() override; + + bool isSuccess() const override; + + // Same as calling synchronize() for NCCL work if timeout is not set. + // Otherwise, it will block the CPU thread until the NCCL work is completed + // or timed out. If timeout, exception will be thrown. + bool wait(std::chrono::milliseconds timeout = kNoTimeout) override; + + void blockCurrentStream() override { + synchronize(); + } + + void abort() override; + + // Let current stream wait on the completion of the NCCL work + // Throws on exceptions. + void synchronize() override; + + // Synchronize streams by blocking each on the NCCL stream + void synchronizeStream(); + + // Helper function to handle exception (throw if needed). + void handleException(ErrorHandlingMode asyncErrorHandling); + + // Helper function that checks if the NCCL kernels have finished + // execution on the GPUs + bool finishedGPUExecution(); + + // Get a Future object that will be marked as completed internally. + c10::intrusive_ptr getFuture() override; + + // Get a Future result of each work (e.g. success, different error types). + // instead of the tensor output. + c10::intrusive_ptr getFutureResult() override; + + float getDuration() const override; + + uint64_t getSequencenumber() const override; + + const std::string& logPrefix() const; + + // Helper function that sets an exception_ptr on the WorkNCCL object. + void setException(std::exception_ptr exception_ptr); + + // Helper function that returns True if the WorkNCCL object has timed out + // and False otherwise. + // In case of timeout, set exception on the WorkNCCL object. + bool checkTimeout( + std::optional timeout = std::nullopt); + + // Print the traceback of the collective at call time + void printTraceback() const; + + std::string getTraceback() const; + + std::vector result() override; + + protected: + // The process group unique id + std::string pgUID_; + + // The process group description + std::string pgDesc_; + + // The cached list of CUDA devices to operate on + at::Device device_; + + // The start CUDA event of NCCL operator tracking this work item. These + // start CUDA events are needed by desync debugging if enabled. + std::shared_ptr ncclStartEvent_; + + // The end CUDA event of NCCL operator tracking this work item. + std::shared_ptr ncclEndEvent_; + + // The NCCL communicator used for this work item. + std::shared_ptr ncclComm_; + + // whether this work is a barrier op + bool isBarrierOp_{false}; + + // Clone of blockingWait_ from ProcessGroupNCCL. + bool blockingWait_{false}; + + // Clone of opTimeout_ from ProcessGroupNCCL. + std::chrono::milliseconds opTimeout_{}; + + // Ephemeral timeouts are owned by exactly one work, + // and reset after that work completes. + // There may be more than one ephemeral timeout active at the same time, + // and this variable is used to track the ownership of ephemeral timeout. + std::chrono::milliseconds ownedEphermeralTimeout_ = + std::chrono::milliseconds(0); + + // Time point representing when the work started. + std::chrono::time_point workStartTime_; + + // Record the sequential number of collective or p2p. + uint64_t seq_; + bool isP2P_; + + // Indicates if the nccl start event has been updated to the store trace. + // This will be used by desync debug. + bool startTraceUpdated_{false}; + + // Record collective sizes for debug. We only record the size on the first + // device as multi-device per process is deprecated + size_t numelIn_ = 0; + size_t numelOut_ = 0; + + // Wrapper method for the static checkForNCCLErrors which can be overridden + // for tests. + virtual std::exception_ptr checkForNCCLErrors(); + + friend std::ostream& operator<<( + std::ostream& output, + const WorkNCCL& workNCCL); + + // Checks for NCCL errors and sets an appropriate exception_ptr. + void checkAndSetException(); + + // Just checks whether GPU execution has started, without modifying + // exception_ptr. + bool startedGPUExecutionInternal() const; + + // Just checks whether GPU execution has completed, without modifying + // exception_ptr. + bool finishedGPUExecutionInternal() const; + + // Reference to the store so that we can write aborted communicators + // to the store. + c10::intrusive_ptr store_; + + // Store a reference to NCCL collective's outputs, used by result and to + // give a more descriptive message when representing the Work as a string. + std::shared_ptr> outputs_; + + // TORCH_NCCL_AVOID_RECORD_STREAMS implementation helper. + // Stores references to participating non-output tensors (ie inputs, + // flattened intermediates). + // We'll clear this list in synchronizeStream, just after user-facing + // stream(s) are synced with the nccl work stream(s). + // By keeping these refs (as well as outputs_) alive until after the + // collective's work rejoins the user-facing streams, we achieve + // caching allocator safety without any recordStream calls. + // For in-place collectives, some refs stashed here may alias outputs_, + // but that doesn't do any harm. + std::shared_ptr stashed_for_allocator_safety_; + + // The future returned by getFuture. + c10::intrusive_ptr future_; + + // the future result (e.g., success or failure) of the work + c10::intrusive_ptr futureWorkResult_; + + bool timingEnabled_; + // unique id used to tell the trace buffer that this + // work has completed + std::optional trace_id_; + std::optional trace_reset_epoch_; + DebugLevel distDebugLevel_; + friend class ProcessGroupNCCL; + }; + + struct Options : Backend::Options { + // NOTE: timeout in ProcessGroupNCCL::Options denote the timeout for + // operations. This is only used when blockingWait_ is enabled. + explicit Options(bool is_high_priority_stream = false); + + // return intrusive_ptr of the object + static c10::intrusive_ptr create( + bool is_high_priority_stream = false) { + return c10::make_intrusive(is_high_priority_stream); + } + + // Schedule NCCL operations on high priority CUDA streams + bool is_high_priority_stream; + +#ifdef NCCL_HAS_CONFIG + // Configure ranks + ncclConfig_t config = NCCL_CONFIG_INITIALIZER; +#endif + + // Optional "parent" backend and color to create communicators from + // via `ncclCommSplit` + c10::intrusive_ptr split_from; + // Color to use for `ncclCommSplit`, values: + // * Non-negative value: in group; + // * NCCL_SPLIT_NOCOLOR (-1): not in group; + // * NCCL_SPLIT_NOCOLOR - 1: uninitialized. + // [Note 1]: the type must be `int` instead of `int64_t` because NCCL API + // accepts int. Otherwise, an implicit conversion may happen at the API call + // and the value may become negative. + // [Note 2]: this member is pybinded to Python, the value passed from Python + // must be within the numerical range of C++ int. Otherwise, Python will + // raise a RuntimeError saying type is incompatible. See also + // `_process_group_color` in `distributed_c10d.py`. +#ifdef NCCL_HAS_COMM_SPLIT + int split_color{NCCL_SPLIT_NOCOLOR - 1}; +#else + // [Note 3]: for older NCCL versions, NCCL_SPLIT_NOCOLOR is not defined. But + // `split_color` is pybinded to Python, so we need to define it. So we use + // the int value of `NCCL_SPLIT_NOCOLOR` (-1) instead. + int split_color{-2}; +#endif + }; + + // Helper class related to TORCH_NCCL_DESYNC_DEBUG + class DesyncDebugger { + public: + // Initialize and enable DesyncDebugger + void init( + int rank, + int size, + int globalRank, + int pgId, + c10::intrusive_ptr store); + + // Run desync debug. This function is called by watchdog at time of timeout. + void run(); + + // Log work start to store. + void logWorkStart(WorkNCCL& work); + + // Log work end to store. + void logWorkEnd(WorkNCCL& work); + + private: + // Whether desync debug is enabled. + // If false, all functions are no-op. + bool enabled_{false}; + + // From ProcessGroupNCCL + int rank_; + int size_; + int globalRank_; + int pgId_; + + // Reference to the store so that we can log start/end event. + c10::intrusive_ptr store_; + + // The store keys to trace the last NCCL collective kernel CUDA events - + // start event and end event respectively. These are used to do desync root + // cause analysis. + std::string traceKeyStart_; + std::string traceKeyEnd_; + }; + + // Class that runs as a separate thread aside from watchdog + // thread because we need to check the heartbeat from watchdog thread + // so that when we get stuck in some NCCL/CUDA calls, + // we can dump the debugging information and abort the process. + class HeartbeatMonitor { + public: + HeartbeatMonitor(ProcessGroupNCCL* pg); + virtual ~HeartbeatMonitor() = default; + + // Start the heartbeat monitor thread. + void start(); + + // Join the heartbeat monitor thread. + void join(); + + // Run the actual loop to check watchdog heartbeat. + virtual void runLoop(); + + // Set the terminal flag and notify the heartbeat monitor thread to stop. + void stop(); + + // Set the last update time of watchdog thread. + void setLastWorkListUpdateTime( + std::chrono::time_point time); + + int getDumpTimeout() const; + + // Util function to get the timeout error message + std::string getNCCLWatchdogTimeoutErrorMsg(const std::string& extraMsg); + + // Util function to get the timeout exit message + std::string getNCCLWatchdogTimeoutExitMsg(const std::string& exitReason); + + protected: + // We need to keep a reference to the PG instance so that we can access + // the member functions of the PG instance. We store a raw pointer on + // purpose because the heartbeat monitor thread now still lives within the + // lifetime of the PG instance. + ProcessGroupNCCL* pg_; + + private: + // Whether or not to print C++ stack traces to logs on unclean shutdown. + bool logCppStackOnUncleanShutdown_; + + // The time interval used for deciding whether there is no watchdog + // heartbeat. + int heartbeatTimeoutInSec_; + + // timeout for the dump to finish. + int waitTimeoutDumpInMilSec_; + + // Interval of check coordinated signals in ProcessGroupNCCL from other + // ranks e.g., trigger the dump of the debugging info for timeout when + // notified. + int coordCheckIntervalMilSec_; + + // We gate the heartbeat monitor thread so that we can roll it out + // gradually. + bool watchdogHeartbeatMonitorEnabled_; + + // Monitor thread which checks the heartbeat of Watchdog thread. + // If the monitor thread finds there is no heartbeat, it will dump debug + // info and then kill the watchdog thread to avoid hang. + std::thread ncclHeartbeatMonitorThread_; + + // Whether or not we should terminate the heartbeat monitoring threads. + std::atomic terminateHeartbeatMonitorThread_{false}; + + // Condition Variable for monitor thread to wake up early + std::condition_variable monitorWakeUpCV_; + + // Whether or not to dump debug info on exception including both watchdog + // timeout and nccl errors. + bool dumpOnTimeoutOrEx_; + + // Mutex to Guard monitorWakeUpCV_ + std::mutex monitorMutex_; + + // The last update time of WorkList inside watchdog thread. + std::chrono::time_point lastWorkListUpdateTime_; + }; + + // Class that runs as a side thread to check whether the NCCL collective + // is timed out or errors on the cached NCCL communicators. + class Watchdog { + public: + Watchdog(ProcessGroupNCCL* pg); + virtual ~Watchdog() = default; + + // Start the watchdog thread. + void start(); + + // Join the watchdog thread. + void join(); + + // Function that runs as part of a separate thread and checks for errors on + // NCCL communicators. We need a separate thread to check for NCCL errors + // since we can't rely on the user calling certain methods like wait(), + // isCompleted() etc. to detect and remediate errors. In addition to this, + // we need a mechanism to safely abort and remove NCCL communicators from + // our cache. This can be done cleanly by having a thread for the + // ProcessGroupNCCL class. Attempting to modify the communicator cache from + // the WorkNCCL class might run into issues with object lifetime since the + // ProcessGroupNCCL object might get destroyed before the WorkNCCL object. + void run(); + + // Watchdog's inside loop. + // Takes care of cleaning up completed work, and aborting upon failure or + // timeout. + void runLoop(); + + // Notify the loop inside watchdog. + void notify(); + + void checkAndSetRemoteError(); + + // A helper function to get the src rank of a signal from the Store. This is + // nonblocking function returning -1 if the signal is not available yet. + int getSignalSrcRank( + c10::intrusive_ptr& store, + const std::string& signal); + + uint64_t getHeartbt() const; + + void setDesyncDebug(bool desyncDebug); + + private: + std::thread ncclCommWatchdogThread_; + + // We need to keep a reference to the PG instance so that we can access + // the member functions of the PG instance. We store a raw pointer on + // purpose because the watchdog thread now still lives within the + // lifetime of the PG instance. + ProcessGroupNCCL* pg_; + + // Whether the NCCL watchdog should rethrow CUDA errors. + bool rethrowCUDAErrors_ = false; + + std::exception_ptr watchDogException_ = nullptr; + + // Condition Variable for watchdog thread sleep + std::condition_variable workMetaListCV_; + + // Heartbeat of watchdog thread. + std::atomic_uint64_t heartbeat_; + + // Whether or not to propagate detected errors to all ranks in the same PG + // through TCPStore. + bool propagatePgError_; + + // Whether or not to enable timeout root cause analysis. + bool desyncDebug_; + + DesyncDebugger desyncDebugger_; + }; + + // If you wish to create multiple process groups, each with a potentially + // different rank and size, you can do so by passing a new store instance + // to each one. If you have only a single store object, you can + // use the `c10d::PrefixStore` to derive scoped instances. + // This is also what the Python API in torch.distributed does. + // + // The process group instance keeps a reference to the store because + // it may be used long after the constructor runs. In fact, the constructor + // doesn't create any NCCL communicators. A single NCCL communicator can + // only be used on a specific set of devices, and are therefore created + // on-demand when a collective runs. If another collective is executed later, + // against a different set of devices, the process group creates another NCCL + // communicator. These NCCL communicators are cached and reused if possible. + // + ProcessGroupNCCL( + c10::intrusive_ptr store, + int rank, + int size, + c10::intrusive_ptr options = Options::create()); + + // This constructor includes the deprecated `groupName` argument. + // If you have existing code that uses the `groupName`, you can replace + // it by specifying a `c10d::PrefixStore(groupName, store)` for store. + C10_DEPRECATED ProcessGroupNCCL( + const c10::intrusive_ptr& store, + int rank, + int size, + const std::string& groupName, + c10::intrusive_ptr options = Options::create()) + : ProcessGroupNCCL(store, rank, size, std::move(options)) {} + + ~ProcessGroupNCCL() override; + + // This function returns a local uid for ProcessGroupNCCL. + uint64_t getUid() { + return static_cast(local_id_); + } + + c10::intrusive_ptr getOptions() { + return options_; + } + + c10::intrusive_ptr getBackendOptions() override { + return c10::static_intrusive_pointer_cast(options_); + } + + const std::string getBackendName() const override { + return std::string(NCCL_BACKEND_NAME); + } + + bool supportsSplitting() const override { + return true; + } + + bool supportsCoalescing() const override { + return true; + } + + bool supportsTimeEstimation() const override { +#ifdef NCCL_SIM_INFO_INITIALIZER + return true; +#else + return false; +#endif + } + + void setTimeout(std::chrono::milliseconds timeout) override { + options_->timeout = timeout; + } + + void startCoalescing() override; + + c10::intrusive_ptr endCoalescing() override; + + void startTimeEstimate(); + + float endTimeEstimate(); + + // For specifying a composite optype, such as ALLGATHER and REDUCE_SCATTER + c10::intrusive_ptr endCoalescing(OpType optype); + + c10::intrusive_ptr broadcast( + std::vector& tensors, + const BroadcastOptions& opts = BroadcastOptions()) override; + + c10::intrusive_ptr _broadcast_oop( + at::Tensor& outputTensors, + at::Tensor& inputTensors, + const BroadcastOptions& opts = BroadcastOptions()); + + c10::intrusive_ptr allreduce_sparse( + std::vector& tensors, + const AllreduceOptions& opts = AllreduceOptions()) override; + + c10::intrusive_ptr allreduce( + std::vector& tensors, + const AllreduceOptions& opts = AllreduceOptions()) override; + + c10::intrusive_ptr allreduce_coalesced( + std::vector& tensors, + const AllreduceCoalescedOptions& opts = + AllreduceCoalescedOptions()) override; + + c10::intrusive_ptr reduce( + std::vector& tensors, + const ReduceOptions& opts = ReduceOptions()) override; + + c10::intrusive_ptr _reduce_oop( + at::Tensor& outputTensors, + at::Tensor& inputTensors, + const ReduceOptions& opts = ReduceOptions()); + + c10::intrusive_ptr allgather( + std::vector>& outputTensors, + std::vector& inputTensors, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr _allgather_base( + at::Tensor& outputbuffer, + at::Tensor& inputbuffer, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr allgather_coalesced( + std::vector>& outputTensorLists, + std::vector& inputTensors, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr allgather_into_tensor_coalesced( + std::vector& outputs, + std::vector& inputs, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr reduce_scatter( + std::vector& outputTensors, + std::vector>& inputTensors, + const ReduceScatterOptions& opts = ReduceScatterOptions()) override; + + c10::intrusive_ptr _reduce_scatter_base( + at::Tensor& outputTensor, + at::Tensor& inputTensor, + const ReduceScatterOptions& opts = ReduceScatterOptions()) override; + + c10::intrusive_ptr reduce_scatter_tensor_coalesced( + std::vector& outputs, + std::vector& inputs, + const ReduceScatterOptions& opts = ReduceScatterOptions()) override; + + c10::intrusive_ptr barrier( + const BarrierOptions& opts = BarrierOptions()) override; + + c10::intrusive_ptr alltoall_base( + at::Tensor& outputTensor, + at::Tensor& inputTensor, + std::vector& outputSplitSizes, + std::vector& inputSplitSizes, + const AllToAllOptions& opts = AllToAllOptions()) override; + + c10::intrusive_ptr alltoall( + std::vector& outputTensors, + std::vector& inputTensors, + const AllToAllOptions& opts = AllToAllOptions()) override; + + c10::intrusive_ptr send( + std::vector& tensors, + int dstRank, + int tag) override; + + c10::intrusive_ptr recv( + std::vector& tensors, + int srcRank, + int tag) override; + + int64_t getCommPtr(); + + void groupStart(); + + void groupEnd(); + + void groupEndNonblocking(const std::shared_ptr& comm); + + c10::intrusive_ptr gather( + std::vector>& outputTensors, + std::vector& inputTensors, + const GatherOptions& opts = GatherOptions()) override; + + c10::intrusive_ptr scatter( + std::vector& outputTensors, + std::vector>& inputTensors, + const ScatterOptions& opts = ScatterOptions()) override; + + // Unsupported Ops + c10::intrusive_ptr recvAnysource( + std::vector& tensors, + int tag) override; + + // Agrees on an initial sequence number for the whole group by having rank 0 + // create it and broadcast it to other ranks using the store. + void setSequenceNumberForGroup() override; + + // Retrieves the current sequence number for the whole group, which should be + // in sync. If the returned number is not consistent across the group, it + // may indicate that there is some sort of collective desynchronization. + uint64_t getSequenceNumberForGroup() override; + + // Return the total number of splits the communicators held by this process + // group have performed. Counts ncclCommCreateFromRanks() for ncclx v2.21.5+ + uint64_t getCommSplitCounter() const; + + void registerOnCompletionHook( + std::function)>&& hook) override; + void waitForPendingWorks() override; + + void enableCollectivesTiming() override; + + c10::intrusive_ptr split( + const c10::intrusive_ptr& store, + const std::vector& ranks, + const c10::intrusive_ptr& opts) override; + + c10::intrusive_ptr merge( + const c10::intrusive_ptr& store, + const c10::intrusive_ptr& opts, + const int& rank, + const int& size) override; + + // Helper function for iteratively aborting communicators in the provided map + void abortCommsFromMap( + std::unordered_map>& ncclCommsMap, + const std::optional& abortReason); + + c10::intrusive_ptr initIntraNodeComm(); + + // Destroy (shutdown) this backend -- normal exit. + void shutdown() override; + + // Provides an API to abort the ProcessGroup (similar to ncclCommAbort) + // instead of relying on ProcessGroupNCCL destructor. + void abort() override; + + void eagerConnectSingleDevice(at::Device device) override; + + void performNocolorSplit(at::Device device); + + // If all comms on this PG are fully initialized, return true. + bool isInitialized(); + + ErrorType getError() override; + + bool supportsShrinking() const override { +#ifdef NCCL_HAS_COMM_SHRINK + return true; +#else + return false; +#endif + } + + // Backend-style shrink override that returns a Backend instance. + c10::intrusive_ptr shrink( + const std::vector& ranks_to_exclude, + int shrink_flags = 0, + const c10::intrusive_ptr& opts_override = + nullptr) override; + + std::shared_ptr getMemAllocator() override; + + // Allocate tensor from communication-optimized memory pool + at::Tensor allocateTensor(long size, at::TensorOptions options = {}) override; + + // Whether tensor allocation from NCCL memory pool is supported + bool supportsTensorAlloc(c10::DeviceIndex deviceIdx) override; + + // Performs NCCL user buffer registration for all buffers in + // the given MemPool + void registerMemPool(at::cuda::MemPool* pool, bool symm = false); + + // Performs NCCL user buffer de-registration for all buffers in + // the given MemPool + void deregisterMemPool(at::cuda::MemPool* pool); + + // This method adds a temporary extension for the timeout period, + // applying to all collectives between the calling of this API and + // the completion of the first collective on the GPU. While this feature + // provides flexibility in specific scenarios, it introduces statefulness + // to timeout setting. Therefore, it is advisable to use this API sparingly + // and consider alternative approaches, such as directly setting the timeout + // or utilizing a barrier collective (one can set any timeout to the barrier), + // whenever feasible. + void addEphemeralTimeout(const std::chrono::milliseconds& timeout); + + // This function is only intended for testing purposes because we don't + // want to expose the `WorkNCCL` via pybind. It verifies whether the + // `opTimeout_` of the provided WorkNCCL instance is the same as the specified + // timeout. + bool verifyWorkTimeoutForTest( + const c10::intrusive_ptr& work, + const std::chrono::milliseconds& timeout); + + void setEnableNanCheck(bool enableNanCheck); + + protected: + uint64_t getWatchdogHeartbt() const; + + // Instance of the heartbeat monitor thread. + std::unique_ptr heartbeatMonitor_; + + // Instance of the watchdog thread. + std::unique_ptr watchdog_; + + // Helper that broadcasts nccl unique ID to all ranks through the store + void broadcastUniqueNCCLID( + ncclUniqueId* ncclID, + bool isSingleP2POp, + const std::string& devicesKey, + int p2pRank); + + // Helper that allgathers nccl unique IDs to all ranks through the store + void allgatherUniqueNCCLIDs( + int rootIdx, + ncclUniqueId* ncclID, + std::vector& ncclIDs); + + // Helper that looks up the cached NCCL communicators only + std::shared_ptr getNCCLComm(const std::string& deviceKey); + + std::shared_ptr initNCCLComm( + const std::string& deviceKey, + at::Device& device, + OpType opType, + int p2pRank = 0, + bool isSendRecvSelf = false); + + // Initialize device-specific state (comm, stream, event, bookkeeping) for a + // given communicator on this process group instance. + void initializeDeviceStateForComm( + const at::Device& device, + std::shared_ptr comm); + + // Wrapper method which can be overridden for tests. + virtual std::exception_ptr checkForNCCLErrors( + std::shared_ptr& ncclComm); + + // Ensure thaht if record is True, the work obj will be enqueued via + // workEnqueue + virtual c10::intrusive_ptr initWork( + at::Device& device, + int rank, + OpType opType, + bool isP2P, + const char* profilingTitle = nullptr, + const std::vector& inputs = {}, + const std::vector& outputs = {}, + bool record = false); + + // In the timeout case and we will dump debug info such as the NCCL flight + // recorder to storage. Down the road, if we have more complicated or blocking + // operations, we might need to use a side thread to do it. + bool dumpDebuggingInfo( + bool includeStackTrace = true, + bool onlyActive = false); + + void dumpExtraDebuggingInfo(); + + // Abort all communicators on this rank. + bool abortComms(const std::optional& abortReason = std::nullopt); + + // A helper function to check if nonblocking API mode should be used. + // Use this helper instead of directly checking `useNonblocking_` variable. + bool useNonblocking(); + + protected: + int globalRankStart_{}; + int globalRankStride_{}; + + private: + bool eagerInit_{false}; + bool showSerializationWarning_{true}; + + // Helper that encapsulates work shared across all collective communication + // primitives. The callbacks have the following signatures: + // + // ncclResult_t fn(at::Tensor& input, at::Tensor& output, + // ncclComm_t, at::cuda::CUDAStream&); + // void {pre,post}(std::vector); + template + c10::intrusive_ptr collective( + at::Tensor& input, + at::Tensor& output, + Fn fn, + OpType opType, + bool asyncOp, + const char* profilingTitle = nullptr, + bool nanCheck = true); + + template + c10::intrusive_ptr collective( + at::Tensor& input, + at::Tensor& output, + Fn fn, + PreProcess pre, + PostProcess post, + OpType opType, + bool asyncOp, + const char* profilingTitle = nullptr, + bool nanCheck = true); + + template + c10::intrusive_ptr collective( + std::vector& inputs, + std::vector& outputs, + Fn fn, + PreProcess pre, + PostProcess post, + OpType opType, + bool asyncOp, + const char* profilingTitle = nullptr, + bool nanCheck = true); + + template + c10::intrusive_ptr collectiveCoalesced( + std::vector& input, + std::vector& output, + Fn fn, + OpType opType, + bool asyncOp, + const char* profilingTitle = nullptr); + + // Helper that encapsulates work shared across point-to-point communication + // primitives. It is the same structure as the helper used for collective + // communication primitives. + template + c10::intrusive_ptr pointToPoint( + at::Tensor& tensor, + Fn fn, + int peer, + OpType opType, + const char* profilingTitle = nullptr); + + template + c10::intrusive_ptr pointToPoint( + at::Tensor& tensor, + Fn fn, + int peer, + OpType opType, + PreProcess pre, + PostProcess post, + const char* profilingTitle); + + c10::intrusive_ptr allreduce_impl( + at::Tensor& tensor, + const char* profilingTitle = "nccl:all_reduce", + const AllreduceOptions& opts = AllreduceOptions()); + + // Checks for NCCL errors on each of the communicators and returns an + // appropriate exception_ptr (nullptr if no errors). + static std::exception_ptr checkForNCCLErrorsInternal( + std::shared_ptr& ncclComm); + + // Return the CUDA device most likely associated with this backend. + // If we aren't bound to a specific device, there is no strict + // guarantee that this heuristic is the correct assignment of ranks + // to GPUs that Python layers use, but in practice it tends to be. + // Fortunately we don't rely on this for correctness of any tensor + // operations, just for ancillary uses like barriers. + at::Device guessDeviceForRank() const; + + // Destroys initialized NCCL communicators in devNCCLComMap_ given by input + // key. Throws if there are no communicators to destroy. Also removes + // communicators from the cache and clears used device indices. + void destroyNCCLComms(const std::string& devNCCLCommMapKey); + + void runHookLoop(); + + // Generates a prefix that is unique to this process group and rank, for + // disambiguating logs + std::string createLogPrefix() const; + + // Returns the unique prefix created in createLogPrefix + const std::string& logPrefix() const; + + // Returns the global rank of the device. This function assumes that users + // always create a default global process group(PG) which includes all + // devices. It is called in the constructor of ProcessGroupNCCL, so it always + // return the rank_ of the very first PG created, aka, default global PG. + const int& globalRank() const; + + const c10::intrusive_ptr& globalStore() const; + + // Returns the global ranks of a PG. + const std::vector& groupRanks() const; + + // Util function to assign timeout to each work. + void assignTimeoutToWork( + const c10::intrusive_ptr& work, + const c10::intrusive_ptr& option); + + // Broadcast flight-recorder dump signal + void broadcastDumpSignal(); + + // A helper function to broadcast a signal (key) from a src rank to all other + // ranks using the specified store. + void broadcastSignal( + c10::intrusive_ptr& store, + const std::string& signal, + int srcRank); + + protected: + // Function that directly trigger std::abort so that the whole process + // gets terminated. + virtual void terminateProcess(const std::string& errMsg); + + // A helper function to wait for a future to complete or timeout. + // Returns true if the future completes before timeout, false otherwise. + bool waitForFutureOrTimeout( + std::future& fut, + const std::chrono::milliseconds& timeOutMilSec, + const std::string& futDescription, + ::c10d::C10dLoggingData& debugLog, + bool throwException = false); + + // A helper function to guess the device id of the current rank, based on + // bounded device or used device. Do not use this function if you already know + // the device id to operate on. + c10::DeviceIndex guessDeviceId() const; + + static const int64_t kWatchdogThreadSleepMillis; + + // The store is used to broadcast the NCCL unique ID of rank 0. This store + // comes with prefix and it is different across ProcessGroup NCCL instances + // (aka, different ProcessGroups). + c10::intrusive_ptr store_; + + // Reference to the store without prefix so that keys are same across all + // ProcessGroup NCCL instances and (key, value) pairs written to the store are + // global. + c10::intrusive_ptr globalStore_; + + // The lock which protects the write/read of + // ephemeralTimeoutActive_/ephemeralTimeoutInflight_. + // TODO(fduwjj): We need to have an audit on all mutexes we are adding here. + // And consolidate them if possible. + std::mutex mtxTimeoutExtension_; + + // The ephemeral timeout added on top of existing timeout for works issued + // before first work finishes. + std::chrono::milliseconds ephemeralTimeoutActive_ = + std::chrono::milliseconds(0); + + // The ephemeral timeout addition which has been already applied to work. + std::chrono::milliseconds ephemeralTimeoutInflight_ = + std::chrono::milliseconds(0); + + const c10::intrusive_ptr options_; + + // The number of NCCL communicators that have been created during + // the lifetime of this process group. This sequence number is + // used to scope keys used in the store. + uint64_t ncclCommCounter_{0}; + + // The NCCL communicator that the process group has cached. + // + // For collective operations: + // The key is a list of GPU devices that an operation is operating on + // The GPU devices are stored in a device sequence and the cache NCCL + // communicator is associated with this GPU device sequence + // + // e.g. If the process group op only uses device 0, then the value of + // the used device string stored (value of the hashmap) would be "0". + // + // If the process group op uses device 0 - 7 and the each tensor of the + // input tensor list is on device, 0, 1, 2, 3, 4, 5, 6, 7 separately, + // then the value of the used device string (key) stored would be + // "0,1,2,3,4,5,6,7" + // + // If the process group op uses device 0 - 7 and the each tensor of the + // input tensor list is on device, 0, 4, 5, 6, 7, 1, 2, 3 separately, + // then the value of the used device string stored would be + // "0,4,5,6,7,1,2,3" + // + // Note that the order of the device for the tensor list matters. + // + // For point-to-point operations: + // The key is a string of my current rank and the peer process rank. + // e.g. If process 1 and process 2 are involved in a point-to-point + // communication, the key will be "1:2" on both processes. Note: this is for + // the scenario where there is only 1 GPU per process. When it comes to + // multiple GPUs per process, this part may need to redesigned. + // TODO: we probably need a separate map for P2P comms + std::unordered_map> devNCCLCommMap_; + + // The NCCL communicators currently in process of being initialized. + std::unordered_map> + inInitializationCommMap_; + + // Mutex to guard maps like devNCCLCommMap_. + std::mutex mutex_; + + // Size of ring buffer where we store NCCL Traces for debugging. + int traceBufferSize_; + + // We gate the cudaEventCache so that we can roll it out gradually. + std::atomic cudaEventCacheEnabled_; + + std::thread onCompletionHookThread_; + + // Whether or not we should terminate the watchdog and workCleanup threads. + std::atomic terminateProcessGroup_; + + // Whether there are hooks pending to be fired + std::atomic hasPendingHooks_; + + // This is the signal from watchdog threads to indicate whether the monitor + // thread should dump. Making it static so that it is accessible from all the + // PGs. With this flag, monitor thread would dump debug info under any one of + // the three conditions: + // + // 1: watchdog thread of any PG detects a collective timeout. + // 2: timeout signal is received from other ranks through tcpstore. + // 3: current PG's watchdog heartbeat timeout occurs. + // + // Note that only the monitor thread from PG0 will dump the debug info for + // case one and two so that the debug info is only dumped once. + static std::atomic shouldDump_; + + // Mutex to Guard workMetaList_ + std::mutex workMetaListMutex_; + + bool writeDebugInfo_ = false; + + // Vector to store WorkNCCL pointers + std::list workMetaList_; + + // Mutex to Guard workMetaList_ + std::mutex completedWorkListMutex_; + + // Condition Variable for watchdog thread sleep + std::condition_variable completedWorkListCV_; + + std::list completedWorkList_; + + // Add Work Pointer to workVector + void workEnqueue(const c10::intrusive_ptr&); + + // The CUDA streams used by NCCL kernels + std::unordered_map ncclStreams_; + + // The CUDA events used to sync NCCL streams + std::unordered_map ncclEvents_; + + // Device Indexes used for all collectives in this group + std::set usedDeviceIdxs_; + + // Flag to denote if a coalescing groupStart/groupEnd block is active + int coalescing_state_ = 0; + + // Stores device indexes for all collectives run inside a coalescing block + at::Device coalescedDevice_ = at::Device("cuda"); + + // Stores communicators for all collectives run inside a coalescing block + std::shared_ptr coalescedComm_ = nullptr; + + // Whether the coalesced calls are sync or async. + bool coalescedAsync_{}; + + // keeps track of input and output tensors when coalescing is in flight. Will + // hand over these tensors to WorkNCCL's stash when coalescing is ended. + TensorShelf coalescedTensors_; + + // Some ops may have completed, but user still hasn't called `work.wait()`. + // When watchdog detects this, it transfers the TensorShelf from `work` to + // this `shelves` structure. Next time we execute ProcessGroupNCCL's methods + // on main thread, we clear the `shelves` in one shot. This is mainly because + // watchdog (a side thread) unstashing the shelf directly seems to cause some + // problem. + std::vector> shelvesToUnstash_; + std::mutex shelvesMutex_; + + // Whether or not wait() and synchronize() are blocking operations that wait + // for the operation to complete. + bool blockingWait_ = false; + + // Whether or not the workCleanupThread is used to perform async error + // handling. + ErrorHandlingMode asyncErrorHandling_ = NoHandling; + + ErrorType error_ = ErrorType::SUCCESS; + + std::mutex errorMutex_; + + // Whether or not to sleep after an exception is thrown in the watchdog. + bool sleepAfterException_{}; + + // Whether or not to enable nan check for input tensors to collectives. + bool enableNanCheck_; + + // Whether or not to create start CUDAEvent and enable timing for start + // and end events. Note that enableTiming_ is always true if desyncDebug_ + // is set to true. + std::atomic enableTiming_; + + // Flag to enable the print of hash value of input/output of collectives for + // verification. + std::atomic enableCollectiveHashDebug_; + + // Whether or not TORCH_NCCL_AVOID_RECORD_STREAMS was set + bool avoidRecordStreams_ = false; + + // The number of active ncclGroupStart() calls. This counter will be increased + // by 1 when ncclGroupStart() is called and decreased by 1 when ncclGroupEnd() + // is called. + static thread_local uint64_t ncclActiveGroupCounter_; + + // Counting for the sequential number of NCCL collective call. + // (specifically, how many actual kernels we launched, which differs from + // op_id_ when coalescing is enabled) + uint64_t seqCollective_{0}; + + // Counting for the sequential number of NCCL P2P calls. + uint64_t seqP2P_{0}; + + // Incrementing counter for logical operations (collective or p2p) issued on + // the ProcessGroup + uint64_t op_id_{0}; + + // The number of ProcessGroupNCCL created on the current rank. + size_t local_id_; + + std::string logPrefix_; + + c10::intrusive_ptr intraNodeComm_; + + // Number of devices on this node. + int localDeviceCount_{0}; + + std::shared_ptr pgStatus_ = + std::make_shared(); + + // Internal cached value: use NCCL non-blocking API mode or not. + // Use `useNonblocking()` method instead of accessing this variable directly. + std::optional useNonblocking_{std::nullopt}; + + // Communication-optimized memory pool associated with this PG + std::unique_ptr memPool_ = nullptr; +}; + +// Reset the flighrecorder recordings for the current rank. +TORCH_API void reset_nccl_trace(); + +// Dumps the NCCL comm traces and additional information about the Process +// Group. +TORCH_API std::string dump_nccl_trace( + bool includeCollectives, + bool includeStackTraces, + bool onlyActive); + +// Dumps the NCCL comm traces and additional information about the Process +// Group in JSON formatted string. +// We don't include stack traces in JSON format as it is far too much data. +TORCH_API std::string dump_nccl_trace_json( + bool includeCollectives, + bool onlyActive); + +// Gets a mutable reference to a global optional function.Heartbeat Monitor +// will use this function to dump traces, if available. Inside fbcode, we +// store a function here that uses an internal tool for process tracing +TORCH_API std::optional< + std::function)>>& +get_cpp_trace_dumper(); + +// Similar to get_cpp_trace_dumper, this stores a function defined in +// torch-python layer that lets us check whether the GIL can be acquired, +// helpful for instrumenting in cases where a hang was observed. +typedef bool (*gil_checker_t)(); + +TORCH_API gil_checker_t& get_gil_checker(); +} // namespace c10d + +#endif // USE_C10D_NCCL + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupUCC.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupUCC.hpp new file mode 100644 index 0000000000000000000000000000000000000000..12e737b61df23a0fc5503b60fc7a6136d311aaeb --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupUCC.hpp @@ -0,0 +1,363 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef USE_C10D_UCC + +#include + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#ifdef USE_CUDA +#include +#include +#endif + +namespace c10d { + +#define TORCH_UCC_DEVICE_NOT_SET -2 + +#ifdef USE_CUDA +#define SAVE_TENSORS(_TENSORS, _DATA) \ + do { \ + if ((_TENSORS)[0].device().is_cuda()) { \ + for (const auto i : c10::irange((_TENSORS).size())) { \ + c10::cuda::CUDACachingAllocator::recordStream( \ + (_TENSORS)[i].storage().data_ptr(), (*stream)); \ + } \ + } else { \ + (_DATA) = (_TENSORS); \ + } \ + } while (0) + +#else +#define SAVE_TENSORS(_TENSORS, _DATA) (_DATA) = (_TENSORS); +#endif + +constexpr const char* UCC_BACKEND_NAME = "ucc"; + +struct event_pool_t { +#ifdef USE_CUDA + std::queue> event_pool; +#endif + std::mutex event_pool_mutex; +}; + +class Comm; + +// UCC does not support multiple CUDA devices per process. +class TORCH_API ProcessGroupUCC : public Backend { + private: + void set_timeout(ucc_coll_args_t& args); + + public: + class WorkData { + public: + std::vector src; + std::vector dst; + std::vector flat; + WorkData() {} + virtual ~WorkData() = default; + }; + class AlltoallWorkData : public WorkData { + public: + AlltoallWorkData(int size) + : send_lengths(size), + send_offsets(size), + recv_lengths(size), + recv_offsets(size) {} + std::vector send_lengths; + std::vector send_offsets; + std::vector recv_lengths; + std::vector recv_offsets; + }; + + class AllgathervWorkData : public WorkData { + public: + AllgathervWorkData(int size) : recv_lengths(size), recv_offsets(size) {} + std::vector recv_lengths; + std::vector recv_offsets; + }; + + class ScattervWorkData : public WorkData { + public: + ScattervWorkData(int size) : send_lengths(size), send_offsets(size) {} + std::vector send_lengths; + std::vector send_offsets; + }; + + class ProgressEntry { + friend class ProcessGroupUCC; + friend class Comm; + + public: + ProgressEntry(CommBase* comm, ucc_coll_req_h request) + : status_(UCC_INPROGRESS), comm_(comm), request_(request) {} + // Finalizes UCC status or exception of collective request. + void finalize(std::exception_ptr eptr = nullptr); + ucc_status_t status_; + CommBase* comm_; + ucc_coll_req_h request_; + std::unique_ptr data; + c10::intrusive_ptr future_; + std::exception_ptr eptr_; + }; + + class WorkUCC : public Work { + friend class ProcessGroupUCC; + friend class Comm; + + public: + WorkUCC( + OpType opType, + uint64_t seq, + const char* prof_title, + const std::optional>& inputs, + const c10::intrusive_ptr& logger) + : Work(-1, opType, prof_title, inputs), logger_(logger), seq_(seq) {} + ~WorkUCC(); + void setException(); + void setAndThrowException(); + bool isCompleted() override; + bool isSuccess() const override; + bool wait(std::chrono::milliseconds timeout = kUnsetTimeout) override; + c10::intrusive_ptr getFuture() override; + std::vector result() override; + int sourceRank() const override; +#ifdef USE_CUDA + std::unique_ptr fence = nullptr; + event_pool_t* ep = nullptr; +#endif + int sourceRank_; + + protected: + std::shared_ptr entry_; + c10::intrusive_ptr logger_; + uint64_t seq_; + + private: + // The future returned by getFuture. + c10::intrusive_ptr future_; + // Store a reference to collective's outputs, used by result + std::shared_ptr> outputs_; + }; + + explicit ProcessGroupUCC( + const c10::intrusive_ptr& store, + int rank = -1, + int size = -1, + std::chrono::duration timeout = kBackendDefaultTimeout); + + void initComm(c10::Device dev); + + ~ProcessGroupUCC() override; + + const std::string getBackendName() const override { + return std::string(UCC_BACKEND_NAME); + } + +#ifdef USE_CUDA + std::unique_ptr getPooledEvent(); +#endif + + // Performs a health check by initializing dummy UCC & UCX communicators and + // then destroying them. This will help indicate and signal any + // UCC/UCX-related issues prior to the first collective. The actual + // initialization and subsequent destruction is ran on a separate thread and + // the main thread is signalled about timeouts/errors to report to the + // application. + void runHealthCheck(); + + template + c10::intrusive_ptr collective_post( + OpType opType, + PreProcess preproc, + PostProcess postproc, + ucc_coll_args_t& coll, + std::unique_ptr data, + c10::Device dev, + std::vector& inputTensors, + std::vector& outputTensors, + const char* prof_title); + + c10::intrusive_ptr broadcast( + std::vector& data, + const BroadcastOptions& opts = BroadcastOptions()) override; + + c10::intrusive_ptr allreduce( + std::vector& tensors, + const AllreduceOptions& opts = AllreduceOptions()) override; + + c10::intrusive_ptr allreduce_coalesced( + std::vector& tensors, + const AllreduceCoalescedOptions& opts = + AllreduceCoalescedOptions()) override; + + c10::intrusive_ptr reduce( + std::vector& tensors, + const ReduceOptions& opts = ReduceOptions()) override; + + c10::intrusive_ptr allgather( + std::vector>& outputTensors, + std::vector& inputTensors, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr _allgather_base( + at::Tensor& outputBuffer, + at::Tensor& inputBuffer, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr barrier( + const BarrierOptions& opts = BarrierOptions()) override; + + c10::intrusive_ptr gather( + std::vector>& outputTensors, + std::vector& inputTensors, + const GatherOptions& opts = GatherOptions()) override; + + c10::intrusive_ptr scatter( + std::vector& outputTensors, + std::vector>& inputTensors, + const ScatterOptions& opts = ScatterOptions()) override; + + c10::intrusive_ptr reduce_scatter( + std::vector& outputTensors, + std::vector>& inputTensors, + const ReduceScatterOptions& opts = ReduceScatterOptions()) override; + + c10::intrusive_ptr _reduce_scatter_base( + at::Tensor& outputTensor, + at::Tensor& inputTensor, + const ReduceScatterOptions& opts = ReduceScatterOptions()) override; + + c10::intrusive_ptr alltoall_base( + at::Tensor& outputTensor, + at::Tensor& inputTensor, + std::vector& outputSplitSizes, + std::vector& inputSplitSizes, + const AllToAllOptions& opts = AllToAllOptions()) override; + + c10::intrusive_ptr alltoall( + std::vector& outputTensors, + std::vector& inputTensors, + const AllToAllOptions& opts = AllToAllOptions()) override; + + c10::intrusive_ptr send( + std::vector& tensors, + int dstRank, + int tag) override; + + c10::intrusive_ptr recv( + std::vector& tensors, + int srcRank, + int tag) override; + + // Counting for the sequential number of UCC collective_post call. + uint64_t seq_{0}; + + // Agrees on an initial sequence number for the whole group by having rank 0 + // create it and broadcast it to other ranks using the store. + void setSequenceNumberForGroup() override; + + // Retrieves the current sequence number for the whole group, which should be + // in sync. If the returned number is not consistent across the group, it + // may indicate that there is some sort of collective desynchronization. + uint64_t getSequenceNumberForGroup() override; + + static c10::intrusive_ptr createProcessGroupUCC( + const c10::intrusive_ptr<::c10d::Store>& store, + int rank, + int size, + const std::chrono::duration& timeout); + + protected: + const std::chrono::duration timeout_; + std::shared_ptr oob; + std::shared_ptr comm = {nullptr}; + uint32_t comm_id; + ucc_team_h team{nullptr}; + ucc_ee_h cuda_ee{nullptr}; + ucc_ee_h cuda_ee_p2p[2]{nullptr, nullptr}; + +#ifdef USE_CUDA + std::unique_ptr stream = nullptr; + std::unique_ptr stream_p2p[2] = {nullptr, nullptr}; + event_pool_t ep; +#endif + c10::intrusive_ptr logger; +}; + +class Comm { + c10::intrusive_ptr logger; + std::shared_ptr oob; + CommUCC ucc_comm; + std::mutex mutex; + std::thread progress_thread; + std::condition_variable queue_produce_cv; + std::condition_variable queue_consume_cv; + std::deque> progress_queue; + bool stop_progress_loop; + bool collective_inprogress; + torch_ucc_phase_t finalize_phase; + + public: + c10::DeviceIndex cuda_device_index; + Comm( + const c10::intrusive_ptr& logger, + std::shared_ptr oob, + c10::Device dev, + bool is_health_check); + + ~Comm(); + + void ucc_create_team( + ucc_team_h& team, + std::shared_ptr oob); + + void ucc_destroy_team(ucc_team_h& team); + + c10::intrusive_ptr enqueue_p2p( + OpType opType, + ucc_coll_req_h request, + const char* prof_title); + +#ifdef USE_CUDA + void enqueue_cuda_collective( + std::unique_ptr data, + c10::intrusive_ptr work, + ucc_coll_args_t& coll, + ucc_team_h team, + ucc_ee_h ee); +#endif + + void enqueue_collective( + std::unique_ptr data, + c10::intrusive_ptr work, + ucc_coll_args_t& coll, + ucc_team_h team); + + static std::shared_ptr get_comm( + uint32_t& id, + c10::Device dev, + std::shared_ptr oob, + const c10::intrusive_ptr& logger, + bool is_health_check = false); + + void progress_loop(); +}; + +} // namespace c10d + +#endif // USE_C10D_UCC + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupWrapper.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupWrapper.hpp new file mode 100644 index 0000000000000000000000000000000000000000..29c66431b1aba7bd74be896f4bf4d070c1f59d26 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/ProcessGroupWrapper.hpp @@ -0,0 +1,145 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef USE_C10D_GLOO + +#include +#include +#include + +namespace c10d { + +class TORCH_API ProcessGroupWrapper : public Backend { + public: + explicit ProcessGroupWrapper( + const c10::intrusive_ptr& backend, + c10::intrusive_ptr glooBackend); + + const std::string getBackendName() const override; + + c10::intrusive_ptr broadcast( + std::vector& data, + const BroadcastOptions& opts = BroadcastOptions()) override; + + c10::intrusive_ptr allreduce( + std::vector& data, + const AllreduceOptions& opts = AllreduceOptions()) override; + + c10::intrusive_ptr allreduce_coalesced( + std::vector& tensors, + const AllreduceCoalescedOptions& opts = + AllreduceCoalescedOptions()) override; + + c10::intrusive_ptr reduce( + std::vector& tensors, + const ReduceOptions& opts = ReduceOptions()) override; + + c10::intrusive_ptr allgather( + std::vector>& outputTensors, + std::vector& inputTensors, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr _allgather_base( + at::Tensor& outputBuffer, + at::Tensor& inputBuffer, + const AllgatherOptions& opts = AllgatherOptions()) override; + + // This function is deprecated and will be moved out of ProcessGroup to comms: + // * do not add dependencies on this function, + // * do not implement it in your ProcessGroup, implement _allgather_base + // instead. + c10::intrusive_ptr allgather_coalesced( + std::vector>& outputTensorLists, + std::vector& inputTensors, + const AllgatherOptions& opts = AllgatherOptions()) override; + + c10::intrusive_ptr gather( + std::vector>& outputTensors, + std::vector& inputTensors, + const GatherOptions& opts = GatherOptions()) override; + + c10::intrusive_ptr scatter( + std::vector& outputTensors, + std::vector>& inputTensors, + const ScatterOptions& opts = ScatterOptions()) override; + + c10::intrusive_ptr reduce_scatter( + std::vector& outputTensors, + std::vector>& inputTensors, + const ReduceScatterOptions& opts = ReduceScatterOptions()) override; + + c10::intrusive_ptr alltoall_base( + at::Tensor& outputTensor, + at::Tensor& inputTensor, + std::vector& outputSplitSizes, + std::vector& inputSplitSizes, + const AllToAllOptions& opts = AllToAllOptions()) override; + + c10::intrusive_ptr alltoall( + std::vector& outputTensors, + std::vector& inputTensors, + const AllToAllOptions& opts = AllToAllOptions()) override; + + void monitoredBarrier(const BarrierOptions& opts, bool waitAllRanks = false) + override; + + // Agrees on an initial sequence number for the whole group by having rank 0 + // create it and broadcast it to other ranks using the store. Only implemented + // for GLOO and NCCL backends currently. + // dont implement this + void setSequenceNumberForGroup() override; + + // Retrieves the current sequence number for the whole group, which should be + // in sync. If the returned number is not consistent across the group, it + // may indicate that there is some sort of collective desynchronization. + uint64_t getSequenceNumberForGroup() override; // just call underlying + + c10::intrusive_ptr send( + std::vector& tensors, + int dstRank, + int tag) override; + + c10::intrusive_ptr recv( + std::vector& tensors, + int srcRank, + int tag) override; + + c10::intrusive_ptr recvAnysource( + std::vector& tensors, + int tag) override; + + c10::intrusive_ptr barrier( + const BarrierOptions& opts = BarrierOptions()) override; + + c10::intrusive_ptr _reduce_scatter_base( + at::Tensor& outputBuffer, + at::Tensor& inputBuffer, + const ReduceScatterOptions& opts) override; + + void startCoalescing() override; + + c10::intrusive_ptr endCoalescing() override; + + c10::intrusive_ptr getWrappedPg() const; + + private: + // Underlying process group that actual application collectives will be + // dispatched to + c10::intrusive_ptr backend_; + // Gloo process group responsible for internal coordination such as monitored + // barrier, sequence number checking, collective fingerprint collecting. + c10::intrusive_ptr glooBackend_; + // Conducts several checks to ensure that the underlying collective is well + // formed with the goal of notifying the user about incorrect collective use + // in the application. + void runCollectiveChecks( + OpType op_type, + const std::vector& tensors); +}; +} // namespace c10d + +#endif // USE_C10D_GLOO + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/PyProcessGroup.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/PyProcessGroup.hpp new file mode 100644 index 0000000000000000000000000000000000000000..dd974d19037a5e7ae362dd5c4218c4bfcf4ea34f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/PyProcessGroup.hpp @@ -0,0 +1,361 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace c10d { + +// PyProcessGroup is a pybind11 trampoline class to allow a Python +// class to inherit from torch.distributed.ProcessGroup +class PyProcessGroup : public ProcessGroup { + public: + // PyWork is a pybind11 trampoline class to allow a Python + // class to inherit from torch.distributed.Work + class TORCH_PYTHON_API PyWork : public Work { + public: + PyWork() = default; + + bool wait(std::chrono::milliseconds timeout = kNoTimeout) override { + PYBIND11_OVERRIDE( + bool, /* Return type */ + Work, /* Parent class */ + wait, /* Name of function in C++ */ + timeout); + } + + c10::intrusive_ptr getFuture() override { + // We cannot use PYBIND11_OVERRIDE because: + // 1. We have to >MANUALLY< unwrap the PyFutureWrapper and + // 2. The python name is get_future + pybind11::gil_scoped_acquire gil; + auto override = + pybind11::get_override(static_cast(this), "get_future"); + + if (override) { + py::object o = override(); + auto futWrapper = + o.cast>(); + return futWrapper->fut; + } + + return Work::getFuture(); + } + }; + +#define WORK_OVERRIDE(cname, name, ...) \ + do { \ + pybind11::gil_scoped_acquire gil; \ + pybind11::function override = \ + pybind11::get_override(static_cast(this), #name); \ + if (override) { \ + auto o = override(__VA_ARGS__); \ + return c10::make_intrusive(o); \ + } \ + return cname::name(__VA_ARGS__); \ + } while (false) + + // This class is used to wrap a PyWork trampoline with it's corresponding + // Python object to prevent the Python object from being garbage collected. + class PyWorkHolder : public Work { + public: + PyWorkHolder(const c10::intrusive_ptr& work, py::object pyWork) + : work_(work), pyWork_(std::move(pyWork)) {} + + PyWorkHolder(py::object pyWork) + : work_(pyWork.cast>()), + pyWork_(std::move(pyWork)) {} + + ~PyWorkHolder() override { + // GIL must be held when freeing python objects. + py::gil_scoped_acquire gil; + pyWork_ = py::object(); + } + + bool wait(std::chrono::milliseconds timeout = kNoTimeout) override { + return work_->wait(timeout); + } + + c10::intrusive_ptr getFuture() override { + return work_->getFuture(); + } + + private: + c10::intrusive_ptr work_; + py::object pyWork_; + }; + + using ProcessGroup::ProcessGroup; + + const std::string getBackendName() const override { + PYBIND11_OVERRIDE( + std::string, /* Return type */ + ProcessGroup, /* Parent class */ + getBackendName, /* Name of function in C++ */ + ); + } + + int getRank() const override { + PYBIND11_OVERRIDE( + int, /* Return type */ + ProcessGroup, /* Parent class */ + getRank, /* Name of function in C++ */ + ); + } + + int getSize() const override { + PYBIND11_OVERRIDE( + int, /* Return type */ + ProcessGroup, /* Parent class */ + getSize, /* Name of function in C++ */ + ); + } + + void abort() override { + PYBIND11_OVERRIDE( + void, /* Return type */ + ProcessGroup, /* Parent class */ + abort, /* Name of function in C++ */ + ); + } + + const std::string& getGroupName() const override { + PYBIND11_OVERRIDE( + const std::string&, /* Return type */ + ProcessGroup, /* Parent class */ + getGroupName, /* Name of function in C++ */ + ); + } + + void setGroupName(const std::string& group_name) override { + PYBIND11_OVERRIDE( + void, /* Return type */ + ProcessGroup, /* Parent class */ + setGroupName, /* Name of function in C++ */ + group_name); + } + + const std::string& getGroupDesc() const override { + PYBIND11_OVERRIDE( + const std::string&, /* Return type */ + ProcessGroup, /* Parent class */ + getGroupDesc, /* Name of function in C++ */ + ); + } + + void setGroupDesc(const std::string& group_desc) override { + PYBIND11_OVERRIDE( + void, /* Return type */ + ProcessGroup, /* Parent class */ + setGroupDesc, /* Name of function in C++ */ + group_desc); + } + + c10::intrusive_ptr splitGroup( + const std::vector& ranks, + const std::optional& timeout, + const std::optional>& opts, + const std::optional& group_name, + const std::optional& group_desc) override { + PYBIND11_OVERRIDE( + c10::intrusive_ptr, /* Return type */ + ProcessGroup, /* Parent class */ + splitGroup, /* Name of function in C++ */ + ranks, + timeout, + opts, + group_name, + group_desc); + } + + c10::intrusive_ptr mergeRemoteGroup( + const c10::intrusive_ptr& store, + const MergeOptions& opts, + const int& size) override { + PYBIND11_OVERRIDE( + c10::intrusive_ptr, /* Return type */ + ProcessGroup, /* Parent class */ + mergeRemoteGroup, /* Name of function in C++ */ + store, + opts, + size); + } + + c10::intrusive_ptr allgather( + std::vector>& outputTensors, + std::vector& inputTensors, + const AllgatherOptions& opts = AllgatherOptions()) override { + WORK_OVERRIDE( + ProcessGroup, /* Parent class */ + allgather, /* Name of function in C++ */ + outputTensors, + inputTensors, + opts); + } + + c10::intrusive_ptr allgather_into_tensor_coalesced( + std::vector& outputTensors, + std::vector& inputTensors, + const AllgatherOptions& opts = AllgatherOptions()) override { + WORK_OVERRIDE( + ProcessGroup, /* Parent class */ + allgather_into_tensor_coalesced, /* Name of function in C++ */ + outputTensors, + inputTensors, + opts); + } + + c10::intrusive_ptr allreduce( + std::vector& tensors, + const AllreduceOptions& opts = AllreduceOptions()) override { + WORK_OVERRIDE( + // py::object, /* Return type */ + ProcessGroup, /* Parent class */ + allreduce, /* Name of function in C++ */ + tensors, + opts); + } + + c10::intrusive_ptr allreduce_coalesced( + std::vector& tensors, + const AllreduceCoalescedOptions& opts = + AllreduceCoalescedOptions()) override { + WORK_OVERRIDE( + ProcessGroup, /* Parent class */ + allreduce_coalesced, /* Name of function in C++ */ + tensors, + opts); + } + + c10::intrusive_ptr alltoall_base( + at::Tensor& outputBuffer, + at::Tensor& inputBuffer, + std::vector& outputSplitSizes, + std::vector& inputSplitSizes, + const AllToAllOptions& opts = AllToAllOptions()) override { + WORK_OVERRIDE( + ProcessGroup, /* Parent class */ + alltoall_base, /* Name of function in C++ */ + outputBuffer, + inputBuffer, + outputSplitSizes, + inputSplitSizes, + opts); + } + + c10::intrusive_ptr barrier( + const BarrierOptions& opts = BarrierOptions()) override { + WORK_OVERRIDE( + ProcessGroup, /* Parent class */ + barrier, /* Name of function in C++ */ + opts); + } + + c10::intrusive_ptr broadcast( + std::vector& tensors, + const BroadcastOptions& opts = BroadcastOptions()) override { + WORK_OVERRIDE( + ProcessGroup, /* Parent class */ + broadcast, /* Name of function in C++ */ + tensors, + opts); + } + + c10::intrusive_ptr reduce_scatter( + std::vector& outputTensors, + std::vector>& inputTensors, + const ReduceScatterOptions& opts = ReduceScatterOptions()) override { + WORK_OVERRIDE( + ProcessGroup, /* Parent class */ + reduce_scatter, /* Name of function in C++ */ + outputTensors, + inputTensors, + opts); + } + + c10::intrusive_ptr reduce_scatter_tensor_coalesced( + std::vector& outputTensors, + std::vector& inputTensors, + const ReduceScatterOptions& opts = ReduceScatterOptions()) override { + WORK_OVERRIDE( + ProcessGroup, /* Parent class */ + reduce_scatter_tensor_coalesced, /* Name of function in C++ */ + outputTensors, + inputTensors, + opts); + } + + c10::intrusive_ptr send( + std::vector& tensors, + int dstRank, + int tag) override { + WORK_OVERRIDE( + ProcessGroup, /* Parent class */ + send, /* Name of function in C++ */ + tensors, + dstRank, + tag); + } + + c10::intrusive_ptr recv( + std::vector& tensors, + int srcRank, + int tag) override { + WORK_OVERRIDE( + ProcessGroup, /* Parent class */ + recv, /* Name of function in C++ */ + tensors, + srcRank, + tag); + } +}; + +class TORCH_PYTHON_API PythonOnCompletionHook { + public: + // Wraps a py::object hook and acquires Python GIL in dtor before + // destructing the hook object. + PythonOnCompletionHook(py::object hook) : hook_(std::move(hook)) {} + PythonOnCompletionHook(const PythonOnCompletionHook&) = default; + + // NOLINTNEXTLINE(bugprone-exception-escape) + ~PythonOnCompletionHook() { + py::gil_scoped_acquire ag; + hook_.dec_ref(); + // Explicitly set hook_ to nullptr to prevent py::object's dtor + // to decref on the PyObject again. + // See Note [Destructing py::object] in python_ivalue.h + hook_.ptr() = nullptr; + } + + void operator()(const std::shared_ptr& workInfo) const { + std::exception_ptr eptr; + { + py::gil_scoped_acquire acquire; + try { + hook_(workInfo); + } catch (py::error_already_set& e) { + // py::error_already_set requires GIL to destruct, take + // special care. + eptr = std::make_exception_ptr(std::runtime_error(e.what())); + e.restore(); + PyErr_Clear(); + } catch (std::exception&) { + eptr = std::current_exception(); + } + } + // No more Python-related stuff at this point, i.e., this + // exception can be captured and handled by PG backend. + if (eptr) + std::rethrow_exception(eptr); + } + + private: + py::object hook_; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/RankLocal.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/RankLocal.hpp new file mode 100644 index 0000000000000000000000000000000000000000..de3e5e2f95ff92bec1e9bbc6c457c777cb4f578d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/RankLocal.hpp @@ -0,0 +1,78 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) + +#pragma once + +#include + +#include + +namespace c10d { + +// `RankLocal` maintains a unique instance of T for each non-autograd thread. +// For non-autograd threads, `RankLocal::get()` functions similar to +// thread_local. For autograd threads, `RankLocal::get()` returns the +// instance of T corresponding to the enqueuing non-autograd thread. The +// mechanism allows for rank-specific context shared between forward and +// backward. It works for both the one-rank-per-process and one-rank-per-thread +// scenarios. +// +// NOTE: RankLocal doesn't make the underlying objects thread-safe. +template +class RankLocal { + public: + RankLocal(const RankLocal&) = delete; + RankLocal& operator=(const RankLocal&) = delete; + + static T& get() { + // Fast path: non-autograd threads can simply return + // the object reference cached in TLS. + if (cached_ != nullptr) { + return *cached_; + } + const auto node = torch::autograd::get_current_node(); + auto fwd_thread_id = node == nullptr ? at::RecordFunction::currentThreadId() + : node->thread_id(); + // Optimistically acquire the read lock first, since most likely we are in + // an autograd thread and the object has already been constructed. + { + std::shared_lock read_lock(lock_); + auto it = thread_id_to_rank_local_.find(fwd_thread_id); + if (it != thread_id_to_rank_local_.end()) { + // Cache for non-autograd threads + if (node == nullptr) { + cached_ = &it->second; + } + return it->second; + } + } + + std::unique_lock write_lock(lock_); + auto [it, _] = thread_id_to_rank_local_.try_emplace(fwd_thread_id); + // Cache for non-autograd threads + if (node == nullptr) { + cached_ = &it->second; + } + return it->second; + } + + private: + RankLocal() = default; + thread_local static T* cached_; + static std::unordered_map thread_id_to_rank_local_; + static std::shared_mutex lock_; +}; + +template +thread_local T* RankLocal::cached_ = nullptr; + +template +std::unordered_map RankLocal::thread_id_to_rank_local_; + +template +std::shared_mutex RankLocal::lock_; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Store.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Store.hpp new file mode 100644 index 0000000000000000000000000000000000000000..641bc6a36aebde20dfd33309da1609a55e27dea6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Store.hpp @@ -0,0 +1,159 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include +#include + +namespace c10d { + +// callback function will be given arguments (std::optional oldValue, +// std::optional newValue) +using WatchKeyCallback = + std::function, std::optional)>; + +class TORCH_API Store : public torch::CustomClassHolder { + public: + static constexpr std::chrono::milliseconds kDefaultTimeout = + std::chrono::seconds(300); + static constexpr std::chrono::milliseconds kNoTimeout = + std::chrono::milliseconds::zero(); + + Store() : timeout_(kDefaultTimeout) {} + + explicit Store(const std::chrono::milliseconds& timeout) + : timeout_(timeout) {} + + Store(const Store&) = default; + Store(Store&&) noexcept = default; + + ~Store() override = default; + + // Clone a thread safe copy of this store object that points to the same + // underlying store. + virtual c10::intrusive_ptr clone() = 0; + + void set(const std::string& key, const std::string& value); + + virtual void set( + const std::string& key, + const std::vector& value) = 0; + + std::string compareSet( + const std::string& key, + const std::string& currentValue, + const std::string& newValue); + + virtual std::vector compareSet( + const std::string& key, + const std::vector& currentValue, + const std::vector& newValue) { + C10_THROW_ERROR(NotImplementedError, "Not implemented."); + } + + std::string get_to_str(const std::string& key); + + virtual std::vector get(const std::string& key) = 0; + + virtual int64_t add(const std::string& key, int64_t value) = 0; + + virtual bool deleteKey(const std::string& key) = 0; + + virtual bool check(const std::vector& keys) = 0; + + virtual int64_t getNumKeys() = 0; + + virtual void wait(const std::vector& keys) = 0; + + virtual void wait( + const std::vector& keys, + const std::chrono::milliseconds& timeout) = 0; + + virtual const std::chrono::milliseconds& getTimeout() const noexcept; + + virtual void setTimeout(const std::chrono::milliseconds& timeout); + + // watchKey() is deprecated and no longer supported. + virtual void watchKey( + const std::string& /* unused */, + // NOLINTNEXTLINE(performance-unnecessary-value-param) + WatchKeyCallback /* unused */) { + C10_THROW_ERROR( + NotImplementedError, + "watchKey is deprecated, no implementation support it."); + } + + virtual void append( + const std::string& key, + const std::vector& value); + + virtual std::vector> multiGet( + const std::vector& keys); + + virtual void multiSet( + const std::vector& keys, + const std::vector>& values); + + // Returns true if this store support append, multiGet and multiSet + virtual bool hasExtendedApi() const; + + virtual void queuePush( + const std::string& key, + const std::vector& value) { + C10_THROW_ERROR(NotImplementedError, "queue support is not implemented."); + } + + virtual std::vector queuePop(const std::string& key, bool block) { + C10_THROW_ERROR(NotImplementedError, "queue support is not implemented."); + } + + virtual int64_t queueLen(const std::string& key) { + C10_THROW_ERROR(NotImplementedError, "queue support is not implemented."); + } + + virtual std::vector listKeys() { + C10_THROW_ERROR( + NotImplementedError, "listKeys support is not implemented."); + } + + protected: + std::chrono::milliseconds timeout_; +}; + +/* +StoreTimeoutGuard is a RAII guard that will set the store timeout and restore it +when it returns. +*/ +class StoreTimeoutGuard { + public: + explicit StoreTimeoutGuard( + Store& store, + const std::chrono::milliseconds& timeout) + : store_(store), oldTimeout_(store.getTimeout()) { + store.setTimeout(timeout); + } + + ~StoreTimeoutGuard() { + store_.setTimeout(oldTimeout_); + } + + /* Disabling copy and move semantics */ + StoreTimeoutGuard(const StoreTimeoutGuard&) = delete; + StoreTimeoutGuard& operator=(const StoreTimeoutGuard&) = delete; + StoreTimeoutGuard(StoreTimeoutGuard&&) = delete; + StoreTimeoutGuard& operator=(StoreTimeoutGuard&&) = delete; + + private: + Store& store_; + std::chrono::milliseconds oldTimeout_{}; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/TCPStore.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/TCPStore.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ea3272d25b465ae53a8b73f8b440253892a92e41 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/TCPStore.hpp @@ -0,0 +1,176 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +namespace c10d { +namespace detail { + +// TCPStore is a key-value store used by PyTorch mainly for distributed +// rendezvous, but for other purposes as well. (e.g., a centralized storage for +// synchronization among different processes.) +// +// It is run via a classic client-server architecture, where the server runs +// a separate background thread (alternatively we call it daemon thread). The +// client and server communicate via TCP sockets. +// +// Currently we have two types of server backends: +// 1. TCPStoreBackend: a single thread to handle all incoming request +// synchronously. +// 2. LibUVTCPStoreBackend: an event-driven asynchronous stream processing that +// leverages libuv library (https://github.com/libuv/libuv) for better +// performance. And this backend now is recommended to users. (We set the +// default value of `useLibUV` inside `TCPStoreOptions` to true now, so users +// should get it by default). +// +// Code structure: +// ├── TCPStore client side API and server setup code: +// │ TCPStore.hpp/TCPStore.cpp +// ├── TCPStoreBackend server side API implementation code: +// │ TCPStoreBackend.hpp/TCPStoreBackend.cpp +// | (actual class:`TCPStoreMasterDaemon`) +// ├── LibUVTCPStoreBackend +// │ TCPStoreLibUvBackend.cpp +// | (actual class: `LibUVStoreDaemon`) + +class TCPServer; + +class TCPClient; + +struct SocketAddress { + std::string host; + std::uint16_t port{}; +}; + +} // namespace detail + +struct TCPStoreOptions { + static constexpr std::uint16_t kDefaultPort = 29500; + + std::uint16_t port = kDefaultPort; + bool isServer = false; + std::optional numWorkers = std::nullopt; + bool waitWorkers = true; + std::chrono::milliseconds timeout = Store::kDefaultTimeout; + + // A boolean value indicating whether multiple store instances can be + // initialized with the same host:port pair. + bool multiTenant = false; + + // If specified, and if isServer is true, the underlying TCPServer will take + // over the bound socket associated to this fd. This option is useful to avoid + // port assignment races in certain scenarios. + std::optional masterListenFd = std::nullopt; + + // A boolean value indicating whether to use the experimental libUV backend. + bool useLibUV = true; +}; + +class TORCH_API TCPStore : public Store { + public: + static constexpr std::chrono::milliseconds kConnectRetryDelay{1000}; + + explicit TCPStore(std::string host, const TCPStoreOptions& opts = {}); + + ~TCPStore() override; + + c10::intrusive_ptr clone() override; + + void set(const std::string& key, const std::vector& value) override; + + std::vector compareSet( + const std::string& key, + const std::vector& expectedValue, + const std::vector& desiredValue) override; + + std::vector get(const std::string& key) override; + + int64_t add(const std::string& key, int64_t value) override; + + bool deleteKey(const std::string& key) override; + + bool check(const std::vector& keys) override; + + int64_t getNumKeys() override; + + void wait(const std::vector& keys) override; + + void wait( + const std::vector& keys, + const std::chrono::milliseconds& timeout) override; + + void append(const std::string& key, const std::vector& value) + override; + + std::vector> multiGet( + const std::vector& keys) override; + + void multiSet( + const std::vector& keys, + const std::vector>& values) override; + + bool hasExtendedApi() const override; + + void queuePush(const std::string& key, const std::vector& value) + override; + + std::vector queuePop(const std::string& key, bool block) override; + + int64_t queueLen(const std::string& key) override; + + std::vector listKeys() override; + + // Waits for all workers to join. + void waitForWorkers(); + + // Returns the hostname used by the TCPStore. + const std::string& getHost() const noexcept { + return addr_.host; + } + + // Returns the port used by the TCPStore. + std::uint16_t getPort() const noexcept { + return addr_.port; + } + + bool isLibUvBackend() const noexcept { + return usingLibUv_; + } + + // note(xilunwu): this function is only for internal testing + void _splitSet(const std::string& key, const std::vector& data); + + std::string repr() const; + + private: + int64_t incrementValueBy(const std::string& key, int64_t delta); + + void ping(); + void validate(); + + std::vector doGet(const std::string& key); + + void doWait( + c10::ArrayRef keys, + std::chrono::milliseconds timeout); + + detail::SocketAddress addr_; + std::shared_ptr server_; + std::unique_ptr client_; + std::optional numWorkers_; + + const std::string initKey_ = "init/"; + const std::string keyPrefix_ = "/"; + std::mutex activeOpLock_; + bool usingLibUv_ = true; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/TCPStoreBackend.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/TCPStoreBackend.hpp new file mode 100644 index 0000000000000000000000000000000000000000..bed050e1cb26c70b5955a5d29f521be86bf6194a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/TCPStoreBackend.hpp @@ -0,0 +1,83 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include + +#ifdef _WIN32 +#include +#include +#else +#include +#include +#endif + +namespace c10d::detail { + +// Magic number for client validation. +static const uint32_t validationMagicNumber = 0x3C85F7CE; + +enum class QueryType : uint8_t { + VALIDATE, + SET, + COMPARE_SET, + GET, + ADD, + CHECK, + WAIT, + GETNUMKEYS, + DELETE_KEY, + APPEND, + MULTI_GET, + MULTI_SET, + CANCEL_WAIT, + PING, + QUEUE_PUSH, + QUEUE_POP, + QUEUE_LEN, + LIST_KEYS, +}; + +enum class CheckResponseType : uint8_t { READY, NOT_READY }; + +enum class WaitResponseType : uint8_t { STOP_WAITING, WAIT_CANCELED }; + +// Abstract base class to handle thread state for TCPStoreMasterDaemon. +// Contains the windows/unix implementations to signal a +// shutdown sequence for the thread +class BackgroundThread { + public: + explicit BackgroundThread(); + + virtual ~BackgroundThread() = 0; + virtual std::uint16_t port() const = 0; + + void start(); + bool stop_requested(); + + protected: + void dispose(); + virtual void run() = 0; + virtual void stop() = 0; + bool is_running() { + return is_running_.load(); + } + + private: + std::atomic is_running_{false}; + std::thread daemonThread_; +}; + +std::unique_ptr create_tcpstore_backend( + const TCPStoreOptions& opts); +std::unique_ptr create_libuv_tcpstore_backend( + const TCPStoreOptions& opts); +bool is_libuv_tcpstore_backend_available(); + +} // namespace c10d::detail + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/TraceUtils.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/TraceUtils.h new file mode 100644 index 0000000000000000000000000000000000000000..8e584ac92c5c2a8aaf092cd25fc273f1fe1740f2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/TraceUtils.h @@ -0,0 +1,324 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include // optional, for ostream fallback +#include // for fmt::join + +#include +#include +#include +#include +#include +#include + +namespace c10d { + +inline std::string getTraceStartKey(const std::string& pgName, int rank) { + return fmt::format(FMT_COMPILE("{}_{}_trace_start"), pgName, rank); +} + +inline std::string getTraceEndKey(const std::string& pgName, int rank) { + return fmt::format(FMT_COMPILE("{}_{}_trace_end"), pgName, rank); +} + +inline bool traceUpdate( + c10::intrusive_ptr& store, + const std::string& key, + uint64_t seq, + const std::string& col) { + std::vector value(col.size() + sizeof(seq) + 1); + std::memcpy(value.data(), &seq, sizeof(seq)); + std::memcpy(value.data() + sizeof(seq), col.data(), col.size()); + try { + store->set(key, value); + return true; + } catch (...) { + LOG(ERROR) << "Store is down while updating #" << seq << " with key " + << key; + return false; + } + return true; +} + +enum TraceDebugEvent { + kEventStart, + kEventEnd, +}; +// >> +using TraceMap = + std::map>>; + +inline std::string ranksToString(const std::vector& ranks) { + return fmt::to_string(fmt::join(ranks, ", ")); +} + +inline std::string ranksFromTrace( + const std::vector>& items) { + fmt::memory_buffer buf; + bool first = true; + for (const auto& [rank, _] : items) { + if (!first) { + fmt::format_to(std::back_inserter(buf), ", "); + } + fmt::format_to(std::back_inserter(buf), "{}", rank); + first = false; + } + return fmt::to_string(buf); +} + +inline std::string analyzeMissingRanks(const std::vector& missingRanks) { + return c10::str( + "\n\t - To our best knowledge, ranks [", + ranksToString(missingRanks), + "] are the lagging ranks that caused this timeout. " + "They never joined any collectives"); +} + +inline std::string analyzeLaggingRanks(const TraceMap& traceMap) { + uint64_t lagSeq = traceMap.begin()->first; + std::vector startRanks; + std::vector endRanks; + for (auto& p : traceMap.begin()->second) { + if (p.second.second == kEventStart) { + startRanks.push_back(p.first); + } else { + endRanks.push_back(p.first); + } + } + std::string report = + "\n\t - To our best knowledge, the lagging/dead/mismatched ranks " + "that caused the desync are:"; + if (!startRanks.empty()) { + report += c10::str( + "\n\t - [", + ranksToString(startRanks), + "] joined but didn't finish collective #", + lagSeq, + " (count from 1)"); + } + if (!endRanks.empty()) { + report += c10::str( + "\n\t [", + ranksToString(endRanks), + "] finished collective #", + lagSeq, + ", but didn't join collective #", + lagSeq + 1, + " (count from 1)"); + } + return report; +} + +inline std::string dumpSnapshot(TraceMap& traceMap) { + std::string report = "\n\t - Snapshot of ranks' latest states:"; + for (auto& tracePair : traceMap) { + uint64_t seq = tracePair.first; + std::map>& subMap = + tracePair.second; + + std::unordered_map> collectivesStart; + std::unordered_map> collectivesEnd; + for (const auto& p : subMap) { + int rank = p.first; + const std::string& col = p.second.first; + if (p.second.second == kEventStart) { + collectivesStart[col].push_back(rank); + } else { + collectivesEnd[col].push_back(rank); + } + } + + if (!collectivesStart.empty()) { + report += c10::str("\n\t #", seq, " started ranks:"); + for (auto& mapPair : collectivesStart) { + report += c10::str( + "\n\t [", + ranksToString(mapPair.second), + "] started ", + mapPair.first); + } + } + if (!collectivesEnd.empty()) { + report += c10::str("\n\t #", seq, " finished ranks:"); + for (auto& mapPair : collectivesEnd) { + report += c10::str( + "\n\t [", + ranksToString(mapPair.second), + "] finished ", + mapPair.first); + } + } + } + return report; +} + +inline bool parseTraceValue( + c10::intrusive_ptr& store, + const std::string& key, + uint64_t& seq, + std::string& col) { + try { + std::vector traceValue = store->get(key); + std::memcpy(&seq, traceValue.data(), sizeof(seq)); + std::string colName((char*)traceValue.data() + sizeof(seq)); + col = colName; + return true; + } catch (...) { + LOG(ERROR) << "Store is down while getting key " << key; + return false; + } + return true; +} + +inline std::string retrieveDesyncReport( + c10::intrusive_ptr& store, + const std::string& pgName, + int myRank, + int worldSize) { + std::string report; + + uint64_t thisSeq = 0; + std::string thisCol; + + std::vector missingRanks; + TraceMap traceMap; + + for (const auto rank : c10::irange(worldSize)) { + // Build traceMapStart. + uint64_t seqStart = 0; + { + std::string traceKeyStart = getTraceStartKey(pgName, rank); + if (!store->check({traceKeyStart})) { + missingRanks.push_back(rank); + continue; + } + std::string col; + if (!parseTraceValue(store, traceKeyStart, seqStart, col)) { + return report; + } + traceMap[seqStart].emplace(rank, std::make_pair(col, kEventStart)); + if (rank == myRank) { + thisSeq = seqStart; + thisCol = std::move(col); + } + } + + // Build traceMapEnd. + { + std::string traceKeyEnd = getTraceEndKey(pgName, rank); + if (!store->check({traceKeyEnd})) { + continue; + } + uint64_t seq = 0; + std::string col; + if (!parseTraceValue(store, traceKeyEnd, seq, col)) { + return report; + } + if (seq == seqStart) { + traceMap[seq][rank].second = kEventEnd; + } + } + } + + TORCH_INTERNAL_ASSERT( + !missingRanks.empty() || !traceMap.empty(), + "Trace shouldn't be empty while enabled GLOO_ASYNC_TIMEOUT_DEBUG"); + TORCH_INTERNAL_ASSERT( + !thisCol.empty(), + "Timeout rank [", + myRank, + "] must have collective tracking iteam in c10::Store trace"); + TORCH_INTERNAL_ASSERT( + traceMap[thisSeq][myRank].second == kEventStart, + "Timeout rank [", + myRank, + "] last trace item must be kEventStart. thisSeq = ", + thisSeq, + ", col = ", + thisCol); + + report += c10::str( + "\n\t - [", myRank, "] Timeout at collective: ", thisCol, ", #", thisSeq); + + if (!missingRanks.empty()) { + report += analyzeMissingRanks(missingRanks); + } else { + report += analyzeLaggingRanks(traceMap); + report += dumpSnapshot(traceMap); + } + + return report; +} + +inline std::string pickle_str(const c10::IValue& v) { + std::vector result; + { + auto writer = [&](const char* data, size_t size) { + result.insert(result.end(), data, data + size); + }; + torch::jit::Pickler pickler( + writer, nullptr, nullptr, nullptr, nullptr, false); + pickler.protocol(); + pickler.pushIValue(v); + pickler.stop(); + } + return std::string(result.begin(), result.end()); +} + +inline std::string get_python_cpp_trace() { + // usage: + // LOG(INFO) << "stacktrace: " + // << get_python_cpp_trace(); + // warn: might be slow in getting cpp traces + // because of slow/broken addr2line + // in different system libs + std::shared_ptr tb = + torch::CapturedTraceback::gather( + /*python=*/true, /*script=*/true, /*cpp=*/true); + torch::SymbolizedTracebacks s_tbs = torch::symbolize({tb.get()}); + const auto& s_tb = s_tbs.tracebacks.at(0); + constexpr auto TB_FMT_CSTR = FMT_COMPILE("#{} {} from {}:{}\n"); + fmt::memory_buffer buf; + auto buf_iter = std::back_inserter(buf); + for (auto idx : c10::irange(s_tb.size())) { + auto frame_id = s_tb[idx]; + const auto& frame = s_tbs.all_frames.at(frame_id); + fmt::format_to( + buf_iter, + TB_FMT_CSTR, + idx, + frame.funcname, + frame.filename, + frame.lineno); + } + return fmt::to_string(buf); +} + +inline c10::Dict new_dict() { + return c10::Dict( + c10::AnyType::get(), c10::AnyType::get()); +} + +inline c10::List new_list() { + return c10::List(c10::AnyType::get()); +} + +inline std::string ranks_str(const std::vector& ranks) { + return fmt::format("[{}]", fmt::join(ranks, ", ")); +} + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Types.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Types.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c02edd2ef0eb6f4a04b6abe2976d1c9cd20e2e90 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Types.hpp @@ -0,0 +1,190 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include + +#include +#include + +#include +#include + +namespace c10d { + +// Base class for supplementary data potentially needed by ReduceOps +struct TORCH_API _SupplementBase : torch::CustomClassHolder { + ~_SupplementBase() override = default; +}; + +// Supplementary data specific to NCCL PREMUL_SUM +// The point of use in ProcessGroupNCCL knows how to unpack it. +struct NCCLPreMulSumSupplement : _SupplementBase { + double double_factor{0.0}; + at::Tensor tensor_factor; + NCCLPreMulSumSupplement(double f) : double_factor{f} {} + NCCLPreMulSumSupplement(at::Tensor t) : tensor_factor{std::move(t)} { + TORCH_CHECK_EQ(tensor_factor.numel(), 1); + } +}; + +// Other ReduceOps that need different supplementary data can also +// derive from _SupplementBase. +struct TORCH_API ReduceOp : torch::CustomClassHolder { + // note(crcrpar): RedOpType could be defined outside of `ReduceOp` + enum RedOpType : uint8_t { + SUM = 0, + AVG = 1, + PRODUCT = 2, + MIN = 3, + MAX = 4, + BAND = 5, // Bitwise AND + BOR = 6, // Bitwise OR + BXOR = 7, // Bitwise XOR + PREMUL_SUM = 8, // Multiply by a user-supplied constant before summing. + UNUSED = 9 + }; + + ReduceOp() = default; + + ReduceOp(RedOpType op) : op_(op) { + TORCH_INTERNAL_ASSERT( + op_ != PREMUL_SUM, + "Use `torch.distributed._make_nccl_premul_sum` to create an instance of ReduceOp with PREMUL_SUM"); + } + + ReduceOp( + RedOpType op, + const c10::intrusive_ptr<_SupplementBase>& optional_supplement) { + if (optional_supplement) { + op_ = op; + } else { + supplement_ = optional_supplement; + } + } + + // The heap resource supplement_, if it exists, is managed by a + // c10::intrusive_ptr, so constructors and operator= can be simple + ReduceOp(const ReduceOp& other) = default; + ReduceOp& operator=(const ReduceOp& other) = default; + + ReduceOp(ReduceOp&& other) = default; + ReduceOp& operator=(ReduceOp&& other) = default; + ~ReduceOp() override = default; + + operator RedOpType() const { + return op_; + } + + bool operator==(const std::uint8_t other) { + TORCH_INTERNAL_ASSERT(other < 9, "Invalid other op value"); + return other == op_; + } + + bool operator==(const ReduceOp::RedOpType other) { + return *this == static_cast(other); + } + + // todo(crcrpar): Handle `RedOpType::PREMUL_SUM` with its scaling factor. + bool operator==(const ReduceOp& other) { + return *this == other.op_; + } + + RedOpType op_ = SUM; + // supplement_ is "type-erased" storage for optional supplementary + // data the op might need. + // The point of use will know the derived type supplement_ really is, + // and downcast its pointer to extract the data as the needed type(s). + // Right now, only PREMUL_SUM needs supplementary data, but the same + // mechanism could extend to support other nontrivial reduce ops with + // different supplementary payloads. + c10::intrusive_ptr<_SupplementBase> supplement_; +}; + +template +ReduceOp makeNCCLPreMulSum(const T& factor) { + ReduceOp rop; + rop.op_ = ReduceOp::PREMUL_SUM; + rop.supplement_ = c10::make_intrusive(factor); + return rop; +} + +TORCH_API bool isComplexViewAsRealAllowed(const ReduceOp& reduceOp); + +constexpr auto kUnsetTimeout = std::chrono::milliseconds(-1); + +struct BroadcastOptions { + int64_t rootRank = 0; + int64_t rootTensor = 0; + std::chrono::milliseconds timeout = kUnsetTimeout; + bool asyncOp = true; +}; + +struct AllreduceOptions { + ReduceOp reduceOp = ReduceOp::SUM; + std::chrono::milliseconds timeout = kUnsetTimeout; + bool asyncOp = true; + std::optional sparseIndices = std::nullopt; +}; + +struct AllreduceCoalescedOptions : AllreduceOptions {}; + +struct ReduceOptions { + ReduceOp reduceOp = ReduceOp::SUM; + int64_t rootRank = 0; + int64_t rootTensor = 0; + std::chrono::milliseconds timeout = kUnsetTimeout; + bool asyncOp = true; +}; + +struct AllgatherOptions { + std::chrono::milliseconds timeout = kUnsetTimeout; + bool asyncOp = true; +}; + +struct GatherOptions { + int64_t rootRank = 0; + std::chrono::milliseconds timeout = kUnsetTimeout; + bool asyncOp = true; +}; + +struct ScatterOptions { + int64_t rootRank = 0; + std::chrono::milliseconds timeout = kUnsetTimeout; + bool asyncOp = true; +}; + +struct ReduceScatterOptions { + ReduceOp reduceOp = ReduceOp::SUM; + std::chrono::milliseconds timeout = kUnsetTimeout; + bool asyncOp = true; +}; + +struct AllToAllOptions { + std::chrono::milliseconds timeout = kUnsetTimeout; + bool asyncOp = true; +}; + +struct BarrierOptions { + std::vector device_ids; + std::chrono::milliseconds timeout = kUnsetTimeout; + std::optional device; + bool asyncOp = true; +}; + +struct DistributedBackendOptions { + c10::intrusive_ptr<::c10d::Store> store; + int group_rank; + int group_size; + std::chrono::duration timeout; + std::string group_id; + std::vector global_ranks_in_group; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/UCCTracing.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/UCCTracing.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6e53aa7a21fb3a67135d15b2add0fe344d05dab6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/UCCTracing.hpp @@ -0,0 +1,63 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef USE_C10D_UCC + +#include + +namespace c10d { + +#define RECORD_COMMS_TRACE( \ + _comms_tracer, _work, _opType, _rank, _comm_size, _inTensors, _outTensors) \ + do { \ + if (torch_ucc_config.enable_comms_logger) { \ + _comms_tracer->recordComms( \ + opTypeToString(_opType), \ + (uintptr_t)_work.get(), \ + _rank, \ + _comm_size, \ + _inTensors, \ + _outTensors); \ + } \ + } while (0) + +// interfaces to collect communication traces +class TORCH_API CommTraceLogger : public torch::CustomClassHolder { + private: + std::vector comms_trace_; + std::vector curBlocks_; /* unused */ + std::vector curOutSplitSizes_; + std::vector curInSplitSizes_; + int curRoot_ = -1; + unsigned long seqnum = 0; + + public: + void setCurBlock(const std::string& name); /* unused */ + void popBlock(); /* unused */ + // record root info if applicable, e.g., broadcast, gather, scatter + void recordOptionalInfo(int root = -1); + // record input/output splits of Alltoallv + void recordOptionalInfo( + const std::vector& outputSplitSizes = {}, + const std::vector& inputSplitSizes = {}); + // record essential comms information + void recordComms( + const std::string& collName, + const uintptr_t workReq = 0, + const int rank = -1, + const int world_size = -1, + const std::vector& inputTensors = {}, + const std::vector& outputTensor = {}); + // return collected comms traces + std::vector& getCommsTrace() { + return comms_trace_; + } +}; + +} // namespace c10d + +#endif // USE_C10D_UCC + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/UCCUtils.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/UCCUtils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..66357622a880f36d1cad25d5a1642478482bcf05 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/UCCUtils.hpp @@ -0,0 +1,192 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef USE_C10D_UCC + +#include +#include +#include + +namespace c10d { + +// Macro to generate the error message on a non-successful UCC return value. +#define TORCH_UCC_GET_ERROR_MSG(_err, _error_msg, _result) \ + do { \ + _err = c10::str( \ + "[", \ + std::string(__FILE__), \ + ":", \ + std::to_string(__LINE__), \ + "] ", \ + logger->getLogPrefix(), \ + _error_msg, \ + ", error code ", \ + _result, \ + ": ", \ + ucc_status_string(_result), \ + ", system error code ", \ + errno); \ + } while (0) + +// Macro to throw on a non-successful UCC return value. +#define TORCH_UCC_CHECK(_cmd, _error_msg) \ + do { \ + ucc_status_t result = _cmd; \ + if (result != UCC_OK) { \ + std::string err; \ + TORCH_UCC_GET_ERROR_MSG(err, _error_msg, result); \ + TORCH_CHECK(false, err); \ + } \ + } while (0) + +// Macro and throw on a non-successful UCC return value and free its request. +#define TORCH_UCC_CHECK_REQUEST(_request, _cmd, _error_msg) \ + do { \ + ucc_status_t result = _cmd; \ + if (result != UCC_OK) { \ + std::string err; \ + TORCH_UCC_GET_ERROR_MSG(err, _error_msg, result); \ + if (_request != nullptr) { \ + ucc_collective_finalize(_request); \ + } \ + TORCH_CHECK(false, err); \ + } \ + } while (0) + +// Macros to print logs with unified format +#define TORCH_UCC_LOG_ERROR(_phase, _msg) \ + LOG(ERROR) << logger->getLogPrefix(_phase) << "[ERROR] " << _msg; +#define TORCH_UCC_LOG_INFO(_phase, _msg) \ + LOG(INFO) << logger->getLogPrefix(_phase) << "[INFO] " << _msg; +#define TORCH_UCC_LOG_DEBUG(_phase, _msg) \ + VLOG(1) << logger->getLogPrefix(_phase) << "[DEBUG] " << _msg; + +enum torch_ucc_phase_t { + TORCH_UCC_UNKNOWN = -1, + TORCH_UCC_INIT, + TORCH_UCC_HEALTH_CHECK, + TORCH_UCC_READY, + TORCH_UCC_COLL_POST, + TORCH_UCC_COLL_PROGRESS, + TORCH_UCC_FINALIZE, +}; + +const std::map ucc_phase_map = { + {TORCH_UCC_UNKNOWN, "UNKNOWN"}, + {TORCH_UCC_INIT, "INIT"}, + {TORCH_UCC_HEALTH_CHECK, "HEALTH_CHECK"}, + {TORCH_UCC_READY, "READY"}, + {TORCH_UCC_COLL_POST, "COLL_POST"}, + {TORCH_UCC_COLL_PROGRESS, "COLL_PROGRESS"}, + {TORCH_UCC_FINALIZE, "FINALIZE"}, +}; + +class CommTraceLogger; + +class TORCH_API ProcessGroupUCCLogger : public torch::CustomClassHolder { + public: + ProcessGroupUCCLogger(); + ProcessGroupUCCLogger(std::string log_prefix, torch_ucc_phase_t phase); + + std::string getLogPrefix(torch_ucc_phase_t phase = TORCH_UCC_UNKNOWN); + void setLogPrefix(std::string log_prefix); + inline void setPhase(torch_ucc_phase_t phase) { + local_phase = phase; + } + + void initCommsTracer(); + void flushComms(int rank, int world_size); + std::shared_ptr trace_generator = nullptr; + + protected: + std::string log_prefix; + torch_ucc_phase_t local_phase = TORCH_UCC_UNKNOWN; + bool initialized_CommTraceLogger = false; +}; + +struct torch_ucc_oob_coll_info_t { + c10::intrusive_ptr store; + uint32_t comm_id; + int rank; + int size; + void* rbuf; + size_t msglen; + std::string getKey(std::string key) { + return std::to_string(comm_id) + key; + } +}; + +class CommBase { + public: + CommBase(const c10::intrusive_ptr& logger_) + : logger(logger_) {} + virtual void progress() = 0; + virtual void free_request(ucc_coll_req_h request) = 0; + virtual ~CommBase() {} + c10::intrusive_ptr logger; +}; +class CommUCC : public CommBase { + public: + ucc_lib_h lib{nullptr}; + ucc_context_h context{nullptr}; + + public: + void progress() override; + CommUCC( + std::shared_ptr oob, + const c10::intrusive_ptr& logger); + void free_request(ucc_coll_req_h request) override; + ~CommUCC(); +}; + +ucc_status_t oob_allgather( + void* sbuf, + void* rbuf, + size_t msglen, + void* coll_info, + void** req); + +ucc_status_t oob_allgather_test(void* req); + +ucc_status_t oob_allgather_free(void* req); + +// trim: remove spaces before and after the string view +// implementation borrowed from https://stackoverflow.com/a/17976541 +inline std::string_view trim(std::string_view s) { + auto wsfront = std::find_if_not( + s.begin(), s.end(), [](int c) { return std::isspace(c); }); + auto wsback = std::find_if_not(s.rbegin(), s.rend(), [](int c) { + return std::isspace(c); + }).base(); + return ( + wsback <= wsfront ? "" : s.substr(wsfront - s.begin(), wsback - wsfront)); +} + +inline std::string tolower(std::string_view s) { + std::string result; + result.reserve(s.size()); + for (auto c : s) { + result.push_back(std::tolower(c)); + } + return result; +} + +inline std::vector parse_list(std::string list) { + std::vector result; + list = tolower(trim(list)); + while (!list.empty()) { + const auto end_pos = list.find_first_of(','); + const auto token = trim(list.substr(0, end_pos)); + result.push_back(std::string(token)); + list = (end_pos != std::string_view::npos) ? list.substr(end_pos + 1) : ""; + } + return result; +} + +} // namespace c10d + +#endif // USE_C10D_UCC + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/UnixSockUtils.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/UnixSockUtils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4293911d1faea1f3595eea7af19f7094afcfac60 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/UnixSockUtils.hpp @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace c10d::tcputil { + +#define CONNECT_SOCKET_OFFSET 2 + +inline int poll(struct pollfd* fds, unsigned long nfds, int timeout) { + return ::poll(fds, nfds, timeout); +} + +inline void addPollfd( + std::vector& fds, + int socket, + short events) { + fds.push_back({.fd = socket, .events = events}); +} + +inline struct ::pollfd getPollfd(int socket, short events) { + struct ::pollfd res = {.fd = socket, .events = events}; + return res; +} + +} // namespace c10d::tcputil + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Utils.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Utils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..fedb64349bbbb088ea87d493c37c0e8efaef2e65 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Utils.hpp @@ -0,0 +1,750 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#include +typedef SSIZE_T ssize_t; +#pragma comment(lib, "Ws2_32.lib") +#else +#include +#include +#include +#include +#include +#endif + +#include + +#include +#include +#include +#include +#include + +namespace c10d { + +TORCH_API size_t getTensorsNumel(const std::vector& tensors); + +// Retrieve tensor shapes from a given tensor. +TORCH_API std::vector getTensorShapes( + const std::vector& tensors); + +// Use -2 to represent unset state of env vars +#define C10D_ENV_NOT_SET -2 + +#define WARN_ENV_VAR_ONCE(deprecated_env, new_env) \ + TORCH_WARN_ONCE( \ + "Environment variable " + deprecated_env + " is deprecated; use " + \ + new_env + " instead"); + +// Turns at::IntArrayRef into "(1, 2, 3, 4)". +inline std::string toString(at::IntArrayRef l) { + std::stringstream ss; + ss << '('; + for (const auto i : c10::irange(l.size())) { + if (i > 0) { + ss << ", "; + } + ss << l[i]; + } + ss << ')'; + return ss.str(); +} + +inline std::string toString(const c10::Layout& layout) { + std::stringstream ss; + ss << layout; + return ss.str(); +} + +inline void assertSameType( + const at::DeprecatedTypeProperties& type, + const std::vector& tensors) { + for (const auto i : c10::irange(tensors.size())) { + if (!tensors[i].options().type_equal(type.options())) { + const std::string expected = type.toString(); + const std::string actual = tensors[i].toString(); + throw std::invalid_argument( + // NOLINTNEXTLINE(performance-inefficient-string-concatenation) + "mixed types (" + expected + " and " + actual + ")"); + } + } +} + +inline std::vector split( + char separator, + const std::string& string) { + std::vector pieces; + std::stringstream ss(string); + std::string item; + while (std::getline(ss, item, separator)) { + pieces.push_back(std::move(item)); + } + return pieces; +} + +inline std::string getCvarString( + const std::vector& env, + const char* def) { + std::string ret(def); + + if (env.empty()) { + TORCH_CHECK(false, "No environment variables passed"); + return ret; + } + + /* parse environment variable in reverse order, so the early + * versions of a variable get higher priority than the latter + * versions of the same variable */ + for (ssize_t i = static_cast(env.size()) - 1; i >= 0; i--) { + auto val = c10::utils::get_env(env[i].c_str()); + if (!val.has_value()) { + continue; + } else if (i) { + WARN_ENV_VAR_ONCE(env[i], env[0]); + } + + ret = val.value(); + } + + return ret; +} + +inline int getCvarInt(const std::vector& env, int def) { + int ret = def; + + if (env.empty()) { + TORCH_CHECK(false, "No environment variables passed"); + return ret; + } + + /* parse environment variable in reverse order, so the early + * versions of a variable get higher priority than the latter + * versions of the same variable */ + for (ssize_t i = static_cast(env.size()) - 1; i >= 0; i--) { + const auto val = c10::utils::get_env(env[i].c_str()); + if (!val.has_value()) { + continue; + } else if (i) { + WARN_ENV_VAR_ONCE(env[i], env[0]); + } + + try { + ret = std::stoi(val.value()); + } catch (std::exception&) { + TORCH_CHECK(false, "Invalid value for environment variable: " + env[i]); + } + } + + return ret; +} + +inline bool getCvarBool(const std::vector& env, bool def) { + bool ret = def; + + if (env.empty()) { + TORCH_CHECK(false, "No environment variables passed"); + return ret; + } + + /* parse environment variable in reverse order, so the early + * versions of a variable get higher priority than the latter + * versions of the same variable */ + for (ssize_t i = static_cast(env.size()) - 1; i >= 0; i--) { + auto val = c10::utils::get_env(env[i].c_str()); + if (!val.has_value()) { + continue; + } else if (i) { + WARN_ENV_VAR_ONCE(env[i], env[0]); + } + + for (auto& x : val.value()) { + // NOLINTNEXTLINE(*-narrowing-conversions) + x = std::tolower(x); + } + + if (val == "y" || val == "yes" || val == "1" || val == "t" || + val == "true") { + ret = true; + } else if ( + val == "n" || val == "no" || val == "0" || val == "f" || + val == "false") { + ret = false; + } else { + TORCH_CHECK(false, "Invalid value for environment variable: " + env[i]); + return ret; + } + } + + return ret; +} + +inline void assertSameSizes( + const at::IntArrayRef& sizes, + const std::vector& tensors) { + for (const auto i : c10::irange(tensors.size())) { + if (!tensors[i].sizes().equals(sizes)) { + const auto expected = toString(sizes); + const auto actual = toString(tensors[i].sizes()); + throw std::invalid_argument( + // NOLINTNEXTLINE(performance-inefficient-string-concatenation) + "mixed sizes (" + expected + " and " + actual + ")"); + } + } +} + +inline void assertSameSizeAndType(const std::vector& tensors) { + // Ensure we have at least one tensor + if (tensors.empty()) { + throw std::invalid_argument("argument is empty"); + } + + // Ensure all tensors have identical type and shape + auto options = tensors[0].options(); + auto sizes = tensors[0].sizes(); + for (const auto i : c10::irange(1, tensors.size())) { + if (!tensors[i].options().type_equal(options)) { + const auto expected = toString(options); + const auto actual = toString(tensors[i].options()); + throw std::invalid_argument( + // NOLINTNEXTLINE(performance-inefficient-string-concatenation) + "argument contains mixed types (" + expected + " and " + actual + + ")"); + } + if (!tensors[i].sizes().equals(sizes)) { + const auto expected = toString(sizes); + const auto actual = toString(tensors[i].sizes()); + throw std::invalid_argument( + // NOLINTNEXTLINE(performance-inefficient-string-concatenation) + "argument contains mixed types (" + expected + " and " + actual + + ")"); + } + } +} + +inline void assertTypeMatch( + const std::function& fn, + const at::DeprecatedTypeProperties& type, + const at::ArrayRef tensors, + size_t index) { + if (!tensors[index].options().type_equal(type.options())) { + fn("invalid tensor type at index " + std::to_string(index) + " (expected " + + type.toString() + ", got " + tensors[index].toString() + ")"); + } +} + +inline void assertTypeMatch( + const std::function& fn, + const at::TensorOptions& options, + const at::ArrayRef tensors, + size_t index) { + if (!tensors[index].options().type_equal(options)) { + fn("invalid tensor type at index " + std::to_string(index) + " (expected " + + toString(options) + ", got " + toString(tensors[index].options()) + ")"); + } +} + +inline void assertSizesMatch( + const std::function& fn, + const at::IntArrayRef& sizes, + const at::ArrayRef tensors, + size_t index) { + if (tensors[index].sizes() != sizes) { + fn("invalid tensor size at index " + std::to_string(index) + " (expected " + + toString(sizes) + ", got " + toString(tensors[index].sizes()) + ")"); + } +} + +inline void assertLayoutMatch( + const std::function& fn, + const c10::Layout& expected, + const at::ArrayRef tensors, + size_t index) { + const auto& actual = tensors[index].layout(); + if (actual != expected) { + fn("invalid tensor layout at index " + std::to_string(index) + + " (expected " + toString(expected) + ", got " + toString(actual) + ")"); + } +} + +inline void assertLayoutMatch( + const std::function& fn, + const at::ArrayRef tensors) { + const auto& layout = tensors[0].layout(); + for (const auto i : c10::irange(1, tensors.size())) { + assertLayoutMatch(fn, layout, tensors, i); + } +} + +inline void assertNonEmpty( + const std::function& fn, + const at::ArrayRef tensors) { + if (tensors.empty()) { + fn("requires non-empty tensor list"); + } +} + +inline void assertSingleElement( + const std::function& fn, + const at::ArrayRef tensors) { + if (tensors.size() != 1) { + fn("requires a single-element tensor list"); + } +} + +inline void assertSingleElementInput( + const std::function& fn, + const at::ArrayRef tensors) { + if (tensors.size() != 1) { + fn("requires a single-element input tensor list"); + } +} + +inline void assertSingleElementOutput( + const std::function& fn, + const at::ArrayRef tensors) { + if (tensors.size() != 1) { + fn("requires a single-element output tensor list"); + } +} + +inline void assertRootRank( + const std::function& fn, + int64_t rank, + int64_t size) { + if (rank < 0 || rank >= size) { + fn("invalid root rank: " + std::to_string(rank)); + } +} + +inline void assertRootTensor( + const std::function& fn, + int64_t rank, + int64_t size) { + if (rank < 0 || rank >= size) { + fn("invalid root tensor: " + std::to_string(rank)); + } +} + +inline void assertDense( + const std::function& fn, + const at::ArrayRef tensors) { + const auto& layout = tensors[0].layout(); + if (layout != at::kStrided) { + fn("only supports dense tensors"); + } +} + +inline void assertCPU( + const std::function& fn, + const at::ArrayRef tensors) { + const auto& device = tensors[0].device(); + if (device.type() != at::kCPU) { + fn("only supports CPU tensors"); + } +} + +inline void assertSameDevice( + const std::function& fn, + const at::ArrayRef tensors) { + if (tensors.size() < 2) { + return; + } + const auto& device = tensors[0].device(); + for (const auto i : c10::irange(1, tensors.size())) { + if (tensors[i].device() != device) { + fn("tensors should be on the same device"); + } + } +} + +inline void assertTypeAndSizesMatch( + const std::function& fn, + const at::ArrayRef tensors, + const at::DeprecatedTypeProperties& type, + const at::IntArrayRef& sizes) { + for (const auto i : c10::irange(tensors.size())) { + assertTypeMatch(fn, type, tensors, i); + assertSizesMatch(fn, sizes, tensors, i); + } +} + +inline void assertTypeAndSizesMatch( + const std::function& fn, + const at::ArrayRef tensors, + const at::TensorOptions& options, + const at::IntArrayRef& sizes) { + for (const auto i : c10::irange(tensors.size())) { + assertTypeMatch(fn, options, tensors, i); + assertSizesMatch(fn, sizes, tensors, i); + } +} + +inline void assertTypeAndSizesMatch( + const std::function& fn, + const at::ArrayRef tensors) { + const auto& options = tensors[0].options(); + const auto sizes = tensors[0].sizes(); + assertTypeAndSizesMatch(fn, tensors.slice(1), options, sizes); +} + +// Copied from ATen/core/functional.h. +template +inline auto fmap(T& inputs, const F& fn) + -> std::vector { + std::vector r; + r.reserve(inputs.size()); + for (auto& input : inputs) { + r.push_back(fn(input)); + } + return r; +} + +// Copied from torch/csrc/utils/tensor_flatten.h. +inline at::Tensor flattenDenseTensors(at::TensorList tensors) { + static const auto flatten = [](const at::Tensor& t) { + return t.contiguous().view({-1}); + }; + if (tensors.size() == 1) { + return flatten(tensors[0]); + } + return at::cat(::c10d::fmap(tensors, flatten)); +} + +inline at::Tensor newLikeFlat( + std::vector>& tensors, + size_t deviceIdx) { + if (tensors.empty() || tensors[0].empty()) { + TORCH_CHECK(false, "Received an empty list"); + } + if (deviceIdx >= tensors.size()) { + TORCH_CHECK(false, "Invalid device index"); + } + auto& t = tensors[deviceIdx][0]; + auto device = t.device(); + for (const auto i : c10::irange(1, tensors[deviceIdx].size())) { + if (tensors[deviceIdx][i].device() != device) { + TORCH_CHECK(false, "Expecting all tensors on the same device"); + } + } + at::DeviceGuard gpuGuard(device); + std::vector sizes{static_cast(tensors[deviceIdx].size())}; + std::vector strides{t.numel()}; + sizes.insert(sizes.end(), t.sizes().begin(), t.sizes().end()); + strides.insert(strides.end(), t.strides().begin(), t.strides().end()); + return at::empty_strided( + sizes, strides, t.options().memory_format(std::nullopt)); +} + +inline at::Tensor newLikeFlat(std::vector& tensors) { + if (tensors.empty()) { + TORCH_CHECK(false, "Received an empty list"); + } + auto& t = tensors[0]; + at::DeviceGuard gpuGuard(t.device()); + std::vector sizes{static_cast(tensors.size())}; + sizes.insert(sizes.end(), t.sizes().begin(), t.sizes().end()); + return at::empty(sizes, t.options()); +} + +inline std::vector> getSizes( + const std::vector& tensors) { + std::vector> sizes(tensors.size()); + for (const auto i : c10::irange(tensors.size())) { + sizes[i] = tensors[i].sizes().vec(); + } + return sizes; +} + +inline std::vector getDevices(const std::vector& tensors) { + std::vector devices(tensors.size(), -1); + if (tensors[0].device().is_cuda()) { + for (const auto i : c10::irange(tensors.size())) { + // NOLINTNEXTLINE(bugprone-signed-char-misuse) + devices[i] = tensors[i].storage().device().index(); + } + } + return devices; +} + +template +inline T* getDataPointer(const at::Tensor& tensor) { + // This method is only used in ProcessGroupGloo for now. Call sites must make + // sure that the input tensor is contiguous. It is OK if the tensor does not + // start from the beginning of the storage. For example, it could come from + // chunk(..., dim=0)[1]. Hence, we need to use data_ptr() instead of + // tensor.storage().data() + // NB: not using tensor.data() because tensor is not aware of gloo::TYPE + return static_cast(tensor.data_ptr()); +} + +template +std::vector getDataPointers(const std::vector& tensors) { + std::vector ptrs(tensors.size()); + for (const auto i : c10::irange(tensors.size())) { + ptrs[i] = getDataPointer(tensors[i]); + } + return ptrs; +} + +// For alltoall split size sanity check +inline void checkSplitSizes( + const std::vector& split_sizes, + const at::Tensor& tensor, + int group_size) { + if (split_sizes.empty()) { + TORCH_CHECK( + tensor.size(0) % group_size == 0, + "Tensor's dim 0 does not divide equally across group size"); + } else { + TORCH_CHECK( + split_sizes.size() == static_cast(group_size), + "Number of tensor splits not equal to group size"); + const auto sum = c10::sum_integers(split_sizes); + TORCH_CHECK( + sum == tensor.size(0), "Split sizes doesn't match total dim 0 size"); + } +} + +// Compute alltoall lengths and offsets, handling multi-dimension tensors +template +size_t computeLengthsAndOffsets( + const std::vector& split_sizes, + const at::Tensor& tensor, + std::vector* lengths, + std::vector* offsets) { + size_t group_size = lengths->size(); + bool equal_splits = false; + size_t dim0_size = tensor.size(0); + size_t row_size = (dim0_size ? tensor.numel() / dim0_size : 1); + size_t split_size = 0; + size_t offset = 0; + + if (split_sizes.empty()) { + equal_splits = true; + split_size = tensor.size(0) / group_size; + } + for (const auto i : c10::irange(group_size)) { + size_t length = row_size * (equal_splits ? split_size : split_sizes[i]); + (*lengths)[i] = length; + (*offsets)[i] = offset; + // TODO: see if we should add overflow protection for offset + offset += length; + } + return offset; +} + +template +size_t computeLengthsAndOffsets( + const std::vector& tensors, + std::vector* lengths, + std::vector* offsets) { + size_t group_size = lengths->size(); + size_t offset = 0; + for (const auto i : c10::irange(group_size)) { + size_t length = tensors[i].numel(); + (*lengths)[i] = length; + (*offsets)[i] = offset; + offset += length; + } + return offset; +} + +// Get the start and stride of the global rank from a list of global ranks +// If the global ranks do not follow the consecutive rule, the stride will be -1 +void TORCH_API getGlobalRankStartAndStride( + const std::vector& globalRanksInGroup, + int& globalRankStart, + int& globalRankStride); + +using RankType = uint32_t; +using SizeType = uint64_t; + +// `errno` is only meaningful when it fails. E.g., a successful `fork()` sets +// `errno` to `EINVAL` in child process on some macos +// (https://stackoverflow.com/a/20295079), and thus `errno` should really only +// be inspected if an error occurred. +// +// `success_cond` is an expression used to check if an error has happened. So +// for `fork()`, we can use `SYSCHECK(pid = fork(), pid != -1)`. The function +// output is stored in variable `__output` and may be used in `success_cond`. +#ifdef _WIN32 +#define SYSCHECK(expr, success_cond) \ + while (true) { \ + auto __output = (expr); \ + auto errno_local = WSAGetLastError(); \ + (void)__output; \ + if (!(success_cond)) { \ + if (errno == EINTR) { \ + continue; \ + } else if ( \ + errno_local == WSAETIMEDOUT || errno_local == WSAEWOULDBLOCK) { \ + C10_THROW_ERROR(DistNetworkError, "Socket Timeout"); \ + } else { \ + C10_THROW_ERROR(DistNetworkError, c10::utils::str_error(errno_local)); \ + } \ + } else { \ + break; \ + } \ + } +#else +#define SYSCHECK(expr, success_cond) \ + while (true) { \ + auto __output = (expr); \ + (void)__output; \ + if (!(success_cond)) { \ + if (errno == EINTR) { \ + continue; \ + } else if (errno == EAGAIN || errno == EWOULDBLOCK) { \ + C10_THROW_ERROR(DistNetworkError, "Socket Timeout"); \ + } else { \ + C10_THROW_ERROR(DistNetworkError, c10::utils::str_error(errno)); \ + } \ + } else { \ + break; \ + } \ + } +#endif + +// Most functions indicate error by returning `-1`. This is a helper macro for +// this common case with `SYSCHECK`. +// Since SOCKET_ERROR = -1 in MSVC, so also leverage SYSCHECK_ERR_RETURN_NEG1 +#define SYSCHECK_ERR_RETURN_NEG1(expr) SYSCHECK(expr, __output != -1) + +namespace tcputil { + +// Send and receive +template +void sendBytes( + int socket, + const T* buffer, + size_t length, + bool moreData = false) { + size_t bytesToSend = sizeof(T) * length; + if (bytesToSend == 0) { + return; + } + + auto currentBytes = reinterpret_cast(buffer); + + int flags = 0; + +#ifdef MSG_MORE + if (moreData) { // there is more data to send + flags |= MSG_MORE; + } +#endif + +// Ignore SIGPIPE as the send() return value is always checked for error +#ifdef MSG_NOSIGNAL + flags |= MSG_NOSIGNAL; +#endif + + while (bytesToSend > 0) { + ssize_t bytesSent = 0; + SYSCHECK_ERR_RETURN_NEG1( + bytesSent = ::send(socket, currentBytes, bytesToSend, flags)) + if (bytesSent == 0) { + C10_THROW_ERROR( + DistNetworkError, + "Failed to send, sent 0 bytes. " + "Connection was likely closed. " + "Did the remote server shutdown or crash?"); + } + + bytesToSend -= bytesSent; + currentBytes += bytesSent; + } +} + +template +void recvBytes(int socket, T* buffer, size_t length) { + size_t bytesToReceive = sizeof(T) * length; + if (bytesToReceive == 0) { + return; + } + + auto currentBytes = reinterpret_cast(buffer); + + while (bytesToReceive > 0) { + ssize_t bytesReceived = 0; + SYSCHECK_ERR_RETURN_NEG1( + bytesReceived = recv(socket, currentBytes, bytesToReceive, 0)) + if (bytesReceived == 0) { + C10_THROW_ERROR( + DistNetworkError, + "Failed to recv, got 0 bytes. " + "Connection was likely closed. " + "Did the remote server shutdown or crash?"); + } + + bytesToReceive -= bytesReceived; + currentBytes += bytesReceived; + } +} + +// send a vector's length and data +template +void sendVector(int socket, const std::vector& vec, bool moreData = false) { + SizeType size = vec.size(); + sendBytes(socket, &size, 1, true); + sendBytes(socket, vec.data(), size, moreData); +} + +// receive a vector as sent in sendVector +template +std::vector recvVector(int socket) { + SizeType valueSize = 0; + recvBytes(socket, &valueSize, 1); + std::vector value(valueSize); + recvBytes(socket, value.data(), value.size()); + return value; +} + +// this is only for convenience when sending rvalues +template +void sendValue(int socket, const T& value, bool moreData = false) { + sendBytes(socket, &value, 1, moreData); +} + +template +T recvValue(int socket) { + T value; + recvBytes(socket, &value, 1); + return value; +} + +// send a string's length and data +inline void sendString( + int socket, + const std::string& str, + bool moreData = false) { + SizeType size = str.size(); + sendBytes(socket, &size, 1, true); + sendBytes(socket, str.data(), size, moreData); +} + +// receive a string as sent in sendString +inline std::string recvString(int socket) { + SizeType valueSize = 0; + recvBytes(socket, &valueSize, 1); + std::vector value(valueSize); + recvBytes(socket, value.data(), value.size()); + return std::string(value.data(), value.size()); +} + +} // namespace tcputil +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/WinSockUtils.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/WinSockUtils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..4e94641b9b8ed2ce362b092398a6af0243aa4a81 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/WinSockUtils.hpp @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace c10d::tcputil { + +#define CONNECT_SOCKET_OFFSET 1 + +inline int poll(struct pollfd* fdArray, unsigned long fds, int timeout) { + return WSAPoll(fdArray, fds, timeout); +} + +inline void addPollfd( + std::vector& fds, + int socket, + short events) { + fds.push_back({(SOCKET)socket, events}); +} + +inline struct ::pollfd getPollfd(int socket, short events) { + struct ::pollfd res = {(SOCKET)socket, events}; + return res; +} + +} // namespace c10d::tcputil + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Work.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Work.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a109527dd40efdc364cc899e96d482dea336b262 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/Work.hpp @@ -0,0 +1,190 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +constexpr auto kNoTimeout = std::chrono::milliseconds(0); + +namespace c10d { + +constexpr const char* const kSeqNumStoreKey = "SEQ_NUM_STORE_KEY"; + +enum class OpType : std::uint8_t { + BROADCAST = 0, + ALLREDUCE = 1, + ALLREDUCE_COALESCED = 2, + REDUCE = 3, + ALLGATHER = 4, + _ALLGATHER_BASE = 5, + ALLGATHER_COALESCED = 6, + GATHER = 7, + SCATTER = 8, + REDUCE_SCATTER = 9, + ALLTOALL_BASE = 10, + ALLTOALL = 11, + SEND = 12, + RECV = 13, + RECVANYSOURCE = 14, + BARRIER = 15, + _REDUCE_SCATTER_BASE = 16, + COALESCED = 17, + _ALLREDUCE_SPARSE = 18, + UNKNOWN = 100, +}; + +// TODO: support different types of failures/errors +enum class WorkResult : std::uint8_t { + SUCCESS = 0, + TIMEOUT = 1, + COMM_ERROR = 2, + UNKNOWN = 100, +}; + +// Converts OpType to human readable string. +TORCH_API std::string opTypeToString(OpType opType); + +// Whether or not an OP is an p2p op (SEND, RECV, RECVANYSOURCE) +TORCH_API bool isP2POp(OpType opType, bool batchP2P = false); + +// Please do not use Work API, it is going away, to be +// replaced by ivalue::Future. +// Python binding for this class might change, please do not assume +// this will be bound using pybind. +class TORCH_API Work : public torch::CustomClassHolder { + public: + Work( + int rank = -1, + OpType opType = OpType::UNKNOWN, + const char* profilingTitle = nullptr, + const std::optional>& inputTensors = + std::nullopt); + + ~Work() override; + + // Checks if request has completed. Non-blocking operation. + virtual bool isCompleted(); + + // Returns if the work completed successfully. + // If false, the exception function can be called to get details. + virtual bool isSuccess() const; + + // Returns exception if isSuccess() returned false. + virtual std::exception_ptr exception() const; + + // Returns source rank if this objects represents a recv-from-any. + virtual int sourceRank() const; + + // Returns result tensors, if applicable. + // If work is not supposed to have result, we return empty list. + virtual std::vector result(); + + // Ensures that operations on the output tensors that are invoked + // after this function returns are correctly sequenced after the + // asynchronous completion of this work. + // + // For CUDA tensors, it inserts stream synchronization such that + // the streams of the caller wait for completion of the + // asynchronous operations on the destination tensors. + // + // For CPU tensors, it is currently a nop. + // + // This function should only be used if the caller polls for + // completion through the `isCompleted` function, it has returned + // true, and the `isSuccess` function also has returned true. + // + virtual void synchronize(); + + // Waits until request completes. Blocking operation. + // Throws if the work completed with an exception. + // Returns false if the work is aborted. + // Otherwise, it always returns true, indicating the work is completed. + // + // Functionally equivalent to: + // + // while (!isCompleted()) { /* nop */ } + // auto success = isSuccess(); + // if (!success) { std::rethrow_exception(exception()); } + // return success; + // + virtual bool wait(std::chrono::milliseconds timeout = kNoTimeout); + + // Blocks the current stream until the work is completed. + // This is equivalent to synchronize for CUDA tensors but works for both CPU + // tensors and CUDA tensors by using a spinlock CUDA kernel. + // This will immediately return. + // If no stream is active it will throw an error. + virtual void blockCurrentStream(); + + virtual void abort(); + + // Returns a Future object that will be associated with the completion of + // work. Only NCCL backend is currently supported. + virtual c10::intrusive_ptr getFuture(); + + // Get a Future object that would be marked as either success or failure + // This API can be used by the user to track the completion of the work + // and handle the exception if any. + virtual c10::intrusive_ptr getFutureResult(); + + virtual float getDuration() const; + + virtual uint64_t getSequencenumber() const; + + OpType retrieveOpType() const; + + static c10::intrusive_ptr create_from_future( + const c10::intrusive_ptr& /*future*/); + + protected: + // Completes the work object and optionally sets the exception in a + // thread-safe manner. Notifies all waiting condition variables as well. + void finish(std::exception_ptr exception = nullptr); + + // Similar to finish, but throws an exception if one is already set or + // provided by the user. + void finishAndThrow(std::exception_ptr exception); + + mutable std::mutex mutex_; + std::condition_variable cv_; + bool completed_ = false; + std::exception_ptr exception_; + + // Current rank of the node. + const int rank_; + + // Operation type that this work object refers to. + OpType opType_; + + // When profiling, the callback to record end of operation event. This + // callback needs to be called when collective operation is complete. + std::function recordFunctionEndCallback_; +}; + +struct TORCH_API WorkInfo { + WorkInfo( + const OpType& opType, + const uint64_t seq, + const std::chrono::time_point& timeStarted, + const std::chrono::time_point& timeFinished, + const std::chrono::duration& activeDuration) + : opType(opType), + seq(seq), + timeStarted(timeStarted), + timeFinished(timeFinished), + activeDuration(activeDuration) {} + + OpType opType; + uint64_t seq; + std::chrono::time_point timeStarted; + std::chrono::time_point timeFinished; + std::chrono::duration activeDuration; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/c10d.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/c10d.h new file mode 100644 index 0000000000000000000000000000000000000000..45daf59b55ab849b2e72bdf275a6c09315413ef0 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/c10d.h @@ -0,0 +1,14 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::distributed::c10d { + +PyMethodDef* python_functions(); + +} // namespace torch::distributed::c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/comm.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/comm.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e5099eb2980e5d3f38ab7aa723f5f92880b73d91 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/comm.hpp @@ -0,0 +1,147 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace c10d { + +// Broadcast many tensors to all processes in the process group. +TORCH_API void broadcast_coalesced( + const c10::intrusive_ptr& process_group, + at::TensorList tensors, + size_t buffer_size, + int rank = 0); + +// This class passes bucket contents tensor to DDP communication hook. +class TORCH_API GradBucket { + public: + explicit GradBucket( + size_t index, + size_t bucket_count, + at::Tensor tensor, + std::vector offsets, + std::vector lengths, + std::vector sizes_vec, + std::vector parameters, + std::optional sparse_grad_indices) + : index_(index), + bucket_count_(bucket_count), + buffer_(std::move(tensor)), + offsets_(std::move(offsets)), + lengths_(std::move(lengths)), + sizes_vec_(std::move(sizes_vec)), + parameters_(std::move(parameters)), + sparse_grad_indices_(std::move(sparse_grad_indices)) {} + + // Returns the index of the bucket, which is unique across all the buckets. + size_t getIndex() const { + return index_; + } + + const at::Tensor& getBuffer() const { + return buffer_; + } + + // Returns a mutable buffer compared with the above method. + at::Tensor& getBufferRef() { + return buffer_; + } + + // Overwrites the buffer at a specific index. + void setBuffer(at::Tensor& buffer) { + buffer_ = buffer; + } + + // Each tensor in the list that getGradients corresponds to a + // parameter. + std::vector getGradients() const; + + // Returns model parameters belonging to this bucket. They are returned in the + // same order as gradient tensors via getGradients(). For example, + // getParameters[i] will have its gradient stored in + // getGradients[i] + const std::vector getParameters() const { + return parameters_; + } + + // Returns whether this bucket is the last bucket to allreduce in an + // iteration. + bool isLast() const { + return index_ == bucket_count_ - 1; + } + + std::optional& getSparseGradIndices() { + return sparse_grad_indices_; + } + + private: + size_t index_; + size_t bucket_count_; + at::Tensor buffer_; + + // Per-variable info in buffer_. + std::vector offsets_; + std::vector lengths_; + std::vector sizes_vec_; + + // Model parameters for this bucket. + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const std::vector parameters_; + + // Predefined sparse indices for this bucket (only used for sparse tensors). + // The gradients will be updated to have indices with these tensor values + std::optional sparse_grad_indices_; +}; + +// Base class of both `PythonCommHook` and `CppCommHook`. +// Requires implementing 1) `runHook` method that communicates gradients +// asynchronously, and 2) `parseHookResult` method that converts the hook +// result into a tensor. +class TORCH_API CommHookInterface { + public: + virtual ~CommHookInterface() = default; + + // Passes the input grad bucket to the registered communication hook. + // Once the tensor in the bucket are ready, kicks off the hook asynchronously + // and returns a future that holds the communication results. + virtual c10::intrusive_ptr runHook( + GradBucket& bucket) = 0; + + // Returns the resulting tensor once the communication hook result is + // ready. The resulting tensor will then be copied to the grads of + // individual parameters. + virtual at::Tensor parseHookResult(const c10::IValue& result) = 0; +}; + +namespace detail { +// This helper function is called both by CppCommHookInterface below and inside +// reducer. +TORCH_API at::Tensor parseCppCommHookResult(const c10::IValue& result); +} // namespace detail + +// This CppCommHook interface only requires implementing runHook method that +// potentially uses a state. +template +class CppCommHookInterface : public CommHookInterface { + public: + explicit CppCommHookInterface(T state) : state_(std::move(state)) {} + + ~CppCommHookInterface() override = default; + + at::Tensor parseHookResult(const c10::IValue& result) override { + return detail::parseCppCommHookResult(result); + } + + protected: + T state_; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_collectives/ControlCollectives.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_collectives/ControlCollectives.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5beaa289331f4c3b81d3a279dddb8a3be6b51b38 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_collectives/ControlCollectives.hpp @@ -0,0 +1,64 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + +namespace c10d { + +using namespace std::chrono_literals; + +class TORCH_API ControlCollectives : public torch::CustomClassHolder { + public: + virtual void barrier( + const std::string& key, + std::chrono::milliseconds timeout = 5min, + bool block = true) = 0; + + virtual void broadcastSend( + const std::string& key, + const std::vector& data, + std::chrono::milliseconds timeout = 5min) = 0; + virtual std::vector broadcastRecv( + const std::string& key, + std::chrono::milliseconds timeout = 5min) = 0; + + virtual void gatherSend( + const std::string& key, + const std::vector& data, + std::chrono::milliseconds timeout = 5min) = 0; + virtual std::vector> gatherRecv( + const std::string& key, + const std::vector& data, + std::chrono::milliseconds timeout = 5min) = 0; + + virtual std::vector scatterSend( + const std::string& key, + const std::vector>& data, + std::chrono::milliseconds timeout = 5min) = 0; + virtual std::vector scatterRecv( + const std::string& key, + std::chrono::milliseconds timeout = 5min) = 0; + + virtual std::vector> allGather( + const std::string& key, + const std::vector& data, + std::chrono::milliseconds timeout = 5min) = 0; + + virtual int64_t allSum( + const std::string& key, + int64_t data, + std::chrono::milliseconds timeout = 5min) = 0; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_collectives/StoreCollectives.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_collectives/StoreCollectives.hpp new file mode 100644 index 0000000000000000000000000000000000000000..5782eb49f5755c1f2270428dd171260ad1d28a02 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_collectives/StoreCollectives.hpp @@ -0,0 +1,73 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace c10d { + +class TORCH_API StoreCollectives : public ControlCollectives { + public: + explicit StoreCollectives( + c10::intrusive_ptr store, + int rank, + int worldSize); + + void barrier( + const std::string& key, + std::chrono::milliseconds timeout = 5min, + bool block = true) override; + + void broadcastSend( + const std::string& key, + const std::vector& data, + std::chrono::milliseconds timeout = 5min) override; + std::vector broadcastRecv( + const std::string& key, + std::chrono::milliseconds timeout = 5min) override; + + void gatherSend( + const std::string& key, + const std::vector& data, + std::chrono::milliseconds timeout = 5min) override; + std::vector> gatherRecv( + const std::string& key, + const std::vector& data, + std::chrono::milliseconds timeout = 5min) override; + + std::vector scatterSend( + const std::string& key, + const std::vector>& data, + std::chrono::milliseconds timeout = 5min) override; + std::vector scatterRecv( + const std::string& key, + std::chrono::milliseconds timeout = 5min) override; + + std::vector> allGather( + const std::string& key, + const std::vector& data, + std::chrono::milliseconds timeout = 5min) override; + + int64_t allSum( + const std::string& key, + int64_t data, + std::chrono::milliseconds timeout = 5min) override; + + private: + void enforceUnique(const std::string& key); + + private: + c10::intrusive_ptr store_; + int rank_; + int worldSize_; + + c10::FastSet seenKeys_; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_plane/Handlers.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_plane/Handlers.hpp new file mode 100644 index 0000000000000000000000000000000000000000..bf79294f23d5515edbe5c98dc20b6b1ae16fdb5a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_plane/Handlers.hpp @@ -0,0 +1,82 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include + +namespace c10d::control_plane { + +// Request represents a request to the handler. This conceptually maps to an +// HTTP request but could be called via other transports. +class TORCH_API Request { + public: + virtual ~Request() = default; + + virtual const std::string& body() const = 0; + + virtual const std::multimap& params() const = 0; + + std::string getParam(const std::string& key) const { + auto it = params().find(key); + if (it != params().end()) { + return it->second; + } + return ""; + } +}; + +// Response represents a response to the handler. This conceptually maps to an +// HTTP response but could be called via other transports. +class TORCH_API Response { + public: + virtual ~Response() = default; + + // Set the response body to the provided string. + // TODO: add support for chunked responses + virtual void setContent( + std::string&& content, + const std::string& content_type) = 0; + + // Set the response status code. + // These should match standard HTTP status codes. + virtual void setStatus(int status) = 0; +}; + +using HandlerFunc = std::function; + +// Registers a handler. The name needs to be unique and can be called by using +// getHandler directly or via WorkerServer for remote requests. +// These handlers are called from a background C++ thread concurrently with the +// main thread. These handlers need to be thread safe and not cause issues +// during Python training. +TORCH_API void registerHandler(const std::string& name, HandlerFunc f); + +// Fetches a handler by name. +TORCH_API HandlerFunc getHandler(const std::string& name); + +TORCH_API std::vector getHandlerNames(); + +// Registers a handler statically. +// See registerHandler for more details. +class TORCH_API RegisterHandler { + public: + RegisterHandler(const std::string& name, HandlerFunc f) { + registerHandler(name, std::move(f)); + } + + // disable move, copy + RegisterHandler(const RegisterHandler&) = delete; + RegisterHandler(RegisterHandler&&) = delete; + RegisterHandler& operator=(const RegisterHandler&) = delete; + RegisterHandler& operator=(RegisterHandler&&) = delete; +}; + +} // namespace c10d::control_plane + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_plane/WaitCounterHandler.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_plane/WaitCounterHandler.hpp new file mode 100644 index 0000000000000000000000000000000000000000..b6e73ec065fe5b1b094f4fabd97ad887a9b94b68 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_plane/WaitCounterHandler.hpp @@ -0,0 +1,20 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace c10d { +namespace control_plane { + +// Returns all wait counter values as a JSON string +std::string getWaitCounterValuesJson(); + +// Ensures the wait counter backend is registered +void ensureWaitCounterBackendRegistered(); + +} // namespace control_plane +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_plane/WorkerServer.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_plane/WorkerServer.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9ffaa77726734d694015d8900ae759b762deff6b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/control_plane/WorkerServer.hpp @@ -0,0 +1,37 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include + +C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wdeprecated-literal-operator") +#include +C10_DIAGNOSTIC_POP() + +namespace c10d::control_plane { + +class TORCH_API WorkerServer : public c10::intrusive_ptr_target { + public: + WorkerServer(const std::string& hostOrFile, int port = -1); + ~WorkerServer() override; + + void shutdown(); + + int port() { + return port_; + } + + private: + httplib::Server server_; + std::thread serverThread_; + int port_; +}; + +} // namespace c10d::control_plane + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/cuda/CUDAEventCache.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/cuda/CUDAEventCache.hpp new file mode 100644 index 0000000000000000000000000000000000000000..e289e3d0aff7561ab197ce8c3e18ccf33d2c9ff9 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/cuda/CUDAEventCache.hpp @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include +#include + +namespace c10d { + +class TORCH_API CUDAEventCache + : public std::enable_shared_from_this { + public: + CUDAEventCache(); + std::shared_ptr create(bool timing); + static std::shared_ptr get(at::DeviceIndex device); + + private: + std::mutex cacheMutex_; + // NOTE: We intentionally store raw pointers so that + // we do not attempt to destroy the event objects on process exit, + // because cuda may be gone. + std::array, 2> + eventsArray_; // 0 for timing=false, 1 for timing=true +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/cuda/StreamBlock.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/cuda/StreamBlock.hpp new file mode 100644 index 0000000000000000000000000000000000000000..8195a6faa33d4f86c771ff08b254022e43b3cacd --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/cuda/StreamBlock.hpp @@ -0,0 +1,43 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +namespace c10d::cuda { + +enum StreamBlockStatus : int32_t { + UNKNOWN = 0, + RUNNING = 1, + TIMED_OUT = 2, + ABORTED = 3, +}; + +/* +StreamBlock implements a baton that will block a the active CUDA stream +until aborted by the main process. +*/ +class TORCH_API StreamBlock { + public: + virtual ~StreamBlock() = default; + virtual void abort() = 0; + virtual StreamBlockStatus status() = 0; +}; + +std::unique_ptr block_stream(std::chrono::milliseconds timeout); + +// Declare a registry so we can call the CUDA StreamBlock API from CPU only code +// (i.e. ProcessGroup/Work objects in libtorch_cpu). +// The implementation lives defined in StreamBlock.cu. +TORCH_DECLARE_REGISTRY( + StreamBlockRegistry, + StreamBlock, + std::chrono::milliseconds); + +} // namespace c10d::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/cuda/utils.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/cuda/utils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..d8a2520f4fd00d62e506f65e8645c1b035fee2c7 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/cuda/utils.hpp @@ -0,0 +1,15 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// This file contains utility functions common for CUDA, which can be used by +// ProcessGroupNCCL or SymmetricMemory. + +namespace c10d::cuda { + +bool deviceSupportsMulticast(int device_idx); + +} // namespace c10d::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/debug.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/debug.h new file mode 100644 index 0000000000000000000000000000000000000000..cf343d0eef5a8b1a06ab96d0fbf0a19aaa61f87f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/debug.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// Copyright (c) Meta Platforms, Inc. and its affiliates. +// All rights reserved. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +#include + +namespace c10d { + +enum class DebugLevel { Off = 0, Info = 1, Detail = 2 }; + +TORCH_API void setDebugLevel(DebugLevel level); + +// Sets the debug level based on the value of the `TORCH_DISTRIBUTED_DEBUG` +// environment variable. +TORCH_API void setDebugLevelFromEnvironment(); + +TORCH_API DebugLevel debug_level() noexcept; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/default_comm_hooks.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/default_comm_hooks.hpp new file mode 100644 index 0000000000000000000000000000000000000000..c56d6097a4d48dfe170ebab6b5e49d7f8504e83e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/default_comm_hooks.hpp @@ -0,0 +1,57 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace c10d { + +enum class BuiltinCommHookType : uint8_t { + ALLREDUCE = 1, + FP16_COMPRESS = 2, +}; + +class AllReduceCommHook + : public CppCommHookInterface> { + public: + explicit AllReduceCommHook(const c10::intrusive_ptr& state) + : CppCommHookInterface>(state) {} + + ~AllReduceCommHook() override = default; + + c10::intrusive_ptr runHook(GradBucket& bucket) override; +}; + +class FP16CompressCommHook + : public CppCommHookInterface> { + public: + explicit FP16CompressCommHook(const c10::intrusive_ptr& state) + : CppCommHookInterface>(state) {} + + ~FP16CompressCommHook() override = default; + + c10::intrusive_ptr runHook(GradBucket& bucket) override; +}; + +// Almost same as AllReduceCommHook, but without division inside the hook. +// This enables the optimization of fusing copy and division and saves one scan +// over all the input parameters, when no communication hook is provided by the +// user. Only used internally and not released as a public built-in +// communication hook. +class _AllReduceBySumCommHook + : public CppCommHookInterface> { + public: + explicit _AllReduceBySumCommHook( + const c10::intrusive_ptr& state) + : CppCommHookInterface>(state) {} + + ~_AllReduceBySumCommHook() override = default; + + c10::intrusive_ptr runHook(GradBucket& bucket) override; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/error.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/error.h new file mode 100644 index 0000000000000000000000000000000000000000..e4e1a3e2d2be49904528720168ffc383b4332f9e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/error.h @@ -0,0 +1,58 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// Copyright (c) Facebook, Inc. and its affiliates. +// All rights reserved. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +#include +#include + +#include + +namespace fmt { + +template <> +struct formatter { + constexpr auto parse(format_parse_context& ctx) const { + return ctx.begin(); + } + + template + auto format(const std::error_category& cat, FormatContext& ctx) const { + if (std::strcmp(cat.name(), "generic") == 0) { + return fmt::format_to(ctx.out(), "errno"); + } else { + return fmt::format_to(ctx.out(), "{} error", cat.name()); + } + } +}; + +template <> +struct formatter { + constexpr auto parse(format_parse_context& ctx) const { + return ctx.begin(); + } + + template + auto format(const std::error_code& err, FormatContext& ctx) const { + return fmt::format_to( + ctx.out(), "({}: {} - {})", err.category(), err.value(), err.message()); + } +}; + +} // namespace fmt + +namespace c10d::detail { + +inline std::error_code lastError() noexcept { + return std::error_code{errno, std::generic_category()}; +} + +} // namespace c10d::detail + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/exception.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/exception.h new file mode 100644 index 0000000000000000000000000000000000000000..b5e5c995331893cfd6f1f5a5666deeb2dd316bd3 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/exception.h @@ -0,0 +1,44 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// Copyright (c) Facebook, Inc. and its affiliates. +// All rights reserved. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +#include +#include + +// Utility macro similar to C10_THROW_ERROR, the major difference is that this +// macro handles exception types defined in the c10d namespace, whereas +// C10_THROW_ERROR requires an exception to be defined in the c10 namespace. +#define C10D_THROW_ERROR(err_type, ...) \ + throw ::c10d::err_type( \ + {__func__, __FILE__, static_cast(__LINE__)}, \ + c10::str(__VA_ARGS__)) + +#define C10D_CHECK_WITH(error_t, cond, ...) \ + if (C10_UNLIKELY_OR_CONST(!(cond))) { \ + C10D_THROW_ERROR( \ + error_t, TORCH_CHECK_MSG(cond, "", c10::str(__VA_ARGS__))); \ + } + +namespace c10d { + +using c10::DistNetworkError; +using c10::DistStoreError; + +class TORCH_API SocketError : public DistNetworkError { + using DistNetworkError::DistNetworkError; +}; + +class TORCH_API TimeoutError : public DistNetworkError { + using DistNetworkError::DistNetworkError; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/logger.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/logger.hpp new file mode 100644 index 0000000000000000000000000000000000000000..9cb5cf16044d0e6fc6b6a8fdd8774a17c7cfcaf4 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/logger.hpp @@ -0,0 +1,176 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +namespace c10d { + +// A struct to hold the latest status of the process group. +struct ProcessGroupStatus { + // the sequential number of the last collective enqueued into workMetaList_ + // This is useful for identifying a rank that has not join a collective + // initialized to be -1 to indicate no collective has been enqueued + int64_t lastEnqueuedSeq{-1}; + // the sequential number of the last collective started as the kernel + int64_t lastStartedSeq{-1}; + // the sequential number of the last collective completed marked by + // the watchdog thread + // initialized to be -1 to indicate no collective has been completed + int64_t lastCompletedSeq{-1}; + + // the name of the last collective enqueued into workMetaList_ + std::string lastEnqueuedWorkName; + // the name of the last collective started as the kernel + std::string lastStartedWorkName; + // the name of the last collective completed + std::string lastCompletedWorkName; + + // the sizes of the last work enqueued + size_t lastEnqueuedNumelIn; + size_t lastEnqueuedNumelOut; + // the sizes of the last work completed + size_t lastCompletedNumelIn; + size_t lastCompletedNumelOut; + // the sizes of the last work started + size_t lastStartedNumelIn; + size_t lastStartedNumelOut; +}; + +class TORCH_API Logger { + public: + explicit Logger(std::shared_ptr reducer); + // Set logging data that can be got during DistributedDataParallel + // construction time. + void set_construction_data_and_log( + const std::string& module_name, + const std::vector& device_ids, + int output_device, + bool broadcast_buffers, + bool has_sync_bn, + bool static_graph); + + void set_static_graph(); + + // An interface for users to get DDPLoggingData and log them + // in the applications. Explanation of logging fields are in + // "struct DDPLoggingData" of "torch/c10/util/Logging.h". + at::DDPLoggingData get_ddp_logging_data(); + + // Stream insertion operator for logging data to stream under + // TORCH_DISTRIBUTED_DEBUG. + friend std::ostream& operator<<(std::ostream& output, const Logger& logger); + + ~Logger() noexcept(false) { + // Log if DDP graph is static in Logger dtor instead of Reducer dtor since + // Logger is deleted before Reducer. + log_if_graph_static(reducer_->ddp_graph_static()); + } + + // Set environment variables. + void set_env_variables(); + // Set parameters stats. + void set_parameter_stats(); + // Get size of each bucket (Bytes). + std::vector get_bucket_sizes(); + // Get variable indices for each bucket. + std::vector> get_per_bucket_variable_indices(); + // Set comm. hook, if used + void set_comm_hook(const std::string& hook); + // Set running with uneven input detection (model.join() context manager) + void set_uneven_input_join(); + + // Reset performance stats at current iteration + void reset_performance_stats(); + + // Calculate avg stats using cpu timer and gpu timer + // that has been recorded in reducer. + void calculate_avg_time( + int64_t& avg_time, + int64_t& time_duration, + Timer& timer, + Timer::Event start_event, + Timer::Event end_event); + + // Set the absolute time of the event that has been recorded in reducer. + void set_event_time(int64_t& event_time, Timer& timer, Timer::Event event); + // Set stats that can be collected only during + // training loop. It is called at the beginning of forward call + // to record the run time stats of sampled iterations that previously ran. + // GPU performance stats are collected only for single process + // single device program and single device module right now. + // TODO to support single process multiple devices and multi device modules, + // events need to be created and recorded on multiple devices. + void set_runtime_stats_and_log(); + + // Called when DDP/reducer is failing with an error. The + // logging data structure will have two fields filled: "has_error" indicating + // that this iteration encountered an error and other fields are not valid, + // and "error", a string which contains the error message that DDP failed + // with. + template + void set_error_and_log(const std::string& ddp_error, const Args&... args) { + ddp_logging_data_->ints_map["has_error"] = 1; + auto err = c10::str(ddp_error, args...); + ddp_logging_data_->strs_map["error"] = err; + // Report the iteration we are erroring at so user knows how many examples + // successfully processed before this error was hit. + ddp_logging_data_->ints_map["iteration"] = reducer_->num_iterations_; + at::LogPyTorchDDPUsage(*ddp_logging_data_); + } + + // When running without static graph, called when reducer is destroyed to log + // if graph was actually static and is a candidate for static graph + // optimization. + void log_if_graph_static(bool is_static); + + private: + // ddp_logging_data_ is used to hold all the ddp related logging + // data fields. + std::unique_ptr ddp_logging_data_; + std::shared_ptr reducer_; + // track the number of iterations when runtime stats are collected so far. + long num_iterations_stats_recorded_ = 0; +}; + +// a generic logging data struct that holds different types of logging data. +// starting with key value pairs of strings and integers, +// It can be extended to more types as needed. +struct C10dLoggingData { + // logging fields that are string types. + std::map strings; + // logging fields that are int64_t types. + std::map integers; +}; + +class TORCH_API C10dLogger { + public: + C10dLogger(const C10dLogger&) = default; + C10dLogger(C10dLogger&&) = delete; + C10dLogger& operator=(const C10dLogger&) = default; + C10dLogger& operator=(C10dLogger&&) = delete; + virtual ~C10dLogger() = default; + virtual void log(const C10dLoggingData& data); + static C10dLogger* getLogger(); + static void registerLogger(std::unique_ptr /*logger*/); + + protected: + // singletion, hide constructor from the public + C10dLogger(std::string logDestination) + : logDestination_(std::move(logDestination)) {} + + // the name of the destination this logger should log to + std::string logDestination_; + + private: + static std::unique_ptr logger_; + static std::atomic registered_; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/logging.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/logging.h new file mode 100644 index 0000000000000000000000000000000000000000..596e4686212800c6ac419ac9fd77519d90257160 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/logging.h @@ -0,0 +1,52 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// Copyright (c) Meta Platforms, Inc. and its affiliates. +// All rights reserved. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +#include + +#include +#include +#include + +namespace c10d::detail { + +enum class LogLevel { Trace, Debug, Info, Warning, Error }; + +TORCH_API bool isLogLevelEnabled(LogLevel level) noexcept; + +template +// NOLINTNEXTLINE(cppcoreguidelines-missing-std-forward) +std::string formatLogMessage(fmt::string_view fmt, T&&... args) { + return fmt::vformat(fmt, fmt::make_format_args(args...)); +} + +} // namespace c10d::detail + +#define C10D_ERROR(...) \ + if (c10d::detail::isLogLevelEnabled(c10d::detail::LogLevel::Error)) \ + LOG(ERROR) << "[c10d] " << c10d::detail::formatLogMessage(__VA_ARGS__) + +#define C10D_WARNING(...) \ + if (c10d::detail::isLogLevelEnabled(c10d::detail::LogLevel::Warning)) \ + LOG(WARNING) << "[c10d] " << c10d::detail::formatLogMessage(__VA_ARGS__) + +#define C10D_INFO(...) \ + if (c10d::detail::isLogLevelEnabled(c10d::detail::LogLevel::Info)) \ + LOG(INFO) << "[c10d] " << c10d::detail::formatLogMessage(__VA_ARGS__) + +#define C10D_DEBUG(...) \ + if (c10d::detail::isLogLevelEnabled(c10d::detail::LogLevel::Debug)) \ + LOG(INFO) << "[c10d - debug] " << c10d::detail::formatLogMessage(__VA_ARGS__) + +#define C10D_TRACE(...) \ + if (c10d::detail::isLogLevelEnabled(c10d::detail::LogLevel::Trace)) \ + LOG(INFO) << "[c10d - trace] " << c10d::detail::formatLogMessage(__VA_ARGS__) + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/python_callback_work.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/python_callback_work.hpp new file mode 100644 index 0000000000000000000000000000000000000000..27c25dc8235c65258a8fa05d88eca27ceb87416a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/python_callback_work.hpp @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace c10d { + +// PythonCallbackWork is a subclass of Work that wraps a Python callback +// function that implements wait(). This allows asynchronous work to +// be integrated with Python code, enabling custom completion logic or +// post-processing in Python. +class PythonCallbackWork : public Work { + public: + explicit PythonCallbackWork(py::function callback); + + ~PythonCallbackWork() override; + + bool wait(std::chrono::milliseconds timeout) override; + + c10::intrusive_ptr getFuture() override; + + private: + py::function callback_; + c10::intrusive_ptr future_; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/python_comm_hook.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/python_comm_hook.h new file mode 100644 index 0000000000000000000000000000000000000000..e6e985f87ced478c67147c8405303e682aac7403 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/python_comm_hook.h @@ -0,0 +1,39 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include +#include +#include + +namespace c10d { + +class TORCH_PYTHON_API PythonCommHook : public CommHookInterface { + public: + // Takes a state and a callable hook. The inputs are Python objects. + // The state is passed to the hook in runHook method, and it can be used to + // maintain and update any state information during the execution of the hook. + // The hook performs user-specified processing and returns a future indicating + // asynchronous communication of gradients. + PythonCommHook(py::object state, py::object hook) + : state_(std::move(state)), hook_(std::move(hook)) {} + + ~PythonCommHook() override; + + c10::intrusive_ptr runHook(GradBucket& bucket) override; + + at::Tensor parseHookResult(const c10::IValue& result) override; + + private: + // Only needed for stateful communication. + py::object state_; + py::object hook_; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/quantization/quantization.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/quantization/quantization.h new file mode 100644 index 0000000000000000000000000000000000000000..08e8f2948af7ec89093fb8c7701bf7e2fe21a696 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/quantization/quantization.h @@ -0,0 +1,20 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +#include + +namespace torch::distributed::c10d::quantization { + +at::Tensor _float_to_bfloat16_cpu(const at::Tensor& input); +at::Tensor _bfloat16_to_float_cpu(const at::Tensor& input); + +} // namespace torch::distributed::c10d::quantization + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/quantization/quantization_gpu.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/quantization/quantization_gpu.h new file mode 100644 index 0000000000000000000000000000000000000000..9c21a8f5a07cfab89ce47f08d201abb1f7e1bb32 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/quantization/quantization_gpu.h @@ -0,0 +1,20 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +#include + +namespace torch::distributed::c10d::quantization { + +at::Tensor _float_to_bfloat16_cuda(const at::Tensor& input); +at::Tensor _bfloat16_to_float_cuda(const at::Tensor& input); + +} // namespace torch::distributed::c10d::quantization + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/quantization/quantization_utils.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/quantization/quantization_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..a2a6a171ba92a0741a7cc6f1990987917ff1b657 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/quantization/quantization_utils.h @@ -0,0 +1,39 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +#include + +#include + +inline std::string torch_tensor_device_name(const at::Tensor& ten) { + return c10::DeviceTypeName(ten.device().type()); +} + +#define TENSOR_NDIM_EQUALS(ten, dims) \ + TORCH_CHECK( \ + (ten).ndimension() == (dims), \ + "Tensor '" #ten "' must have " #dims \ + " dimension(s). " \ + "Found ", \ + (ten).ndimension()) + +#define TENSOR_ON_CPU(x) \ + TORCH_CHECK( \ + !x.is_cuda(), \ + #x " must be a CPU tensor; it is currently on device ", \ + torch_tensor_device_name(x)) + +#define TENSOR_ON_CUDA_GPU(x) \ + TORCH_CHECK( \ + x.is_cuda(), \ + #x " must be a CUDA tensor; it is currently on device ", \ + torch_tensor_device_name(x)) + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/reducer.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/reducer.hpp new file mode 100644 index 0000000000000000000000000000000000000000..a8a5b08a4d2166a883ff047149a35abe0840a118 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/reducer.hpp @@ -0,0 +1,607 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifndef _WIN32 +#include +#endif + +namespace c10d { + +constexpr int kDefaultFirstBucketBytes = 1024 * 1024; +constexpr int kDefaultBucketBytesCap = 25 * 1024 * 1024; +// Collect runtime stats once for every kDDPRuntimeLoggingSampleRate iterations. +constexpr int kDDPRuntimeLoggingSampleRate = 100; + +// Forward declaration +class Logger; + +// Local accumulator type for a single bucket. +struct BucketAccumulator { + std::vector indices; + size_t size = 0; + size_t size_limit = 0; +}; + +class TORCH_API Reducer { + public: + // The constructor takes a list of variables (i.e. parameters) for this + // process's single model replica (as DDP assumes single-process + // single-device). The bucket assignment for this reducer, `bucket_indices`, + // is specified as a list of buckets, each of which is specified as a list of + // indices into the bucket's `variables` list. + explicit Reducer( + std::vector params, + std::vector> bucket_indices, + c10::intrusive_ptr process_group, + std::vector expect_sparse_gradients, + int64_t bucket_bytes_cap, + bool find_unused_parameters, + bool gradient_as_bucket_view, + std::unordered_map param_names, + int64_t first_bucket_bytes_cap, + bool skip_all_reduce_unused_params, + bool use_python_reducer); + + ~Reducer() noexcept(false); + + // To (re-)initialize bucket assignment, pass a list of buckets, each of + // which is specified by a list of indices in the bucket's `variables` list. + // This function performs validation that the variables within a bucket + // all live on the same device and have the same dimensionality. + void initialize_buckets(std::vector> bucket_indices); + + void autograd_hook(size_t index); + + // This function is called when the forward function has produced an output, + // and the user wishes to reduce gradients in the backwards pass. + // If they don't, and wish to accumulate gradients before reducing them, + // a call to this function can simply be omitted. + void prepare_for_backward(const std::vector& outputs); + + // Called at the beginning of forward() inside DistributedDataParallel, + // right now it captures the starting time of forward in each iteration. + void prepare_for_forward(); + + // Returns the relative time in nanoseconds when gradients were ready, + // with respect to the time `prepare_for_backward` was called. The + // vector is for parameters for a single model replica. + std::vector get_backward_stats() const { + return backward_stats_; + } + + // Registers a hook to the reducer. The hook is `CommHookInterface` + // type to allow both Python and CPP hooks. This function can only + // be called once before calling backward. + // Cannot combine with the call of `register_builtin_comm_hook`. + void register_comm_hook(std::unique_ptr iface); + + // Registers a built-in C++ comm hook to the reducer. This function can only + // be called once before calling backward. + // Cannot combine with the call of `register_comm_hook`. + void register_builtin_comm_hook(c10d::BuiltinCommHookType comm_hook_type); + + // Informs reducer that optimizer is running in backward, so gradients + // don't need to be copied from buckets as the optimizer would've already + // been applied. + void set_optimizer_in_backward() { + optim_in_backward_ = true; + } + + // Runs allreduce or installed communication hook given GradBucket instance. + c10::intrusive_ptr run_comm_hook( + GradBucket& grad_bucket); + + // Runs default allreduce hook. + c10::intrusive_ptr run_allreduce_hook( + GradBucket& grad_bucket); + + // Returns gradient buckets in sequential order of buckets_. This is the order + // in which buckets are reduced across processes. If return_zero_tensors=true, + // will return zero tensors of the same shape instead of the true tensors. + std::vector get_grad_buckets( + bool return_zero_tensors = true) const; + + // Rebuild buckets based on rebuilt_params_ and rebuilt_param_indices_ + // according to when tensors received grads in the backward pass. + // TODO this function makes broadcast communication call and + // could be overlapped with next forward() call, thus + // it could be async. Will make it async when rebuilding buckets for + // find_unused_parameters = true case, as we could rebuild buckets more than + // once for find_unused_parameters = true case, where subgraphs are trained + // and parameter indices order may change more frequently. + // For find_unused_parameters = false case, buckets are only rebuilt once, + // the performance cost is negligible. Returns true if the buckets were + // rebuilt. + bool rebuild_buckets(); + + void setSparseMetadata(std::map& metadata); + + // Install futures that should be awaited at end of backwards. Currently these + // are only used by user-defined custom buffer reduction hooks, but can be + // generalized to any user-originating futures that need to be awaited. + void install_futures( + const c10::List>& futs); + + // Returns true if we should rebuild buckets, else false. We only rebuild + // buckets once after the first iteration and never rebuild them if + // find_unused_parameters_. + inline bool should_rebuild_buckets() const { + return (static_graph_ || !find_unused_parameters_) && !has_rebuilt_bucket_; + } + + // Pushes all parameters to be rebuilt. + void push_rebuilt_params_for_all_indices(); + + // Creates and sets ForwardPassWorkHandle given a Work and the + // corresponding tensor being reduced. + void set_forward_pass_work_handle( + c10::intrusive_ptr forwardPassWorkHandle, + bool useStaticWorldSize); + + // Retrieve on-device tensors used to track locally unused parameters. It is + // a tensor where index i = 1 if the Variable with that index has been used. + at::Tensor get_local_used_map_on_device() const; + + // An function for users to set sample_rate of collecting + // runtime stats. The time stats will be recorded for the + // first 10 iterations, after 10 iterations time stats will be + // recorded once every "sample_rate" training iterations. + void set_ddp_runtime_logging_sample_rate(int sample_rate); + + // Specify the training graph is static. + void set_static_graph(); + + // Delay all reduce to be after all gradients' calculation is complete. + void delay_all_reduce(); + + void set_mixed_precision_param_dtype(c10::ScalarType dtype); + + // Weak reference to associated DDP logger. The reference is weak to avoid + // refcycle between reducer and logger. + void set_logger(std::weak_ptr logger); + + // When graph is not explicitly set by user as static and has unused + // parameters, this will return whether the graph has been static until the + // current iteration, which means unused params set has not changed. + bool ddp_graph_static(); + + // Removes autograd hooks registered by the Reducer on the model parameters. + void remove_autograd_hooks(); + + // Checks whether or not the reducer has finalized the current backward + // iteration. + void check_finalized(); + + // Updates the underlying process group used by DDP with the new process + // group. + void update_process_group( + c10::intrusive_ptr new_process_group); + + // Resets reducer state. + void reset_state(); + + protected: + // Forward declaration. + struct Bucket; + + void push_rebuilt_params(const size_t& index); + + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + mutable std::mutex mutex_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const std::vector params_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + c10::intrusive_ptr<::c10d::ProcessGroup> process_group_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::vector expect_sparse_gradients_; + + std::vector> + grad_accumulators_; // NOLINT(cppcoreguidelines-non-private-member-variables-in-classes) + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::unordered_map gradAccToVariableMap_; + std::vector>> + hooks_; // NOLINT(cppcoreguidelines-non-private-member-variables-in-classes) + + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + bool expect_autograd_hooks_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + bool require_finalize_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + size_t next_bucket_; + + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + bool has_marked_unused_parameters_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const bool find_unused_parameters_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const bool gradient_as_bucket_view_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::vector unused_parameters_; + // Previous iteration's unused params, used for checking if unused parameters + // change between iterations. Only filled during the first backwards call. + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::vector prev_iteration_unused_parameters_; + // Whether graph is static or not. When user does not explicitly set static + // graph, the only possible dynamism is set of unused parameters changing + // between iterations which is tracked by this flag. + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + bool ddp_graph_static_{true}; + // Locally used parameter maps indicating if parameters are used locally + // during the current iteration or no_sync session if no_sync is on. + // Each map is a one-dim int32 tensor of number of parameters. These tensors + // are marked in autograd_hook to indicate the corresponding param has been + // used, and get allreduced in the end of backward step of current iteration + // or no_sync session for figuring out the globally unused parameters. + // + // local_used_map_: CPU tensor for bookkeeping locally used params + // local_used_map_dev_: dev tensor for reducing globally unused params + at::Tensor local_used_map_; + at::Tensor local_used_map_dev_; + // Indicate that reduction is done and D2H copy is done as well. + bool local_used_map_reduced_; + + // Weak pointer to associated DDP logger. + std::weak_ptr logger_; + // List of futures installed by Reducer::install_futures that should be + // awaited at the end of backwards pass. + std::optional>> + installed_futures_{std::nullopt}; + // Mixed precision parameter dtype for bucket type checking. + std::optional mixed_precision_param_dtype_{std::nullopt}; + + // Work handle for allreduce on local_used_map_ + c10::intrusive_ptr local_used_work_; + + void mark_variable_ready_dense(size_t variable_index); + + void mark_variable_ready_sparse(size_t variable_index); + + void mark_variable_ready(size_t variable_index); + + void mark_bucket_ready(size_t bucket_index); + + void finalize_bucket_dense(Bucket& bucket); + + void finalize_backward(); + + // Returns list of model parameters corresponding to the given bucket. + // bucket_index is a key to cache after buckets are rebuilt, after which this + // mapping never changes. + std::vector get_variables_for_bucket( + size_t bucket_index, + const Bucket& bucket) const; + + // Asserts that the reduction for the previous iteration has finished before + // rebuilding buckets or kicking off the next one. + void ensure_prior_reduction_finished(); + + // Broadcast rebuilt buckets from rank 0 to other ranks before initializing + // the buckets + void sync_bucket_indices(std::vector>& bucket_indices); + + // We'd like to use DistAutogradContext::GradCallback here but dist autograd + // doesn't exist under Windows. So we just directly use the concrete type but + // to preserve and enforce our original intent we do a static assert when dist + // autograd is available. + using GradCallback = std::function; +#ifndef _WIN32 + static_assert( + std::is_same_v< + GradCallback, + torch::distributed::autograd::DistAutogradContext::GradCallback>); +#endif + void runGradCallbackForVariable(at::Tensor& variable, const GradCallback& cb); + + // This function is called inside `initialize_buckets()`. It initializes both + // `bucket_views_in` and `bucket_views_out` with views for each variable's + // gradient into the bucket's flattened `gradients` tensor. Views serve as + // entry points to `copy_()` each grad's data in/out of the flattened + // `gradients` tensor. + void initialize_bucket_views(Bucket& bucket); + + // This function is called inside `finalize_backward`, it happens only if + // DDP communication hook was registered to recreate just bucket_views_out + // with the result of `future_work`. + void populate_bucket_views_out(Bucket& bucket, at::Tensor& tensor); + + // If gradient_as_bucket_view_ is false, after allreduce buckets, + // copy bucket results back to grads. + void copy_bucket_to_grad( + at::Tensor& variable, + Reducer::Bucket& bucket, + size_t intra_bucket_index, + bool global_unused); + // Check layout of grad and bucket_view before copying the grad to bucket. + void check_grad_layout(const at::Tensor& grad, const at::Tensor& bucket_view); + + // A bucket contains [1..N] gradients to be reduced, where the gradients + // have the same dtype and device. + // Coalescing gradients together before reducing can result in lower overhead + // and/or faster time to completion. Coalescing requires the constituent + // gradients to have the same dtype and device, and the resulting flattened + // tensor uses that common dtype and device. The flattened tensor is filled + // as the corresponding gradients are computed (triggered by autograd hooks), + // and the buckets are reduced in a predetermined order consistent across + // processes. + struct Bucket { + // Gradients of the bucket flattened into a 1-dimensional tensor + at::Tensor gradients; + + // Views into the `gradients` tensor for each individual gradient + // Each view is created with layout (size and stride) matching the + // gradient's expected layout (see the "Gradient Layout Contract" in + // torch/csrc/autograd/functions/accumulate_grad.h). + // `bucket_views_in[i].copy_(grad)` and `grad.copy_(bucket_views_out[i])` + // provide convenient ways to copy gradient data in/out of `gradients`, + // respectively. + // We keep both `bucket_views_in` and `bucket_views_out` because + // registering a DDP communication hook may re-initialize + // `bucket_views_out` with the value of the hook's `future_work` but we + // still need separate views into the bucket's original flattened gradient + // to copy in gradient data. + std::vector bucket_views_in; + std::vector bucket_views_out; + + // Variables whose gradients are held in this bucket + // We use refcounted tensors here so that we can easily unflatten the + // bucket's flattened `gradients` tensor into the participating variables + // after reduction has completed. + std::vector variables; + + // Per-variable offset/length into the flattened `gradients` tensor and + // the corresponding `GradBucket` instance for communication hooks + std::vector offsets; + std::vector lengths; + + // Per-variable sizes slicing into the bucket's `gradients` tensor + std::vector sizes_vec; + + // Number of gradients left to be computed before the bucket is ready to + // be reduced + size_t pending; + + // Global indices of participating variables in the bucket + std::vector variable_indices; + + // Future work handle for DDP communication hook + // If no hook is registered, a temporary vanilla allreduce hook is used. + c10::intrusive_ptr future_work; + + // if this bucket contains complex parameters + bool is_complex_bucket = false; + + // If this bucket should expect a single sparse gradient + // If `true`, then this implies that `bucket.variables.size() == 1`. + bool expect_sparse_gradient = false; + + // Sparse indices tensor + std::optional sparse_tensor_indices = std::nullopt; + + // TODO(@pietern) + // Memory copies from gradient tensors into the bucket are potentially + // done on different CUDA streams. We record an event for every copy + // so that we can synchronize with them prior to kicking off the reduction. + // std::vector events; + }; + + std::vector buckets_; + + // A variable locator locates a particular variable in the reducer's buckets + struct VariableLocator { + // Index of the bucket containing the variable in the `buckets_` vector + size_t bucket_index; + // Index of the variable in the bucket, which may be used consistently + // across `bucket_views_in`, `bucket_views_out`, `variables`, `offsets`, + // `lengths`, `sizes_vec`, and `variable_indices` in `Bucket` + size_t intra_bucket_index; + + VariableLocator() = default; + + VariableLocator(size_t bucket_index_, size_t intra_bucket_index_) + : bucket_index(bucket_index_), + intra_bucket_index(intra_bucket_index_) {} + }; + + // Map the index of a variable to its location in the bucket structure. + std::vector variable_locators_; + + // track the number of iterations to synchronize grads in training so far. + long num_iterations_; + // track distinct iteration of backward call. This is distinct from + // num_iterations_, for example in the case of multiple forward before + // backward. + long num_bwd_calls_; + // whether the first autograd hook for a distinct backward pass has been + // called. + bool first_autograd_hook_called_; + // track the number of buckets that have been ready for + // communication calls like allReduce or communication hooks. + int num_buckets_ready_; + // track the number of buckets that have been reduced. + int num_buckets_reduced_; + + // Timing information. + int64_t backward_compute_start_time_ = -1; + std::unique_ptr timer_; + + // We collect the relative timestamp of every gradient being ready + // when executing autograd. This can be used to derive a timeline of + // the point in time buckets were ready, or ideal bucket assignment/ordering. + std::vector backward_stats_; + + bool should_collect_runtime_stats(); + void record_forward_compute_start_time(); + void record_backward_compute_start_time(); + void record_backward_compute_end_time(); + void record_backward_comm_start_time(); + void record_backward_comm_end_time(); + + int get_ddp_runtime_logging_sample_rate(); + int ddp_runtime_logging_sample_rate_ = kDDPRuntimeLoggingSampleRate; + + bool is_multi_device_module_ = false; + + // Following variables are to help build dynamic bucket order + bool has_rebuilt_bucket_; + std::vector rebuilt_params_; + std::vector rebuilt_param_indices_; + const int64_t bucket_bytes_cap_; + +#ifndef _WIN32 + struct RpcContext { + using ContextPtr = torch::distributed::autograd::ContextPtr; + // The shared_ptr is to hold the context instance. + ContextPtr context_ptr_holder; + std::atomic context_ptr{nullptr}; + + void set(ContextPtr&& new_context_ptr); + }; + RpcContext rpc_context_; +#endif + + // A struct containing work handle and tensor for allreduce scheduled in + // forward pass, if applicable. + struct ForwardPassAllreduceWork { + c10::intrusive_ptr workHandle; + at::Tensor resultTensor; + // whether we should divide by the initial world_size or the no. of + // remaining DDP ranks. + bool useStaticWorldSize; + }; + + // Handle for the currently scheduled allreduce in the forward pass, if + // applicable. + ForwardPassAllreduceWork forwardPassWorkHandle_; + + // Division factor for reduction of gradients. + // Equal to the process group size, with an exception of handling uneven + // input. + int div_factor_; + + bool static_graph_; + + bool skip_all_reduce_unused_params_; + + // Key: size_t (index), Value: the number of times that a variable's + // autograd_hook() should be triggered before marking this variable's grad as + // ready for communication. Map will not change after 1st iteration. + std::unordered_map numGradHooksTriggeredMap_; + // Key: size_t (index), Value: the number of times that a variable's + // autograd_hook() are left to be triggered before marking this variable's + // grad as ready for communication. Map will change after 1st iteration to + // track a grad is ready for communication or not. + std::unordered_map numGradHooksTriggeredMapPerIteration_; + + private: + // reset counting for buckets before backward starts + void reset_bucket_counting(); + // search unused parameters beore backward starts + void search_unused_parameters( + const std::vector& outputs); + void set_divide_factor(); + // kick off all reduce for the ready bucket + void all_reduce_bucket(Bucket& bucket); + // kick off all reduce to local used map, it can help find global unused + // parameters + void all_reduce_local_used_map(); + // initialize locally used parameter maps + void initialize_local_used_map(); + // get current cuda stream + const c10::Stream get_current_stream(); + bool dynamic_graph_find_unused(); + bool static_graph_first_iteration(); + bool static_graph_after_first_iteration(); + + bool is_unused_bucket(Bucket& bucket); + bool should_skip_all_reduce_bucket(Bucket& bucket); + + // comm_hook_ is used to access the DDP communication hook if registered. + std::unique_ptr comm_hook_; + + // Sparse metadata contains the indices that will be used + // when calling into sparse allreduce. + // This is only used in the sparse allreduce collective calls + std::unique_ptr> sparse_metadata_; + + // Debug level setting. It is parsed once when Reducer is constructed, and + // remains the same across a single invocation of DDP training. + DebugLevel ddp_debug_level_; + // Mapping of variable index to fully qualified name of model to notify users + // about errors when certain parameters do not get gradient. + std::unordered_map param_names_; + // Variable indices stored sequentially in order of when the gradient is ready + // for the current backwards pass. + std::vector grad_ready_order_indices_; + // Bytes capacity of first bucket, can be configured by user + int64_t first_bucket_bytes_cap_; + // Per iteration set of parameter indices that have been marked ready. + std::unordered_set perIterationReadyParams_; + // Retrieves parameter names that have not been marked as ready as part of + // previous iteration. + std::vector getUnmarkedParamsForIteration(); + // Retrieves parameter indices that have not been marked as ready as part of + // previous iteration. + std::vector getUnmarkedParamIndicesForIteration(); + // Raises appropriate error if mark_variable_ready is called on the same + // variable twice, which is unexpected. + void checkAndRaiseMarkedTwiceError(size_t curVariableIndex); + // Retrieves parameter corresponding to the given VariableIndex. + at::Tensor& get_param_from_index(size_t index); + // Python reducer keeps C++ reducer initialized. To remove this flag, + // we need to refactor the DDP wrapper's initialization. + bool use_python_reducer_; + + // Cached bucket index to model parameter mapping. Populated after buckets + // are rebuilt after which this mapping is static. + mutable std::unordered_map> + cached_variables_for_bucket_; + + bool optim_in_backward_{false}; + friend class Logger; +}; + +// This is equivalent to take_tensors but returns indices into the +// tensor list argument for bucket assignment. Also, it is aware +// of device placement and will not allow buckets to span devices. +// The index of tensors[i] assigned to bucket is tensor_indices[i], +// when tensor_indices is empty, the index of tensors[i] assigned to +// bucket is i. +TORCH_API std::tuple>, std::vector> +compute_bucket_assignment_by_size( + const std::vector& tensors, + const std::vector& bucket_size, + const std::vector& expect_sparse_gradient = {}, + const std::vector& tensor_indices = {}, + const std::optional>& logger = {}); + +// Verify models across all processes are the same as model on rank 0 with +// respect to no. of params and matching dtype/size/layout. +TORCH_API void verify_params_across_processes( + const c10::intrusive_ptr& process_group, + const std::vector& params, + const std::optional>& logger); +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/reducer_timer.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/reducer_timer.hpp new file mode 100644 index 0000000000000000000000000000000000000000..7b4320325c5ef027f4ad47a789b67e5168ee7dd7 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/reducer_timer.hpp @@ -0,0 +1,86 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include + +namespace c10d { +constexpr int kUnsetTime = -1; + +inline int64_t current_time_in_nanos() { + return c10::getTime(); +} + +class TORCH_API Timer { + private: + // The timestamp of forward call start time in each iteration. + int64_t forward_start_time = kUnsetTime; + // The timestamp of backward computation start and end time in each + // iteration. + int64_t backward_compute_start_time = kUnsetTime; + int64_t backward_compute_end_time = kUnsetTime; + // The timestamp of first communication call start time in each iteration. + int64_t backward_comm_start_time = kUnsetTime; + // The timestamp of last communication call end time in each iteration. + int64_t backward_comm_end_time = kUnsetTime; + + public: + enum class Event : uint8_t { + kForwardStart, + kBackwardComputeStart, + kBackwardComputeEnd, + kBackwardCommStart, + kBackwardCommEnd, + }; + + // Record the current event, i.e., mark it as having occurred now. Default + // CPU implementation. + virtual void record(Event event) { + getTimeRef(event) = current_time_in_nanos(); + } + + // Return the difference between when two events occurred, in nanoseconds. + // Or nullopt if one of them hasn't been recorded. + virtual std::optional measureDifference(Event start, Event end) = 0; + + virtual ~Timer() = default; + + // Return host-side timestamp, or nullopt if it has not yet been recorded. + std::optional getTimestamp(Event event) { + auto time = getTimeRef(event); + if (time == kUnsetTime) { + return std::nullopt; + } else { + return time; + } + } + + // Return host-side time member variable corresponding to the given event. + int64_t& getTimeRef(Event event) { + switch (event) { + case Event::kForwardStart: + return forward_start_time; + case Event::kBackwardComputeStart: + return backward_compute_start_time; + case Event::kBackwardComputeEnd: + return backward_compute_end_time; + case Event::kBackwardCommStart: + return backward_comm_start_time; + case Event::kBackwardCommEnd: + return backward_comm_end_time; + default: + TORCH_INTERNAL_ASSERT(false); + } + } +}; + +TORCH_DECLARE_TYPED_REGISTRY( + TimerRegistry, + c10::DeviceType, + Timer, + std::unique_ptr, + c10::Device); +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/sequence_num.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/sequence_num.hpp new file mode 100644 index 0000000000000000000000000000000000000000..fa88322a6fa02adc0285afecf0fa128048f310f4 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/sequence_num.hpp @@ -0,0 +1,71 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace c10d { +constexpr int kUnsetSeqNum = 0; + +namespace { +constexpr int kByteOffset = 8; +} // namespace + +// Converts from int to char vec to write in store +template +inline std::vector toVec(uint64_t num, int numBytes) { + std::vector values; + // Read off bytes from right to left, pushing them into + // char array. + for (const auto i : c10::irange(numBytes)) { + uint8_t x = (num >> (kByteOffset * i)) & 0xff; + values.push_back(static_cast(x)); + } + return values; +} + +// Converts from char vec (such as from store read) to int. +template +inline uint64_t fromVec(const std::vector& values) { + uint64_t num = 0; + // Set each byte at the correct location on num + for (const auto i : c10::irange(values.size())) { + uint8_t x = static_cast(values[i]); + num |= (static_cast(x) << (kByteOffset * i)); + } + return num; +} + +class TORCH_API SequenceNum { + public: + SequenceNum(); + explicit SequenceNum(const uint64_t num); + // Retrieve num_. Will throw if not set. + uint64_t get() const; + // Increment num_. Will throw if not set. + void increment(); + // Increment num_ and return the old value. Will throw if not set. + uint64_t getAndIncrement(); + // Sets num_ + void set(const uint64_t num); + // Returns true if this SequenceNum is properly initialized with a value, else + // false. + bool isSet() const; + + SequenceNum& operator=(const SequenceNum& other); + + SequenceNum(const SequenceNum& other); + + private: + std::optional num_; + mutable std::mutex lock_; +}; + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/socket.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/socket.h new file mode 100644 index 0000000000000000000000000000000000000000..8d2ec93016c028f4d7c79410ab5c3a11cfa5264f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/socket.h @@ -0,0 +1,110 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// Copyright (c) Meta Platforms, Inc. and its affiliates. +// All rights reserved. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace c10d::detail { + +class SocketOptions { + public: + SocketOptions& prefer_ipv6(bool value) noexcept { + prefer_ipv6_ = value; + + return *this; + } + + bool prefer_ipv6() const noexcept { + return prefer_ipv6_; + } + + SocketOptions& connect_timeout(std::chrono::milliseconds value) noexcept { + connect_timeout_ = value; + + return *this; + } + + std::chrono::milliseconds connect_timeout() const noexcept { + return connect_timeout_; + } + + // Sets the backoff policy to use for socket connect ops. + SocketOptions& connect_backoff(std::shared_ptr value) noexcept { + connect_backoff_ = std::move(value); + + return *this; + } + + const std::shared_ptr& connect_backoff() const noexcept { + return connect_backoff_; + } + + private: + bool prefer_ipv6_ = true; + std::chrono::milliseconds connect_timeout_{std::chrono::seconds{30}}; + std::shared_ptr connect_backoff_{ + std::make_shared(std::chrono::milliseconds(1000))}; +}; + +class SocketImpl; + +class Socket { + public: + // This function initializes the underlying socket library and must be called + // before any other socket function. + static void initialize(); + + static Socket listen(std::uint16_t port, const SocketOptions& opts = {}); + + static Socket listenFromFd(int fd, std::uint16_t expected_port); + + static Socket connect( + const std::string& host, + std::uint16_t port, + const SocketOptions& opts = {}); + + Socket() noexcept = default; + + Socket(const Socket& other) = delete; + + Socket& operator=(const Socket& other) = delete; + + Socket(Socket&& other) noexcept; + + Socket& operator=(Socket&& other) noexcept; + + ~Socket(); + + Socket accept() const; + + int handle() const noexcept; + + std::uint16_t port() const; + + bool waitForInput(std::chrono::milliseconds timeout); + + std::string repr() const; + + private: + explicit Socket(std::unique_ptr&& impl) noexcept; + + std::unique_ptr impl_; +}; +} // namespace c10d::detail + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/socket_fmt.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/socket_fmt.h new file mode 100644 index 0000000000000000000000000000000000000000..f333c8fa12d8ad4f321072b9379c24529c7d3507 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/socket_fmt.h @@ -0,0 +1,35 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// (c) Meta Platforms, Inc. and affiliates. +// All rights reserved. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once + +/* +This file should not be included from other .h files and only used in cpp files +as it exposes the underlying platform specific socket headers. +*/ + +#include + +#ifdef _WIN32 +#include + +#include +#include +#else +#include +#endif + +namespace c10d::detail { + +// Returns a human-readable representation of the given socket address. +std::string formatSockAddr(const struct ::sockaddr* addr, socklen_t len); + +} // namespace c10d::detail + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemory-inl.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemory-inl.h new file mode 100644 index 0000000000000000000000000000000000000000..480803752d9f3026d6b73a09cb1700a7c1678e5c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemory-inl.h @@ -0,0 +1,363 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 900) && CUDART_VERSION >= 12010 +#define NVCC_SUPPORTS_MULTICAST 1 +#endif + +#include +#if defined(USE_ROCM) +#include +#endif +#if !defined(USE_ROCM) +#include +#if defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 600) +#include +#endif +#endif +#include + +namespace c10d::symmetric_memory { + +template +using Vec = at::native::memory::Vec; + +template +inline constexpr bool dependent_false = + at::native::memory::dependent_false; + +using at::native::memory::get_alignment; + +template +__device__ __forceinline__ uint32_t +cas(uint32_t* addr, uint32_t compare, uint32_t val) { +#if !defined(USE_ROCM) && defined(__CUDA_ARCH__) && (__CUDA_ARCH__ >= 600) + ::cuda::atomic_ref ref(*addr); + ref.compare_exchange_strong(compare, val, ::cuda::std::memory_order(Sem)); + return compare; +#elif defined(USE_ROCM) + __atomic_compare_exchange_n( + addr, &compare, val, false, static_cast(Sem), __ATOMIC_RELAXED); + return compare; +#else + CUDA_KERNEL_ASSERT(false); + return 0; +#endif +} + +__device__ __forceinline__ void trap() { +#if defined(USE_ROCM) + // abort() calls trap() under the covers. However, on ROCm, the trap is + // handled differently inside hip runtime. It collects a gpu core dump and + // causes linux kernel to create a core dump of the host application. + abort(); +#else + __trap(); +#endif +} + +__device__ __forceinline__ size_t global_timer_ns() { +#if defined(USE_ROCM) + static constexpr double MI300_FREQ_GHZ = 2.1; + return clock64() / MI300_FREQ_GHZ; +#else + size_t val; + asm volatile("mov.u64 %0, %globaltimer;" : "=l"(val) : : "memory"); + return val; +#endif +} + +constexpr size_t ns_per_ms = 1e6; + +template +__device__ __forceinline__ bool try_put_signal( + uint32_t* addr, + size_t timeout_ms) { + size_t deadline = global_timer_ns() + timeout_ms * ns_per_ms; + while (cas(addr, 0, 1) != 0) { + if (timeout_ms != 0 && global_timer_ns() > deadline) { + return false; + } + } + return true; +} + +template +__device__ __forceinline__ bool try_wait_signal( + uint32_t* addr, + size_t timeout_ms) { + size_t deadline = global_timer_ns() + timeout_ms * ns_per_ms; + while (cas(addr, 1, 0) != 1) { + if (timeout_ms != 0 && global_timer_ns() > deadline) { + return false; + } + } + return true; +} + +template +__device__ __forceinline__ void put_signal(uint32_t* addr) { + while (cas(addr, 0, 1) != 0) + ; +} + +template +__device__ __forceinline__ void wait_signal(uint32_t* addr) { + while (cas(addr, 1, 0) != 1) + ; +} + +// Synchronizes blocks with matching blockIdx across participating devices. +// Note: sync_remote_block itself is not a system level barrier/fence. It is a +// building block for expressing different synchronization patterns. +// +// Pattern 0: Ensures that all writes to symm_mem buffers from previous +// kernels across all devices are visible to the current kernel: +// +// sync_remote_blocks(...); +// __syncthreads(); +// +// Pattern 1: Ensures that all writes to symm_mem buffers from the current +// block are visible to all remote blocks with matching blockIdx: +// +// __syncthreads(); +// sync_remote_blocks(...); +// __syncthreads(); +// +// Pattern 2: Ensures that symm_mem buffers read by the current kernel are safe +// for writing by subsequent kernels across all devices. +// +// __syncthreads(); +// sync_remote_blocks(...); +template +__device__ __forceinline__ void sync_remote_blocks( + uint32_t** signal_pads, + size_t rank, + size_t world_size) { + if (threadIdx.x < world_size) { + auto target_rank = threadIdx.x; + if constexpr (hasPrevMemAccess) { + put_signal( + signal_pads[target_rank] + blockIdx.x * world_size + rank); + } else { + put_signal( + signal_pads[target_rank] + blockIdx.x * world_size + rank); + } + if constexpr (hasSubsequentMemAccess) { + wait_signal( + signal_pads[rank] + blockIdx.x * world_size + target_rank); + } else { + wait_signal( + signal_pads[rank] + blockIdx.x * world_size + target_rank); + } + } +}; + +template +struct MultimemLdReduce { + template + __device__ __inline__ Vec operator()(T* mc_ptr) { + static_assert(dependent_false); + } +}; + +template +__device__ __inline__ Vec multimem_ld_reduce_add(T* mc_ptr) { + MultimemLdReduce functor; + return functor.template operator()(mc_ptr); +} + +#if defined(USE_ROCM) || !defined(NVCC_SUPPORTS_MULTICAST) +#define SPECIALIZE_MULTIMEM_LD_REDUCE_VEC_32(type, asm_type, acc_prec) \ + template <> \ + struct MultimemLdReduce { \ + template \ + __device__ __inline__ Vec operator()(type* mc_ptr) { \ + CUDA_KERNEL_ASSERT(false); \ + } \ + }; +#else +#define SPECIALIZE_MULTIMEM_LD_REDUCE_VEC_32(type, asm_type, acc_prec) \ + template <> \ + struct MultimemLdReduce { \ + template \ + __device__ __inline__ Vec operator()(type* mc_ptr) { \ + Vec vec; \ + if constexpr (Alignment == 16) { \ + asm("multimem.ld_reduce.relaxed.sys.global.add" acc_prec \ + ".v4" asm_type " {%0,%1,%2,%3}, [%4];" \ + : "=r"(vec.u32[0]), \ + "=r"(vec.u32[1]), \ + "=r"(vec.u32[2]), \ + "=r"(vec.u32[3]) \ + : "l"(mc_ptr) \ + : "memory"); \ + } else if constexpr (Alignment == 8) { \ + asm("multimem.ld_reduce.relaxed.sys.global.add" acc_prec \ + ".v2" asm_type " {%0,%1}, [%2];" \ + : "=r"(vec.u32[0]), "=r"(vec.u32[1]) \ + : "l"(mc_ptr) \ + : "memory"); \ + } else if constexpr (Alignment == 4) { \ + asm("multimem.ld_reduce.relaxed.sys.global.add" acc_prec asm_type \ + " %0, [%1];" \ + : "=r"(vec.u32) \ + : "l"(mc_ptr) \ + : "memory"); \ + } \ + return vec; \ + } \ + }; +#endif + +SPECIALIZE_MULTIMEM_LD_REDUCE_VEC_32(at::BFloat16, ".bf16x2", ".acc::f32"); +SPECIALIZE_MULTIMEM_LD_REDUCE_VEC_32(float, ".f32", ""); + +template +__device__ __inline__ void multimem_st(T* mc_ptr, Vec& vec) { +#if defined(USE_ROCM) || !defined(NVCC_SUPPORTS_MULTICAST) + CUDA_KERNEL_ASSERT(false); +#else + if constexpr (Alignment == 16) { + asm("multimem.st.relaxed.sys.global.v4.f32 [%0], {%1,%2,%3,%4};" + : + : "l"(mc_ptr), + "r"(vec.u32[0]), + "r"(vec.u32[1]), + "r"(vec.u32[2]), + "r"(vec.u32[3]) + : "memory"); + } else if constexpr (Alignment == 8) { + asm("multimem.st.relaxed.sys.global.v2.f32 [%0], {%1,%2};" + : + : "l"(mc_ptr), "r"(vec.u32[0]), "r"(vec.u32[1]) + : "memory"); + } else if constexpr (Alignment == 4) { + asm("multimem.st.relaxed.sys.global.f32 [%0], %1;" + : + : "l"(mc_ptr), "r"(vec.u32) + : "memory"); + } else { + static_assert(dependent_false); + } +#endif +} + +template +__device__ __inline__ T add_bf16x2(T a, T b) { + static_assert(sizeof(T) == 4); +#if (defined(__CUDA_ARCH__) && (__CUDA_ARCH__ < 800)) + CUDA_KERNEL_ASSERT(false); + return T{}; +#elif defined(USE_ROCM) + union bf2f { + float f; + __hip_bfloat16 bf[2]; + } _bf2f_a = {.f = 0}, _bf2f_b = {.f = 0}; + + //__hip_bfloat162 is a struct with two __hip_bfloat16 elements called x and y + // This typecasts input a and b as bfloat16 and maps to low bits of a float + // and does the addition in float + _bf2f_a.bf[1] = reinterpret_cast<__hip_bfloat162*>(&a)->x; + _bf2f_b.bf[1] = reinterpret_cast<__hip_bfloat162*>(&b)->x; + union f2bf { + float f; + __hip_bfloat16 bf[2]; + } _f2bf_res0, _f2bf_res1; + _f2bf_res0.f = _bf2f_a.f + _bf2f_b.f; + + // Same thing for y elements of __hip_bfloat162 + _bf2f_a.bf[1] = reinterpret_cast<__hip_bfloat162*>(&a)->y; + _bf2f_b.bf[1] = reinterpret_cast<__hip_bfloat162*>(&b)->y; + _f2bf_res1.f = _bf2f_a.f + _bf2f_b.f; + + // Put the two results together + __hip_bfloat162 rtn(_f2bf_res0.bf[1], _f2bf_res1.bf[1]); + return *reinterpret_cast(&rtn); +#else + auto res = __hadd2( + *reinterpret_cast<__nv_bfloat162*>(&a), + *reinterpret_cast<__nv_bfloat162*>(&b)); + return *reinterpret_cast(&res); +#endif +} + +template +__device__ __inline__ Vec add_vec( + const Vec& a, + const Vec& b) { + Vec c{}; + if constexpr (std::is_same_v) { + if constexpr (Alignment == 16) { + c.f32[0] = a.f32[0] + b.f32[0]; + c.f32[1] = a.f32[1] + b.f32[1]; + c.f32[2] = a.f32[2] + b.f32[2]; + c.f32[3] = a.f32[3] + b.f32[3]; + } else if constexpr (Alignment == 8) { + c.f32[0] = a.f32[0] + b.f32[0]; + c.f32[1] = a.f32[1] + b.f32[1]; + } else if constexpr (Alignment == 4) { + c.f32 = a.f32 + b.f32; + } else { + static_assert(dependent_false); + } + } else if constexpr (std::is_same_v) { + if constexpr (Alignment == 16) { + c.u32[0] = add_bf16x2(a.u32[0], b.u32[0]); + c.u32[1] = add_bf16x2(a.u32[1], b.u32[1]); + c.u32[2] = add_bf16x2(a.u32[2], b.u32[2]); + c.u32[3] = add_bf16x2(a.u32[3], b.u32[3]); + } else if constexpr (Alignment == 8) { + c.u32[0] = add_bf16x2(a.u32[0], b.u32[0]); + c.u32[1] = add_bf16x2(a.u32[1], b.u32[1]); + } else if constexpr (Alignment == 4) { + c.u32 = add_bf16x2(a.u32, b.u32); + } else { + static_assert(dependent_false); + } + } else { + static_assert(dependent_false); + } + return c; +} + +// With world_size specialization: perform balanced load from all peers before +// performing reduction. +template +__device__ inline std::enable_if_t<(k_world_size > 0), Vec> +load_and_reduce(T** ptrs, size_t rank, size_t world_size, size_t offset) { + Vec vecs[k_world_size]; +#pragma unroll k_world_size + for (size_t step = 0; step < k_world_size; ++step) { + size_t remote_rank = (rank + step) % k_world_size; + vecs[remote_rank] = + at::native::memory::ld_vec(ptrs[remote_rank] + offset); + } + auto acc = vecs[0]; +#pragma unroll k_world_size - 1 + for (size_t r = 1; r < world_size; ++r) { + acc = add_vec(acc, vecs[r]); + } + return acc; +} + +// Without world_size specialization: perform ordered (unbalanced) load and +// accumulate on each load. +template +__device__ inline std::enable_if_t<(k_world_size <= 0), Vec> +load_and_reduce(T** ptrs, size_t rank, size_t world_size, size_t offset) { + Vec acc{}; + for (size_t step = 0; step < world_size; ++step) { + auto vec = at::native::memory::ld_vec(ptrs[step] + offset); + acc = add_vec(acc, vec); + } + return acc; +} + +} // namespace c10d::symmetric_memory + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemory.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemory.hpp new file mode 100644 index 0000000000000000000000000000000000000000..caaa52d32408bffd084a772282c94e23af9d0a6b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemory.hpp @@ -0,0 +1,157 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace c10d::symmetric_memory { + +// Resource wrapper that owns a (vaddr, allocation handle) pair. Upon +// destruction, it unmaps the vaddr and releases the allocation handle. +struct AllocationRef : public c10::intrusive_ptr_target { + void* ptr; + HandleType handle; + size_t block_size; + int device_idx; + bool is_multicast; + + AllocationRef( + void* ptr, + HandleType handle, + size_t block_size, + int device_idx, + bool is_multicast = false); + + ~AllocationRef(); +}; + +// Forward declaration of CUDAPeerAllocInfo +class CUDAPeerAllocInfo; + +class CUDASymmetricMemory : public SymmetricMemory { + public: + // This is mostly a shallow copy that shares the pointer to + // `CUDAPeerAllocInfo` which corresponds to the base Block. The + // CUDASymmetricMemory handle is specified by the offset to the base ptr. + CUDASymmetricMemory( + const c10::intrusive_ptr& pai, + size_t offset); + + ~CUDASymmetricMemory() override {}; + + std::vector get_buffer_ptrs() override; + std::vector get_signal_pad_ptrs() override; + void** get_buffer_ptrs_dev() override; + void** get_signal_pad_ptrs_dev() override; + size_t get_buffer_size() override; + size_t get_offset() override; + + bool has_multicast_support() override; + void* get_multicast_ptr() override; + + void barrier(int channel, size_t timeout_ms) override; + void put_signal(int dst_rank, int channel, size_t timeout_ms) override; + void wait_signal(int src_rank, int channel, size_t timeout_ms) override; + + int get_rank() override; + int get_world_size() override; + c10::Device get_device() override; + bool world_within_direct_access() override; + + private: + int local_device_idx_; + int rank_; + int world_size_; + c10::intrusive_ptr pai_; + size_t offset_{0}; // in byte +}; + +// A class to hold the base pointers and signal pad pointers for a group of +// peers. One `CUDAPeerAllocInfo` object can be shared by multiple +// `CUDASymmetricMemory` objects when latter reside on the same allocation +// and rendezvous over the same group. (The `CUDASymmetricMemory` objects may +// have different offsets compared to the base address.) +class CUDAPeerAllocInfo : public c10::intrusive_ptr_target { + public: + CUDAPeerAllocInfo( + std::vector> alloc_refs, + std::vector buffers, + std::vector signal_pads, + HandleType mc_handle, + void* mc_addr, + size_t buffer_size, + int local_device_idx, + int rank, + int world_size); + + private: + std::vector> alloc_refs_; + std::vector buffers_; + std::vector signal_pads_; + HandleType mc_handle_; + void* mc_addr_; + size_t buffer_size_; + int local_device_idx_; + int rank_; + int world_size_; + void** buffers_dev_; + void** signal_pads_dev_; + + friend class CUDASymmetricMemory; +}; + +// Metadata associated with each allocation performed by +// `CUDASymmetricMemoryAllocator`. +struct Block : public c10::intrusive_ptr_target { + c10::intrusive_ptr alloc_ref; + int device_idx; + size_t block_size; + size_t buffer_size; + size_t signal_pad_offset; + std::optional default_group_name; + std::map> symm_mems; + + Block( + c10::intrusive_ptr alloc_ref, + int device_idx, + size_t block_size, + size_t buffer_size, + size_t signal_pad_offset, + const std::optional& group_name); +}; + +class CUDASymmetricMemoryAllocator : public SymmetricMemoryAllocator { + public: + void* alloc( + size_t size, + int device_idx, + const std::optional& group_name) override; + + void free(void* ptr) override; + size_t get_alloc_size(void* ptr) override; + c10::intrusive_ptr rendezvous( + void* ptr, + const std::optional& group_name) override; + bool has_multicast_support(int device_idx) override; + c10::DeviceType supported_device_type() override; + std::string name() override; + + private: + c10::intrusive_ptr find_block(void* ptr); + c10::intrusive_ptr find_block_covering(void* ptr, size_t& offset); + + std::shared_mutex mutex_; + std::unordered_map> ptr_to_block_; + c10::cuda::CUDACachingAllocator::Expandable_Segments_Handle_Type + handle_type_ = c10::cuda::CUDACachingAllocator:: + Expandable_Segments_Handle_Type::UNSPECIFIED; +}; + +} // namespace c10d::symmetric_memory + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryTypes.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryTypes.hpp new file mode 100644 index 0000000000000000000000000000000000000000..2822011bdb192125ec44e3825ffff1f74aa18e55 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryTypes.hpp @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#if defined(USE_ROCM) +#include +#endif + +namespace c10d::symmetric_memory { + +// Covers NVL72 +constexpr int max_cuda_p2p_domain_size = 72; +// Maximum number of channels +constexpr int symm_max_nblocks = 32; + +// Maximally, a rank will need to sync with all other ranks, over all +// channels. Each signal is 32 bits, which is the minimum unit for atomic cas. +// Default signal pad size, can be overridden via set_signal_pad_size(). +constexpr size_t default_signal_pad_size = + symm_max_nblocks * max_cuda_p2p_domain_size * sizeof(uint32_t); + +#if !defined(USE_ROCM) && defined(PYTORCH_C10_DRIVER_API_SUPPORTED) +using HandleType = CUmemGenericAllocationHandle; +#elif defined(USE_ROCM) +using HandleType = hipMemGenericAllocationHandle_t; +#else +using HandleType = void*; +#endif + +} // namespace c10d::symmetric_memory + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.hpp new file mode 100644 index 0000000000000000000000000000000000000000..0c6f8e725fdaea39eba18fcb303469cf9db1e643 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/CUDASymmetricMemoryUtils.hpp @@ -0,0 +1,120 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace c10d { +namespace symmetric_memory { + +bool device_has_multicast_support(int device_idx); + +bool allow_overlapping_devices(); + +// Query environment variable to get the backend used for CUDA Symmetric Memory. +std::string getSymmMemBackendCUDA(); + +class IpcChannel { + public: + IpcChannel(); + ~IpcChannel(); + + void send_fd(int dst_pid, int fd); + int recv_fd(); + + std::vector all_gather_fds( + int rank, + const std::vector& pids, + int fd); + + int broadcast_fds( + int rank, + int src_rank, + const std::vector& pids, + int fd); + + private: + static std::string get_socket_name(int pid); + + std::string socket_name_; + int socket_; +}; + +// A set of store-based exchange methods with a preset prefix typically type of +// the SymmetricMemory. Most used as static instances at respective +// SymmetricMemory implementation files. +class StoreExchange { + public: + StoreExchange(const std::string& store_prefix) + : store_prefix_(store_prefix) {} + + // Put template function in header file so that compiler can easily access it. + template + std::vector all_gather( + const c10::intrusive_ptr& store, + int rank, + int world_size, + T val) { + static_assert(std::is_trivially_copyable_v); + + std::vector peer_keys; + peer_keys.reserve(world_size); + for (int r = 0; r < world_size; ++r) { + std::ostringstream oss; + oss << store_prefix_ << '/' << seq_id_ << '/' << r; + peer_keys.push_back(oss.str()); + } + ++seq_id_; + + { + std::vector payload( + reinterpret_cast(&val), + reinterpret_cast(&val) + sizeof(T)); + store->set(peer_keys[rank], payload); + } + + std::vector peer_vals; + peer_vals.reserve(world_size); + for (int r = 0; r < world_size; ++r) { + if (r == rank) { + peer_vals.push_back(val); + continue; + } + store->wait({peer_keys[r]}); + auto payload = store->get(peer_keys[r]); + TORCH_CHECK(payload.size() == sizeof(T)); + T peer_val{}; + std::memcpy(&peer_val, payload.data(), sizeof(T)); + peer_vals.push_back(peer_val); + } + return peer_vals; + } + + void barrier( + const c10::intrusive_ptr& store, + int rank, + int world_size) { + // TODO: implement an efficient one? + all_gather(store, rank, world_size, 0); + } + + private: + const std::string store_prefix_; + size_t seq_id_ = 0; +}; + +// Returns a pointer of virtual address that is mapped to the physical memory +// held by the handle. +void map_block( + void** ptr, + c10d::symmetric_memory::HandleType handle, + size_t size, + int device_idx); + +} // namespace symmetric_memory +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/DMAConnectivity.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/DMAConnectivity.hpp new file mode 100644 index 0000000000000000000000000000000000000000..1fbf2d774c5543fcf496bb87f2599fbace5c46b7 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/DMAConnectivity.hpp @@ -0,0 +1,43 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace c10d { + +struct TORCH_API DMAConnectivity : c10::intrusive_ptr_target { + c10::DeviceType device_type; + std::string connection_type; + + // This is an NxN matrix representing the connectivity between N devices, + // where each element matrix[i][j] indicates the connectivity between device + // i and device j. A value of 0 denotes that there is no connection between + // device i and j. The meaning of non-zero values are specific to the + // connection type (e.g., for NVLink it represents the number of NVLinks). + std::vector> matrix; + + explicit DMAConnectivity( + c10::DeviceType device_type, + std::string connection_type, + std::vector> matrix); +}; + +struct DMAConnectivityDetector : c10::intrusive_ptr_target { + virtual c10::intrusive_ptr detect() = 0; + ~DMAConnectivityDetector() override = default; +}; + +C10_EXPORT void register_dma_connectivity_detector( + c10::DeviceType device_type, + const std::string& connection_type, + c10::intrusive_ptr detector); + +TORCH_API c10::intrusive_ptr detect_dma_connectivity( + c10::DeviceType device_type, + const std::string& connection_type); + +} // namespace c10d + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/SymmetricMemory.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/SymmetricMemory.hpp new file mode 100644 index 0000000000000000000000000000000000000000..35c12d238f4817ffc6bcf1b52a4be4012ec27c95 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/SymmetricMemory.hpp @@ -0,0 +1,225 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace c10d::symmetric_memory { + +// SymmetricMemory represents symmetric allocations across a group of devices. +// The allocations represented by a SymmetricMemory object are accessible by +// all devices in the group. The class can be used for op-level custom +// communication patterns (via the get_buffer APIs and the synchronization +// primitives), as well as custom communication kernels (via the buffer and +// signal_pad device pointers). +// +// To acquire a SymmetricMemory object, each rank first allocates +// identical-sized memory via SymmetricMemoryAllocator::alloc(), then invokes +// SymmetricMemoryAllocator::rendezvous() on the memory to establish the +// association across peer buffers. The rendezvous is a one-time process, and +// the mapping between a local memory memory and the associated SymmetricMemory +// object is unique. +// +// NOTE [symmetric memory signal pad] +// Signal pads are P2P-accessible memory regions designated for +// synchronization. SymmetricMemory offers built-in synchronization primitives +// such as barriers, put_signal, and wait_signal, which are all based on signal +// pads. Users may utilize signal pads for their own synchronization logic, +// provided that the signal pads remain zero-filled following successful +// synchronization. +// +// NOTE [symmetric memory synchronization channel] +// Synchronization channels allow users to use a single SymmetricMemory object +// to perform isolated synchronizations on different streams. For example, +// consider the case in which two barriers are issued on two streams for +// different purposes. Without the concept of channels, we cannot guarantee the +// correctness of the barriers since signals issued from barrier on stream A +// can be received by the barrier on stream B. By specifying different channels +// for these two barriers, they can operate correctly in parallel. +class TORCH_API SymmetricMemory : public c10::intrusive_ptr_target { + public: + ~SymmetricMemory() override = default; + + virtual std::vector get_buffer_ptrs() = 0; + virtual std::vector get_signal_pad_ptrs() = 0; + + // get_buffer_ptrs_dev() and get_signal_pad_ptrs_dev() each return a pointer + // to a device array of size world_size, containing buffer pointers and + // signal pad pointers, respectively. + virtual void** get_buffer_ptrs_dev() = 0; + virtual void** get_signal_pad_ptrs_dev() = 0; + virtual size_t get_buffer_size() = 0; + size_t get_signal_pad_size(); + + virtual size_t get_offset() { + TORCH_CHECK(false, "NYI"); + } + + virtual bool has_multicast_support() = 0; + virtual void* get_multicast_ptr() = 0; + + at::Tensor get_buffer( + int rank, + c10::IntArrayRef sizes, + c10::ScalarType dtype, + int64_t storage_offset); + + at::Tensor get_signal_pad( + int rank, + c10::IntArrayRef sizes, + std::optional dtype = std::nullopt, + int64_t storage_offset = 0); + + at::Tensor get_remote_tensor( + int peer, + c10::IntArrayRef sizes, + c10::ScalarType dtype); + + virtual void barrier(int channel, size_t timeout_ms) = 0; + virtual void put_signal(int dst_rank, int channel, size_t timeout_ms) = 0; + virtual void wait_signal(int src_rank, int channel, size_t timeout_ms) = 0; + + virtual int get_rank() = 0; + virtual int get_world_size() = 0; + virtual c10::Device get_device() = 0; + + virtual const std::vector& get_rank_to_global_rank() { + TORCH_CHECK(false, "NYI"); + } + + virtual int* get_rank_to_global_rank_dev() { + TORCH_CHECK(false, "NYI"); + } + + // Returns true if *all* peers within the group are accessible via direct + // memory load and store. + virtual bool world_within_direct_access() { + TORCH_CHECK(false, "NYI"); + } +}; + +class SymmetricMemoryAllocator : public c10::intrusive_ptr_target { + public: + ~SymmetricMemoryAllocator() override = default; + + virtual void* alloc( + size_t size, + int device_idx, + const std::optional& group_name) = 0; + + virtual void free(void* ptr) = 0; + virtual size_t get_alloc_size(void* ptr) = 0; + virtual c10::intrusive_ptr rendezvous( + void* ptr, + const std::optional& group_name) = 0; + virtual bool has_multicast_support(int device_idx) = 0; + virtual c10::DeviceType supported_device_type() = 0; + virtual std::string name() = 0; +}; + +C10_EXPORT bool is_finalizing(); + +C10_EXPORT void register_allocator( + c10::DeviceType device_type, + c10::intrusive_ptr allocator); + +C10_EXPORT void register_availability( + const std::string& name, + c10::intrusive_ptr allocator); + +C10_EXPORT bool has_allocator(c10::DeviceType device_type); + +C10_EXPORT c10::intrusive_ptr get_allocator( + c10::DeviceType device_type); + +// Set a store for rendezvousing symmetric allocations on a group of devices +// identified by `group_name`. The concept of groups is logical; users can +// utilize predefined groups (e.g., a group of device identified by a +// ProcessGroup) or create custom ones. Note that a SymmetricMemoryAllocator +// backends might employ a more efficient communication channel for the actual +// rendezvous process and only use the store for bootstrapping purposes. +TORCH_API void set_group_info( + const std::string& group_name, + int rank, + int world_size, + c10::intrusive_ptr store); + +struct GroupInfo { + int rank; + int world_size; + c10::intrusive_ptr store; + // Note this field is not automatically populated by set_group_info(). If a + // SymmetricMemory implementation needs to use it, it must be populated by a + // call to exchange_global_ranks() first. + std::vector rank_to_global_rank; +}; + +C10_EXPORT GroupInfo& get_group_info(const std::string& group_name); + +// Identical to empty_strided, but allows symmetric memory access to be +// established for the allocated tensor via SymmetricMemory::rendezvous(). This +// function itself is not a collective operation. It invokes +// SymmetricMemoryAllocator::alloc() for the requested device under the hood. +// +// NOTE [symmetric memory persistent allocation] +// If an `alloc_id` is supplied, empty_strided_p2p will perform persistent +// allocation. This makes the function cache allocated memory and ensure that +// invocations with the same `alloc_id` receive tensors backed by the same +// memory address. For safety, if a previous persistent allocation is still +// active (i.e., the storage of the returned tensor is still alive), persistent +// allocations with the same `alloc_id` will fail. This determinism coupled +// with memory planning of communication buffers (e.g., by Inductor) allows +// communication algorithms to reliably reuse previously established remote +// memory access. +TORCH_API at::Tensor empty_strided_p2p( + c10::IntArrayRef size, + c10::IntArrayRef stride, + c10::ScalarType dtype, + c10::Device device, + const std::optional& group_name, + std::optional alloc_id); + +// Establishes symmetric memory access on tensors allocated via +// empty_strided_p2p() and empty_strided_p2p_persistent(). rendezvous() is a +// one-time process, and the mapping between a local memory region and the +// associated SymmetricMemory object is unique. Subsequent calls to +// rendezvous() with the same tensor, or tensors allocated with +// empty_strided_p2p_persistent() using the same alloc_id, will receive the +// cached SymmetricMemory object. +// +// The function has a collective semantic and must be invoked simultaneously +// from all rendezvous participants. +TORCH_API c10::intrusive_ptr rendezvous( + const at::Tensor& tensor, + const std::optional& group_name = std::nullopt); + +TORCH_API bool has_multicast_support( + c10::DeviceType device_type, + int device_idx); + +TORCH_API void set_backend(const std::string& name); + +TORCH_API std::optional get_backend(c10::Device device); + +// Get the current signal pad size for symmetric memory allocations. +// Returns the user-configured size if set, otherwise returns the default size. +TORCH_API size_t get_signal_pad_size(); + +// Set the signal pad size for future symmetric memory allocations. +// This must be called before any symmetric memory allocations are made. +// The size should be proportional to the number of blocks the user launches +// and the world size. +TORCH_API void set_signal_pad_size(size_t size); + +C10_EXPORT void register_mempool_allocator( + c10::DeviceType device_type, + std::shared_ptr allocator); + +TORCH_API std::shared_ptr get_mempool_allocator( + c10::Device device); + +} // namespace c10d::symmetric_memory + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/env.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/env.hpp new file mode 100644 index 0000000000000000000000000000000000000000..ec10b3d7f4a84d66f1bb54ce582c56155d7f9257 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/env.hpp @@ -0,0 +1,22 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include + +namespace c10d::symmetric_memory { + +static int getenv_nblocks() { + static int num_blocks = -1; // Uninitialized + if (num_blocks == -1) { + auto str = c10::utils::get_env("TORCH_SYMMMEM_NBLOCKS"); + if (str.has_value()) { + num_blocks = std::stoi(str.value()); + } else { + num_blocks = -2; // Not set + } + } + return num_blocks; +} + +} // namespace c10d::symmetric_memory +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/intra_node_comm.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/intra_node_comm.hpp new file mode 100644 index 0000000000000000000000000000000000000000..28a07ea5936325b11d5f9295d3169e68e11e284f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/intra_node_comm.hpp @@ -0,0 +1,96 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace c10d::intra_node_comm { + +using namespace c10d::symmetric_memory; + +constexpr size_t kMaxDevices = 8; +constexpr size_t kDefaultBufferSize = 10ull * 1024 * 1024; + +using NvlMesh = std::array, kMaxDevices>; + +enum class Topology : uint8_t { + UNKNOWN = 0, + FULLY_CONNECTED = 1, +}; + +enum class AllReduceAlgo : uint8_t { + NONE = 0, + ONE_SHOT = 1, + TWO_SHOT = 2, +}; + +// NOTE: this class will be be removed soon in favor of SymmetricMemory +class TORCH_API IntraNodeComm : public c10::intrusive_ptr_target { + public: + IntraNodeComm( + c10::intrusive_ptr store, + size_t rank, + size_t worldSize, + std::optional bufferSize = std::nullopt); + + ~IntraNodeComm() override; + + static bool isEnabled(); + + /** + * Performs rendezvous. + * If rendezvous fails, the IntraNodeComm object will be in an invalid + * state and it is the caller's responsibility to dispose it. + */ + bool rendezvous(); + + /** + * Selects a AllReduceAlgo that we think will outperform nccl. + * Returns AllReduceAlgo::NONE if we don't think we can outperform nccl. + */ + AllReduceAlgo selectAllReduceAlgo(const at::Tensor& input); + + at::Tensor allReduce(const at::Tensor& input, AllReduceAlgo algo); + + private: + at::Tensor oneShotAllReduce( + const at::Tensor& input, + at::cuda::CUDAStream& stream); + + at::Tensor twoShotAllReduce( + const at::Tensor& input, + at::cuda::CUDAStream& stream); + + c10::intrusive_ptr store_; + size_t rank_; + size_t worldSize_; + size_t bufferSize_; + + /** + * Members initialized after rendezvous + */ + bool isInitialized_ = false; + int deviceIdx_{0}; + Topology topology_ = Topology::UNKNOWN; + void* symmetricMemoryPtr_ = nullptr; + c10::intrusive_ptr symmetricMemory_ = nullptr; +}; + +class IntraNodeCommWork : public c10d::Work { + public: + bool wait(std::chrono::milliseconds timeout = kNoTimeout) override { + return true; + } +}; + +TORCH_API int64_t getIntraNodeCommUsageCounter(); + +bool isIntraNodeCommSupported(); +} // namespace c10d::intra_node_comm + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/nvshmem_team_manager.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/nvshmem_team_manager.hpp new file mode 100644 index 0000000000000000000000000000000000000000..6bf1ed77235da338723478a4868a1cb7a4379ba5 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/c10d/symm_mem/nvshmem_team_manager.hpp @@ -0,0 +1,174 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +// Starting from NVSHMEM 3.3.9, nvshmem_host.h exists so that we can cleanly +// include only the nvshmem host library headers: +// #include +// It translates into the following two lines: +#include +#include +// For maximum compatibility, we use the "host/" style for now. + +namespace c10d::nvshmem_extension { + +// This corresponds to max nblocks +constexpr int MAX_N_TEAMS = 128; + +// A pool of teams for each group. These are duplicate teams. +using TeamPool = std::vector; + +// Manage all the team business. Singleton. +class TeamManager { + public: + // Constructor + explicit TeamManager(const c10::Device device) : device_(device) {} + + // Get single, global manager. + static TeamManager& get(const c10::Device device) { + static TeamManager manager(device); + TORCH_CHECK( + manager.device_ == device, + "Detected use of TeamManager on multiple devices. This is not supported."); + return manager; + } + + // Get a team for a group. + nvshmem_team_t get_team( + const std::string& group_name, + const std::vector& global_ranks) { + auto [team_pool, pool_updated] = + group_to_team_pool(group_name, global_ranks, 1); + // Return the fist available team + return team_pool[0]; + } + + // Get n teams for a group. + // The first element of the returned pair is the team pool on host side. + // The second element of the returned pair is the team pool on device side. + // This API must be call with a device guard. + std::pair get_n_teams( + const std::string& group_name, + const std::vector& global_ranks, + const int need_n) { + // A device guard is required for malloc and memcpy below + c10::cuda::CUDAGuard guard(device_); + // Get the team pool with the requested number of teams + auto [team_pool, pool_updated] = + group_to_team_pool(group_name, global_ranks, need_n); + // Check if the pool already exists in device memory + nvshmem_team_t* team_pool_dev = nullptr; + constexpr auto pool_bytes = sizeof(nvshmem_team_t) * MAX_N_TEAMS; + auto it = team_pool_devptrs_.find(group_name); + if (it == team_pool_devptrs_.end()) { + // If not, allocate a new pool in device memory + team_pool_dev = reinterpret_cast( + c10::cuda::CUDACachingAllocator::raw_alloc(pool_bytes)); + team_pool_devptrs_[group_name] = team_pool_dev; + } else { + team_pool_dev = it->second; + } + // Update the pool in device memory if host side pool is updated + if (pool_updated) { + TORCH_INTERNAL_ASSERT(team_pool.size() == MAX_N_TEAMS); + auto stream = at::cuda::getCurrentCUDAStream(); + C10_CUDA_CHECK(cudaMemcpyAsync( + team_pool_dev, + team_pool.data(), + pool_bytes, + cudaMemcpyHostToDevice, + stream)); + } + return std::make_pair(std::cref(team_pool), team_pool_dev); + } + + ~TeamManager() noexcept { + // Free the team pools in device memory + // Note that we do it in a best effort manner because the team pool is + // managed by a static TeamManager and the destruction order of static + // objects is undetermined. If the destructor is called after the CUDA + // context is destroyed, cudaFree would fail. + try { + // cudaFree generally implies a device synchronization, meaning it will + // block until all preceding CUDA operations on the device have completed + // before freeing the memory. Thus we don't need to worry about freeing + // the memory before CUDA kernels complete. + for (auto& [_, team_pool_dev] : team_pool_devptrs_) { + c10::cuda::CUDACachingAllocator::raw_delete(team_pool_dev); + } + } catch (...) { + // Ignore the error + std::cerr << "Failed to free the team pool in device memory, skipping\n"; + } + } + + private: + // Get the team pool for a group. If the pool doesn't exist, create it. If the + // pool exists but is not large enough, create more teams. + // The first element of the returned pair is the team pool on host side. + // The second element of the returned pair is a boolean indicating if the pool + // is updated. + std::pair group_to_team_pool( + const std::string& group_name, + const std::vector& global_ranks, + const int need_n) { + TORCH_CHECK(need_n < MAX_N_TEAMS, "Too many teams requested"); + // Guarding the NVSHMEM API calls below just to be safe + c10::cuda::CUDAGuard guard(device_); + + // Insert a new team pool if not exists + auto [it, inserted] = group_name_to_team_pool_.emplace( + group_name, TeamPool(MAX_N_TEAMS, NVSHMEM_TEAM_INVALID)); + auto& team_pool = it->second; + bool pool_updated = inserted; + + // Create new teams if what's requested is more than what we have + int stride = 0; // stride in globe, uninitialized + for (int i = 0; i < need_n; ++i) { + if (team_pool[i] != NVSHMEM_TEAM_INVALID) { + continue; + } + // Some checks before we create new teams + if (stride == 0) { // Check only once + TORCH_CHECK(global_ranks.size() > 1); + stride = global_ranks[1] - global_ranks[0]; + for (size_t r = 1; r < global_ranks.size(); ++r) { + TORCH_CHECK(global_ranks[r] - global_ranks[r - 1] == stride); + } + } + nvshmem_team_t team = NVSHMEM_TEAM_INVALID; + nvshmem_team_split_strided( + NVSHMEM_TEAM_WORLD, + global_ranks[0], + stride, + global_ranks.size(), + nullptr, + 0, + &team); + TORCH_CHECK(team != NVSHMEM_TEAM_INVALID, "Failed to create a new team"); + team_pool[i] = team; + pool_updated = true; + } + return std::make_pair(std::cref(team_pool), pool_updated); + } + + private: + // Device where the team manager is created + const c10::Device device_; + // A map from group name to team pool for that group. + std::unordered_map group_name_to_team_pool_; + // A map from group name to team pool array in device memory. + std::unordered_map team_pool_devptrs_; +}; + +} // namespace c10d::nvshmem_extension +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/agent_utils.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/agent_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..25f915f21a7cc914b352ba4b8208e3ce4617d9a7 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/agent_utils.h @@ -0,0 +1,47 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::distributed::rpc { + +// All RPC peers should call into this function at the same time. Each peer +// provides its own id and name, and this function uses the given Store to +// gather global name-to-id mapping on all peers. +TORCH_API std::unordered_map collectNames( + ::c10d::PrefixStore store, + const worker_id_t selfId, + const std::string& selfName, + const int worldSize); + +// Ranks in dynamic RPC groups will initially call into this to establish the +// name-to-id mapping for the current peers in the group. The current rank will +// put its own worker info in the store and discover all the ranks that came +// before it. NOTE: This needs to be called with the Dynamic RPC group +// membership management token held. +TORCH_API std::unordered_map collectCurrentNames( + ::c10d::PrefixStore store, + const worker_id_t selfId, + const std::string& selfName); + +// Remove name from Store, used in dynamic RPC groups. +// NOTE: This needs to be called with the Dynamic RPC group +// membership management token held. +TORCH_API void removeCurrentName( + ::c10d::PrefixStore store, + const worker_id_t selfId, + const std::string& selfName); + +// This performs a synchronization of all call counts by using store. +// All RPC peers wait for others to join to exit at the same time. +TORCH_API int syncCallCount( + ::c10d::PrefixStore store, + const int worldSize, + int activeCalls = 0); + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/message.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/message.h new file mode 100644 index 0000000000000000000000000000000000000000..f6e71490f27f885d1a2e4e897a29fe50a1cb0ea5 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/message.h @@ -0,0 +1,198 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::distributed::rpc { + +// An enum denoting common RPC errors to allow specific error handling for them. +// NOLINTNEXTLINE(performance-enum-size) +enum RPCErrorType { + UNKNOWN_ERROR = 0, /* Indicates that error type could not be parsed */ + TIMEOUT = 1, /* Indicates that the RPC has timed out */ + INTENTIONAL_FAILURE = 2 /* Deliberate failure, such as those injected by + FaultyAgent for testing */ +}; + +// The enum values are bitwise ORed with MessageType +// They are bit flags starting from 0x100 and should have +// value such as 0x100, 0x200, 0x400, 0x800, 0xF00, etc. +// NOLINTNEXTLINE(performance-enum-size) +enum MessageTypeFlags { + REQUEST_TYPE = 0x100, + RESPONSE_TYPE = 0x200, +}; + +// Message types must have values between 0x00 to 0xff +// NOLINTNEXTLINE(performance-enum-size) +enum MessageType { + // messages for dist.rpc on builtin operators + SCRIPT_CALL = 0x00 | MessageTypeFlags::REQUEST_TYPE, + SCRIPT_RET = 0x01 | MessageTypeFlags::RESPONSE_TYPE, + + // messages for dist.rpc on Python UDF + PYTHON_CALL = 0x02 | MessageTypeFlags::REQUEST_TYPE, + PYTHON_RET = 0x03 | MessageTypeFlags::RESPONSE_TYPE, + + // messages for dist.remote on builtin operators and Python UDF + SCRIPT_REMOTE_CALL = 0x04 | + MessageTypeFlags::REQUEST_TYPE, // A remote call on a builtin operator + PYTHON_REMOTE_CALL = + 0x05 | MessageTypeFlags::REQUEST_TYPE, // A remote call on a Python UDF + REMOTE_RET = + 0x06 | MessageTypeFlags::RESPONSE_TYPE, // Response for remote calls for + // UDF, builtin, or script + + // RRef related internal messages + SCRIPT_RREF_FETCH_CALL = + 0x07 | MessageTypeFlags::REQUEST_TYPE, // A UserRRef fetches value + // from owner + PYTHON_RREF_FETCH_CALL = + 0x08 | MessageTypeFlags::REQUEST_TYPE, // A UserRRef fetches + // value from owner + SCRIPT_RREF_FETCH_RET = 0x09 | + MessageTypeFlags::RESPONSE_TYPE, // An OwnerRRef sends ivalue to user + PYTHON_RREF_FETCH_RET = 0x0a | + MessageTypeFlags::RESPONSE_TYPE, // An OwnerRRef sends py::object to user + RREF_USER_DELETE = 0x0b | + MessageTypeFlags::REQUEST_TYPE, // A UserRRef tells the owner to deref + RREF_FORK_REQUEST = + 0x0c | MessageTypeFlags::REQUEST_TYPE, // A child UserRRef tells the owner + // about itself + RREF_CHILD_ACCEPT = + 0x0d | MessageTypeFlags::REQUEST_TYPE, // A child UserRRef tells parent + // that owner knows it + RREF_ACK = + 0x0e | MessageTypeFlags::RESPONSE_TYPE, // ACK to internal RRef messages + + // Messages with autograd info + FORWARD_AUTOGRAD_REQ = 0x0f | MessageTypeFlags::REQUEST_TYPE, + FORWARD_AUTOGRAD_RESP = 0x10 | MessageTypeFlags::RESPONSE_TYPE, + + // Messages to propagate gradients on the backward pass. + BACKWARD_AUTOGRAD_REQ = 0x11 | MessageTypeFlags::REQUEST_TYPE, + BACKWARD_AUTOGRAD_RESP = 0x12 | MessageTypeFlags::RESPONSE_TYPE, + + // Messages to tell workers to clean up their autograd context. + CLEANUP_AUTOGRAD_CONTEXT_REQ = 0x13 | MessageTypeFlags::REQUEST_TYPE, + CLEANUP_AUTOGRAD_CONTEXT_RESP = 0x14 | MessageTypeFlags::RESPONSE_TYPE, + + // Messages that tell workers to run requests with profiling enabled. + RUN_WITH_PROFILING_REQ = 0x15 | MessageTypeFlags::REQUEST_TYPE, + RUN_WITH_PROFILING_RESP = 0x16 | MessageTypeFlags::RESPONSE_TYPE, + + // Messages to support RRef.backward(). + RREF_BACKWARD_REQ = 0x17 | MessageTypeFlags::REQUEST_TYPE, + RREF_BACKWARD_RESP = 0x18 | MessageTypeFlags::RESPONSE_TYPE, + + // Other internal message types + EXCEPTION = 0x37 | MessageTypeFlags::RESPONSE_TYPE, + UNKNOWN = 0x3c +}; + +// A message to be sent/received by an RpcAgent. +// +// A Message object contains 4 fields: +// payload (std::vector): a binary chunk of data. +// tensors (std::vector): all tensors. Tensor data are not +// included in the payload, and it is up to the RpcAgent implementation +// to determine how to serialize them. This design is helpful for +// communicating super large tensors where serializing all the data at +// once leads to excessively large memory footprint. An implementation +// can then serialize and send tensors chunk-by-chunk, in the streaming +// fashion. +// type (MessageType): type of the message. +// id (int64_t): message id, this is used to match request and response. +// Other implementation can ignore it if they have their own +// ways to do matching. +// +// Layers above ``RpcAgent`` only converts ScriptCall, ScriptResp, PythonCall, +// and PythonResp into a Message, and it is up to the RpcAgent +// implementation to determine how to serialize a message. +class TORCH_API Message final : public torch::CustomClassHolder { + private: + // Keep these private in order to force users to go through make_intrusive and + // thus prevent creating a Message that's not held by an intrusive_ptr. + Message(); + + Message( + std::vector&& payload, + std::vector&& tensors, + MessageType type); + + Message( + std::vector&& payload, + std::vector&& tensors, + MessageType type, + int64_t id); + + friend c10::intrusive_ptr; + + public: + Message(const Message& other) = delete; + Message(Message&& other) = delete; + Message& operator=(Message const& rhs) = delete; + Message& operator=(Message&& rhs) = delete; + ~Message() override = default; + + // Destructively retrieves the payload. + std::vector&& movePayload() &&; + std::vector&& moveTensors() &&; + + std::vector& payload(); + const std::vector& payload() const; + std::vector& tensors(); + const std::vector& tensors() const; + MessageType type() const; + + bool isRequest() const; + bool isResponse() const; + bool isShutdown() const; + + // id is an optional field to match request/response. If an RpcAgent + // implementation is able to do the matching without using this id, it can be + // dropped during message serialization. + int64_t id() const; + void setId(int64_t id); + + std::vector> getStorages() const; + + private: + std::vector payload_; + std::vector tensors_; + MessageType type_ = MessageType::UNKNOWN; + int64_t id_ = -1; +}; + +// Create a response Message of type Exception. +// The exception string representation will be used as the message's payload. +// A message ID corresponding to the request that resulted in this response can +// be provided for matching requests/responses. +TORCH_API c10::intrusive_ptr createExceptionResponse( + const std::exception& e, + int64_t id); + +// Create a response Message of type Exception. +// The passed in string representation will be used as the message's payload. +// A message ID corresponding to the request that resulted in this response can +// be provided for matching requests/responses. +TORCH_API c10::intrusive_ptr createExceptionResponse( + const std::string& exceptionStr, + int64_t id); + +inline std::tuple< + c10::intrusive_ptr, + std::vector>> +withStorages(c10::intrusive_ptr message) { + auto storages = message->getStorages(); + return std::make_tuple(std::move(message), std::move(storages)); +} + +using JitFuture = c10::ivalue::Future; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/metrics/RpcMetricsHandler.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/metrics/RpcMetricsHandler.h new file mode 100644 index 0000000000000000000000000000000000000000..d46a0e9be89a5957fd498fe2fbd17c7e02767e4b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/metrics/RpcMetricsHandler.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include + +namespace torch::distributed::rpc { +// All metrics are prefixed with the following key. +// NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays) +constexpr char kRpcMetricsKeyPrefix[] = "torch.distributed.rpc."; +// APIs for logging time-series metrics for RPC-based distributed +// training. Implementations of this class should provide thread safety so that +// metrics can be logged from multiple threads without the user needing to +// coordinate serialization. +class RpcMetricsHandler { + public: + // Accumulates the metric value specified by the name for purposes of + // computing aggregate statistics over time. + virtual void accumulateMetric(const std::string& name, double value) = 0; + // Increment a count for the metric given by the name. + virtual void incrementMetric(const std::string& name) = 0; + virtual ~RpcMetricsHandler() = default; +}; + +// Configuration struct for metrics handling. +struct RpcMetricsConfig { + explicit RpcMetricsConfig(std::string handlerName, bool enabled) + : handlerName_(std::move(handlerName)), enabled_(enabled) {} + + // Handler name + std::string handlerName_; + // Whether metrics exporting should be enabled or not. + bool enabled_; +}; + +// A registry for different implementations of RpcMetricsHandler. Classes +// implementing the above interface should use this to register implementations. +TORCH_DECLARE_REGISTRY( + RpcMetricsHandlerRegistry, + torch::distributed::rpc::RpcMetricsHandler); + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/profiler/remote_profiler_manager.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/profiler/remote_profiler_manager.h new file mode 100644 index 0000000000000000000000000000000000000000..ba39ea8a02305f64f642b6927ee43b35459d10b8 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/profiler/remote_profiler_manager.h @@ -0,0 +1,60 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include +#include +#include + +namespace torch::distributed::rpc { +extern const std::string REMOTE_PROFILING_KEY_PREFIX; + +class TORCH_API RemoteProfilerManager { + public: + // Retrieves the lazily-initialized RemoteProfilerManager singleton instance. + static RemoteProfilerManager& getInstance(); + // Sets the current, thread-local profiling key. + void setCurrentKey(std::string key); + // Returns whether the current profiling key is set. + bool isCurrentKeySet() const; + // Unsets the current, thread-local profiling key to allow other RPCs to reset + // it. + void unsetCurrentKey(); + // inserts a pair (globallyUniqueId, key) to an in-memory map. The + // corresponding ID is used in RPC deserialization to prefix remotely profiled + // events with the right key. + void saveRPCKey( + ProfilingId globallyUniqueId, + const std::string& rpcProfilingKey); + // Retrieves the profiling key corresponding to the given globallyUniqueId. + // Throws if it is not found. + std::string retrieveRPCProfilingKey(const ProfilingId& globallyUniqueId); + // Generates the next globally unique ID for profiling. + ProfilingId getNextProfilerId(); + // Retrieves the currently set thread-local profiling key. Throws if it is not + // set. + std::string& getCurrentProfilingKey(); + // erases the globallyUniqueId from the map. This can help save memory in the + // case that many RPCs are being profiled. + void eraseKey(const ProfilingId& globallyUniqueId); + + RemoteProfilerManager(const RemoteProfilerManager& other) = delete; + RemoteProfilerManager operator=(const RemoteProfilerManager& other) = delete; + RemoteProfilerManager(RemoteProfilerManager&&) = delete; + RemoteProfilerManager& operator=(RemoteProfilerManager&&) = delete; + + private: + RemoteProfilerManager(); + ~RemoteProfilerManager() = default; + local_id_t getNextLocalId(); + std::unordered_map + profiledRpcKeys_; + static thread_local std::optional currentThreadLocalKey_; + std::mutex mutex_; + local_id_t currentLocalId_; +}; +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/profiler/server_process_global_profiler.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/profiler/server_process_global_profiler.h new file mode 100644 index 0000000000000000000000000000000000000000..f86461f6f895bee8a46b54b9f57dd88b66362ad6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/profiler/server_process_global_profiler.h @@ -0,0 +1,134 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +namespace torch::distributed::rpc::profiler::processglobal { + +using namespace torch::autograd::profiler; + +// Process global profiler state. +// +// This class holds information about a profiling range, from "enable" to +// "disable". +// An instance of this ``State`` will be +// pushed into a global stack, so nested profiling range is supported. +// +// It has 2 members. +// One is ``autograd::profiler::ProfilerConfig``. It's set by user and +// will be copied to thread-local profiler state of RPC threads. +// The other is a container that aggregates recorded +// ``autograd::profiler::Event``s from all thread-local profilers on RPC +// threads. +class State { + public: + explicit State(ProfilerConfig config) : config_(std::move(config)) {} + ~State() = default; + + const ProfilerConfig& config() const { + return config_; + } + + void pushResult(thread_event_lists result) { + std::unique_lock lock(resultsMutex_); + + // NB: When a thread wants to push an entry into the this container, + // main control logic might have exited the process-global profile range. + results_.emplace_back(std::move(result)); + } + + std::vector results(); + + private: + // Each result comes from a profile range. In each profile range, there is a + // "__profiler_start" marker event that all following events calculate time + // relative to it, so it's required to call + // parse_cpu_trace(result) for results of all profile range. + std::mutex resultsMutex_; + std::vector results_; + const ProfilerConfig config_ = ProfilerConfig(ProfilerState::Disabled); +}; + +class StateStackEntry; + +#if defined(__MACH__) +// Compiler error: 'shared_timed_mutex' is unavailable: introduced in +// macOS 10.12 +using mutexType = std::mutex; +// Compiler error: 'shared_lock' is unavailable: introduced in +// macOS 10.12 +using rLockType = std::unique_lock; +using wLockType = std::unique_lock; +#else +using mutexType = std::shared_timed_mutex; +using rLockType = std::shared_lock; +using wLockType = std::unique_lock; +#endif + +// This is the global stack of ``State``s. +TORCH_API extern std::shared_ptr currentStateStackEntryPtr; +TORCH_API extern mutexType currentStateStackEntryMutex; + +// This class is used to implement a stack of ``State``s. +// It has 2 members. +// One is `prevPtr`, a shared_ptr pointing to previous element in the +// stack. +// The other is ``statePtr``, a shared_ptr pointing to ``State``. +class StateStackEntry { + public: + StateStackEntry( + std::shared_ptr prevPtr, + std::shared_ptr statePtr) + : prevPtr_(std::move(prevPtr)), statePtr_(std::move(statePtr)) {} + + static void pushRange(std::shared_ptr profilerProcessGlobalStatePtr); + static std::shared_ptr popRange(); + + static std::shared_ptr current() { + rLockType rlock(currentStateStackEntryMutex); + + return currentStateStackEntryPtr; + } + + std::shared_ptr prevPtr() const { + return prevPtr_; + } + + std::shared_ptr statePtr() const { + return statePtr_; + } + + private: + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const std::shared_ptr prevPtr_{nullptr}; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const std::shared_ptr statePtr_{nullptr}; +}; + +// Push the result to ``State``s of current profile range and recursively outer +// profile ranges. +TORCH_API void pushResultRecursive( + std::shared_ptr stateStackEntryPtr, + const thread_event_lists& result); + +// User-facing API. +// +// Enter a server-side process-global profiling range. +// Profiling range can be neste, so it's ok to call this API for multiple +// times. This enables all RPC threads running server-side request callbacks. +TORCH_API void enableServer(const ProfilerConfig& new_config); +// +// Exit a server-side process-global profiling range. +// Profiling range can be neste, so it's possible that profiler is still on +// after calling this API. +// This enables all RPC threads running server-side request callbacks. +TORCH_API std::vector disableServer(); + +} // namespace torch::distributed::rpc::profiler::processglobal + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/py_rref.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/py_rref.h new file mode 100644 index 0000000000000000000000000000000000000000..c5735e844b6e1f62dbfc779a76db78857e111593 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/py_rref.h @@ -0,0 +1,86 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::distributed::rpc { + +// NOLINTNEXTLINE(performance-enum-size) +enum RRefProxyType { RPC_SYNC, RPC_ASYNC, REMOTE }; + +// Python wrapper of an RRef shared_ptr that supports Python +// pickle and unpickle. +class PYBIND11_EXPORT PyRRef { + public: + // The first ctor can only be called while holding GIL. See its implementation + // for more explanations. + explicit PyRRef(const py::object& value, const py::object& type_hint); + explicit PyRRef(c10::intrusive_ptr rref); + PyRRef(const PyRRef&) = default; + ~PyRRef(); + + bool isOwner() const; + bool confirmedByOwner() const; + WorkerInfo owner() const; + std::string ownerName() const; + py::object toHere( + const float timeoutSeconds = + torch::distributed::rpc::kUnsetRpcTimeout) const; + py::object localValue() const; + std::string str() const; + py::tuple pickle() const; + static PyRRef unpickle(const py::tuple& t); + c10::IValue toIValue() const; + // Future that is associated with the creation of this RRef on the remote end. + // This is only used to get the future corresponding to the rref for profiling + // use cases. + c10::intrusive_ptr getFuture() const; + // Keeps track of the future responsible for profiling owner creation + // acknowledgement + c10::intrusive_ptr getProfilingFuture() const; + // Sets the future responsible for profiling owner creation acknowledgement. + // This future is set from python to be a future that returns when profiling + // callbacks have been run. + void setProfilingFuture(c10::intrusive_ptr profilingFuture); + + // create a proxy on this RRef, which can be used to launch RPC on the owner + // of this RRef to run functions on the object referenced by this RRef. + py::object createRRefProxy( + const RRefProxyType& mode, + float timeoutSeconds = rpc::kUnsetRpcTimeout) const; + + // get the type of the data object referenced by this RRef. Timeout argument + // is only used in the first invocation of this function as an argument to the + // RPC to the owner node of the RRef. + py::object getRRefType( + float timeout = rpc::kUnsetRpcTimeout, + bool blocking = true); + + // Run the backward pass with the RRef as the root. + void backward(int64_t autogradContextId, bool retainGraph); + + // Helper static function to run backward on a given rref. + static void backward( + int64_t autogradContextId, + bool retainGraph, + const c10::intrusive_ptr& rref); + + // Specialization of backward if the rref is an OwnerRRef. + static void backwardOwnerRRef( + int64_t autogradContextId, + bool retainGraph, + IValue value); + + private: + c10::intrusive_ptr rref_; + std::optional> profilingFuture_; + std::optional type_; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_call.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_call.h new file mode 100644 index 0000000000000000000000000000000000000000..ea339cae11b4dedb5a2aefa2b702443bc53708a4 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_call.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::distributed::rpc { + +// RPC call representing calling a Python function over RPC. +class TORCH_API PythonCall final : public RpcCommandBase { + public: + PythonCall(SerializedPyObj&& serializedPyObj, bool isAsyncExecution); + + c10::intrusive_ptr toMessageImpl() && override; + + static std::unique_ptr fromMessage(const Message& message); + + const SerializedPyObj& serializedPyObj() const; + + inline bool isAsyncExecution() const { + return isAsyncExecution_; + } + + private: + SerializedPyObj serializedPyObj_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const bool isAsyncExecution_; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_functions.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_functions.h new file mode 100644 index 0000000000000000000000000000000000000000..ba69781bdcb0a200b52d30da80cabeb52ff8608d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_functions.h @@ -0,0 +1,71 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::distributed::rpc { + +// Converts an internal ivalue::Future of Message into a user-facing +// ivalue::Future of py::object type by creating a new ivalue::Future and call +// its markCompleted as a callback in the given ivalue::Future. +// If hasValue is true, the Message will be converted into a py::object and then +// wrap it with an IValue. If hasValue is false, this ivalue::Future is only +// used for signaling and launching callbacks. In this case, the message will be +// discarded and then set the ivalue::Future using an empty IValue or the given +// FutureError if there is an error. +c10::intrusive_ptr toPyJitFuture( + const c10::intrusive_ptr& messageJitFuture, + bool hasValue = true); + +c10::intrusive_ptr pyRpcBuiltin( + const WorkerInfo& dst, + const std::string& opName, + const py::args& args, + const py::kwargs& kwargs, + const float rpcTimeoutSeconds); + +c10::intrusive_ptr pyRpcPythonUdf( + const WorkerInfo& dst, + std::string& pickledPythonUDF, + std::vector& tensors, + const float rpcTimeoutSeconds, + const bool isAsyncExecution); + +c10::intrusive_ptr pyRpcTorchscript( + const std::string& dstWorkerName, + const std::string& qualifiedNameStr, + const py::tuple& argsTuple, + const py::dict& kwargsDict, + const float rpcTimeoutSeconds, + const bool isAsyncExecution); + +PyRRef pyRemoteBuiltin( + const WorkerInfo& dst, + const std::string& opName, + const float rpcTimeoutSeconds, + const py::args& args, + const py::kwargs& kwargs); + +PyRRef pyRemotePythonUdf( + const WorkerInfo& dst, + std::string& pickledPythonUDF, + std::vector& tensors, + const float rpcTimeoutSeconds, + const bool isAsyncExecution); + +PyRRef pyRemoteTorchscript( + const std::string& dstWorkerName, + const std::string& qualifiedNameStr, + const float rpcTimeoutSeconds, + const bool isAsyncExecution, + const py::args& args, + const py::kwargs& kwargs); + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_remote_call.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_remote_call.h new file mode 100644 index 0000000000000000000000000000000000000000..b34cb40349850c4c643e9e5d899b53293f220a2b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_remote_call.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +namespace torch::distributed::rpc { + +class TORCH_API PythonRemoteCall : public RpcCommandBase { + public: + PythonRemoteCall( + SerializedPyObj&& serializedPyObj, + at::IValue retRRefId, + at::IValue retForkId, + const bool isAsyncExecution); + + inline const SerializedPyObj& serializedPyObj() const { + return serializedPyObj_; + } + + inline const at::IValue& retRRefId() const { + return retRRefId_; + } + + inline const at::IValue& retForkId() const { + return retForkId_; + } + + inline bool isAsyncExecution() const { + return isAsyncExecution_; + } + + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage(const Message& message); + + private: + SerializedPyObj serializedPyObj_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const at::IValue retRRefId_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const at::IValue retForkId_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const bool isAsyncExecution_; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_resp.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_resp.h new file mode 100644 index 0000000000000000000000000000000000000000..cc47fbe631a219bcd16a847d1bb849fa812068cf --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_resp.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::distributed::rpc { + +// RPC call representing the response of a Python UDF over RPC. +class TORCH_API PythonResp final : public RpcCommandBase { + public: + explicit PythonResp(SerializedPyObj&& serializedPyObj); + + c10::intrusive_ptr toMessageImpl() && override; + + static std::unique_ptr fromMessage(const Message& message); + + const SerializedPyObj& serializedPyObj() const; + + private: + SerializedPyObj serializedPyObj_; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_rpc_handler.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_rpc_handler.h new file mode 100644 index 0000000000000000000000000000000000000000..8de1c7d7592e448f541f2a3f03b1a0f9b9a5f2ec --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/python_rpc_handler.h @@ -0,0 +1,134 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::distributed::rpc { + +// Singleton class provides interface to execute python UDF remote call +// and deserialize the returned results by running python function +// in internal_rpc_utilities. +// The singleton object is constructed at first when RPC agent is +// constructed, where the python function in +// torch/distributed/internal_rpc_utils.py are imported only once. +class PYBIND11_EXPORT PythonRpcHandler { + public: + struct RRefProxyFunctions { + py::object rrefProxyCtor_; + py::object rpcSync_; + py::object rpcAsync_; + py::object remote_; + }; + + struct RRefTypeFunctions { + py::object onOwner_; + py::object onUser_; + }; + + static PythonRpcHandler& getInstance(); + + // Run a pickled Python UDF and return the result py::object + py::object runPythonUdf(const py::object& pythonUdf); + + // Serialized a py::object into a string + SerializedPyObj serialize(const py::object& obj); + + // Deserialize a string into a py::object + py::object deserialize(const SerializedPyObj& serializedObj); + + // Check if obj is RemoteException, then throw it + void handleException(const py::object& obj); + // Alternative if the caller is already holding the GIL. + void handleExceptionGILHeld(const py::object& obj); + // Check if obj is an RemoteException instance. + bool isRemoteException(const py::object& obj); + + // Explicitly clean up py::objects to avoid segment faults when + // py::objects with CPython are cleaned up later at program exit + // See similar issues reported https://github.com/pybind/pybind11/issues/1598 + // and https://github.com/pybind/pybind11/issues/1493 + // Our local tests also caught this segment faults if py::objects are cleaned + // up at program exit. The explanation is: CPython cleans up most critical + // utilities before cleaning up PythonRpcHandler singleton, so when + // PythonRpcHandler singleton cleans up py::objects and call dec_ref(), it + // will crash. + // The solution is to clean up py::objects earlier when Rpc agent join(). + // Be note that py::objects can not be cleaned up when Rpc agent is destroyed + // as well, as Rpc agent is global variable and it will have same issue as + // PythonRpcHandler. + void cleanup(); + + std::shared_ptr jitCompilationUnit(); + + // Parse the string to recover the jit_type, this is used for RRef python + // pickling/unpickling type recovery. The type string inference rule is as + // follows: + // 1. first try to parse if this is primitive types. + // i.e. TensorType, IntType, PyObjectType, etc. + // 2. if not primitive type, we query the python_cu to see if it is a + // class type or interface type registered in python + // We use a ScriptTypeParser instance with custom PythonTypeResolver + // to resolve types according to the above rules. + TypePtr parseTypeFromStr(const std::string& typeStr); + + // Return a set of Python functions for RRef helpers. + const RRefProxyFunctions& getRRefProxyFunctions() const; + + // Return a set of Python functions to retrieve the type of the object + // referenced by a given RRef. + const RRefTypeFunctions& getRRefTypeFunctions() const; + + PythonRpcHandler(const PythonRpcHandler&) = delete; + PythonRpcHandler& operator=(const PythonRpcHandler&) = delete; + PythonRpcHandler(PythonRpcHandler&&) = delete; + PythonRpcHandler& operator=(PythonRpcHandler&&) = delete; + + private: + void init(); + PythonRpcHandler(); + ~PythonRpcHandler() = default; + + // Ref to `torch.distributed.rpc.internal._run_function`. + py::object pyRunFunction_; + + // Ref to `torch.distributed.rpc.internal.serialize`. + py::object pySerialize_; + + // Ref to `torch.distributed.rpc.internal.deserialize`. + py::object pyDeserialize_; + + // Ref to 'torch.distributed.rpc.internal._handle_exception' + py::object pyHandleException_; + + // Python functions for RRef proxy + RRefProxyFunctions rrefProxyFunctions_; + + // Ref to 'torch.distributed.rpc.api._rref_typeof_on_' + RRefTypeFunctions rrefTypeFunctions_; + + // Shared ptr to python compilation unit in jit, it is constructed in python + // side (see _python_cu = torch._C.CompilationUnit() in jit/__init__.py) + // and imported in C++ (see get_python_cu() in + // csrc/jit/python/pybind_utils.h). We import the compilation unit here only + // once for less cost and thread safety. + std::shared_ptr jitCompilationUnit_; + + // jit type parser to parse type_str back to TypePtr for RRef type + // recovery when pickling and unpickling RRef + std::shared_ptr typeParser_; + + // Indicates whether or not we have properly initialized the handler. + bool initialized_; + + // Lock to protect initialization. + std::mutex init_lock_; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/request_callback.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/request_callback.h new file mode 100644 index 0000000000000000000000000000000000000000..f0b0a52371b27e14b530df19a47491337c856d63 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/request_callback.h @@ -0,0 +1,37 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::distributed::rpc { + +// Functor which is invoked to process an RPC message. This is an abstract class +// with some common functionality across all request handlers. Users need to +// implement this interface to perform the actual business logic. +class TORCH_API RequestCallback { + public: + // Invoke the callback. + c10::intrusive_ptr operator()( + Message& request, + std::vector streams) const; + + virtual ~RequestCallback() = default; + + protected: + // RpcAgent implementation should invoke ``RequestCallback`` to process + // received requests. There is no restriction on the implementation's + // threading model. This function takes an rvalue reference of the Message + // object. It is expected to return the future to a response message or + // message containing an exception. Different rpc agent implementations are + // expected to ensure delivery of the response/exception based on their + // implementation specific mechanisms. + virtual c10::intrusive_ptr processMessage( + Message& request, + std::vector streams) const = 0; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/request_callback_impl.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/request_callback_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..7bea64f4b19c05dec4201eead386978cf6a32c64 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/request_callback_impl.h @@ -0,0 +1,66 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::distributed::rpc { + +class TORCH_API RequestCallbackImpl : public RequestCallbackNoPython { + public: + std::unique_ptr deserializePythonRpcCommand( + std::unique_ptr rpc, + const MessageType& messageType) const override; + + c10::intrusive_ptr processPythonCall( + RpcCommandBase& rpc, + const std::vector& streams) const override; + + c10::intrusive_ptr processScriptCall( + RpcCommandBase& rpc, + const std::vector& streams) const override; + + c10::intrusive_ptr processScriptRemoteCall( + RpcCommandBase& rpc, + const std::vector& streams) const override; + + c10::intrusive_ptr processPythonRemoteCall( + RpcCommandBase& rpc, + const std::vector& streams) const override; + + c10::intrusive_ptr processPythonRRefFetchCall( + RpcCommandBase& rpc) const override; + + void handleRRefDelete(c10::intrusive_ptr& rref) const override; + + c10::intrusive_ptr processRpcWithErrors( + RpcCommandBase& rpc, + const MessageType& messageType, + const std::vector& streams) const override; + + bool cudaAvailable() const override; + + c10::intrusive_ptr processRRefBackward( + RpcCommandBase& rpc) const override; + + // Helpers to run user-defined functions, operators and other computations. + + c10::intrusive_ptr runJitFunction( + const c10::QualifiedName& name, + std::vector& stack, + const std::vector& streams, + bool isAsyncExecution) const; + + c10::intrusive_ptr runPythonFunction( + const py::object& function, + const std::vector& streams, + bool isAsyncExecution) const; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/request_callback_no_python.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/request_callback_no_python.h new file mode 100644 index 0000000000000000000000000000000000000000..e8632437b14fefed1c3de5c8ea390059c98c87a1 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/request_callback_no_python.h @@ -0,0 +1,120 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace torch::distributed::rpc { + +// RequestCallback implementation with no Python dependencies. +class TORCH_API RequestCallbackNoPython : public RequestCallback { + public: + c10::intrusive_ptr processMessage( + Message& request, + std::vector streams) const override; + + protected: + virtual std::unique_ptr deserializePythonRpcCommand( + std::unique_ptr rpc, + const MessageType& messageType) const; + + virtual c10::intrusive_ptr processScriptCall( + RpcCommandBase& rpc, + const std::vector& streams) const; + + virtual c10::intrusive_ptr processPythonCall( + RpcCommandBase& rpc, + const std::vector& streams) const; + + c10::intrusive_ptr assignOwnerRRef( + const RRefId& rrefId, + const RRefId& forkId, + const c10::intrusive_ptr& valueFuture) const; + + virtual c10::intrusive_ptr processScriptRemoteCall( + RpcCommandBase& rpc, + const std::vector& streams) const; + + virtual c10::intrusive_ptr processPythonRemoteCall( + RpcCommandBase& rpc, + const std::vector& streams) const; + + c10::intrusive_ptr retrieveOwnerRRef(const RRefId& rrefId) const; + + c10::intrusive_ptr processScriptRRefFetchCall( + RpcCommandBase& rpc) const; + + virtual c10::intrusive_ptr processPythonRRefFetchCall( + RpcCommandBase& rpc) const; + + c10::intrusive_ptr processRRefUserDelete( + RpcCommandBase& rpc) const; + + c10::intrusive_ptr processRRefChildAccept( + RpcCommandBase& rpc) const; + + c10::intrusive_ptr processRRefForkRequest( + RpcCommandBase& rpc) const; + + c10::intrusive_ptr processForwardAutogradReq( + RpcCommandBase& rpc, + const std::vector& streams) const; + + c10::intrusive_ptr processBackwardAutogradReq( + RpcCommandBase& rpc, + const std::vector& streams) const; + + c10::intrusive_ptr processCleanupAutogradContextReq( + RpcCommandBase& rpc) const; + + c10::intrusive_ptr processRunWithProfilingReq( + RpcCommandBase& rpc) const; + + virtual void handleRRefDelete(c10::intrusive_ptr& rref) const; + + c10::intrusive_ptr processRpc( + RpcCommandBase& rpc, + const MessageType& messageType, + const std::vector& streams) const; + + virtual c10::intrusive_ptr processRpcWithErrors( + RpcCommandBase& rpc, + const MessageType& messageType, + const std::vector& streams) const; + + c10::intrusive_ptr handleError( + const std::exception& e, + const MessageType messageType, + int64_t messageId) const; + + virtual bool cudaAvailable() const; + + virtual c10::intrusive_ptr processRRefBackward( + RpcCommandBase& rpc) const; + + // Helpers to run user-defined functions, operators and other computations. + + c10::intrusive_ptr runJitOperator( + const jit::Operator& op, + std::vector& stack, + const std::vector& streams) const; + + // Helpers to convert various kinds of objects into already-completed futures. + + c10::intrusive_ptr asFuture(IValue value, TypePtr type) const; + + c10::intrusive_ptr asFuture( + c10::intrusive_ptr message) const; + + c10::intrusive_ptr asFuture(std::exception_ptr err) const; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rpc.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rpc.h new file mode 100644 index 0000000000000000000000000000000000000000..9e0ced778117bcf6dc19a32b2a7fc90749172e88 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rpc.h @@ -0,0 +1,14 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::distributed::rpc { + +PyMethodDef* python_functions(); + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rpc_agent.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rpc_agent.h new file mode 100644 index 0000000000000000000000000000000000000000..7ce5590994cca2b262f3015e183d36c616f69137 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rpc_agent.h @@ -0,0 +1,345 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace torch::distributed::rpc { + +using DeviceMap = std::unordered_map; + +// Default RPC timeout +constexpr float kDefaultRpcTimeoutSeconds = 60; +// Unset RPC timeout. This is the value agent::send() will have if user does not +// pass in a specific timeout, and indicates that we must use the default +// timeout for RPCs. +constexpr float kUnsetRpcTimeout = -1; +constexpr auto kDefaultInitMethod = "env://"; +constexpr float kSecToMsConversion = 1000; +constexpr auto kRpcTimeoutErrorStr = + "RPC ran for more than set timeout ({} ms) and will now be marked with an error"; +constexpr auto kDefaultNumWorkerThreads = 16; + +using steady_clock_time_point = + std::chrono::time_point; +// Input is qualified name string, output is JIT StrongTypePtr +// Same as jit::TypeResolver, did not import jit::TypeResolver to here +// because it could introduce cyclic dependencies. +using TypeResolver = + std::function; + +struct TORCH_API RpcBackendOptions { + RpcBackendOptions() + : RpcBackendOptions(kDefaultRpcTimeoutSeconds, kDefaultInitMethod) {} + + RpcBackendOptions(float rpcTimeoutSeconds, std::string initMethod) + : rpcTimeoutSeconds(rpcTimeoutSeconds), + initMethod(std::move(initMethod)) { + TORCH_CHECK(rpcTimeoutSeconds >= 0, "RPC Timeout must be non-negative"); + } + + float rpcTimeoutSeconds; + std::string initMethod; +}; + +// A globally unique ID to identify an RpcAgent +struct TORCH_API WorkerInfo : torch::CustomClassHolder { + WorkerInfo(std::string name, int64_t id); + + WorkerInfo(std::string name, worker_id_t id); + + bool operator==(const WorkerInfo& rhs) { + return (id_ == rhs.id_) && (name_ == rhs.name_); + } + + static constexpr size_t MAX_NAME_LEN = 128; + + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const std::string name_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const worker_id_t id_; +}; + +struct TORCH_API RegisterWorkerInfoOnce { + RegisterWorkerInfoOnce(); +}; + +TORCH_API std::ostream& operator<<( + std::ostream& os, + const WorkerInfo& workerInfo); + +// Struct for options to configure the RPC Retry protocol. +struct TORCH_API RpcRetryOptions { + // Using a default constructor like all other Options structs in the RPC + // codebase. TORCH_CHECKs for input validation are done in the + // sendWithRetries function. + RpcRetryOptions() = default; + // Maximum number of times we will retry the RPC + int maxRetries{5}; + // Initial duration between consecutive RPC send attempts + std::chrono::milliseconds rpcRetryDuration{std::chrono::milliseconds(1000)}; + // Constant for exponential backoff used while calculating future wait + // durations + float retryBackoff{1.5}; +}; + +// Struct that stores all the metadata needed to retry a given RPC. +struct TORCH_API RpcRetryInfo { + RpcRetryInfo( + const WorkerInfo& to, + c10::intrusive_ptr message, + c10::intrusive_ptr originalFuture, + int retryCount, + RpcRetryOptions options) + : to_(to), + message_(std::move(message)), + originalFuture_(std::move(originalFuture)), + retryCount_(retryCount), + options_(options) {} + + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const WorkerInfo& to_; + c10::intrusive_ptr message_; + // Future that is returned to the caller of sendWithRetries(). + c10::intrusive_ptr originalFuture_; + // Number of send attempts completed so far. + int retryCount_; + RpcRetryOptions options_; +}; + +// ``RpcAgent`` is the base class for sending and receiving RPC messages. It +// provides a unified ``send`` API for both request and response messages, and +// will invoke the given ``RequestCallback`` to process received requests. It +// should immediately become ready to serve request and accept response after +// construction. +class TORCH_API RpcAgent { + public: + // `WorkerInfo` is the globally unique identifier for this RpcAgent instance. + // It contains a ``name_`` field and an ``id_`` field. ``name_`` is the + // globally unique name for this ``RpcAgent``. It is up to the ``RpcAgent`` + // implementation to determine how to resolve names. ``id_`` is the globally + // unique ID for this ``RpcAgent``. This should be determined by the + // ``RpcAgent`` implementation. + // The ``RequestCallback`` will be invoked to handle received requests. This + // ``RpcAgent`` base class makes no assumption on the thread-safeness of the + // ``RequestCallback``. ``RpcAgent`` implementations need to make sure that + // its threading model conform to ``RequestCallback``'s requirement. + // NB: RpcAgent implementations should not start serving requests until + // ``start()`` is called, as there could be other contexts that have not been + // initialized yet at this time. + RpcAgent( + WorkerInfo id, + std::unique_ptr cb, + std::chrono::milliseconds rpcTimeout); + + virtual ~RpcAgent(); + + // Send a message to the ``RpcAgent`` of id ``to`` and returns a + // ``JitFuture`` ptr. The implementation must be asynchronous, i.e., it + // cannot block until it receives the response. + // + // If ``message.isRequest()`` is true, the ``JitFuture`` will be + // completed when the response arrives. For other message types, the Future + // should be ignored by the caller. + virtual c10::intrusive_ptr send( + const WorkerInfo& to, + c10::intrusive_ptr message, + const float rpcTimeoutSeconds = kUnsetRpcTimeout, + const DeviceMap& deviceMap = {}) = 0; + + // Retries sending the message up to maxRetries times until an ACK is + // received. The duration between consecutive sends is increased over + // time using an exponential backoff algorithm. + // + // Sends ``message`` to the ``RpcAgent`` of id ``to`` and returns a + // ``JitFuture`` ptr, just like send(). Caller can specify the maximum + // number of retries for this RPC (default is 5), initial duration between + // sends (default is 1000ms), and backoff constant (default is 1.5) by + // passing in the RpcRetryOptions struct. This API might end up + // executing a method twice on the remote end (it does not guarantee + // exactly-once semantics). Therefore, the user must ensure their requests + // are idempotent. + c10::intrusive_ptr sendWithRetries( + const WorkerInfo& to, + c10::intrusive_ptr message, + RpcRetryOptions retryOptions = RpcRetryOptions()); + + // Return a reference to the ``WorkerInfo`` of this RpcAgent. + // NB: not using ``std::optional`` here because we might + // need to create a separate RPC API lib and avoid forcing all ``RpcAgent`` + // implementations to depend on libtorch. + const WorkerInfo& getWorkerInfo() const; + + // Return a reference to the ``WorkerInfo`` of the given ``workerName``. + virtual const WorkerInfo& getWorkerInfo( + const std::string& workerName) const = 0; + + virtual const WorkerInfo& getWorkerInfo(worker_id_t id) const = 0; + + virtual std::vector getWorkerInfos() const = 0; + + // Retrieve the timeout for all RPCs. + inline std::chrono::milliseconds getRpcTimeout() const { + return rpcTimeout_.load(); + } + + // Set the timeout for all RPCs + inline void setRpcTimeout(const std::chrono::milliseconds& rpcTimeout) { + rpcTimeout_.store(rpcTimeout); + } + + // Call sync and join all internal threads. This method should be called + // before every RPC process exits. + virtual void join(bool shutdown = false, float timeout = 0) = 0; + + // Synchronize the this process with other ``RpcAgent`` processes. Block until + // all ``RpcAgent``s reach this method and send all pending messages. + virtual void sync() = 0; + + // Sets up backend-agnostic state for accepting requests. Currently, this + // entails setting rpcAgentRunning_ to true, creating the retry thread, and + // calling the backend's startImpl. + void start(); + + // Derived classes must override this function to start accepting requests. + // This is used to initialize any backend-specific state. Users must call + // start, not startImpl, to initialize the RPC Agent. + virtual void startImpl() = 0; + + // Stop accepting requests and shutdown the RPC framework as soon as possible + // by terminating all RPC threads. + void shutdown(); + + // Derived classes must override this function to start accepting requests. + // THis is used to clean up any backend-specific state. Users must call + // shutdown, not shutdownImpl, to shutdown the RPC Agent. + virtual void shutdownImpl() = 0; + + // Check if current RPC agent is set. + static bool isCurrentRpcAgentSet(); + + // Retrieve the valid current RPC agent. + static std::shared_ptr getCurrentRpcAgent(); + + // Set the current RPC agent. + static void setCurrentRpcAgent(std::shared_ptr rpcAgent); + + // Retrieve metrics as KV map + virtual std::unordered_map getMetrics() = 0; + + // Retrieve debug info in addition to metrics as KV map + virtual std::unordered_map getDebugInfo(); + + // Flag to control whether GIL wait times + // should be profiled or not. + void enableGILProfiling(bool flag); + + // Retrieve whether we should profile GIL wait times or not. + bool isGILProfilingEnabled(); + + // Set type resolver that will be passed to JIT pickler to resolver type Ptr + // based on type str. + void setTypeResolver(std::shared_ptr typeResolver); + + // Get the type resolver + std::shared_ptr getTypeResolver(); + + // Retrieves the device map for the provided destination worker. + virtual DeviceMap getDeviceMap(const WorkerInfo& dst) const; + + // Retrieve the (non-CPU) devices that are supported by the agent. + virtual const std::vector& getDevices() const; + + protected: + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const WorkerInfo workerInfo_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const std::unique_ptr cb_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::atomic rpcTimeout_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::atomic profilingEnabled_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::shared_ptr typeResolver_; + // Atomic boolean indicating whether this agent is running. It controls + // whether several background threads should be running. It is set in + // RpcAgent::start() and unset in the derived class shutdown(). + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::atomic rpcAgentRunning_; + + private: + static std::shared_ptr currentRpcAgent_; + // Add GIL wait time data point to metrics + virtual void addGilWaitTime(const std::chrono::microseconds gilWaitTime) = 0; + friend class PythonRpcHandler; + + // Map that stores metadata for RPC's that may need to be re-tried as well as + // the timepoint at which we should re-try them. + std::map< + steady_clock_time_point, + std::unordered_set>> + rpcRetryMap_; + + // Thread that checks for retryable RPC's in the rpcRetryMap_ and sleeps until + // the next unACKed RPC's timeout has expired. + std::thread rpcRetryThread_; + + // Function that rpcRetryThread_ calls in a loop as long as RpcAgent is + // running. + void retryExpiredRpcs(); + + // This is the callback attached to futures corresponding to send retries. + // This handles 3 cases: 1). send was completed, 2). send failed with an + // error and we've done maxRetries failed send attempts, and 3). send + // failed with an error and we have more retries to go. In case 1, we mark + // the original future as complete. In case 2, we mark the future with an + // error and do not retry again. In case 3, we move the RpcRetryInfo struct + // to another time point in the map to schedule the RPC for a future send. + void rpcRetryCallback( + JitFuture& message, + steady_clock_time_point newTime, + std::shared_ptr earliestRpc); + + // Function that uses the exponential backoff algorithm to compute the next + // time point to retry a given RPC. + inline steady_clock_time_point computeNewRpcRetryTime( + RpcRetryOptions& options, + int retryCount) { + // The exponential backoff algorithm being used here is: + // newTime = timeNow + (retryDuration * (backoffConstant ^ retryCount)). + std::chrono::milliseconds timedelta = + std::chrono::duration_cast( + options.rpcRetryDuration * pow(options.retryBackoff, retryCount)); + return std::chrono::time_point_cast( + std::chrono::steady_clock::now() + timedelta); + } + + // Condition Variable to signal when the rpcRetryMap_ has been populated. + std::condition_variable rpcRetryMapCV_; + + // Mutex to protect RpcRetryMap_. + std::mutex rpcRetryMutex_; +}; + +} // namespace torch::distributed::rpc + +namespace std { +template <> +struct hash { + std::size_t operator()( + const torch::distributed::rpc::WorkerInfo& worker_info) const noexcept { + return worker_info.id_; + } +}; +} // namespace std + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rpc_command_base.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rpc_command_base.h new file mode 100644 index 0000000000000000000000000000000000000000..2ea338813b121c913e1cd0d78fdf5dfe22c03bb1 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rpc_command_base.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::distributed::rpc { + +// Base class for all RPC request and responses. +class RpcCommandBase { + public: + // Need to override this to serialize the RPC. This should destructively + // create a message for the RPC (Hence the &&). + c10::intrusive_ptr toMessage() && { + JitRRefPickleGuard jitPickleGuard; + return std::move(*this).toMessageImpl(); + } + virtual c10::intrusive_ptr toMessageImpl() && = 0; + virtual ~RpcCommandBase() = 0; +}; + +inline RpcCommandBase::~RpcCommandBase() = default; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rref_context.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rref_context.h new file mode 100644 index 0000000000000000000000000000000000000000..6f1703a51e6f67be61b2591e3728eda4da5d5566 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rref_context.h @@ -0,0 +1,340 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + +namespace torch::distributed::rpc { + +namespace callback { +// It's the callback for RemoteCall. +void TORCH_API +confirmPendingUser(const JitFuture& jitFuture, const ForkId& expectedForkId); + +// It's the callback for finishing creating owner rref, it returned deletedRRef, +// so that the deletedRRef can be handled under GIL in python_functions.cpp if +// deletedRRef contains python object. +c10::intrusive_ptr TORCH_API +finishCreatingOwnerRRef(const JitFuture& jitFuture, const RRefId& rrefId); +} // namespace callback + +// Manages RRef lifetime and keeps track of RRef forks. +class TORCH_API RRefContext { + public: + static RRefContext& getInstance(); + // NB: This method must be called before destructing RRefContext singleton. + // Similar to delForkOfOwner, this method returns a vector of OwnerRRefs that + // hold py::object. The call-site is also responsible for resetting those + // shared_ptr objects with a GIL. See comments at delForkOfOwner() for more + // details. + static std::vector> destroyInstance( + bool ignoreRRefLeak = true); + + static void handleException(const JitFuture& jitFuture); + + // handle exception without throw ::c10::Error again + static void handleExceptionSilent(const JitFuture& jitFuture); + + RRefContext(const RRefContext&) = delete; + RRefContext(RRefContext&& other) = delete; + void operator=(const RRefContext&) = delete; + RRefContext& operator=(RRefContext&& other) = delete; + + ~RRefContext(); + + // get the worker id of the current worker + inline worker_id_t getWorkerId() const { + return agent_->getWorkerInfo().id_; + } + + // get the worker name of the current worker + inline const std::string& getWorkerName() const { + return agent_->getWorkerInfo().name_; + } + + // generate a globally unique ID + inline GloballyUniqueId genGloballyUniqueId() { + return GloballyUniqueId(getWorkerId(), nextLocalId_++); + } + + inline const std::shared_ptr& agent() const { + return agent_; + } + + // create a ``UserRRef`` owned by the worker ``ownerId`` + c10::intrusive_ptr createUserRRef( + worker_id_t ownerId, + const TypePtr& type); + + // Convert an RRefForkData into an RRef. This RRef could be user or owner. + // This RRef could have already existed before, or could be created in this + // method, we pass type here to validate or help the rref creation. + c10::intrusive_ptr getOrCreateRRef( + const RRefForkData& rfd, + const TypePtr& type); + + // Get the ``OwnerRRef`` of id ``rrefId``. If it does not exist, create a new + // one. This function is called in two places: + // 1. when processing ``rpc.remote()``, i.e., ``SCRIPT_REMOTE_CALL`` + // ``PYTHON_REMOTE_CALL``. + // 2. when unpickling ``OwnerRRef``. + // What's common in these two cases are, 1) the RRefId is already generated + // 2) the TypePtr is presented. So it can always create the ``OwnerRRef`` if + // it is not yet available. + c10::intrusive_ptr getOrCreateOwnerRRef( + const RRefId& rrefId, + const TypePtr& type); + + // Create an empty owner rref of type. + // This method is called to first time generate an ``OwnerRRef``, e.g., + // 1) ``rpc.RRef(obj)`` + // 2) create the ``OwnerRRef`` on `rpc.remote()` caller side. + // What's common in these two cases are, 1) the RRefId hasn't been generated + // 2) the TypePtr is presented. + c10::intrusive_ptr createOwnerRRef(const TypePtr& type); + + // Returns a Future of the OwnerRRef, which will be marked completed when + // ``OwnerRRef`` is created. This method is used when the TypePtr is not + // available, e.g., when processing to_here(). The forceCreated flag can be + // used to ensure that the rref is created on the owner, otherwise throw in + // cases where the user of this API expects this to return a completed future. + // Note that the return value is a intrusive_ptr to a c10::ivalue::Future that + // holds the RRef. + c10::intrusive_ptr getOwnerRRef( + const RRefId& rrefId, + bool forceCreated = false); + + // Adding the RRefId of an OwnerRRef into the forks_ map. This is useful when + // making a remote call to self, which as for now, still goes through serde + // and invokes request callback. In this case, the OwnerRRef has already been + // created on the send side, and we need to pass it to the receive side, + // instead of creating a new OwnerRRef. This is done by adding the OwnerRRef + // into owners_. However, that alone is not enough, as it could be deleted + // when all UserRRef die, which would then remove the OwnerRRef from owners_ + // and this could happen before the self remote call finishes. To prevent + // that, this API adds the RRefId as a ForkId, which will then delete the + // ForkId when the self remote is done. + void addSelfAsFork(c10::intrusive_ptr& rref); + + // Register a fork of the ``OwnerRRef``, and inserts a intrusive_ptr of the + // ``OwnerRRef`` in a map to keep it alive. + void addForkOfOwner(const RRefId& rrefId, const ForkId& forkId); + // Performs the same function as addForkOfOwner but ignores duplicate + // requests. This idempotent function is used with RREF_FORK_REQUEST calls, + // whereas all other message types use the non-idempotent variant. + void addForkOfOwnerIfNotPresent(const RRefId& rrefId, const ForkId& forkId); + // Delete a fork of the ``OwnerRRef``. NB: this could trigger deletion on the + // IValue or py::object. For the later, this method will acquire GIL. + // NB: If this fork deletion triggered deleting OwnerRRef, this method will + // return a shared_ptr to the OwnerRRef, which is likely to be the last + // shared_ptr instance for it. Therefore, deleting this shared_ptr + // will also trigger deleting the object it points to. If OwnerRRef holds a + // py::object, deleting it require GIL. The call site should guarded it with + // a GIL and reset the shared_ptr. The GIL-guarded deletion is intentionally + // left out of this function to avoid creating dependency on pybind. + c10::intrusive_ptr delForkOfOwner( + const RRefId& rrefId, + const ForkId& forkId); + + // Invoked when pickling an RRef to setup child/fork properly + RRefForkData prepareChildFork(const c10::intrusive_ptr& rref); + // Invoked when unpickling an RRef to send RREF_FORK_REQUEST to owner and + // send RREF_CHILD_ACCEPT to the parent. + // NB: forkId is necessary here as the rref could be an OwnerRRef + void notifyOwnerAndParentOfFork( + const ForkId& forkId, + worker_id_t parent, + const c10::intrusive_ptr& rref); + + // When a UserRRef is forked to another worker (user or owner), it is added + // into pendingChildren_ to be held alive until it receives RREF_CHILD_ACCEPT + // from the child. + // NB: This is necessary for both user and owner child. As we do not have FIFO + // communication between workers, we need this strategy to make sure that all + // previously submitted rpc/remote calls are acked before sending out the + // RREF_USER_DELETE message. Otherwise, the OwnerRRef could be deleted too + // soon. + void addPendingChild( + const ForkId& forkId, + const c10::intrusive_ptr& rref); + void delPendingChild(const ForkId& forkId); + + // When a UserRRef is created, it is added into pendingUsers_ to be held alive + // until it receives RREF_USER_ACCEPT from the owner. + void addPendingUser( + const ForkId& forkId, + const c10::intrusive_ptr& rref); + void delPendingUser(const ForkId& forkId); + void addConfirmedUser( + const ForkId& forkId, + const c10::intrusive_ptr& rref); + + // Retrieve a pending user given the fork ID. Throws if the user has already + // been confirmed (i.e. is no longer in the pendingUsers_ map). + c10::intrusive_ptr getPendingUser(const ForkId& forkId); + + // Start recording new pending UserRRefs. All pending UserRRefs introduced + // after this point will be put into the thread_local userTable_, which will + // then be consumed and cleared in waitForThreadLocalPendingRRefs(). + void recordThreadLocalPendingRRefs(); + // End recording new pending UserRRefs, and clear the thread_local userTable_. + // Returns a Future which will be marked as completed when all pending + // UserRRefs in the current userTable_ are confirmed by their owners. The bool + // value in the Future is unused. + // This method is useful to make sure RRefs in user function arguments are + // confirmed before launching user code. + // NB: Callers of this method does not need to keep the returned Future alive, + // because this Future is already captured in callbacks of the + // PendingUserState. If there is no pending UserRRefs, this method returns a + // completed future. + c10::intrusive_ptr waitForThreadLocalPendingRRefs(); + // Only call this function when there are errors during a recording session, + // and it is likely that waitForThreadLocalPendingRRefs() cannot be invoked + // properly. + // TODO: make this a context guard + void clearRecordedPendingRRefsOnError(); + + void delUser( + const worker_id_t owner, + const RRefId& rrefId, + const ForkId& forkId); + void delAllUsersAndUnforkedOwners(std::chrono::milliseconds timeoutMillis); + + std::unordered_map getDebugInfo(); + + private: + struct PendingUserState { + PendingUserState(c10::intrusive_ptr rref) + : rref_(std::move(rref)), + confirmationFuture_(c10::make_intrusive(BoolType::get())) { + } + + inline void confirm() { + c10::static_intrusive_pointer_cast(rref_)->confirm(); + confirmationFuture_->markCompleted(); + } + + c10::intrusive_ptr rref_; + // Use Future.wait() and Future.markCompleted() to block and unblock user + // functions. The bool value wrapped by the future_ is not used. + c10::intrusive_ptr confirmationFuture_; + }; + + RRefContext(std::shared_ptr /*agent*/); + + c10::intrusive_ptr createUserRRef( + worker_id_t ownerId, + const RRefId& rrefId, + const ForkId& forkId, + const TypePtr& type); + + void finishForkRequest(const ForkId& forkId, worker_id_t parent); + + // If there is any leak on any RRef, this method will throw an error. + void checkRRefLeaks(bool ignoreRRefLeak); + + static std::atomic nextLocalId_; + + const std::shared_ptr agent_; + mutable std::mutex mutex_; + // Keep OwnerRRefs alive until there is no living UserRRefs. + std::unordered_map, RRefId::Hash> owners_; + // A map to track OwnerRRefs that are requested but not yet created. This can + // happen if the to_here() message is processed on the owner before the + // corresponding creator rpc.remote() message. If this happens, instead of + // to_here() RPC thread to block waiting for the OwnerRRef creation, the + // RRefContext returns a Future, so that the RPC request processing logic can + // attach subsequent code as a callback to that Future. + // NB: the OwnerRRefs in this map must be cleared when the corresponding + // OwnerRRef is created. Note that the values in this map are intrusive_ptrs + // to c10::ivalue::Future that will be marked completed with the owner RRef. + std::unordered_map, RRefId::Hash> + pendingOwners_; + // Tracks known living UserRRefs of an OwnerRRef + std::unordered_map< + RRefId, + std::unordered_set, + RRefId::Hash> + forks_; + + // This cond var is used by deleteAllUsers(), a event notification is sent if + // number of pending UserRRef or UserRRef children is reduced, or + // number of owned OwnerRRef is reduced. + std::condition_variable deleteAllUsersCV_; + // The follow 3 maps keep UserRRefs alive by holding a intrusive_ptr to the + // RRef instances. A UserRRef must be added into this map if any of the + // following two conditions is true: + // + // (1) A UserRRef has not been accepted by owner yet. + // + // It can be used or shared, but cannot be deleted, and hence kept alive + // in this map. A message of type RREF_USER_ACCEPT will move the + // corresponding RRef from pendingUsers_ map to confirmedUsers_ map. + std::unordered_map, ForkId::Hash> + pendingUsers_; + // UserRRefs are added into this map when it is confirmed by the owner. + // When destroying RRefContext this map helps to find local UserRRefs + // and send delete messages if they are still not deleted by Python + // garbage collection. + std::unordered_map, ForkId::Hash> + confirmedUsers_; + + // (2) A UserRRef has forked a child UserRRef which has not been accepted by + // the owner yet. + // + // In this case, this UserRRef cannot send out RREF_USER_DELETE message, + // as it could potentially trigger the OwnerRRef been deleted before the + // owner learns about the forked child. + std::unordered_map, ForkId::Hash> + pendingChildren_; + + // The RRef context performs its operations through async RPC requests, in + // order to not block the user code. Therefore the RRef context's state may be + // lagging a bit behind what it is intended to be, while it waits for these + // requests to complete. To allow syncing when needed, we store the count of + // these pending requests, so that users can wait for it to reach zero. + std::atomic numPendingFutures_{0}; + + std::mutex destroyedMutex_; + bool destroyed_{false}; + + // Thread local states to keep UserRRefs deserialized from user function + // arguments. + static thread_local std::vector> userTable_; + // A flag indicating whether subsequently created UserRRefs should be added to + // the thread_local userTable_. The flag is set to true before serializing + // RPC arguments and then set to false before running the corresponding + // user code. See addPendingUser and delPendingUser for more details. + // NB: The reason for having this flag is because addPendingUser are called in + // two cases, and we only want to track the 2nd case. + // (1) RRef as the return value: when calling rpc.remote, the UserRRef on the + // caller side is added to the context using addPendingUser. + // (2) RRef as an argument: When running an RPC using RRefs as arguments, the + // RRef is forwarded to the callee as new UserRRefs (if the callee is not + // the owner). In this case, we block running the user function until all + // UserRRefs are confirmed by the owner. + // This contract guarantees that no UserRRefs can be used remotely without + // confirmation. Note that, however, the UserRRef created by rpc.remote can + // still be passed to local functions as arguments and used there. This is by + // design, because this feature is especially useful when, say a master node + // creates multiple UserRRefs in a loop and then shares them with other nodes. + // Blocking every iteration in the loop until RRefs are confirmed will slow + // this down. This nuance on UserRRef can be interpreted as we only make + // exceptions for UserRRef creators. And using the UserRRef on its creator + // without confirmation is OK, because the creator would either call to_here + // or forward the UserRRef, and both would then require confirmations from the + // owner. + static thread_local bool recording_; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rref_impl.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rref_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..ae01733c5d9dab82ad33d47164120d1037ecb3f8 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rref_impl.h @@ -0,0 +1,426 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace torch::distributed::rpc { + +class RRef; +class RRefContext; +class UserRRef; + +constexpr int OWNER_IDX = 0; // index of ownerId in the tuple +constexpr int RREFID_ON_IDX = 1; // index of RRefId.createdOn_ in the tuple +constexpr int RREFID_ID_IDX = 2; // index of RRefId.localId_ in the tuple +constexpr int FORKID_ON_IDX = 3; // index of ForkId.createdOn_ in the tuple +constexpr int FORKID_ID_IDX = 4; // index of ForkId.localId_ in the tuple +constexpr int PARENT_IDX = 5; // index of parent in the tuple +constexpr int TYPE_IDX = 6; // index of parent in the tuple + +// NB: if more fields are added, make sure this field is also bumped +constexpr int RFD_TUPLE_SIZE = 7; // number of RRefForkData fields in py::tuple + +// Represents fork of an RRef to be sent over the wire. +struct TORCH_API RRefForkData { + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const worker_id_t ownerId_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const RRefId rrefId_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const ForkId forkId_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const worker_id_t parent_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const std::string typeStr_; + + RRefForkData( + worker_id_t ownerId, + const RRefId& rrefId, + const ForkId& forkId, + worker_id_t parent, + std::string typeStr); +}; + +// Note [RRef Protocol] +// ~~~~~~~~~~~~~~~~~~~~~~~~~~ +// +// [Background] +// +// RRef stands for Remote REFerence. Each RRef is owned by a single worker +// (i.e., owner) and can be used by multiple users. The owner stores the real +// data referenced by its RRefs. RRef needs to support fast and scalable RPC. +// Hence, in the design, we avoid using a single global master to keep RRef +// states, instead owners will keep track of the global reference counts +// for its RRefs. Every RRef can be uniquely identified by a global RRefId, +// which is assigned at the time it is first created either on a user or on the +// owner. +// +// On the owner worker, there is only one OwnerRRef instance, which contains the +// real data, while on user workers, there can be as many UserRRefs as +// necessary, and UserRRef does not hold the data. All usage on the OwnerRRef +// should retrieve the unique OwnerRRef instance using the globally unique +// RRefId. //A UserRRef will be created when it is used as an argument or return +// value in dist.rpc or dist.remote call, but RRef forking and reference +// counting (RC) are completely transparent to applications. Every UserRRef will +// also have its globally unique ForkId. +// +// [Assumptions] +// +// 1. Transient Network Failures +// +// TODO: current RRef implementation does not tolerate failures +// +// The RRef design handles transient network failures by retrying +// messages. Node crashes or permanent network partition is beyond the scope. +// When those incidents occur, the application may take down all workers, revert +// to the previous checkpoint, and resume training. +// +// 2. Non-idempotent UDFs +// +// We assume UDFs are not idempotent and therefore cannot be retried. However, +// internal RRef control messages are idempotent and retried upon message +// failure. +// +// TODO: RRef internal messages are not yet idempotent +// +// 3. Out of Order Message Delivery +// +// We do not assume message delivery order between any pair of nodes, because +// both sender and receiver are using multiple threads. There is no guarantee on +// which message will be processed first. +// +// [RRef Lifetime] +// +// The goal of the protocol is to delete an OwnerRRef at an appropriate time. +// The right time to delete an OwnerRRef is when there are no living UserRRefs +// and Python GC also agrees to delete the OwnerRRef instance on the owner. The +// tricky part is to determine if there are any living UserRRefs. +// +// A user can get a UserRRef in three situations: +// +// (1). Receiving a UserRRef from the owner. +// (2). Receiving a UserRRef from another user. +// (3). Creating a new UserRRef owned by another worker. +// +// (1) is the simplest case where the owner initiates the fork, and hence it can +// easily increment local RC. The only requirement is that any UserRRef must +// notify the owner before destruction. Hence, we need the first guarantee: +// +// G1. The owner will be notified when any UserRRef is deleted. +// +// As messages might come delayed or out-of-order, we need more one guarantee to +// make sure the delete message is not sent out too soon. Let us first introduce +// a new concept. If A sends an RPC to B that involves an RRef, we call the RRef +// on A the parent RRef and the RRef on B the child RRef. +// +// G2. Parent RRef cannot be deleted until the child RRef is confirmed by the +// owner. +// +// Under (1), where the caller is UserRRef and callee is OwnerRRef, it simply +// means that the user will not send out the delete message until all previous +// messages are ACKed. Note that ACKed does not mean the owner finishes +// executing the function, instead, it only means the owner has retrieved its +// local OwnerRRef and about to pass it to the function, which is sufficient to +// keep the OwnerRRef alive even if the delete message from the user arrives at +// the owner before the function finishes execution. +// +// With (2) and (3), it is possible that the owner only partially knows the RRef +// fork graph or not even knowing it at all. For example, the RRef could be +// constructed on a user, and before the owner receives the RPC call, the +// creator user might have already shared the RRef with other users, and those +// users could further share the RRef. One invariant is that the fork graph of +// any RRef is always a tree rooted at the owner, because forking an RRef always +// creates a new RRef instance, and hence every RRef has a single parent. One +// nasty detail is that when an RRef is created on a user, technically the owner +// is not its parent but we still consider it that way and it does not break the +// argument below. +// +// The owner's view on any node (fork) in the tree has three stages: +// +// 1) unknown -> 2) known -> 3) deleted. +// +// The owner's view on the entire tree keeps changing. The owner deletes its +// OwnerRRef instance when it thinks there are no living UserRRefs, i.e., when +// OwnerRRef is deleted, all UserRRefs could be either indeed deleted or +// unknown. The dangerous case is when some forks are unknown and others are +// deleted. +// +// G2 trivially guarantees that no parent UserRRef Y can be deleted before the +// owner knows all of Y's children UserRRefs. +// +// However, it is possible that the child UserRRef Z may be deleted before the +// owner knows its parent Y. More specifically, this can happen when all of Z's +// messages are processed by the owner before all messages from Y, including the +// delete message. Nevertheless, this does not cause any problem. Because, at +// least one of Y's ancestor will be alive, and it will prevent the owner from +// deleting the OwnerRRef. Consider the following example: (NB: this scenario +// will no longer relevant when we block UDF until all RRefs are confirmed by +// the owner) +// +// OwnerRRef -> A -> Y -> Z +// +// OwnerRRef forks to A, then A forks to Y, and Y forks to Z. Z can be deleted +// without OwnerRRef knowing Y. However, the OwnerRRef will at least know A, as +// the owner directly forks the RRef to A. A won't die before the owner knows Y. +// +// Things get a little trickier if the RRef is created on a user: +// +// OwnerRRef +// ^ +// | +// A -> Y -> Z +// +// If Z calls to_here on the UserRRef, the owner at least knows A when Z is +// deleted, because otherwise to_here wouldn't finish. If Z does not call +// to_here, it is possible that the owner receives all messages from Z before +// any message from A and Y. In this case, as the real data of the OwnerRRef has +// not been created yet, there is nothing to be deleted either. It is the same +// as Z does not exist at all Hence, it's still OK. +// +// See #26759 for more details and discussions. +// +// TODO: make RRef an IValue, and edit createStackForSchema accordingly +// TODO: make RRef system messages idempotent and retry on failures. +// +// ``RRef`` is the base type for both ``UserRRef`` and ``OwnerRRef``. +// Each ``RRef`` has a globally unique ``RRefId``. +class TORCH_API RRef : public RRefInterface { + public: + // RRef is made NOT copyable NOT movable to prevent messing up reference + // counting. + explicit RRef(const RRef& other) = delete; + explicit RRef(RRef&& other) = delete; + RRef& operator=(RRef&& other) = delete; + + ~RRef() override = default; + + // returns the worker id of the owner + inline worker_id_t owner() const override { + return ownerId_; + } + + // returns the worker name of the owner + inline std::string ownerName() const override { + return RpcAgent::getCurrentRpcAgent()->getWorkerInfo(ownerId_).name_; + } + + // returns the worker info of the owner + inline WorkerInfo ownerWorkerInfo() const { + return RpcAgent::getCurrentRpcAgent()->getWorkerInfo(ownerId_); + } + + // Returns the globally unique RRefId of this RRef + inline const RRefId& rrefId() const { + return rrefId_; + } + + inline bool isPyObj() const { + return type_ == PyObjectType::get(); + } + inline const TypePtr type() const override { + return type_; + } + + // Save the future corresponding to the creation of this RRef on a remote + // node. Note that this is only set when processing requests invoked with + // rpc.remote. This is only used to get the future corresponding to the rref + // for profiling use cases. + inline void registerOwnerCreationFuture(c10::intrusive_ptr fut) { + ownerCreationFuture_ = std::move(fut); + } + + // Get the future corresponding to the creation of this rref. + inline c10::intrusive_ptr getOwnerCreationFuture() const { + return ownerCreationFuture_; + } + + // Check if creation of this RRef on owner node has timed out. + inline bool getTimedOut() const { + return timedOut_.load(); + } + + // Dispatches an error to the correct handler based on its RPCErrorType. + void handleError(RPCErrorType errorType, const JitFuture& JitFuture); + + // Send delete UserRRef request to Owner, + // if the request hasn't been sent yet. + // There are 2 cases to call it, + // 1, Python GC decides end of UserRRef lifetime, calling destructor. + // 2, RPC module graceful shutdown calls it on all UserRRefs tracked + // in the RRefContext. + virtual void tryDel() {} + + protected: + // Indicates that the creation of this RRef on owner node has timed out. + inline void setTimedOut() { + timedOut_ = true; + } + friend class RRefContext; + + RRef(worker_id_t ownerId, const RRefId& rrefId, TypePtr type); + + virtual RRefForkData fork() const; + + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const worker_id_t ownerId_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const RRefId rrefId_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::atomic timedOut_{false}; + + // type field to denote the type of the element that the RRef is holding + // it could be any TypePtr that JIT support, including PyObjectType + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const TypePtr type_; + // Future corresponding to request to create RRef on remote node. + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + c10::intrusive_ptr ownerCreationFuture_; +}; + +// ``UserRRef`` represents a user of an RRef. Besides the ``RRefId``, each user +// also has a globally unique ``ForkId`` to identify this user. ``UserRRef`` +// never owns the real value, the only way to get the value of the ``RRef`` is +// to call ``to_here()`` and get a copy.. +class TORCH_API UserRRef final : public RRef { + public: + UserRRef(const UserRRef& other) = delete; + UserRRef(UserRRef&& other) = delete; + UserRRef& operator=(const UserRRef& other) = delete; + UserRRef& operator=(UserRRef&& other) = delete; + + UserRRef( + worker_id_t ownerId, + const RRefId& rrefId, + const ForkId& forkId, + TypePtr type); + + inline bool isOwner() const override { + return false; + } + + inline bool confirmedByOwner() const override { + return confirmedByOwner_; + } + + // Returns the globally unique ForkId of this RRef + const ForkId& forkId() const; + + // Get of copy of the value from the ``OwnerRRef``. If the value is not ready + // yet, this call will block. + IValue toHere( + const float timeoutSeconds = + torch::distributed::rpc::kUnsetRpcTimeout) const; + + void tryDel() override; + + // Will be called when refcount reaches 0. + // Upon destruction, this ``UserRRef`` will tell the owner to deref. + void release_resources() override; + + // Will be called when both refcount and weakcount reach 0. See + // https://github.com/pytorch/pytorch/blob/9116f02bebf3a5260feef5732d36c54ecb3b4033/c10/util/intrusive_ptr.h#L204 + // This is called on destructing the wrapping intrusive_ptr_target instance + // and it's data members. + ~UserRRef() override; + + private: + friend class RRefContext; + + RRefForkData fork() const override; + inline void confirm() { + confirmedByOwner_ = true; + } + + const ForkId forkId_; + + // Indicates if this user has sent delete message to it's owner. + // Note, thread safety is needed because delete message could be sent by + // either the destructor called by Python garbage collection or RRefContext + // proactive cleanup on RPC graceful shutdown. + std::mutex deletedOnOwnerMutex_; + bool deletedOnOwner_{false}; + // Indicating whether this UserRRef has been confirmed by its owner. + std::atomic confirmedByOwner_; +}; + +// Keep the template only on the derived class because ``RRefContext`` needs to +// erase the type on ``RRef`` and keep them in one map. +class TORCH_API OwnerRRef final : public RRef { + public: + OwnerRRef(const OwnerRRef& other) = delete; + OwnerRRef(OwnerRRef&& other) = delete; + OwnerRRef& operator=(const OwnerRRef& other) = delete; + OwnerRRef& operator=(OwnerRRef&& other) = delete; + + OwnerRRef( + worker_id_t ownerId, + const RRefId& rrefId, + TypePtr type, + std::vector devices); + + OwnerRRef( + worker_id_t ownerId, + const RRefId& rrefId, + TypePtr type, + std::optional value, + std::vector devices); + + inline bool isOwner() const override { + return true; + } + + // OwnerRRef is always confirmed, while UserRRef is only confirmed when the + // owner knows about it. + inline bool confirmedByOwner() const override { + return true; + } + + // Get a constant reference of the real value. This method will block if the + // value is not ready. This method does not need GIL as it does not create + // any new py::object. It will throw if there is an error. + const IValue& getValue() const; + + // Set the value of this ``OwnerRRef``. This method does not need GIL as it + // does not create any new py::object. + void setValue(IValue&& value); + // Sets the value of this ``OwnerRRef`` to contain an exception. + void setError(std::exception_ptr eptr); + + // Has a value or error been set? + bool hasValue() const; + // Gets a future that is satisfied when the value or error is set. + c10::intrusive_ptr getFuture(); + + private: + friend class RRefContext; + + c10::intrusive_ptr future_; +}; + +TORCH_API std::ostream& operator<<(std::ostream& os, const RRef& rref); + +// Helper function that casts from c10::RRefInterface to OwnerRRef +inline TORCH_API c10::intrusive_ptr fromRRefInterface( + const c10::intrusive_ptr& rrefInterface) { + return c10::static_intrusive_pointer_cast(rrefInterface); +} + +// Helper function that casts from OwnerRRef to c10::RRefInterface +inline TORCH_API c10::intrusive_ptr fromOwnerRRef( + const c10::intrusive_ptr& ownerRRef) { + return c10::static_intrusive_pointer_cast(ownerRRef); +} + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rref_proto.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rref_proto.h new file mode 100644 index 0000000000000000000000000000000000000000..b96713516e679436aee835898be9d554283e0a3e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/rref_proto.h @@ -0,0 +1,168 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace torch::distributed::rpc { + +// Temporary solution of RRef operations. +// TODO: Remove all these messages and use rpc + registered functions instead. +class TORCH_API RRefMessageBase : public RpcCommandBase { + public: + RRefMessageBase(const RRefId& rrefId, MessageType type) + : rrefId_(rrefId), type_(type) {} + + const RRefId& rrefId(); + + protected: + // NOLINTNEXTLINE(cppcoreguidelines*) + const RRefId rrefId_; + // NOLINTNEXTLINE(cppcoreguidelines*) + const MessageType type_; +}; + +class TORCH_API ForkMessageBase : public RRefMessageBase { + public: + ForkMessageBase(const RRefId& rrefId, const ForkId& forkId, MessageType type) + : RRefMessageBase(rrefId, type), forkId_(forkId) {} + + const ForkId& forkId(); + + c10::intrusive_ptr toMessageImpl() && override; + static std::pair fromMessage( + const Message& message, + MessageType type); + + protected: + // NOLINTNEXTLINE(cppcoreguidelines*) + const ForkId forkId_; +}; + +// UserRRef uses this message to fetch the remote RRef value from the owner. +class TORCH_API ScriptRRefFetchCall final : public RRefMessageBase { + public: + ScriptRRefFetchCall(worker_id_t fromWorkerId, const RRefId& rrefId) + : RRefMessageBase(rrefId, MessageType::SCRIPT_RREF_FETCH_CALL), + fromWorkerId_(fromWorkerId) {} + + inline worker_id_t fromWorkerId() const { + return fromWorkerId_; + } + + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage( + const Message& message); + + private: + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const worker_id_t fromWorkerId_; +}; + +class TORCH_API PythonRRefFetchCall final : public RRefMessageBase { + public: + PythonRRefFetchCall(worker_id_t fromWorkerId, const RRefId& rrefId) + : RRefMessageBase(rrefId, MessageType::PYTHON_RREF_FETCH_CALL), + fromWorkerId_(fromWorkerId) {} + + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage( + const Message& message); + + private: + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const worker_id_t fromWorkerId_; +}; + +// OwnerRRef uses this message to send the RRef value to a remote UserRRef +class TORCH_API RRefFetchRet : public RpcCommandBase { + public: + RRefFetchRet(std::vector values, MessageType type) + : values_(std::move(values)), type_(type) {} + + const std::vector& values(); + c10::intrusive_ptr toMessageImpl() && override; + + private: + std::vector values_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const MessageType type_; +}; + +class TORCH_API ScriptRRefFetchRet final : public RRefFetchRet { + public: + explicit ScriptRRefFetchRet(std::vector values) + : RRefFetchRet(std::move(values), MessageType::SCRIPT_RREF_FETCH_RET) {} + + static std::unique_ptr fromMessage( + const Message& message); +}; + +class TORCH_API PythonRRefFetchRet final : public RRefFetchRet { + public: + explicit PythonRRefFetchRet(std::vector values) + : RRefFetchRet(std::move(values), MessageType::PYTHON_RREF_FETCH_RET) {} + + static std::unique_ptr fromMessage( + const Message& message); +}; + +// UserRRef (regardless it's the creator or not) uses this message to notify +// OwnerRRef on delete. +class TORCH_API RRefUserDelete final : public ForkMessageBase { + public: + RRefUserDelete(const RRefId& rrefId, const ForkId& forkId) + : ForkMessageBase(rrefId, forkId, MessageType::RREF_USER_DELETE) {} + + static std::unique_ptr fromMessage(const Message& message); +}; + +class TORCH_API RemoteRet final : public ForkMessageBase { + public: + RemoteRet(const RRefId& rrefId, const ForkId& forkId) + : ForkMessageBase(rrefId, forkId, MessageType::REMOTE_RET) {} + + static std::unique_ptr fromMessage(const Message& message); +}; + +// A child RRef uses this message to notify its parent that the child has been +// confirmed by the owner. +class TORCH_API RRefChildAccept final : public RpcCommandBase { + public: + explicit RRefChildAccept(const ForkId& forkId) : forkId_(forkId) {} + + const ForkId& forkId() const; + + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage(const Message& message); + + private: + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const ForkId forkId_; +}; + +// A child RRef uses this message to send a fork request to the owner. +class TORCH_API RRefForkRequest final : public ForkMessageBase { + public: + RRefForkRequest(const RRefId& rrefId, const ForkId& forkId) + : ForkMessageBase(rrefId, forkId, MessageType::RREF_FORK_REQUEST) {} + + static std::unique_ptr fromMessage(const Message& message); +}; + +class TORCH_API RRefAck final : public RpcCommandBase { + public: + RRefAck() = default; + + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage(const Message& message); +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/script_call.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/script_call.h new file mode 100644 index 0000000000000000000000000000000000000000..b4073693ec762921a3816b558a8c76913b940357 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/script_call.h @@ -0,0 +1,72 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace torch::distributed::rpc { + +using torch::jit::Operator; + +// A ScriptCall instance represents an invocation of a builtin operator for a +// TorchScript function. If it is a builtin operator, it +// contains a shared ptr to the `Operator` and a list of arguments. +// If it is a TorchScript function, it contains a non empty qualifiedName string +// to the TorchScript function schema name and a list of arguments. +class TORCH_API ScriptCall : public RpcCommandBase { + public: + // Constructor for builtin operator call. + ScriptCall(std::shared_ptr op, std::vector&& stack); + // Constructor for TorchScript function call. + ScriptCall( + const c10::QualifiedName& qualifiedName, + std::vector&& stack, + const bool isAsyncExecution = false); + + bool hasOp() const; + std::shared_ptr op() const; + bool hasQualifiedName() const; + const c10::QualifiedName& qualifiedName() const; + // return the argument stack of this builtin operator + const std::vector& stack() const; + std::vector& stackRef(); + inline bool isAsyncExecution() const { + return isAsyncExecution_; + } + + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage(const Message& message); + + ~ScriptCall() override = default; + + protected: + virtual void toIValues(std::vector& ivalues) const; + static std::unique_ptr fromIValues( + std::vector& ivalues); + + private: + // Given an operator symbol and a string schema, return the matched operator. + static std::shared_ptr matchOperator(const std::string& str_schema); + + static const std::string BUILTIN_OP_NAMESPACE_; + static const std::string ATEN_PREFIX_; + + // This field has value if this ScriptCall represents invocation of a builtin + // operator. + std::optional> op_; + // This field has non empty string if this ScriptCall represents invocation of + // an annotated torchscript function defined by users. + std::optional qualifiedName_; + std::vector stack_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const bool isAsyncExecution_; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/script_remote_call.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/script_remote_call.h new file mode 100644 index 0000000000000000000000000000000000000000..6ae72a328d457a150ae78f30c8c4c1d18c4b2664 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/script_remote_call.h @@ -0,0 +1,59 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::distributed::rpc { + +using torch::jit::Operator; + +// A ScriptRemoteCall instance represents an invocation of `dist.remote` on a +// builtin operator. Currently, it does not support using RRef as arguments yet. +// Besides the operator and a vector of arguments, ScriptRemoteCall also +// contains the RRefId and the ForkId of the return value RRef. +class TORCH_API ScriptRemoteCall final : public ScriptCall { + public: + // Constructor for builtin operator call. + ScriptRemoteCall( + std::shared_ptr op, + std::vector&& stack, + const RRefId& retRRefId, + const ForkId& retForkId); + + // Constructor for TorchScript function call. + ScriptRemoteCall( + const c10::QualifiedName& qualifiedName, + std::vector&& stack, + const RRefId& retRRefId, + const ForkId& retForkId, + const bool isAsyncExecution); + + inline const RRefId& retRRefId() const { + return retRRefId_; + } + + inline const ForkId& retForkId() const { + return retForkId_; + } + + static std::unique_ptr fromIValues( + std::vector& ivalues); + + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage(const Message& message); + + private: + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const RRefId retRRefId_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const ForkId retForkId_; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/script_resp.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/script_resp.h new file mode 100644 index 0000000000000000000000000000000000000000..e4fb8e7ca92d1389f1501908ad7062b0a223a204 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/script_resp.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::distributed::rpc { + +// Return value of a builtin operator or a TorchScript function. +class TORCH_API ScriptResp final : public RpcCommandBase { + public: + explicit ScriptResp(at::IValue&& values); + + const at::IValue& value(); + c10::intrusive_ptr toMessageImpl() && override; + static std::unique_ptr fromMessage(const Message& message); + + private: + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const at::IValue value_; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/tensorpipe_agent.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/tensorpipe_agent.h new file mode 100644 index 0000000000000000000000000000000000000000..b5f3a788d0cb86657effd94171062c84f0efc0e9 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/tensorpipe_agent.h @@ -0,0 +1,498 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef USE_TENSORPIPE + +#include +#include + +#include +#include +#include +#include +#include + +// Forward-declare the TensorPipe classes we need, to avoid including its +// headers in PyTorch's ones and thus have it become a public dependency. + +namespace tensorpipe { + +class Context; +class Error; +class Listener; +class Message; +class Pipe; + +namespace transport { +class Context; +} // namespace transport + +namespace channel { +class Context; +} // namespace channel + +} // namespace tensorpipe + +namespace torch::distributed::rpc { + +// These priorities instruct TensorPipe on which transport/channel to pick +// during handshake. Higher priorities will take precedence over lower ones. +// The transport with lowest priority will be the one used to bootstrap pipes. + +constexpr int64_t kShmTransportPriority = 200; +constexpr int64_t kIbvTransportPriority = 100; +// The UV transport just uses TCP and should work everywhere, thus keep it last. +constexpr int64_t kUvTransportPriority = 0; + +constexpr int64_t kCmaChannelPriority = 1200; +constexpr int64_t kMultiplexedUvChannelPriority = 1100; +// The basic channel reuses a transport as a channel, and is thus our fallback. +constexpr int64_t kBasicChannelPriority = 1000; + +// CPU channel have higher priority than CUDA channels, since the latter might +// handle CPU-to-CPU transfers, but will always be less efficient than their +// CPU-only counterparts. +constexpr int64_t kCudaIpcChannelPriority = 300; +constexpr int64_t kCudaGdrChannelPriority = 200; +constexpr int64_t kCudaXthChannelPriority = 400; +constexpr int64_t kCudaBasicChannelPriority = 0; + +using steady_clock_time_point = + std::chrono::time_point; + +struct TORCH_API TransportRegistration { + std::shared_ptr transport; + int64_t priority; + std::string address; +}; + +TORCH_DECLARE_REGISTRY(TensorPipeTransportRegistry, TransportRegistration); + +struct TORCH_API ChannelRegistration { + std::shared_ptr channel; + int64_t priority; +}; + +TORCH_DECLARE_REGISTRY(TensorPipeChannelRegistry, ChannelRegistration); + +struct TORCH_API TensorPipeRpcBackendOptions : public RpcBackendOptions { + TensorPipeRpcBackendOptions( + int numWorkerThreads, + std::optional> transports, + std::optional> channels, + float rpc_timeout, + std::string init_method, + std::unordered_map device_maps = {}, + std::vector devices = {}) + : RpcBackendOptions(rpc_timeout, std::move(init_method)), + numWorkerThreads(numWorkerThreads), + transports(std::move(transports)), + channels(std::move(channels)), + deviceMaps(std::move(device_maps)), + devices(std::move(devices)) { + TORCH_CHECK( + numWorkerThreads > 0, + "num_worker_threads must be positive, got ", + numWorkerThreads); + + if (this->transports.has_value()) { + for (const std::string& transportName : this->transports.value()) { + TORCH_CHECK( + TensorPipeTransportRegistry()->Has(transportName), + "Unknown transport: ", + transportName); + } + } + + if (this->channels.has_value()) { + for (const std::string& channelName : this->channels.value()) { + TORCH_CHECK( + TensorPipeChannelRegistry()->Has(channelName), + "Unknown channel: ", + channelName); + } + } + } + + void setDeviceMap(const std::string& workerName, const DeviceMap& deviceMap) { + auto iter = deviceMaps.find(workerName); + if (iter == deviceMaps.end()) { + deviceMaps[workerName] = deviceMap; + } else { + for (auto& entry : deviceMap) { + // c10::Device has no default constructor, hence map[device] doesn't + // work In C++-17 we can use insert_or_assign. + auto entryIter = iter->second.find(entry.first); + if (entryIter == iter->second.end()) { + iter->second.emplace(entry.first, entry.second); + } else { + entryIter->second = entry.second; + } + } + } + } + + int numWorkerThreads; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const std::optional> transports; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const std::optional> channels; + std::unordered_map deviceMaps; + std::vector devices; +}; + +// Struct to track the network source metrics +struct TORCH_API NetworkSourceInfo { + worker_id_t srcRank; + std::vector srcMachineAddr; +}; + +// Struct to track aggregated network metrics +struct TORCH_API AggregatedNetworkData { + uint64_t numCalls{0}; + uint64_t totalSentBytes{0}; + uint64_t totalRecvBytes{0}; + uint64_t totalErrors{0}; +}; + +// TensorPipeAgent leverages TensorPipe (https://github.com/pytorch/tensorpipe) +// to transparently move tensors and payloads through the fastest available +// transport or channel. It acts like a hybrid RPC transport, providing shared +// memory (linux) and TCP (linux & mac) support. CUDA support is in progress. +class TORCH_API TensorPipeAgent : public RpcAgent { + public: + TensorPipeAgent( + const c10::intrusive_ptr<::c10d::Store>& store, + std::string selfName, + worker_id_t selfId, + std::optional worldSize, + TensorPipeRpcBackendOptions opts, + std::unordered_map reverseDeviceMaps, + std::vector devices, + std::unique_ptr cb); + + TensorPipeAgent(const TensorPipeAgent&) = delete; + TensorPipeAgent& operator=(const TensorPipeAgent&) = delete; + + c10::intrusive_ptr send( + const WorkerInfo& to, + c10::intrusive_ptr message, + const float rpcTimeoutSeconds = kUnsetRpcTimeout, + const DeviceMap& deviceMap = {}) override; + + // join() and sync() would be deprecated - + // https://github.com/pytorch/pytorch/issues/27647 + void join(bool shutdown = false, float timeout = 0) override; + void sync() override {} + void startImpl() override; + void shutdownImpl() override; + + ~TensorPipeAgent() override; + + const WorkerInfo& getWorkerInfo(const std::string& workerName) const override; + const WorkerInfo& getWorkerInfo(worker_id_t workerId) const override; + std::vector getWorkerInfos() const override; + void updateGroupMembership( + const WorkerInfo& workerInfo, + const std::vector& devices, + const std::unordered_map& reverseDeviceMaps, + bool isJoin); + + std::unordered_map getMetrics() override; + + void addGilWaitTime(const std::chrono::microseconds gilWaitTime) override; + + TensorPipeRpcBackendOptions getBackendOptions() const; + + const c10::intrusive_ptr<::c10d::Store> getStore() const; + + DeviceMap getDeviceMap(const WorkerInfo& dest) const override; + + const std::vector& getDevices() const override; + + using NetworkDataDict = + std::unordered_map; + + // Returns metrics tracked by the NetworkDataDict + NetworkDataDict getNetworkData(); + // Returns NetworkSourceInfo struct + NetworkSourceInfo getNetworkSourceInfo(); + + static const std::string& guessAddress(); + + // For testing purposes. + size_t timeoutMapSize(); + size_t numPendingResponses(); + size_t messageIdToTimeoutMapSize(); + + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const bool isStaticGroup_; + + protected: + // TensorPipe write function that could be used to write response + // messages by server, and write request messages by client. This + // is a protected method since it is overwritten by FaultyTensorPipeAgent + virtual void pipeWrite( + const std::shared_ptr& /*pipe*/, + const c10::intrusive_ptr& message, + std::vector&& devices, + std::vector streams, + std::function /*fn*/) noexcept; + + private: + // Removes the given messageId with the given expirationTime from the + // timeoutMap_. + void removeFromTimeoutMap(uint64_t messageId); + + // Populates workerIdToInfo_ and workerNameToInfo_ using addressStore_ + void prepareNames(bool isStaticGroup); + + // Check the static group attribute with the value set in store + void checkAndSetStaticGroup(const c10::intrusive_ptr<::c10d::Store>& store); + + const std::string& findWorkerURL(const WorkerInfo& worker) const; + + // Only use for Dynamic RPC groups, method to have worker leave group + void leaveGroup(); + + // TensorPipe read function that could be used to read response messages + // by client, and read request messages by server. + void pipeRead( + const std::shared_ptr& /*pipe*/, + std::function, + std::vector)> /*fn*/) noexcept; + + // Callback of listener accept() + void onListenerAccepted( + const tensorpipe::Error& error, + std::shared_ptr& pipe); + + // Respond to a call from a peer + void respond(std::shared_ptr& pipe); + + void sendCompletedResponseMessage( + std::shared_ptr& pipe, + JitFuture& futureResponseMessage, + uint64_t messageId, + std::vector stream); + + // Collects metrics from successful RPC calls + void trackNetworkData( + uint64_t requestSize, + uint64_t responseSize, + const std::string& destWorkerName); + + // Collects metrics from failed RPC calls + void trackNetworkError( + uint64_t requestSize, + const std::string& destWorkerName); + + inline std::vector getDevicesForRemote( + const std::string& remoteName, + const Message& message) const; + + // When a request+response completes, we need to mark the future message as + // complete. However, if its timeout has already expired, it already has an + // error set. There is no atomic "test-and-set" way to mark a future complete + // only if it isn't yet. It does exist for errors (setErrorIfNeeded) but, even + // then, it ends up printing a log message, which may worry the user. To solve + // both issues we use a separate atomic flag to know the status of the future. + struct AtomicJitFuture { + explicit AtomicJitFuture(const std::vector& devices) { + jitFuture = c10::make_intrusive( + at::AnyClassType::get(), devices); + } + + std::atomic_flag isComplete = ATOMIC_FLAG_INIT; + c10::intrusive_ptr jitFuture; + }; + + // Maintains state per client pipe to track pending response messages and + // error states. pendingResponseMessage_ should be protected by a mutex since + // it can be raced with user send() call. + // TODO: To achieve better performance we can have a pipe pool per + // client that can be configured using RpcBackendOptions. + struct ClientPipe { + explicit ClientPipe(std::shared_ptr pipe) + : pipe_(std::move(pipe)) {} + std::shared_ptr pipe_; + mutable std::mutex mutex_; + bool inError_{false}; + // Map from Message Request ID's to corresponding futures. + std::unordered_map> + pendingResponseMessage_; + }; + + const c10::intrusive_ptr<::c10d::Store> store_; + + const TensorPipeRpcBackendOptions opts_; + // For dynamic RPC, the reverse device maps are updated whenever a new rank + // joins or leaves the group + std::unordered_map reverseDeviceMaps_; + // Local devices used by this agent. If application didn't specify this + // field, it will be initialized using corresponding local devices in + // opts_.deviceMaps and reverseDeviceMaps_; + std::vector devices_; + + ThreadPool threadPool_; + std::shared_ptr context_; + std::shared_ptr listener_; + + mutable std::mutex connectedPipesMutex_; + std::unordered_map connectedPipes_; + + // Maps keyed on name and id for easy WorkerInfo lookup. + std::unordered_map workerIdToInfo_; + std::unordered_map workerNameToInfo_; + std::unordered_map workerNameToURL_; + + ::c10d::PrefixStore rankToNameStore_; + ::c10d::PrefixStore nameToAddressStore_; + // Store keys that will used to count joined processes and active calls during + // the shutdown process + ::c10d::PrefixStore shutdownStore_; + int worldSize_ = 0; + std::atomic nextMessageID_{0}; + + // Metadata used for tracking of whether certain RPCs have timed out or not. + struct TimeoutMessageMetadata { + TimeoutMessageMetadata( + uint64_t messageId_, + std::shared_ptr responseFuture_, + std::chrono::milliseconds timeout_) + : messageId(messageId_), + responseFuture(std::move(responseFuture_)), + timeout(timeout_) {} + uint64_t messageId; + std::shared_ptr responseFuture; + std::chrono::milliseconds timeout; + }; + + // Map to store the expiration times for each message. + std::map> + timeoutMap_; + + // Map to store the messageId to expiry time. + std::unordered_map messageIdToTimeout_; + + // Thread that will poll the timeoutMap_ for timed out messages and mark them + // with an error accordingly + std::thread timeoutThread_; + + // Function run by the timeoutThread_ to check for timed out RPCs + void pollTimeoutRpcs(); + + // Mutex to guard the timeoutMap_ + std::mutex timeoutMapMutex_; + + // Condition Variable to signal population of the timeoutMap_ + std::condition_variable timeoutThreadCV_; + + // Returns the expiration time for an RPC by adding the current time to the + // passed in timeout. + inline steady_clock_time_point computeRpcMessageExpiryTime( + std::chrono::milliseconds timeout) const { + return std::chrono::time_point_cast( + std::chrono::steady_clock::now() + timeout); + } + + // Handle error on an outgoing pipe + void handleClientError( + ClientPipe& clientPipe, + const tensorpipe::Error& error); + + // This is a generic struct for capturing Time-Series Metrics. It keeps a + // running sum and count of data points (observations), and can return an + // average of the data points seen so far. This is currently only used for + // tracking the GIL Wait Time in RPC Agents, but can be used for other metrics + // as well. + struct TimeSeriesMetricsTracker { + // Running sum of the data points seen so far + uint64_t currentSum_; + // Running count of the data points seen so far + uint64_t currentCount_; + + explicit TimeSeriesMetricsTracker( + uint64_t currentSum = 0, + uint64_t currentCount = 0); + + // Adds a data point (which is basically one observation for the metric + // being tracked) to the running sum and count. + void addData(uint64_t dataPoint); + // Returns the average of all the data points seen so far. + float computeAverage() const; + }; + + // Map of Time-Series metrics tracked by the RPC Agent + std::unordered_map timeSeriesMetrics_; + // Mutex to guard timeSeriesMetrics_ + std::mutex metricsMutex_; + + // Custom lock guard used to check if the RPC group is dynamic and lock the + // mutex if so + struct GroupMembershipLockGuard { + GroupMembershipLockGuard(std::mutex& mutex, bool isStaticGroup) + : ref_(mutex), isStaticGroup_(isStaticGroup) { + if (isStaticGroup_) { + ref_.lock(); + } + } + + ~GroupMembershipLockGuard() { + if (isStaticGroup_) { + ref_.unlock(); + } + } + + GroupMembershipLockGuard(const GroupMembershipLockGuard&) = delete; + + private: + std::mutex& ref_; + bool isStaticGroup_; + }; + // Mutex to guard access to group membership data + // e.g. updates to (workerIdToInfo_, workerNameToInfo_, workerNameToURL_) + mutable std::mutex groupMembershipMutex_; + + // Map to Track Network Data + NetworkDataDict networkData_; + // Mutex to guard networkData_ + std::mutex networkDataMutex_; + + // A mutex and a cv to guard access to the call counts and watch for changes. + std::mutex callCountMutex_; + std::condition_variable callCountCV_; + // Running total of un-processed, un-errored RPC calls sent + int32_t clientActiveCalls_{0}; + // Running total of un-processed RPC requests received + int32_t serverActiveCalls_{0}; + // Running total of RPC requests that will be completed asynchronously + int32_t serverActiveAsyncCalls_{0}; + + // Whether a global graceful shutdown has begun, in which case we'll silence + // error messages due to remote workers closing their pipes. + std::atomic shuttingDown_{false}; + + // Helpers to modify the counts while correctly dealing with the mutex and cv. + void increaseCallCount(int32_t& count); + void decreaseCallCount(int32_t& count); + + // Helpers to set the state of the requests. + void markFutureAsComplete( + std::shared_ptr atomicFuture, + c10::intrusive_ptr message, + std::vector streams); + void markFutureWithError( + std::shared_ptr atomicFuture, + std::string errorMsg); +}; + +} // namespace torch::distributed::rpc + +#endif // USE_TENSORPIPE + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/tensorpipe_utils.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/tensorpipe_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..025e143190c2df7c2898f03187e70064619bbf3c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/tensorpipe_utils.h @@ -0,0 +1,124 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef USE_TENSORPIPE + +#include + +namespace tensorpipe { +class Message; +class Allocation; +class Descriptor; +} // namespace tensorpipe + +namespace torch::distributed::rpc { + +TORCH_API const c10::Stream& getStreamForDevice( + const std::vector& streams, + const c10::Device& device); + +// Inspired by c10/core/impl/DeviceGuardImplInterface.h. + +class TensorpipeDeviceTypeConverter { + public: + // Ideally we'd want this to also return a tensorpipe::Message::Tensor object + // but we cannot forward-declare that class (because it's nested), and we + // cannot include the TensorPipe headers because it's a private dependency. + // Thus we bend over backwards and entrust this method with appending that + // object to the `tensors` field of the tensorpipe::Message object we pass. + virtual std::optional> prepareTensorForSending( + const c10::Storage& storage, + const std::vector& streams, + tensorpipe::Message& message) const = 0; + + // Same as above: this method cannot return a tensorpipe::Allocation::Tensor, + // thus it appends it to the `tensors` field of the tensorpipe::Allocation. + virtual at::DataPtr allocateTensorForReceiving( + c10::DeviceIndex deviceIndex, + size_t length, + const std::vector& streams, + tensorpipe::Allocation& allocation) const = 0; + + virtual ~TensorpipeDeviceTypeConverter() = default; +}; + +extern TORCH_API std::array< + std::atomic, + static_cast(DeviceType::COMPILE_TIME_MAX_DEVICE_TYPES)> + device_type_converter_registry; + +class TORCH_API TensorpipeDeviceTypeConverterRegistrar { + public: + TensorpipeDeviceTypeConverterRegistrar( + DeviceType /*type*/, + const TensorpipeDeviceTypeConverter* /*impl*/); +}; + +#define C10_REGISTER_TENSORPIPE_DEVICE_TYPE_CONVERTER( \ + DevType, TensorpipeDeviceTypeConverter) \ + static ::torch::distributed::rpc::TensorpipeDeviceTypeConverterRegistrar \ + C10_ANONYMOUS_VARIABLE(g_##DeviceType)( \ + ::c10::DeviceType::DevType, new TensorpipeDeviceTypeConverter()); + +inline const TensorpipeDeviceTypeConverter* getDeviceTypeConverter( + DeviceType type) { + return device_type_converter_registry[static_cast(type)].load(); +} + +// A struct that holds pointers that keep alive all the memory that will be +// accessed by TensorPipe during a write operation. +struct TensorpipeWriteBuffers { + // Allocate on heap so pointers stay valid as we move the holder. + std::unique_ptr type; + std::unique_ptr id; + std::vector payload; + std::vector pickle; + // This contains the original tensors and the clones of the sparse tensors. + std::vector tensors; + // This contains the copies of the data of the tensors that didn't own their + // memory, e.g., the ones created from torch::from_blob() with no deleter. + std::vector> copiedTensors; +}; + +// A struct that holds pointers that keep alive all the memory that will be +// accessed by TensorPipe during a read operation. +struct TensorpipeReadBuffers { + // Allocate on heap so pointers stay valid as we move the holder. + std::unique_ptr type; + std::unique_ptr id; + std::vector payload; + std::vector pickle; + std::vector tensors; +}; + +// Convert an RPC message into a TensorPipe message, plus a holder to all the +// data that must be kept alive while the write is performed asynchronously. +TORCH_API std::tuple +tensorpipeSerialize( + const c10::intrusive_ptr& rpcMessage, + std::vector devices, + const std::vector& streams); + +// Allocate the buffers that will hold the incoming data. They will be managed +// by the returned holder, which must be kept alive until the asynchronous read +// has finished. Pointers to these buffers will be stored in the returned +// tensorpipe::Allocation struct. +TORCH_API std::pair +tensorpipeAllocate( + const tensorpipe::Descriptor& tpDescriptor, + const std::vector& streams); + +// Convert a TensorPipe message back into an RPC message. This requires the data +// to be available and can thus only be performed once the asynchronous read has +// completed. The holder can be destroyed once this function returns. +TORCH_API c10::intrusive_ptr tensorpipeDeserialize( + const tensorpipe::Descriptor& tpDescriptor, + TensorpipeReadBuffers&& holder); + +} // namespace torch::distributed::rpc + +#endif // USE_TENSORPIPE + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/testing/faulty_tensorpipe_agent.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/testing/faulty_tensorpipe_agent.h new file mode 100644 index 0000000000000000000000000000000000000000..c0adb349f2095a721970b5bfdd9acd7141abe827 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/testing/faulty_tensorpipe_agent.h @@ -0,0 +1,109 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef USE_TENSORPIPE + +#include +#include + +namespace torch::distributed::rpc { + +struct TORCH_API FaultyTensorPipeRpcBackendOptions + : public TensorPipeRpcBackendOptions { + FaultyTensorPipeRpcBackendOptions( + int num_worker_threads, + float rpc_timeout, + std::string init_method, + std::vector messages_to_fail, + std::unordered_map messages_to_delay, + int num_fail_sends = 0) + : TensorPipeRpcBackendOptions( + num_worker_threads, + std::optional>(), + std::optional>(), + rpc_timeout, + std::move(init_method)), + messagesToFail(std::move(messages_to_fail)), + messagesToDelay(std::move(messages_to_delay)), + numFailSends(num_fail_sends) { + TORCH_CHECK(numFailSends >= 0, "numFailSends should be non-negative"); + } + + std::vector messagesToFail; + std::unordered_map messagesToDelay; + int numFailSends; +}; + +class TORCH_API FaultyTensorPipeAgent : public TensorPipeAgent { + public: + FaultyTensorPipeAgent( + const c10::intrusive_ptr<::c10d::Store>& store, + std::string selfName, + worker_id_t selfId, + int worldSize, + FaultyTensorPipeRpcBackendOptions opts, + std::unordered_map reverseDeviceMaps, + std::vector devices, + std::unique_ptr callback); + + // Faulty send function for this class. + c10::intrusive_ptr send( + const WorkerInfo& to, + c10::intrusive_ptr message, + const float rpcTimeoutSeconds = torch::distributed::rpc::kUnsetRpcTimeout, + const DeviceMap& deviceMap = {}) override; + + // Add delay to writes + void pipeWrite( + const std::shared_ptr& pipe, + const c10::intrusive_ptr& rpcMessage, + std::vector&& devices, + std::vector streams, + std::function fn) noexcept override; + + protected: + // This function checks the messageTypesToFail_ to determine whether to use + // the faulty send or not. + bool shouldFailMessage(MessageType type) const; + + private: + // This function parses the list of strings passed in by the python tests and + // resolves the Message Types that must use the faulty send. + std::vector parseMessagesToFailInput( + const std::vector& messagesToFail) const; + + // Returns amount of time in seconds to delay sending of the given message + // type. + float getDelayForMessage(MessageType type) const; + + // Parse message types that we should inject arbitrary delays for. + std::unordered_map> parseMessagesToDelay( + const std::unordered_map& messageTypesToDelay) const; + + // Number of sends to intentionally fail before allowing one to succeed. + const int numFailSends_; + + // Vector of the MessageTypes that we must use the faulty send for. This is + // parsed based on a list of strings passed in by the python tests. + const std::vector messageTypesToFail_; + + // Mapping of message types to amount we should delay send for in the ::send() + // function. + std::unordered_map> messageTypesToDelay_; + + // Map to track the number of sends we've failed for each RPC. + std::unordered_map failMessageCountMap_; + + // Mutex to guard failMessageCountMap_ + std::mutex failMapMutex_; + + MessageType messageStringToType(const std::string& messageString) const; +}; + +} // namespace torch::distributed::rpc + +#endif // USE_TENSORPIPE + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/testing/testing.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/testing/testing.h new file mode 100644 index 0000000000000000000000000000000000000000..baf94a5397fe21394cf9ab024800a29cbc4fe9d6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/testing/testing.h @@ -0,0 +1,14 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::distributed::rpc::testing { + +PyMethodDef* python_functions(); + +} // namespace torch::distributed::rpc::testing + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/torchscript_functions.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/torchscript_functions.h new file mode 100644 index 0000000000000000000000000000000000000000..37c6975d05559d13de07463d279ea11cab121f79 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/torchscript_functions.h @@ -0,0 +1,42 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace torch::distributed::rpc { + +// This function sends an rpc call to run torchscript function, currently the +// torchscript function could only be a user defined python function with +// "@torch.jit.script" annotation. The torchscript function could not be +// a class constructor, class method, instance method or a script module. +// dst: destination worker name +// qualifiedName: torchscript function qualified name string like +// "moduleName::torchscriptFunctionName", e.g, +// "dist_autograd_test::my_py_add" +// stack: a bag of IValue args passed to torchscriptFunctionName +// It returns c10::intrusive_ptr +c10::intrusive_ptr TORCH_API rpcTorchscript( + const std::string& dstWorkerName, + const c10::QualifiedName& qualifiedName, + const c10::FunctionSchema& functionSchema, + std::vector stack, + const float rpcTimeoutSeconds = torch::distributed::rpc::kUnsetRpcTimeout, + const bool isAsyncExecution = false); + +c10::intrusive_ptr TORCH_API remoteTorchscript( + const std::string& dstWorkerName, + const c10::QualifiedName& qualifiedName, + const c10::FunctionSchema& functionSchema, + std::vector& stack, + const float rpcTimeoutSeconds = torch::distributed::rpc::kUnsetRpcTimeout, + const bool isAsyncExecution = false); + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/types.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/types.h new file mode 100644 index 0000000000000000000000000000000000000000..f8ec54b86e986a64fd2f55d2c35e1d5c594f30f4 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/types.h @@ -0,0 +1,75 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::distributed::rpc { + +using worker_id_t = int16_t; +using local_id_t = int64_t; + +bool getAllowJitRRefPickle(); +TORCH_API void enableJitRRefPickle(); +TORCH_API void disableJitRRefPickle(); + +struct TORCH_API JitRRefPickleGuard { + JitRRefPickleGuard(); + JitRRefPickleGuard(JitRRefPickleGuard&& other) = delete; + JitRRefPickleGuard(const JitRRefPickleGuard&) = delete; + JitRRefPickleGuard& operator=(const JitRRefPickleGuard&) = delete; + JitRRefPickleGuard& operator=(JitRRefPickleGuard&&) = delete; + ~JitRRefPickleGuard(); +}; + +struct TORCH_API GloballyUniqueId final { + GloballyUniqueId(worker_id_t createdOn, local_id_t localId); + GloballyUniqueId(const GloballyUniqueId& other) = default; + GloballyUniqueId& operator=(const GloballyUniqueId& other) = delete; + GloballyUniqueId(GloballyUniqueId&& other) = default; + GloballyUniqueId& operator=(GloballyUniqueId&& other) = delete; + ~GloballyUniqueId() = default; + + bool operator==(const GloballyUniqueId& other) const; + bool operator!=(const GloballyUniqueId& other) const; + + at::IValue toIValue() const; + static GloballyUniqueId fromIValue(const at::IValue& /*ivalue*/); + + struct Hash { + size_t operator()(const GloballyUniqueId& key) const { + return (uint64_t(key.createdOn_) << kLocalIdBits) | key.localId_; + } + }; + + static constexpr int kLocalIdBits = 48; + + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const worker_id_t createdOn_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const local_id_t localId_; +}; + +TORCH_API std::ostream& operator<<( + std::ostream& os, + const GloballyUniqueId& globalId); + +using RRefId = GloballyUniqueId; +using ForkId = GloballyUniqueId; +using ProfilingId = GloballyUniqueId; + +struct TORCH_API SerializedPyObj final { + SerializedPyObj(std::string&& payload, std::vector&& tensors) + : payload_(std::move(payload)), tensors_(std::move(tensors)) {} + + std::vector toIValues() &&; + static SerializedPyObj fromIValues(std::vector value); + + std::string payload_; + std::vector tensors_; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/unpickled_python_call.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/unpickled_python_call.h new file mode 100644 index 0000000000000000000000000000000000000000..da76292342019059afd7d1c868c7cac1e375fd88 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/unpickled_python_call.h @@ -0,0 +1,43 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::distributed::rpc { + +// This class converts the content in a PythonCall into py::object. This is a +// helper class to make sure that all arguments deserialization is done before +// entering RequestCallbackImpl::processRpc(...), so that the deserialization +// related logic can be carried out in one spot instead of scattered in multiple +// places for different message types. +// NB: The reason for not consolidating class into PythonCall is because +// PythonCall is a libtorch type which should not depend on Python types. +class TORCH_API UnpickledPythonCall : public RpcCommandBase { + public: + UnpickledPythonCall( + const SerializedPyObj& serializedPyObj, + bool isAsyncExecution); + ~UnpickledPythonCall() override; + + // toMessage() method is not implemented, as objects of this class should + // never be directly converted into a Message object. + c10::intrusive_ptr toMessageImpl() && override; + const py::object& pythonUdf() const; + + inline bool isAsyncExecution() const { + return isAsyncExecution_; + } + + private: + py::object pythonUdf_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const bool isAsyncExecution_; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/unpickled_python_remote_call.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/unpickled_python_remote_call.h new file mode 100644 index 0000000000000000000000000000000000000000..afe8a977a615e59b2f77e180275e9fc0e6adc92b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/unpickled_python_remote_call.h @@ -0,0 +1,38 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::distributed::rpc { + +// This class converts the content in a PythonRemoteCall into py::object. This +// is a helper class to make sure that all arguments deserialization is done +// before entering RequestCallbackImpl::processRpc(...), so that the +// deserialization related logic can be carried out in one spot instead of +// scattered in multiple places for different message types. +// NB: The reason for not consolidating class into PythonRemoteCall is because +// PythonRemoteCall is a libtorch type which should not depend on Python types. +class TORCH_API UnpickledPythonRemoteCall final : public UnpickledPythonCall { + public: + explicit UnpickledPythonRemoteCall( + const SerializedPyObj& serializedPyObj, + const at::IValue& retRRefId, + const at::IValue& retForkId, + const bool isAsyncExecution); + + const RRefId& rrefId() const; + const ForkId& forkId() const; + + private: + RRefId rrefId_; + ForkId forkId_; +}; + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/utils.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..324d76b2e4dc48b6b12b593fa58c198daa723ad9 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/distributed/rpc/utils.h @@ -0,0 +1,91 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace torch::distributed::rpc { + +// Parse error message and return RPCErrorType based on the message. +TORCH_API RPCErrorType getRPCErrorType(const JitFuture& jitFuture); +// Create an error string given the error description and error type +TORCH_API std::string makeRPCError( + const std::string& rpcErrorStr, + RPCErrorType errorType); + +// Given an RPC message received as a request over the wire, deserialize it into +// the appropriate 'RpcCommandBase' type. +TORCH_API std::unique_ptr deserializeRequest( + const Message& request); + +// Given an RPC message received as a response over the wire, deserialize it +// into the appropriate 'RpcCommandBase' type, if the response is +// FORWARD_AUTOGRAD_RESP type, unwrap it, attach recvBackward() functions +// to received tensors and set the wrappedMsgType to its wrapped message type. +TORCH_API std::unique_ptr deserializeResponse( + const Message& response, + MessageType& wrappedMsgType); + +// Given an RPC message received as a response over the wire, deserialize it +// into the valid IValue if the message is for a script rpc result, +// otherwise deserialize it into dummy none ivalue that will never be used. +// In this deserialization, we also attach recv rpc backward functions if +// needed. +IValue deserializeResptoIValueInternal( + RpcCommandBase& rpc, + MessageType messageType); +TORCH_API IValue deserializeRespToIValue(const Message& message); + +// Note: format is subject to change and intended for RPCs. +// For saving persistently to disk, use torch::save(). +TORCH_API std::string wireSerialize( + const std::vector& payload, + const std::vector& tensors); + +TORCH_API std::pair, std::vector> wireDeserialize( + const void* data, + size_t data_size); + +// We use vector as the type of blobs because it's what rpc::Message uses +// for its payload, even though it has the disadvantage that it cannot be +// allocated with uninitialized memory: it is always zeroed out. + +// Some Tensors are effectively views of larger Tensors, where only a small +// subset of the Storage data is referenced. This normally is good and avoids +// copies when kept locally, but if we naively push the whole Storage over the +// wire, we'll end up with excess network traffic. This change clones tensors if +// we'd save at least half the data, and over a minimum hurdle. +TORCH_API c10::List cloneSparseTensors( + const std::vector& tensors); + +// Combines an original payload and wrapped payload into the original payload. +// Used to generate the overall payload for the wrapped RPC. +TORCH_API void writeWrappedPayload( + std::vector& originalPayload, + std::vector& additionalPayload); + +// Reads the additional, wrapped payload from a wrapped RPC off of the input +// payload. After this, payload will contain the payload of the original, +// un-wrapped RPC. +TORCH_API std::vector readWrappedPayload( + std::vector& payload, + const rpc::Message& message); + +// Takes a list of events from autograd profiler and populates them into +// profiledEvents to be carried over RPC. +TORCH_API void populateRemoteProfiledEvents( + std::vector& profiledEvents, + const torch::autograd::profiler::ProfilerConfig& profilerConfig, + const std::vector>& + eventLists); + +} // namespace torch::distributed::rpc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/api/compilation_unit.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/api/compilation_unit.h new file mode 100644 index 0000000000000000000000000000000000000000..e6264f6f992a61123df6647a90176397261bc47f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/api/compilation_unit.h @@ -0,0 +1,356 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace torch::jit { + +struct Def; +struct Property; +struct ClassDef; +struct SugaredValue; +struct Resolver; + +using ResolverPtr = std::shared_ptr; +struct Self { + virtual ~Self() = default; + virtual std::shared_ptr makeSugared(Value* v) const = 0; + virtual ClassTypePtr getClassType() const = 0; +}; + +// A CompilationUnit is a list of named Functions +// with helper methods to iterate the list or invoke the function. +// Classes have a CompilationUnit holding the class methods, +// and Modules have a CompilationUnit holding the Functions that +// are used to implement their Methods + +struct TORCH_API CompilationUnit { + enum class FunctionType { Method, Hook, PreHook }; + // constructor that takes a set of functions to compile using the native + // resolver + explicit CompilationUnit(const std::string& source); + CompilationUnit() = default; + + CompilationUnit& operator=(CompilationUnit&&) = default; + CompilationUnit(CompilationUnit&&) = default; + CompilationUnit& operator=(const CompilationUnit&) = delete; + CompilationUnit(const CompilationUnit&) = delete; + + Function* find_function(const c10::QualifiedName& name) const { + auto it = dict_.find(name); + if (it == dict_.end()) { + return nullptr; + } + return functions_[it->second].get(); + } + + Function& get_function(const c10::QualifiedName& name) const { + if (auto r = find_function(name)) { + return *r; + } + TORCH_CHECK(false, "attempted to get undefined function ", name.name()); + } + + void set_optimized(bool o) { + TORCH_WARN( + "CompilationUnit::set_optimized() is deprecated and has no effect. " + "Please use setGraphExecutorOptimize()"); + } + + bool is_optimized() const { + TORCH_WARN( + "CompilationUnit::is_optimized() is deprecated and always returns true. " + "Please use getGraphExecutorOptimize()"); + return true; + } + + // for historic reasons, these are defined in ir_emitter.cpp + // Returns the list of Functions just defined. + std::vector define( + const std::optional& prefix, + const std::vector& properties, + const std::vector& propResolvers, + const std::vector& definitions, + const std::vector& + defResolvers, /* determines how we handle free + variables in each definition*/ + // if non-null, the first argument to each def, is bound to this value + const Self* self, + // see [name mangling] + bool shouldMangle = false, + std::optional operator_set_version = std::nullopt); + + void define_hooks( + const std::optional& prefix, + const std::vector& hookDefs, + const std::vector& hookResolvers, + const std::vector& preHookDefs, + const std::vector& preHookResolvers, + const Self* self, + bool shouldMangle = false); + + // same as above but parse the definitions from source + // Returns the list of Functions just defined. + std::vector define( + // prefix namespace to put all the defined functions into + const std::optional& prefix, + const std::string& source, + const ResolverPtr& resolver, + const Self* self); + + void define_interface( + const c10::QualifiedName& qualifiedName, + const ClassDef& classDef, + ResolverPtr rcb, + bool is_module = false); + + Function* create_function( + c10::QualifiedName name, + std::shared_ptr graph, + bool shouldMangle = false) { + if (shouldMangle) { + name = mangle(name); + } + auto fn = std::make_unique( + std::move(name), std::move(graph), nullptr); + auto ret = fn.get(); + register_function(std::move(fn)); + return ret; + } + + std::vector get_functions() const { + return fmap(functions_, [](const std::unique_ptr& fn) { + return fn.get(); + }); + } + + /// Run a method from this compilation. + /// + /// For example: + /// @code + /// IValue output = module->run("relu_script", a, b); + /// @endcode + /// + /// To get a compile a module from a source string, see torch::jit::compile + /// + /// @param method_name The name of the method to run + /// @param args Arguments to be passed to the method + /// @return An IValue containing the return value (or values if it is a tuple) + /// from the method + template + IValue run_method(const c10::QualifiedName& method_name, Types&&... args) { + return get_function(method_name)({IValue(std::forward(args))...}); + } + + void drop_all_functions() { + dict_.clear(); + functions_.clear(); + } + + /** + * Register a class as being owned by this compilation unit. + */ + void register_type(c10::NamedTypePtr namedType) { + // TODO: class types cannot be redefined because we have no way right now + // of invalidating their methods. NamedTuples are fine though, since they + // don't have methods. + TORCH_CHECK( + 0 == classDict_.count(*namedType->name()), + "class '", + namedType->name()->qualifiedName(), + "' already defined."); + classes_.push_back(std::move(namedType)); + classDict_[*classes_.back()->name()] = classes_.size() - 1; + } + + c10::ClassTypePtr get_class(const c10::QualifiedName& name) const { + auto type = get_type(name); + if (!type) { + return nullptr; + } + return type->cast(); + } + + c10::InterfaceTypePtr get_interface(const c10::QualifiedName& name) const { + auto type = get_type(name); + if (!type) { + return nullptr; + } + return type->cast(); + } + + c10::TupleTypePtr get_named_tuple(const c10::QualifiedName& name) const { + for (const auto& cls : classes_) { + if (cls->name()->qualifiedName() == name.qualifiedName()) { + return cls->expect(); + } + } + return nullptr; + } + + c10::NamedTypePtr get_type(const c10::QualifiedName& name) const { + auto it = classDict_.find(name); + if (it == classDict_.end()) { + return nullptr; + } + return classes_[it->second]; + } + + // For testing: clear all Python-defined classes to ensure that unit tests + // have isolation. + void _clear_python_cu() { + // Delete all the associated class methods + for (const auto& type : classes_) { + if (auto cls = type->cast()) { + for (auto method : cls->methods()) { + // Tombstone the method in the compilation unit. + // Don't erase because the dict_ + auto it = dict_.find(method->qualname()); + if (it != dict_.end()) { + functions_[it->second] = nullptr; + // Erase in our big lookup table + dict_.erase(it); + } + } + // Classes can have multiple pointers to the same hook, + // need to make sure to not delete it twice + std::unordered_set hooks_to_delete; + for (const auto& hook : cls->getForwardHooks()) { + hooks_to_delete.insert(hook); + } + for (const auto& pre_hook : cls->getForwardPreHooks()) { + hooks_to_delete.insert(pre_hook); + } + for (const auto& hook : hooks_to_delete) { + // Tombstone the hook in the compilation unit. + auto it = dict_.find(hook->qualname()); + if (it != dict_.end()) { + functions_[it->second] = nullptr; + // Erase in our big lookup table + dict_.erase(it); + } + } + } + } + classes_.clear(); + classDict_.clear(); + } + + // [Internal Only] Remove method. + // Note Used for freezing. + void unsafeRemoveMethod(const c10::QualifiedName& method_name) { + auto it = dict_.find(method_name); + TORCH_CHECK( + it != dict_.end(), + "method '", + method_name.qualifiedName(), + "' does not exist."); + functions_[it->second] = nullptr; + dict_.erase(it); + } + + // [name mangling] All code objects must have a unique qualified name in a + // CompilationUnit. In Python, sometimes functions won't have unique qualified + // name (for example, nested functions). So we mangle Python functions to + // ensure that they are uniquely named. + // + // We also use mangling to distinguish different Module instances. Since each + // Module is a singleton class instance, different instances of the same + // Python Module will have different types but the same qualified name. + c10::QualifiedName mangle(const c10::QualifiedName& name) const { + auto mangled = name; + while (get_type(mangled) || find_function(mangled)) { + mangled = mangler_.mangle(mangled); + } + return mangled; + } + + private: + std::unique_ptr define( + const std::optional& prefix, + const Def& def, + const ResolverPtr& resolver, + const Self* self, + const std::unordered_map& function_table, + bool shouldMangle = false, + FunctionType type = FunctionType::Method, + std::optional version = std::nullopt) const; + + // Define a property on \p self. + struct PropertyPair; + PropertyPair define_property( + const std::optional& prefix, + const Property& prop, + const ResolverPtr& resolver, + const Self* self, + const std::unordered_map& function_table, + bool shouldMangle = false) const; + + Function& register_function(std::unique_ptr fn) { + TORCH_CHECK( + 0 == dict_.count(fn->qualname().qualifiedName()), + "method '", + fn->qualname().qualifiedName(), + "' already defined."); + functions_.emplace_back(std::move(fn)); + dict_[functions_.back()->qualname()] = functions_.size() - 1; + return *functions_.back(); + } + std::vector> functions_; + // for fast lookup + std::unordered_map dict_; + std::unordered_map classDict_; + + // [class ownership] Right now there are two relationships between classes + // and compilation units: + // 1. Classes have compilation units internally that hold their methods. + // 2. On load, the TypePtrs of any imported classes are owned by the main + // module's compilation unit. + std::vector classes_; + + mutable NameMangler mangler_; +}; + +// An owning pointer to a Function. Just a pair of a raw Function ptr and it's +// owning CU. We need this because pybind requires a ref-counted way to refer to +// Functions. +struct StrongFunctionPtr { + StrongFunctionPtr(std::shared_ptr cu, Function* function) + : cu_(std::move(cu)), function_(function) { + TORCH_INTERNAL_ASSERT(cu_); + TORCH_INTERNAL_ASSERT(function_); + } + std::shared_ptr cu_; + Function* function_; +}; + +namespace script { +// We once had a `script::` namespace that was deleted. This is for backcompat +// of the public API; new code should not use this type alias. +using CompilationUnit = ::torch::jit::CompilationUnit; +} // namespace script +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/api/function_impl.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/api/function_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..e311563890e13e39c5382b582953433131ab62cd --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/api/function_impl.h @@ -0,0 +1,185 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::jit { + +struct TORCH_API GraphFunction : public Function { + GraphFunction( + c10::QualifiedName name, + std::shared_ptr graph, + std::function function_creator, + std::optional executor_execution_mode = + std::nullopt) + : name_(std::move(name)), + graph_(std::move(graph)), + executor_execution_mode_(executor_execution_mode), + function_creator_(std::move(function_creator)) {} + + bool isGraphFunction() const override { + return true; + } + + void run(Stack& stack) override; + + std::function function_creator() const { + return function_creator_; + } + + c10::intrusive_ptr runAsync( + Stack& stack, + TaskLauncher taskLauncher = at::launch) override; + + std::shared_ptr graph() const { + return graph_; + } + + std::shared_ptr optimized_graph() const; + + const c10::QualifiedName& qualname() const override { + return name_; + } + + // private/unstable api. sets the initial execution mode + // will not affect executor if there is an existing executor + // created for this function + void _set_initial_executor_execution_mode(ExecutorExecutionMode mode) { + executor_execution_mode_ = mode; + } + // private/unstable api. sets flag of whether or not to ignore amp. + // will not affect executor if there is an existing executor + // created for this function + void _set_ignore_amp(bool ignore_amp) { + force_no_amp_ = ignore_amp; + } + + // if this isn't yet defined, run its method_creator function + void ensure_defined() override; + + size_t num_inputs() const override { + return graph()->inputs().size(); + } + + Function& setSchema(FunctionSchema schema) override { + schema_ = std::make_unique(std::move(schema)); + return *this; + } + + const FunctionSchema& getSchema() const override; + + GraphExecutorState getDebugState() { + return get_executor().getDebugState(); + } + + bool is_optimized() const { + TORCH_WARN( + "GraphFunction::is_optimized() is deprecated and always returns true. " + "Please use getGraphExecutorOptimize()"); + return true; + } + + void check_single_output() { + TORCH_CHECK( + graph()->outputs().size() == 1, + "Method (but not graphs in general) require a single output. Use None/Tuple for 0 or 2+ outputs"); + } + + GraphExecutor& get_executor() { + ensure_defined(); + std::lock_guard lock(compile_mutex); + auto& executor = executors_[currentSpecialization()]; + if (executor) { + return *executor; + } + check_single_output(); + const std::string& name = name_.name(); + std::shared_ptr opt_graph = optimized_graph(); + if (!executor_execution_mode_) { + executor = GraphExecutor(opt_graph, name); + } else { + executor = GraphExecutor(opt_graph, name, *executor_execution_mode_); + } + return *executor; + } + + using Function::call; + bool call( + Stack& stack, + std::optional bailOut, + c10::function_ref f) override { + f(get_executor().getPlanFor(stack, bailOut).code); + return true; + } + + void clear_optimized_graphs() { + optimized_graphs_.fill(nullptr); + } + + private: + enum SpecializationKey { + AutocastOff, + CpuAutocastOn, + GpuAutocastOn, + CpuGpuAutocastOn, + + // This provides the number of specializations + // (Must be last entry) + TotalCount + }; + + SpecializationKey currentSpecialization() const; + + private: + c10::QualifiedName name_; + // The original, non-optimized graph + std::shared_ptr graph_; // for debugging and for inlining + + // allows users to specify Simple/Profiling Executor for function + // TODO: add more executors + mutable std::optional executor_execution_mode_; + + // if invoked on a graph that has already traced through amp + // don't invoke amp pass + mutable bool force_no_amp_ = false; + // Optimized graph, computed lazily. Used for inlining. + mutable std::array, SpecializationKey::TotalCount> + optimized_graphs_; + + // GraphFunctions are invocable from multiple threads, so this lock needs to + // be held when we're initializing graph executor for the first time or + // computing the optimized graph. We're using reentrant mutex so that we don't + // need to worry about causing a deadlock by calling one method from another + // (e.g. optimized_graph() from get_executor()). + mutable std::recursive_mutex compile_mutex; + + // executor_[0] - autocast off + // executor_[1] - autocast cpu on + // executor_[2] - autocast gpu on + // executor_[3] - autocast cpu & gpu on + std::array, SpecializationKey::TotalCount> + executors_; + + // an optional function that actually creates the method when + // ensure_defined() is called. This is used by the compiler so + // that it can construct methods out of order + std::function function_creator_; + + // if absent, then we generate a default schema based on the graph + // mutable because getSchema caches the default schema if one is requested + // before a call to setSchema + mutable std::unique_ptr schema_; +}; + +// Short hands for dynamic_cast. +TORCH_API GraphFunction* tryToGraphFunction(Function& /*function*/) noexcept; +TORCH_API GraphFunction& toGraphFunction(Function& /*function*/); +TORCH_API const GraphFunction& toGraphFunction(const Function& /*function*/); +} // namespace torch::jit +C10_DECLARE_bool(torch_jit_do_not_store_optimized_graph); + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/api/method.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/api/method.h new file mode 100644 index 0000000000000000000000000000000000000000..d138f8f847d2d0074f0b1669022fa0f4b3e811f6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/api/method.h @@ -0,0 +1,91 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace torch::jit { + +using ObjectPtr = c10::intrusive_ptr; + +// A method in a module, e.g. f in: +// +// class M(ScriptModule): +// @script_method +// def f(self, x): +// ... +// Note: because Method/Module are exposed to python these +// classes use python method naming conventions +struct TORCH_API Method : public torch::IMethod { + Method(ObjectPtr owner, Function* function); + + // the module that contains this method. + Module owner() const; + // the raw objectptr that owns this method, for when the method is owned by a + // torchbind object. + ObjectPtr raw_owner() const; + void run(Stack& stack); + void run(Stack&& stack) { + run(stack); + } + + c10::IValue operator()( + std::vector stack, + const Kwargs& kwargs = Kwargs()) const override; + + // Run method async. Invocation on this function would invokes a JIT + // interpreter that executes ops inline, one by one, on caller's thread. A + // model can utilize async op, i.e. `fork`, to launch an asynchronous task + // which will be launched on provided `taskLauncher`. + c10::intrusive_ptr run_async( + std::vector stack, + const Kwargs& kwargs = Kwargs(), + TaskLauncher taskLauncher = at::launch); + + std::shared_ptr graph() const { + return toGraphFunction(*function_).graph(); + } + + const std::string& name() const override { + return function_->name(); + } + + size_t num_inputs() const { + return function_->num_inputs(); + } + + GraphExecutor& get_executor() { + return toGraphFunction(*function_).get_executor(); + } + + Function& function() const { + return *function_; + } + + private: + void setArgumentNames( + std::vector& /*argumentNames*/ /*argumentNamesOut*/) + const override; + + // Methods are uniqued owned by a single module. This raw pointer allows + // looking up the module. + ObjectPtr owner_; + + // Underlying unbound function + Function* function_; +}; + +namespace script { +// We once had a `script::` namespace that was deleted. This is for backcompat +// of the public API; new code should not use this type alias. +using Method = ::torch::jit::Method; +} // namespace script + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/api/module.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/api/module.h new file mode 100644 index 0000000000000000000000000000000000000000..385c1ec489fc9d43caee073b604b4d5c777c286e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/api/module.h @@ -0,0 +1,690 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +// This file contains classes which assist in desugaring Python style +// modules and their methods into flattened graphs which don't have any +// function calls. + +namespace torch::jit { + +using ::c10::Argument; +using ::c10::FunctionSchema; +using ::c10::QualifiedName; +// Map which stores filename to content. +using ExtraFilesMap = std::unordered_map; + +using ModulePtr = c10::intrusive_ptr; + +struct Module; + +template +struct slot_list_impl; + +template +struct Named { + std::string name; + T value; +}; + +using NameModule = Named; +using NameValue = Named; +using NameTensor = Named; + +namespace detail { +struct TORCH_API ModulePolicy; +struct TORCH_API ParameterPolicy; +struct TORCH_API AttributePolicy; +struct TORCH_API BufferPolicy; +template +struct NamedPolicy; +} // namespace detail + +using module_list = slot_list_impl; +using named_module_list = + slot_list_impl>; + +using parameter_list = slot_list_impl; +using named_parameter_list = + slot_list_impl>; + +using attribute_list = slot_list_impl; +using named_attribute_list = + slot_list_impl>; + +using buffer_list = slot_list_impl; +using named_buffer_list = + slot_list_impl>; + +using ModuleLookup = std::function&)>; + +struct TORCH_API Module : public Object { + explicit Module(c10::QualifiedName class_name); + Module(std::shared_ptr cu, const c10::ClassTypePtr& type); + Module() = default; + Module(const Module&) = default; + Module& operator=(const Module&) = default; + Module(Module&&) noexcept = default; + Module& operator=(Module&&) noexcept = default; + Module( + c10::QualifiedName /*class_name*/, + std::shared_ptr cu, + bool shouldMangle = false); + Module(ModulePtr module_value) : Object(std::move(module_value)) {} + ~Module() = default; + + void set_optimized(bool o) { + TORCH_WARN( + "Module::set_optimized() is deprecated and has no effect. " + "Please use setGraphExecutorOptimize()"); + } + + bool is_optimized() const { + TORCH_WARN( + "Module::is_optimized() is deprecated and always returns true. " + "Please use getGraphExecutorOptimize()"); + return true; + } + + IValue forward(std::vector inputs, const Kwargs& kwargs = Kwargs()) { + return get_method("forward")(std::move(inputs), kwargs); + } + + // In script modules, buffers are Tensors attribute that are _not_ registered + // as parameters. This is different than in nn.Module where there is a special + // register_buffer method. With this simplification, we only need to track + // whether a slot is a parameter to be able to classify it. + void register_buffer(const std::string& name, at::Tensor v) { + bool is_param = false; + bool is_buffer = true; + std::lock_guard lock(*register_mutex_); + type()->addOrCheckAttribute(name, TensorType::get(), is_param, is_buffer); + _ivalue()->setAttr(name, std::move(v)); + } + + void register_parameter( + const std::string& name, + at::Tensor v, + bool is_buffer) { + std::lock_guard lock(*register_mutex_); + type()->addOrCheckAttribute(name, TensorType::get(), !is_buffer, is_buffer); + _ivalue()->setAttr(name, std::move(v)); + } + + void register_attribute( + const std::string& name, + const TypePtr& t, + IValue v, + bool is_param = false, + bool is_buffer = false) { + type()->addOrCheckAttribute(name, t, is_param, is_buffer); + _ivalue()->setAttr(name, std::move(v)); + } + + void register_module(const std::string& name, const Module& module) { + type()->addOrCheckAttribute(name, module.type()); + _ivalue()->setAttr(name, module._ivalue()); + } + + void apply(const std::function& fn); + + buffer_list buffers(bool recurse = true) const; + named_buffer_list named_buffers(bool recurse = true) const; + + module_list children() const; // direct modules + named_module_list named_children() const; + module_list modules() const; // all modules, including this one, recursively + named_module_list named_modules() const; + + // all tensors involved in gradient optimization + parameter_list parameters(bool recurse = true) const; + named_parameter_list named_parameters(bool recurse = true) const; + + // all members of the object, similar to iterating over dir(obj) in python + attribute_list attributes(bool recurse = true) const; + named_attribute_list named_attributes(bool recurse = true) const; + + void dump( + bool print_method_bodies, + bool print_attr_values, + bool print_param_values) const; + + std::string dump_to_str( + bool print_method_bodies, + bool print_attr_values, + bool print_param_values) const; + + /// Enables "training" mode. + void train(bool on = true); + /// Calls train(false) to enable "eval" mode. + /// Do not override this method, override `train()` instead. + void eval() { + train(/*on=*/false); + } + /// True if the module is in training mode. + bool is_training() const { + return attr("training", true).toBool(); + } + + /// Recursively casts all parameters to the given `dtype` and `device`. + /// + /// If `non_blocking` is true and the source is in pinned memory and + /// destination is on the GPU or vice versa, the copy is performed + /// asynchronously with respect to the host. Otherwise, the argument has no + /// effect. + void to(at::Device device, at::ScalarType dtype, bool non_blocking = false); + + /// Recursively casts all parameters to the given dtype. + /// + /// If `non_blocking` is true and the source is in pinned memory and + /// destination is on the GPU or vice versa, the copy is performed + /// asynchronously with respect to the host. Otherwise, the argument has no + /// effect. + void to(at::ScalarType dtype, bool non_blocking = false); + + /// Recursively moves all parameters to the given device. + /// + /// If `non_blocking` is true and the source is in pinned memory and + /// destination is on the GPU or vice versa, the copy is performed + /// asynchronously with respect to the host. Otherwise, the argument has no + /// effect. + void to(at::Device device, bool non_blocking = false); + + void save( + std::ostream& out, + const ExtraFilesMap& extra_files = ExtraFilesMap()) const; + + void save( + const std::string& filename, + const ExtraFilesMap& extra_files = ExtraFilesMap()) const; + + void _save_for_mobile( + std::ostream& out, + const ExtraFilesMap& extra_files = ExtraFilesMap(), + bool save_mobile_debug_info = false, + bool use_flatbuffer = false) const; + + void _save_for_mobile( + const std::string& filename, + const ExtraFilesMap& extra_files = ExtraFilesMap(), + bool save_mobile_debug_info = false, + bool use_flatbuffer = false) const; + + Module copy() const; + + Module deepcopy(std::optional device = std::nullopt) const; + + // Clones both the underlying `ClassType` and the module instance(data), this + // function creates a new `ClassType` and returns a new instance that has the + // same data as the current instance but with the new type, shared ClassType + // will be preserved as well + Module clone(bool inplace = false) const; + + // Clones both the underlying `ClassType` and the module instance(data), this + // function creates a new `ClassType` and returns a new instance that has the + // same data as the current instance but with the new type, shared ClassType + // will be preserved as well. Also allows the caller to specify a set of + // method and attribute names to not clone. + Module clone( + bool inplace, + const std::unordered_set& ignored_method, + const std::unordered_set& ignored_attributes) const; + + void clone_method(const Module& orig, const std::string& name); + + IValue operator()(std::vector inputs); + + template + IValue create_class(const c10::QualifiedName& name, Types&&... args) const { + return create_class(name, {IValue(std::forward(args))...}); + } + + IValue create_class(const c10::QualifiedName& name, Stack stack) const; + + inline bool operator==(const Module& y) const noexcept { + return _ivalue() == y._ivalue(); + } + + void set_delete_memory(std::shared_ptr delete_mem) { + mem_to_delete_ = std::move(delete_mem); + } + + // A set of functions to maintain input shapes through torch.jit.save and + // torch.jit.load. It only works on tensors and lists/dicts of tensors + // because tracing is only supported by these types. + void store_traced_inputs( + const std::string& func_name, + std::vector inputs) { + if (inputs.empty()) { + return; + } + auto c10_inputs = c10::impl::GenericList(AnyType::get()); + for (IValue& value : inputs) { + // Not checking whether this is traceable type as that is already checked + // higher up in the stack and changing that would require a larger + // restructuring. + c10_inputs.emplace_back(std::move(value)); + } + traced_inputs_.insert_or_assign(func_name, c10_inputs); + } + + c10::Dict retrieve_traced_inputs() + const { + return traced_inputs_; + } + + private: + Module clone_impl( + std::unordered_map& type_remap, + bool inplace, + IValue::HashIdentityIValueMap memo, + const std::unordered_set& ignored_methods, + const std::unordered_set& ignored_attributes) const; + + void clone_method( + const Module& orig, + const Function& method, + const std::unordered_map& type_remap); + + c10::QualifiedName getNameForMethod(std::string basename) const { + return QualifiedName(*type()->name(), std::move(basename)); + } + + void to_impl( + const std::optional& device, + const std::optional& dtype, + bool non_blocking); + + // Extra handle for the module to delete when itself is deleted + std::shared_ptr mem_to_delete_; + + // Map of function names to the traced inputs that they have been traced with + c10::Dict traced_inputs_; + + // Mutex to keep registering buffer or parameter thread safe. + std::shared_ptr register_mutex_ = std::make_shared(); +}; + +// C++ equivalent api of `torch.jit.freeze`. See documentation there for +// details. +TORCH_API Module freeze( + const Module& module, + const std::optional>& preserved_attrs = + std::nullopt, + bool optimize_numerics = true); + +// C++ equivalent api of `torch.jit.optimize_for_inference`. See documentation +// there for details. +TORCH_API Module optimize_for_inference( + Module& module, + const std::vector& other_methods = {}); + +enum class FusionBehavior { STATIC, DYNAMIC }; + +using FusionStrategy = std::vector>; +// clang-format off +/* +Sets the type and number of specializations that can occur during fusion. + +Usage: provide a list of pairs (type, depth) where type is one of STATIC or DYNAMIC +and depth is an integer. + +Behavior - static vs dynamic: + In STATIC fusion, fused ops are compiled to have fixed input shapes. The shape is determined + based on some initial profiling runs. + In DYNAMIC fusion, fused ops are compiled to have variable input shapes, so that multiple + shapes are possible. + +In both cases, we also recompile on new striding behavior, device, or dtype. + +Behavior - fallback functions & depth: + When an input doesn't match the format required by the specialized compiled op, it will run + a fallback function. Fallback functions are recursively be compiled and specialized based + on the observed tensor shapes. Since compilation can be slow, the "depth" parameter is provided to + limit the number of specializations that can be compiled, before giving up on recompiling and + falling back to a completely un-fused, un-specialized implementation. + +The list of (type, depth) pairs controls the type of specializations and the number of +specializations. For example: [(STATIC, 2), (DYNAMIC, 2)] indicates that the first +two specializations will use static fusions, the following two specializations will use +dynamic fusion, and any inputs that satisfy none of the 4 options will run an +unfused implementation. + +NB: in the future, if more as more fusion backends are added there may be more granular +apis for specific fusers. +*/ +// clang-format on +TORCH_API FusionStrategy getFusionStrategy(); +// returns previous strategy +TORCH_API FusionStrategy setFusionStrategy(FusionStrategy& fusion_strategy); + +namespace detail { + +struct TORCH_API SlotCursor { + Module module_; + int64_t i_; // slot offset, -1 indicates the module itself +}; + +} // namespace detail + +// This iterator allows the (optionally recursive) enumeration of +// the members of a Module. It performs a depth-first pre-order +// traversal of the module. The Policy template parameter determines +// which slots of the object should be included. For instance, +// when iterating parameters, we return the parameter tensors, +// but skip modules, buffers, and other attributes. +// See ModulePolicy for comments about Policy object's API. +template +struct slot_iterator_impl { + using SlotCursor = detail::SlotCursor; + using value_type = typename Policy::value_type; + slot_iterator_impl( + Module root, + bool recurse, // if true, do a depth-first search, otherwise, just look at + // slots of root + bool return_module) // if true include root itself as the first thing + // visited (used in modules()) + : cursors_({SlotCursor{std::move(root), return_module ? -1 : 0}}), + recurse_(recurse) { + // advance iterator to first valid element (or the end, if empty) + while_not_valid_next(); + } + // empty cursors_, represents end of iteration + slot_iterator_impl() : recurse_(false) {} + value_type operator*() const { + return Policy::create(cursors_, cur()); + } + value_type operator->() const { + return **this; + } + slot_iterator_impl& operator++() { + next_valid(); + return *this; + } + slot_iterator_impl operator++(int) { + // this is really expensive, should we delete it so people don't use it + // instead of prefix? + slot_iterator_impl old = *this; + ++(*this); + return old; + } + + private: + // return_module() is a corner case where instead of returning a submodule + // of root, we are returning root itself, because we are iterating modules(), + // which contains the root module itself. + // It is represented with a single SlotCursor whose index is -1. + bool return_module() const { + return top().i_ == -1; + } + const SlotCursor& top() const { + return cursors_.back(); + } + SlotCursor& top() { + return cursors_.back(); + } + IValue cur() const { + return return_module() ? top().module_._ivalue() + : top().module_._ivalue()->getSlot(top().i_); + } + + // advance to the next slot in a depth first pre-order traversal of the + // modules slots. This function does not guarantee the next slot is a + // valid element of the iteration. That is done by valid(). + // invariant: !cursors_.empty() + void next() { + // we just returned the module itself, advance i_ to 0 so we are now + // at the first slot of the module. + if (return_module()) { + ++top().i_; + return; + } + // the last traversal action advanced beyond the number of slots in the + // module so continue the iteration in the parent. + if (top().i_ >= int64_t(top().module_._ivalue()->type()->numAttributes())) { + cursors_.pop_back(); + if (!cursors_.empty()) { + ++top().i_; + } + return; + } + // if the current thing is a module, we have to scan it for recursive + // traversals. We do this by adding a new SlotCursor to track the traversal. + if (recurse_ && + top().module_._ivalue()->type()->getAttribute(top().i_)->is_module()) { + cursors_.emplace_back(SlotCursor{cur().toModule(), 0}); + return; + } + // common case: advance to the next slot. + ++top().i_; + } + // is the current position of the iterator a valid one? + // otherwise, we have to continue advancing. + bool valid() const { + return top().i_ < + int64_t(top().module_._ivalue()->type()->numAttributes()) && + Policy::valid( + top().module_._ivalue()->type(), + top().i_, + top().module_._ivalue()->getSlot(top().i_)); + } + void while_not_valid_next() { + // advance iteration until we are either at the end (cursors_.empty()) + // or in a valid state. return_module() is a special case, + // and is always considered valid, regardless of Policy, because it is + // it is only true when we are iterating modules. + while (!cursors_.empty() && !return_module() && !valid()) { + next(); + } + } + void next_valid() { + // avoid crashing if this is empty + if (cursors_.empty()) { + return; + } + // advance to next element, which is maybe not valid + next(); + while_not_valid_next(); + } + + std::vector cursors_; + bool recurse_; + + friend inline bool operator!=( + const slot_iterator_impl& a, + const slot_iterator_impl& b) { + // we are finished iteration when we have no more iteration SlotCursors. + // end is always an empty iterator with no cursors. + return (a.cursors_.empty() != b.cursors_.empty()); + } +}; + +// This type represents lists of parameters, attributes, and +// submodules contained in the module. It is abstract because +// they are not stored directly in std::vectors but inside the +// module's IValue object itself. +template +struct slot_list_impl { + using iterator = slot_iterator_impl; + using const_iterator = slot_iterator_impl; + using value_type = typename iterator::value_type; + slot_iterator_impl begin() const { + return slot_iterator_impl(module_, recurse_, return_module_); + } + slot_iterator_impl end() const { + return slot_iterator_impl(); + } + size_t size() const { + if (!size_) { + size_ = size_t(0); + for ([[maybe_unused]] const value_type& _ : *(this)) { + ++*size_; + } + } + return *size_; + } + + slot_list_impl(Module module, bool recurse, bool return_module) + : module_(std::move(module)), + recurse_(recurse), + return_module_(return_module), + size_(std::nullopt) { + if (!recurse && !return_module && Policy::all_slots) { + size_ = module_.num_slots(); + } + } + + private: + Module module_; + bool recurse_; + bool return_module_; + // size of this list, cached on first request + // when we need to filter the slot list + mutable std::optional size_; + friend struct Module; +}; + +namespace detail { + +// slot_iterator_impl always iterate over all the slots in a module, +// the Policy template argument determines slots should be returned and their +// types +struct TORCH_API ModulePolicy { + // the type of the value being returned + using value_type = Module; + + // the logic for creating the type being returned, given the raw IValue + // of that object. + static value_type create( + const std::vector& cursors, + IValue v) { + return Module(std::move(v).toObject()); + } + // is slot i in typ something that this iterator should return, otherwise, + // we skip it. + static bool valid(const ClassTypePtr& typ, size_t i, const IValue& v) { + return typ->getAttribute(i)->is_module(); + } + // are we going to return everything? If so, we can optimize the calculate + // of the size of the list. + static constexpr bool all_slots = false; +}; + +struct TORCH_API ParameterPolicy { + using value_type = at::Tensor; + static value_type create( + const std::vector& cursors, + IValue v) { + return std::move(v).toTensor(); + } + static bool valid(const ClassTypePtr& typ, size_t i, const IValue& v) { + return typ->is_parameter(i) && v.isTensor(); + } + static constexpr bool all_slots = false; +}; + +struct TORCH_API BufferPolicy { + using value_type = at::Tensor; + static value_type create( + const std::vector& cursors, + IValue v) { + return std::move(v).toTensor(); + } + static bool valid(const ClassTypePtr& typ, size_t i, const IValue& v) { + return typ->getAttribute(i)->isSubtypeOf(*TensorType::get()) && + typ->is_buffer(i); + } + static constexpr bool all_slots = false; +}; + +struct TORCH_API AttributePolicy { + using value_type = IValue; + static value_type create( + const std::vector& cursors, + IValue v) { + return v; + } + static bool valid(const ClassTypePtr& typ, size_t i, const IValue& v) { + return true; + } + static constexpr bool all_slots = true; +}; + +// take a Policy object, and make a version of it that returns the slot. +// along with the fully qualified name of that slot. This is used for the named_ +// variants like named_parameters(). +template +struct NamedPolicy { + using value_type = Named; + static value_type create( + const std::vector& cursors, + IValue v) { + std::string name; + if (cursors.size() == 1) { + name = (cursors.back().i_ == -1) ? "" : nameFragment(cursors.back()); + } else { + std::ostringstream ss; + for (const auto i : c10::irange(cursors.size())) { + if (i > 0) { + ss << '.'; + } + ss << nameFragment(cursors[i]); + } + name = ss.str(); + } + return value_type{std::move(name), Policy::create(cursors, std::move(v))}; + } + static bool valid(const ClassTypePtr& t, size_t i, const IValue& v) { + return Policy::valid(t, i, v); + } + static constexpr bool all_slots = Policy::all_slots; + + private: + static std::string nameFragment(const detail::SlotCursor& f) { + return f.module_.type()->getAttributeName(f.i_); + } +}; + +} // namespace detail + +TORCH_API bool& getInlineEverythingMode(); + +namespace script { +// We once had a `script::` namespace that was deleted. This is for backcompat +// of the public API; new code should not use this type alias. +using Module = ::torch::jit::Module; +using ExtraFilesMap = ::torch::jit::ExtraFilesMap; +} // namespace script + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/api/object.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/api/object.h new file mode 100644 index 0000000000000000000000000000000000000000..f25e599974b138172c593fe2c3f9f9fac2e26397 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/api/object.h @@ -0,0 +1,205 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include + +namespace torch::jit { + +struct Resolver; +using ResolverPtr = std::shared_ptr; + +using ObjectPtr = c10::intrusive_ptr; + +// Throw this in C++ land if `attr` fails. This will be converted to a Python +// AttributeError by the Python binding code +class ObjectAttributeError : public std::runtime_error { + public: + ObjectAttributeError(const std::string& what) : std::runtime_error(what) {} +}; + +struct TORCH_API Object { + Object() = default; + Object(const Object&) = default; + Object& operator=(const Object&) = default; + Object(Object&&) noexcept = default; + Object& operator=(Object&&) noexcept = default; + Object(ObjectPtr _ivalue) : _ivalue_(std::move(_ivalue)) {} + Object(std::shared_ptr cu, const c10::ClassTypePtr& type); + Object( + c10::QualifiedName, + std::shared_ptr cu, + bool shouldMangle = false); + + ObjectPtr _ivalue() const { + TORCH_INTERNAL_ASSERT(_ivalue_); + return _ivalue_; + } + + c10::ClassTypePtr type() const { + return _ivalue()->type(); + } + + struct Property { + std::string name; + Method getter_func; + std::optional setter_func; + }; + + void setattr(const std::string& name, c10::IValue v) { + if (_ivalue()->type()->hasConstant(name)) { + TORCH_CHECK( + false, + "Can't set constant '", + name, + "' which has value:", + _ivalue()->type()->getConstant(name)); + } else if (auto slot = _ivalue()->type()->findAttributeSlot(name)) { + const c10::TypePtr& expected = _ivalue()->type()->getAttribute(*slot); + TORCH_CHECK( + v.type()->isSubtypeOf(*expected), + "Expected a value of type '", + expected->repr_str(), + "' for field '", + name, + "', but found '", + v.type()->repr_str(), + "'"); + _ivalue()->setSlot(*slot, std::move(v)); + } else { + TORCH_CHECK(false, "Module has no attribute '", name, "'"); + } + } + + c10::IValue attr(const std::string& name) const { + if (auto r = _ivalue()->type()->findAttributeSlot(name)) { + return _ivalue()->getSlot(*r); + } + if (auto r = _ivalue()->type()->findConstantSlot(name)) { + return _ivalue()->type()->getConstant(*r); + } + std::stringstream err; + err << _ivalue()->type()->repr_str() << " does not have a field with name '" + << name.c_str() << "'"; + throw ObjectAttributeError(err.str()); + } + + c10::IValue attr(const std::string& name, c10::IValue or_else) const { + if (auto r = _ivalue()->type()->findAttributeSlot(name)) { + return _ivalue()->getSlot(*r); + } + if (auto r = _ivalue()->type()->findConstantSlot(name)) { + return _ivalue()->type()->getConstant(*r); + } + return or_else; + } + + bool hasattr(const std::string& name) const { + return _ivalue()->type()->hasAttribute(name) || + _ivalue()->type()->hasConstant(name); + } + + // each object owns its methods. The reference returned here + // is guaranteed to stay valid until this module has been destroyed + Method get_method(const std::string& name) const { + if (auto method = find_method(name)) { + return *method; + } + TORCH_CHECK(false, "Method '", name, "' is not defined."); + } + + const std::vector get_methods() const { + return c10::fmap(type()->methods(), [&](Function* func) { + return Method(_ivalue(), func); + }); + } + + bool has_property(const std::string& name) const { + for (const auto& prop : type()->properties()) { + if (prop.name == name) { + return true; + } + } + return false; + } + + const Property get_property(const std::string& name) const { + for (const auto& prop : type()->properties()) { + if (prop.name == name) { + std::optional setter = std::nullopt; + if (prop.setter) { + setter = Method(_ivalue(), prop.setter); + } + return Property{ + prop.name, Method(_ivalue(), prop.getter), std::move(setter)}; + } + } + TORCH_CHECK(false, "Property '", name, "' is not defined."); + } + + const std::vector get_properties() const { + return c10::fmap(type()->properties(), [&](ClassType::Property prop) { + std::optional setter = std::nullopt; + if (prop.setter) { + setter = Method(_ivalue(), prop.setter); + } + return Property{ + std::move(prop.name), + Method(_ivalue(), prop.getter), + std::move(setter)}; + }); + } + + std::optional find_method(const std::string& basename) const; + + /// Run a method from this module. + /// + /// For example: + /// @code + /// IValue output = module->run("relu_script", a, b); + /// @endcode + /// + /// To get a compile a module from a source string, see torch::jit::compile + /// + /// @param method_name The name of the method to run + /// @param args Arguments to be passed to the method + /// @return An IValue containing the return value (or values if it is a tuple) + /// from the method + template + IValue run_method(const std::string& method_name, Types&&... args) { + return get_method(method_name)({IValue(std::forward(args))...}); + } + + // so that C++ users can easily add methods + void define(const std::string& src, const ResolverPtr& resolver = nullptr); + + size_t num_slots() const { + return _ivalue()->slots().size(); + } + + // shallow copy the object + Object copy() const; + + // Copies all the attributes of the object recursively without creating new + // `ClassType`, including deepcopy of Tensors + Object deepcopy() const; + + private: + // mutable be we lazily initialize in module_object. + mutable ObjectPtr _ivalue_; +}; + +namespace script { +// We once had a `script::` namespace that was deleted. This is for backcompat +// of the public API; new code should not use this type alias. +using Object = ::torch::jit::Object; +} // namespace script +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/backend.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/backend.h new file mode 100644 index 0000000000000000000000000000000000000000..cea04920023b6876ac4c8123c4b887b88d457fbd --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/backend.h @@ -0,0 +1,119 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::jit { +namespace { +inline c10::FunctionSchema getIsAvailableSchema() { + c10::Argument self("self", c10::AnyType::get()); + c10::Argument available("available", c10::BoolType::get()); + c10::FunctionSchema preprocessor_schema( + "is_available", + /*overload_name=*/"", + /*arguments=*/{self}, + /*returns=*/{available}); + return preprocessor_schema; +} + +constexpr static auto kBackendsNamespace = "__backends__"; + +inline c10::FunctionSchema getCompileSchema() { + c10::Argument self("self", c10::AnyType::get()); + c10::Argument mod("processed", c10::AnyType::get()); + auto any_dict_ty = + c10::DictType::create(c10::StringType::get(), c10::AnyType::get()); + c10::Argument method_compile_spec("method_compile_spec", any_dict_ty); + c10::Argument handles("handles", any_dict_ty); + + c10::FunctionSchema compile_schema( + "compile", + /*overload_name=*/"", + /*arguments=*/{self, mod, method_compile_spec}, + /*returns=*/{handles}); + return compile_schema; +} + +inline c10::FunctionSchema getExecuteSchema() { + auto any_list_ty = c10::ListType::create(c10::AnyType::get()); + c10::Argument self("self", c10::AnyType::get()); + c10::Argument handle("handle", c10::AnyType::get()); + c10::Argument input("input", any_list_ty); + c10::Argument output("output", any_list_ty); + return c10::FunctionSchema( + "execute", + /*overload_name=*/"", + /*arguments=*/{self, handle, input}, + /*returns=*/{output}); +} + +template +std::function getIsAvailableFunc() { + return [](Stack& stack) { + auto self = pop(stack).toCustomClass(); + auto ret = self->is_available(); + push(stack, ret); + }; +} + +template +std::function getCompileFunc() { + return [](Stack& stack) { + auto method_compile_spec = pop(stack).toGenericDict(); + auto processed = pop(stack); + auto self = pop(stack).toCustomClass(); + auto ret = self->compile(processed, method_compile_spec); + push(stack, ret); + }; +} + +template +std::function getExecuteFunc() { + return [](Stack& stack) { + auto args = pop(stack); + auto handle = pop(stack); + auto self = pop(stack); + auto backend = self.toCustomClass(); + auto res = backend->execute(handle, args.toList()); + push(stack, res); + }; +} +} // namespace + +// Static registration API for backends. +template +class backend { + static_assert( + std::is_base_of_v, + "torch::jit::backend requires T to inherit from PyTorchBackendInterface"); + std::string backend_name_; + + public: + // Registers a new backend with /p name, and the given /p preprocess + // function. + backend(const std::string& name) : backend_name_(name) { + static auto cls = torch::class_(kBackendsNamespace, name) + .def(torch::init<>()) + ._def_unboxed( + "is_available", + getIsAvailableFunc(), + getIsAvailableSchema()) + ._def_unboxed( + "compile", + getCompileFunc(), + getCompileSchema()) + ._def_unboxed( + "execute", + getExecuteFunc(), + getExecuteSchema()); + } +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/backend_debug_handler.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/backend_debug_handler.h new file mode 100644 index 0000000000000000000000000000000000000000..ec124d0cf8ae0cbee9a38a575c49c22e2712164d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/backend_debug_handler.h @@ -0,0 +1,143 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +#include +#include +#include + +#include + +namespace torch::jit { + +/* + * BackendDebugHandleManager is responsible for issuing debug handles to + * backends. Debug handles are associated with nodes of a graph. + * BackendDebugHandleManager also maintains a map + * [debug-handle, DebugInfoTuple = {source range, inlined callstack ptr]} that + * will help generate a callstack for exception raised using debug handles. + * Effectively debug handles are something that is given to backend and later + * when an exception occurs in the backend, backend can tell, using debug + * handle, that an exception occurred here. Then the runtime can generate + * callstack corresponding to the exception. + * There are two parts to BackendDebugHandleManager: + * 1. static std::atomic debug_handle + * 2. Map of [debug-handle, DebugInfoTuple] + * + * About 1: + * Why do they have to be unique. The reason is that by ensuring + * uniqueness of debug handles, we remove the burden of another layer of + * mapping where we need to say this set of debug handles were generated for + * this lowered module or this bytecode function. This simplifies the API for + * serialization since debug handles can uniquely identify DebugInfoTuple. + * Thus simplifies the runtime API for throwing exception. Exception throwing + * only needs to know debug_handle and not which module or method threw it. + * There are 2 issues to keep in mind, though,for static std::atomic + * debug_handle: A. Performance implications of using atomic variable. However + * this is only used for compilation so we assume to absorb some of that + * penalty. Plus if there is no contention then we should have less to worry + * about. B. If repeated compilation is part of a long running process then we + * may overflow int64_t. We may detect and fail on this. For now this is not + * done. + * + * Now about 2: + * There are two usecases for [debug-handle, DebugInfoTuple] + * A. During bytecode generation the DebugInfoTuple corresponding to the nodes + * of the inlined graph being serialized, are stored in this object and a + * unique debug handle is returned. This unique debug handle is stored in + * mobile_debug info for pytorch lite models. It will be used for raising + * exceptions as well as profiling. B. During backend lowering, each backend's + * preprocess/compile method can compile method's graph and serialize those + * methods. Once the method is lowered to backend, graph is essentially lost. + * Without access to graph it is hard to generate model level debug info. Thus + * the debug handles provide a way to map nodes of the graph to the model level + * debug info. + * + * During byte-code model serialization, [debug-handle, DebugInfoTuple] is + * serialized. Now we know a. debug handles and b. how to map debug handles to + * model source code. Thus we can either do eager symbolication by converting + * debug handles to corresponding source code at runtime, or do lazy + * symbolicattion offline. + * + * Note that it is not necessary to serialize [debug-handle, DebugInfoTuple] + * corresponding to lowered backend if the lowering process, that is + * preprocess/compile, and execution happens in the same session, then eager + * symbolication can be employed. + * + * Now how does BackendDebugHandleManager capture all of the above? + * By providing two API. + * 1. getNextDebugHandle which given a Node* returns a unique debug handle, + * that will uniquely identify DebugInfoTuple. + * and + * 2. getCallStackPtrMap which returns the map + * [debug-handle, DebugInfoTuple] + * + * 1 provides debug handles to backends and 2 provides runtime a way to map + * debug handles to source level debug info. + * + * So why does debug handle map to DebugInfoTuple = {source range and inlined + * cs}? {debug_handle, source_range_tag, serialized_callstack} Take this + * example: class L(nn.Module): def __init__(self) -> None: + * ... + * def forward(self, x): + * return x * 5 + * class M(nn.Module): + * def __init__(self) -> None: + * ... + * def forward(self, x): + * return x - 2 + * class N(nn.Module): + * def __init__(self) -> None: + * self.m = M() + * def forward(self, x): + * return self.m(x) + 3 + * m = torch.jit.script(N()) + * Once you inline m's forward method, m.forward.graph will look something + * like this + * graph(%self...): + * %x = aten::mul(..) + * %x = aten::sub(x, ..) + * %y = aten::add(x, ..) + * .. + * Inlined callstack ptr for these two nodes will look like: + * aten::mul's inlined CS (callstack): [N.forward, source range] -> [M.forward, + * source range] aten::sub's inlined CS (callstack): [N.forward, source range] + * aten::add's inlined CS: null + * mul node's inlined CS contains only information about the callsites' source + * range The information about mul node's source range ('return x * 5') is not + * available in its inlined CS. It is rather part of node's source range + * instead of inlined CS. Thus to get full stack: [N.forward, source range] -> + * [M.forward, source range] -> [aten::mul's source range] We need to track + * mul's source range and inlined CS both. + */ + +using BackendDebugInfoMapType = + std::unordered_map; + +/* + * This class is used to generate debug info map. + * backend's preprocess will call generate_debug_handles (see + * backend_detail.cpp), which uses debug_handle_manager to generate debug + * handles. When lowering process finishes, calling stopRecording will + * return debug info map from debug_handle_manager + */ +class TORCH_API BackendDebugInfoRecorder { + public: + BackendDebugInfoRecorder() = default; + int64_t getNextDebugHandle(const Node* node); + // Reason this is not done as RAII is that work done in stopRecording + // can throw, and throwing with dtor will call terminate and thus voids any + // exception catching at a higher level. + BackendDebugInfoMapType stopRecording(); + NodeToDebugHandle generate_debug_handles(const std::shared_ptr& graph); + + private: + static std::atomic unique_debug_handle_; + BackendDebugInfoMapType handles_to_inlined_callstack_ptrs_; +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/backend_debug_info.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/backend_debug_info.h new file mode 100644 index 0000000000000000000000000000000000000000..b2ff9a3fe801206fba4bf40538e5770a3ae493e4 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/backend_debug_info.h @@ -0,0 +1,68 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifndef BUILD_LITE_INTERPRETER +#include +#endif +#include + +namespace torch::jit { + +constexpr static auto kBackendUtilsNamespace = "backendutils"; +constexpr static auto kBackendDebugInfoClass = "BackendDebugInfo"; + +#ifndef BUILD_LITE_INTERPRETER +/* + * Custom class for holding debug information in lowered modules, intended + * purely for keeping this information to be later serialized outside of the + * lowered module itself. + * Its usage pattern is: + * 1. LoweredModule declares an instance of this class in __backend_debug_info + * 2. During serialization, __backend_debug_info is used to obtain the debug + * information. + * 3. The contents of LoweredModule.__backend_debug_info are not serialized + * within the LoweredModule itself. + */ +class TORCH_API PyTorchBackendDebugInfo : public torch::CustomClassHolder { + public: + PyTorchBackendDebugInfo() = default; + + std::optional& getDebugInfoMap() { + return debug_info_map_; + } + + void setDebugInfoMap(BackendDebugInfoMapType&& debug_info_map) { + debug_info_map_ = std::move(debug_info_map); + } + + private: + std::optional debug_info_map_; +}; + +#else + +/* + * Dummy instance exists for the following reason: + * __backend_debug_info is of type BackendDebugInfo which is a torchbind' + * class backed by cpp class PyTorchBackendDebugInfo. + * PyTorchBackendDebugInfo, depends on ir.h., scope.h, source_range etc. + * We dont include this on lite interpreter side. Thus on lite interpreter side + * we cannot have valid definition of PyTorchBackendDebugInfo. However we do not + * need valid instance of __backend_debug_info in lite interpreter anyway as we + * dont serialize this info as part of LowerdModule as mentioned ealrier. + * However since LoweredModule has registered attribute of __backend_debug_info + * we still need to make sure that BackendDebugInfo is registered with + * TorchScript. However in this instance it does not have to be backed by + * PyTorchBackendDebugInfo, so we create a dummy PyTorchBackendDebugInfoDummy + * just for this purpose. + */ +class PyTorchBackendDebugInfoDummy : public torch::CustomClassHolder { + public: + PyTorchBackendDebugInfoDummy() = default; +}; +#endif +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/backend_detail.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/backend_detail.h new file mode 100644 index 0000000000000000000000000000000000000000..cca52f2866881927fa9db1b8f35cb20be87a5183 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/backend_detail.h @@ -0,0 +1,44 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include + +#include + +namespace torch::jit { + +using DebugHandleType = int64_t; + +using NodeToDebugHandle = std::unordered_map; + +using BackendDebugHandleGenerator = + std::function&)>; + +namespace detail { + +using BackendPreprocessFunction = std::function&, + const BackendDebugHandleGenerator& generate_debug_handles)>; + +TORCH_API void registerBackendPreprocessFunction( + const std::string& name, + const BackendPreprocessFunction& preprocess); + +bool hasBackendPreprocessFunction(const std::string& name); + +BackendPreprocessFunction getBackendPreprocessFunction(const std::string& name); + +TORCH_API Module codegen_backend_module( + const std::string& backend_name, + const Module& orig_module, + const c10::Dict& method_compile_spec, + const c10::DictTypePtr& any_dict_ty); +} // namespace detail +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/backend_exception.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/backend_exception.h new file mode 100644 index 0000000000000000000000000000000000000000..14a22a5704d99e3bdb2a347cecb906c7f8c681e2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/backend_exception.h @@ -0,0 +1,61 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +#include + +namespace c10 { +class TORCH_API BackendRuntimeException : public c10::Error { + public: + // Use debug_handle to throw exception + BackendRuntimeException( + SourceLocation loc, + std::string msg, + int64_t debug_handle) + : c10::Error(loc, std::move(msg)) { + debug_handles.push_back(debug_handle); + } + // If rethrowing, can push another debug_handle + // This is useful in couple of scenarios. + // 1. A submodule is lowered and lite interpreter has CallMethod + // to lowered module's method. In this case lowered module will throw with + // a handle, plus there will be another debug handle corresponding + // to the CallMethod node in lite interpreter. Both together give complete + // trace. This function allows lite interpreter to rethrow with debug + // handle it has for CallMethod. + // 2. Another scenarios is when lite interpreter can make function calls or + // the lowered backend also has function call ability. Thus we have + // multiple function frames. Now we need a stack of handles to symbolicate + // entire stack trace. + void pushDebugHandle(int64_t debug_handle) { + debug_handles.push_back(debug_handle); + } + const std::vector& getDebugHandles() { + return debug_handles; + } + + private: + // Stores stack of debug handles. + std::vector debug_handles; +}; + +} // namespace c10 +#define TORCH_DELEGATED_BACKEND_THROW(cond, msg, debug_handle) \ + if (C10_UNLIKELY_OR_CONST(!(cond))) { \ + throw ::c10::BackendRuntimeException( \ + {__func__, __FILE__, static_cast(__LINE__)}, \ + msg, \ + debug_handle); \ + } + +#define TORCH_DELEGATED_BACKEND_RETHROW(e, debug_handle) \ + do { \ + e.pushDebugHandle(debug_handle); \ + throw; \ + } while (false) + +#define DEBUG_HANDLE_UNKNOWN -1 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/backend_init.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/backend_init.h new file mode 100644 index 0000000000000000000000000000000000000000..bc490802ff882f7c10b304af7803b80d1c511b9e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/backend_init.h @@ -0,0 +1,14 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::jit { +// Initialize Python bindings for JIT to_ functions. +void initJitBackendBindings(PyObject* module); +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/backend_interface.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/backend_interface.h new file mode 100644 index 0000000000000000000000000000000000000000..5f7056a86d0628c1a861465c1cc30d46fc2d1db7 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/backend_interface.h @@ -0,0 +1,37 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit { + +// Interface for a JIT backend. +class TORCH_API PyTorchBackendInterface : public torch::CustomClassHolder { + public: + PyTorchBackendInterface() noexcept; + ~PyTorchBackendInterface() override; + + // Returns true if the backend is available to process delegation calls. + virtual bool is_available() = 0; + + // Compile the module contained in \p processed using the details provided in + // \p method_compile_spec for each module method that should be compiled for + // the backend. \p method_compile_spec should be of type Dict. + // \returns a dictionary of type Dict that contains a backend + // handle each method that can run on the backend (i.e. each key in \p + // method_compile_spec). + virtual c10::impl::GenericDict compile( + c10::IValue processed, + c10::impl::GenericDict method_compile_spec) = 0; + + // Execute the method specified by \p handle using \p inputs. \returns the + // outputs as a tuple. + virtual c10::impl::GenericList execute( + c10::IValue handle, + c10::impl::GenericList inputs) = 0; +}; +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/backend_preprocess.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/backend_preprocess.h new file mode 100644 index 0000000000000000000000000000000000000000..f0241ec96ef63b32e94b6116f8fcc31df5da9f51 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/backend_preprocess.h @@ -0,0 +1,21 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +namespace torch::jit { +class backend_preprocess_register { + std::string backend_name_; + + public: + backend_preprocess_register( + const std::string& name, + const detail::BackendPreprocessFunction& preprocess) + : backend_name_(name) { + detail::registerBackendPreprocessFunction(name, preprocess); + } +}; +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/backend_resolver.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/backend_resolver.h new file mode 100644 index 0000000000000000000000000000000000000000..aee7fac6ddfb3fe942a82ad33074e672a3422821 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/backend_resolver.h @@ -0,0 +1,13 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit { +// Create a Resolver for use in generating LoweredModules for specific backends. +TORCH_API std::shared_ptr loweredModuleResolver(); +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/coreml/cpp/context.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/coreml/cpp/context.h new file mode 100644 index 0000000000000000000000000000000000000000..6ac2655639b3b0dc4a087056e90a7f75a593f226 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/coreml/cpp/context.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#ifndef PTM_COREML_Context_h +#define PTM_COREML_Context_h + +#include + +namespace torch::jit::mobile::coreml { + +struct ContextInterface { + virtual ~ContextInterface() = default; + virtual void setModelCacheDirectory(std::string path) = 0; +}; + +class BackendRegistrar { + public: + explicit BackendRegistrar(ContextInterface* ctx); +}; + +void setModelCacheDirectory(std::string path); + +} // namespace torch::jit::mobile::coreml + +#endif + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/coreml/objc/PTMCoreMLCompiler.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/coreml/objc/PTMCoreMLCompiler.h new file mode 100644 index 0000000000000000000000000000000000000000..1b040c52c64f22364741a4c926686ec3579edd3b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/coreml/objc/PTMCoreMLCompiler.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#import + +#include + +NS_ASSUME_NONNULL_BEGIN + +@interface PTMCoreMLCompiler : NSObject + ++ (void)setCacheDirectory:(const std::string&)dir; + ++ (NSString*)cacheDirectory; + ++ (BOOL)compileModel:(const std::string&)modelSpecs modelID:(const std::string&)modelID; + ++ (nullable MLModel*)loadModel:(const std::string)modelID + backend:(const std::string)backend + allowLowPrecision:(BOOL)allowLowPrecision + error:(NSError**)error; + +@end + +NS_ASSUME_NONNULL_END + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/coreml/objc/PTMCoreMLExecutor.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/coreml/objc/PTMCoreMLExecutor.h new file mode 100644 index 0000000000000000000000000000000000000000..5a79337260dcc3099d395a1639f39420796ab6b0 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/coreml/objc/PTMCoreMLExecutor.h @@ -0,0 +1,24 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#import + +#import + +NS_ASSUME_NONNULL_BEGIN + +@interface PTMCoreMLExecutor : NSObject + +@property(atomic, strong) MLModel* model; + +- (instancetype)initWithFeatureNames:(NSArray*)featureNames; + +- (void)setInputs:(c10::impl::GenericList)inputs; + +- (id)forward:(NSError**)error; + +@end + +NS_ASSUME_NONNULL_END + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/coreml/objc/PTMCoreMLFeatureProvider.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/coreml/objc/PTMCoreMLFeatureProvider.h new file mode 100644 index 0000000000000000000000000000000000000000..c0e536370b6ee5209dc88ae869539bad7a423260 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/coreml/objc/PTMCoreMLFeatureProvider.h @@ -0,0 +1,21 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#import +#import + +NS_ASSUME_NONNULL_BEGIN + +@interface PTMCoreMLFeatureProvider : NSObject + +- (instancetype)initWithFeatureNames:(NSSet*)featureNames; + +- (void)clearInputTensors; + +- (void)setInputTensor:(const at::Tensor&)tensor forFeatureName:(NSString*)name; + +@end + +NS_ASSUME_NONNULL_END + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/coreml/objc/PTMCoreMLModelWrapper.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/coreml/objc/PTMCoreMLModelWrapper.h new file mode 100644 index 0000000000000000000000000000000000000000..5ee77404da4e52f35be363da00dbf4779b75af89 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/coreml/objc/PTMCoreMLModelWrapper.h @@ -0,0 +1,46 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include +#include +#include + +namespace torch { +namespace jit { +namespace mobile { +namespace coreml { + +class MLModelWrapper : public CustomClassHolder { + public: + PTMCoreMLExecutor* executor; + std::vector outputs; + + MLModelWrapper() = delete; + + MLModelWrapper(PTMCoreMLExecutor* executor) : executor(executor) { + [executor retain]; + } + + MLModelWrapper(const MLModelWrapper& oldObject) { + executor = oldObject.executor; + outputs = oldObject.outputs; + [executor retain]; + } + + MLModelWrapper(MLModelWrapper&& oldObject) { + executor = oldObject.executor; + outputs = oldObject.outputs; + [executor retain]; + } + + ~MLModelWrapper() { + [executor release]; + } +}; + +} // namespace coreml +} // namespace mobile +} // namespace jit +} // namespace torch + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/coreml/objc/PTMCoreMLTensorSpec.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/coreml/objc/PTMCoreMLTensorSpec.h new file mode 100644 index 0000000000000000000000000000000000000000..7537f743d938199c4d13c8f8aca01c6e5b0c231b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/coreml/objc/PTMCoreMLTensorSpec.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include +#import + +#include + +namespace torch::jit::mobile::coreml { + +struct TensorSpec { + std::string name; + c10::ScalarType dtype = c10::ScalarType::Float; +}; + +static inline c10::ScalarType scalar_type(const std::string& type_string) { + if (type_string == "0") { + return c10::ScalarType::Float; + } else if (type_string == "1") { + return c10::ScalarType::Double; + } else if (type_string == "2") { + return c10::ScalarType::Int; + } else if (type_string == "3") { + return c10::ScalarType::Long; + } + return c10::ScalarType::Undefined; +} + +} // namespace torch::jit::mobile::coreml + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/xnnpack/compiler/xnn_compiler.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/xnnpack/compiler/xnn_compiler.h new file mode 100644 index 0000000000000000000000000000000000000000..61bd88c05345fc626ef96149efa97d5235afa52b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/xnnpack/compiler/xnn_compiler.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#include +#include +#include +#include + +namespace torch::jit::xnnpack::delegate { + +class XNNCompiler { + public: + // Takes Flatbuffer Serialized XNNPack Model and rebuilds the xnn-subgraph + // returns an executor object that holds the xnn runtime object which we + // can then use to set inputs and run inference using the xnn graph. + static void compileModel( + const void* buffer_pointer, + size_t num_bytes, + XNNExecutor* executor); +}; + +} // namespace torch::jit::xnnpack::delegate + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/xnnpack/executor/xnn_executor.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/xnnpack/executor/xnn_executor.h new file mode 100644 index 0000000000000000000000000000000000000000..376de821a60acabd138d32b375e1c833bd077886 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/xnnpack/executor/xnn_executor.h @@ -0,0 +1,73 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#pragma once +#include +#include +#include + +namespace torch::jit::xnnpack::delegate { + +class XNNExecutor { + private: + std::unique_ptr runtime_{ + nullptr, + &xnn_delete_runtime}; + std::vector input_ids_; + std::vector output_ids_; + std::vector externals_; + + public: + XNNExecutor() = default; + + template + bool set_inputs(std::vector& inputs, std::vector& outputs) { + externals_.clear(); + + if (inputs.size() != input_ids_.size()) { + return false; + } + + for (int i = 0; i < inputs.size(); i++) { + externals_.emplace_back(xnn_external_value{input_ids_[i], inputs[i]}); + } + + if (outputs.size() != output_ids_.size()) { + return false; + } + + for (int i = 0; i < outputs.size(); i++) { + externals_.emplace_back(xnn_external_value{output_ids_[i], outputs[i]}); + } + + return true; + } + + bool forward() { + xnn_status status = + xnn_setup_runtime(runtime_.get(), externals_.size(), externals_.data()); + + if (status != xnn_status_success) { + return false; + } + + status = xnn_invoke_runtime(runtime_.get()); + + if (status != xnn_status_success) { + return false; + } + + return true; + } + + friend class XNNCompiler; +}; + +} // namespace torch::jit::xnnpack::delegate + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/xnnpack/serialization/serializer.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/xnnpack/serialization/serializer.h new file mode 100644 index 0000000000000000000000000000000000000000..1ca44842bad03c04d60d83a20a67ae8f845a1213 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/xnnpack/serialization/serializer.h @@ -0,0 +1,94 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#include +#include +#include +#include +#include + +namespace torch { +namespace jit { +namespace xnnpack { +namespace delegate { + +using namespace fb_xnnpack; // Specified in the schema + +class XNNSerializer { + public: + // Constructors + // initial buffersize of 1024 which will grow + // automatically, constant buffer and buffer sizes initialized with dummy + // values as 0 index is reserved for non-constant tensors + XNNSerializer() : XNNSerializer(1024) {} + + explicit XNNSerializer(size_t bufferSize) + : _builder(bufferSize), + _nodes(), + _values(), + _constantBuffer({CreateBuffer( + _builder, + {})}), // index 0 is reserved for non-const data + _bufferSizes({0}) {} + + // Serializing Nodes + + // Serialize add node, we are serializing the argument needed to call + // xnn_define_add2. Serializing these values, and at run time we build + // the graph by re running xnn_define_add2 + void serializeAddNode( + uint32_t input1_id, + uint32_t input2_id, + uint32_t output_id, + uint32_t flags); + + // Serializing Values + void serializeTensorValue( + uint32_t xnn_datatype, + size_t num_dims, + std::vector dims, + size_t buffer_data_idx, + uint32_t external_id, + uint32_t flags, + uint32_t id_out); + + // finish and serialize xnngraph returning serialized data + std::string finishAndSerialize( + std::vector input_ids, + std::vector output_ids, + size_t num_extern_ids); + + // decoupled data serialization with tensor values. This way constant tensor + // data can be referenced by multiple intermediate tensors. This call + // serializes the num_bytes of the data_ptr and returns the index it was + // placed in. + size_t serializeData(const uint8_t* data_ptr, size_t num_bytes); + + private: + // xnnpack version we are serializing + const char* _version_sha1 = "ae108ef49aa5623b896fc93d4298c49d1750d9ba"; + + // flatbuffer objects we will create and serialize together to create xnngraph + flatbuffers_fbsource::FlatBufferBuilder _builder; + + // Vector of the serialized xnnpack nodes + std::vector> _nodes; + + // Vector of the serialized xnnpack values + std::vector> _values; + + std::vector> _constantBuffer; + std::vector _bufferSizes; +}; + +} // namespace delegate +} // namespace xnnpack +} // namespace jit +} // namespace torch + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/xnnpack/xnnpack_graph_builder.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/xnnpack/xnnpack_graph_builder.h new file mode 100644 index 0000000000000000000000000000000000000000..369d56f8d9a33971d3047c5c499fe014a4dea7c9 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/backends/xnnpack/xnnpack_graph_builder.h @@ -0,0 +1,102 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// Copyright (c) Meta Platforms, Inc. and affiliates. +// +// This source code is licensed under the BSD-style license found in the +// LICENSE file in the root directory of this source tree. + +#include +#include +#include +#include +#include +#include + +#include + +namespace torch { +namespace jit { +namespace xnnpack { +namespace delegate { + +class XNNGraph { + private: + const float output_min = -std::numeric_limits::infinity(); + const float output_max = std::numeric_limits::infinity(); + + // serializer class + XNNSerializer _serializer; + // xnn subgraph + xnn_subgraph_t _subgraph_ptr; + // Set of all the tensor values throughout the jit graph + std::unordered_set _intermediate_tensors; + // Set of all the tensor values mapped to the xnnpack ids + std::unordered_map _val_to_ids; + // Vector containing the torch valued inputs/outputs, + // must be ordered to preserve the order of input/outputs + std::vector _inputs; + std::vector _outputs; + + // Graph passes for optimizing and tracing torchscript graph + // Essentially massaging the graph into a digestiable format for + // xnnpack graph lowering. + std::shared_ptr optimizeAndTraceGraph( + std::shared_ptr graph, + std::vector& example_inputs); + + // Gather all the intermediate tensor values within a graph. This + // skips through all prim constants. The purpose of this is for defining + // the tensor values beforehand for the xnnpack subgraph. + void gatherTensorValues(std::shared_ptr& graph); + + // Gathers the tensor values in a give node + void gatherNodeInputs(torch::jit::Node& node); + + // Helper function to determine if a jit value is a graph input + bool isGraphInput(torch::jit::Value* val); + + // Helper function to determine if a jit value is a graph output + bool isGraphOutput(torch::jit::Value* val); + + // Defines all xnnpack nodes for the nodes in the graph + void defineAllNodes(std::shared_ptr& graph); + + // Defines all xnn tensor values used throughout the graph + void defineAllTensorValues(); + + // Makes a pass through the graph and throws if any ops are unsupported + void checkOpsToDelegate(std::shared_ptr& graph); + + public: + XNNGraph() : _serializer(), _subgraph_ptr(nullptr) { + xnn_status status = xnn_initialize(/*allocator =*/nullptr); + TORCH_CHECK(xnn_status_success == status, "Failed to initialize xnnpack"); + } + + ~XNNGraph() { + xnn_deinitialize(); + if (_subgraph_ptr != nullptr) { + xnn_delete_subgraph(_subgraph_ptr); + } + } + + void buildXNNGraph( + std::shared_ptr& graph, + std::vector example_inputs); + + void runGraphOnInputs( + std::vector tensor_inputs, + std::vector tensor_outputs); + + std::string serializedXNNGraph(); + + std::vector> getGraphOutputShapes(); +}; + +} // namespace delegate +} // namespace xnnpack +} // namespace jit +} // namespace torch + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/cuda/interface.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/cuda/interface.h new file mode 100644 index 0000000000000000000000000000000000000000..211a2fe3a749f934e4c9347b46fb0c6eb111e7f6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/cuda/interface.h @@ -0,0 +1,57 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +/* + * This file contains APIs for cuda fuser; + * + * We use an empty static struct to hold the function pointers, which are + * registered separately. This is to support cpu-only compilation. + * Registration is done in torch/csrc/jit/codegen/cuda/register_interface.cpp + */ + +namespace torch::jit::fuser::cuda { + +TORCH_API std::atomic& getCudaFusionGuardMode(); + +TORCH_API bool getSingletonFusion(); +TORCH_API bool setSingletonFusion(bool value); +TORCH_API bool getHorizontalFusion(); +TORCH_API bool setHorizontalFusion(bool value); + +// dummy struct to allow API registration +struct CudaFuserInterface { + void (*fn_compile_n)(Node*) = nullptr; + void (*fn_run_n_s)(const Node*, Stack&) = nullptr; + void (*fn_fuse_graph)(std::shared_ptr&) = nullptr; + bool (*fn_can_fuse_n)(const Node*) = nullptr; + void (*fn_insert_profile_inodes)(ProfilingRecord* pr) = nullptr; + bool (*fn_profile_n)(const Node*) = nullptr; + bool (*fn_skip_n)(const std::string&, bool flip) = nullptr; +}; + +// Get interface, this is used by registration and user facing API internally +TORCH_API CudaFuserInterface* getFuserInterface(); + +TORCH_API void compileFusionGroup(Node* fusion_node); +TORCH_API void runFusionGroup(const Node* fusion_node, Stack& stack); +TORCH_API void fuseGraph(std::shared_ptr& /*graph*/); +TORCH_API bool canFuseNode(const Node* node); +TORCH_API void InsertProfileNodesForCUDAFuser(ProfilingRecord* pr); +TORCH_API bool profileNode(const Node* node); + +TORCH_API bool skipNode(const std::string& symbol_str, bool flip = true); + +TORCH_API bool isEnabled(); +TORCH_API bool setEnabled(bool is_enabled); +TORCH_API bool canBeEnabled(); + +} // namespace torch::jit::fuser::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/arg_spec.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/arg_spec.h new file mode 100644 index 0000000000000000000000000000000000000000..3621030aed0ee08ccf973d0d64ebe2592118a851 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/arg_spec.h @@ -0,0 +1,60 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include // fmap +#include +#include +#include + +#include +#include + +namespace torch::jit::fuser { + +// Describes the (runtime) arguments to a kernel. +// ArgSpecs are also used as keys to lookup instantiated kernels, so +// they are hashable. +// Note: the device to run on is included in the arg spec because kernels +// are compiled per-device. +struct TORCH_API ArgSpec { + ArgSpec(at::TensorList inputs, const int _device) + : descs_{c10::fmap(inputs)}, + hash_code_{c10::get_hash(_device, inputs.size(), descs_)}, + device_{_device} {} + + // (Common) hash function + static size_t hash(const ArgSpec& spec) { + return spec.hash_code_; + } + + // Comparators + bool operator==(const ArgSpec& other) const { + return (descs_ == other.descs_ && device_ == other.device_); + } + + bool operator!=(const ArgSpec& spec) const { + return !(*this == spec); + } + + // Getters + size_t hashCode() const { + return hash_code_; + } + const std::vector& descs() const { + return descs_; + } + int device() const { + return device_; + } + + private: + std::vector descs_; + size_t hash_code_; + int device_; +}; + +} // namespace torch::jit::fuser + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/codegen.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/codegen.h new file mode 100644 index 0000000000000000000000000000000000000000..1cc359481bf7cb9eb5b1bf72b1f7043bf0612309 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/codegen.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + +namespace torch::jit::fuser { + +// Creates a CPU or CUDA kernel for the given graph. +// Returns the C++ or CUDA string implementing the kernel. +TORCH_API std::string generateKernel( + const std::string& name, + const Graph& graph, + const std::vector>>& + inputs, + const std::vector>& outputs, + const bool use_cuda); + +} // namespace torch::jit::fuser + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/compiler.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/compiler.h new file mode 100644 index 0000000000000000000000000000000000000000..e76959805a5cdee9d94582b09f76d3750e0ecdc4 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/compiler.h @@ -0,0 +1,61 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +namespace torch::jit::fuser { + +// Performs device-independent "upfront" compilation of the given fusion_group, +// if it has not been registered already. +// Returns a key that can be used to run the fusion later +TORCH_API int64_t registerFusion(const Node* fusion_group); + +// Performs device-specific "runtime" compilation of the given kernel +// with the runtime arguments specified in ArgSpec. +// Outputs are allocated using map_size on the specified device. +TORCH_API std::shared_ptr compileKernel( + const KernelSpec& spec, + const ArgSpec& arg_spec, + const std::vector& map_size, + const at::Device& device); + +TORCH_API size_t nCompiledKernels(); + +TORCH_API int debugFuser(); + +using FusedKernelConstructor = std::function( + int16_t device, + std::string name, + std::string code, + std::vector input_desc, + std::vector output_desc, + std::vector chunk_desc, + std::vector concat_desc, + bool has_random)>; + +TORCH_API void registerFusionBackend( + at::Device::Type backend_type, + FusedKernelConstructor ctor); +TORCH_API bool hasFusionBackend(at::Device::Type backend_type); +struct TORCH_API RegisterFusionBackend{RegisterFusionBackend( + at::Device::Type backend_type, + FusedKernelConstructor ctor){ + registerFusionBackend(backend_type, std::move(ctor)); +} // namespace torch::jit::fuser +} +; + +} // namespace torch::jit::fuser + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/cpu/fused_kernel.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/cpu/fused_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..13e37fe47a442853f902b18967222dfd930cec2c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/cpu/fused_kernel.h @@ -0,0 +1,44 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include +#include +#include + +namespace torch::jit::fuser::cpu { + +// Represents a compiled CPU kernel and the metadata necessary to run it +struct TORCH_API FusedKernelCPU : public FusedKernel { + FusedKernelCPU( + std::string name, + std::string code, + std::vector input_desc, + std::vector output_desc, + std::vector chunk_desc, + std::vector concat_desc, + bool has_random); + + at::Backend backend() const override { + return at::Backend::CPU; + } + + void launch_raw(const uint32_t numel, std::vector& arguments) + const override { + kernel(numel, arguments.data()); + } + + private: + std::unique_ptr so_lib; + void (*kernel)(uint32_t, void**) = nullptr; +}; + +} // namespace torch::jit::fuser::cpu + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/cpu/resource_strings.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/cpu/resource_strings.h new file mode 100644 index 0000000000000000000000000000000000000000..62c3008b31ff5ecacfca5541068ced287dcb84cc --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/cpu/resource_strings.h @@ -0,0 +1,106 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit::fuser::cpu { + +/*with type_as not checking type of its input, a fusion group can have non-fp32 +tensor as input. Correct code for this case is generated, however, nvrtc does +not know how to handle int*_t integer types, so typedefs help it handle those +cases*/ + +static auto type_declarations_template = at::jit::CodeTemplate(R"( + +#define POS_INFINITY INFINITY +#define NEG_INFINITY -INFINITY + +typedef ${IndexType} IndexType; +template +struct TensorInfo { + T* data; + IndexType sizes[N]; + IndexType strides[N]; +}; +template +struct TensorInfo { + T * data; +}; +)"); + +static auto cpu_compilation_unit_template = at::jit::CodeTemplate(R"( +#include +#include +#include + +double rsqrt(double x) { + return 1.0/sqrt(x); +} + +float rsqrtf(float x) { + return 1.0f/sqrtf(x); +} + +double frac(double x) { + return x - trunc(x); +} + +float fracf(float x) { + return x - truncf(x); +} + +${type_declarations} + +#ifdef _MSC_VER +template struct int_of_size; + +#define DEFINE_INT_OF_SIZE(int_t) \ +template<> struct int_of_size { using type = int_t; } + +DEFINE_INT_OF_SIZE(int64_t); +DEFINE_INT_OF_SIZE(int32_t); +DEFINE_INT_OF_SIZE(int16_t); +DEFINE_INT_OF_SIZE(int8_t); + +#undef DEFINE_INT_OF_SIZE + +template +using int_same_size_t = typename int_of_size::type; + +#define IndexTypeLoop int_same_size_t +#define ToIndexTypeLoop(x) static_cast(x) +#else +#define IndexTypeLoop IndexType +#define ToIndexTypeLoop(x) x +#endif + +#define OMP_THRESHOLD 100000 +static void ${kernelName}_kernel(IndexType totalElements, ${formals}) { + #pragma omp parallel for if(totalElements > OMP_THRESHOLD) + for (IndexTypeLoop linearIndex = 0; + linearIndex < ToIndexTypeLoop(totalElements); + linearIndex += 1) { + // Convert `linearIndex` into an offset of tensor: + ${tensorOffsets} + // calculate the results + ${kernelBody} + } +} + +#ifdef _WIN32 +#define JIT_API __declspec(dllexport) +#else +#define JIT_API +#endif + +extern "C" +JIT_API void ${kernelName}(IndexType totalElements, void ** args) { + ${kernelName}_kernel(totalElements ${,argument_loads}); +} +)"); + +} // namespace torch::jit::fuser::cpu + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/cpu/temp_file.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/cpu/temp_file.h new file mode 100644 index 0000000000000000000000000000000000000000..726ca1e7cc63e1fa32a04b9ca9995aad365ff778 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/cpu/temp_file.h @@ -0,0 +1,140 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#ifdef _WIN32 +#include +#include +#include +#include +#include +#include +#include +#include +#include +#else +#include +#endif + +#include +#include + +namespace torch::jit::fuser::cpu { + +#ifdef _MSC_VER +inline int wmkstemps(wchar_t* tmpl, int suffix_len) { + int len; + wchar_t* name; + int fd = -1; + int save_errno = errno; + + len = wcslen(tmpl); + if (len < 6 + suffix_len || + wcsncmp(&tmpl[len - 6 - suffix_len], L"XXXXXX", 6)) { + return -1; + } + + name = &tmpl[len - 6 - suffix_len]; + + std::random_device rd; + do { + for (unsigned i = 0; i < 6; ++i) { + name[i] = "abcdefghijklmnopqrstuvwxyz0123456789"[rd() % 36]; + } + + fd = _wopen(tmpl, _O_RDWR | _O_CREAT | _O_EXCL, _S_IWRITE | _S_IREAD); + } while (errno == EEXIST); + + if (fd >= 0) { + errno = save_errno; + return fd; + } else { + return -1; + } +} +#endif + +struct TempFile { + AT_DISALLOW_COPY_AND_ASSIGN(TempFile); + + TempFile(const std::string& t, int suffix) { +#ifdef _MSC_VER + auto wt = c10::u8u16(t); + std::vector tt(wt.c_str(), wt.c_str() + wt.size() + 1); + int fd = wmkstemps(tt.data(), suffix); + AT_ASSERT(fd != -1); + file_ = _wfdopen(fd, L"r+"); + auto wname = std::wstring(tt.begin(), tt.end() - 1); + name_ = c10::u16u8(wname); +#else + // mkstemps edits its first argument in places + // so we make a copy of the string here, including null terminator + std::vector tt(t.c_str(), t.c_str() + t.size() + 1); + int fd = mkstemps(tt.data(), suffix); + AT_ASSERT(fd != -1); + file_ = fdopen(fd, "r+"); + // - 1 because tt.size() includes the null terminator, + // but std::string does not expect one + name_ = std::string(tt.begin(), tt.end() - 1); +#endif + } + + const std::string& name() const { + return name_; + } + + void sync() { + fflush(file_); + } + + void write(const std::string& str) { + size_t result = fwrite(str.c_str(), 1, str.size(), file_); + AT_ASSERT(str.size() == result); + } + +#ifdef _MSC_VER + void close() { + if (file_ != nullptr) { + fclose(file_); + } + file_ = nullptr; + } +#endif + + FILE* file() { + return file_; + } + + ~TempFile() { +#ifdef _MSC_VER + if (file_ != nullptr) { + fclose(file_); + } + auto wname = c10::u8u16(name_); + if (!wname.empty() && _waccess(wname.c_str(), 0) != -1) { + _wunlink(wname.c_str()); + } +#else + if (file_ != nullptr) { + // unlink first to ensure another mkstemps doesn't + // race between close and unlink + unlink(name_.c_str()); + fclose(file_); + } +#endif + } + + private: + FILE* file_ = nullptr; + std::string name_; +}; + +} // namespace torch::jit::fuser::cpu + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/cuda/fused_kernel.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/cuda/fused_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..85c23b394ab73e4cce98eb632a4979059f2429d3 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/cuda/fused_kernel.h @@ -0,0 +1,64 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include + +#include +#include +#include + +namespace torch::jit::fuser::cuda { + +// query codegen output arch and target +TORCH_CUDA_CU_API void codegenOutputQuery( + const cudaDeviceProp* const prop, + int& major, + int& minor, + bool& compile_to_sass); + +// A class holding metadata for an actual CUDA function. +// Note: CUDA functions are per device. +struct TORCH_CUDA_CU_API FusedKernelCUDA + : public ::torch::jit::fuser::FusedKernel { + FusedKernelCUDA( + at::DeviceIndex device, + std::string name, + std::string code, + std::vector input_desc, + std::vector output_desc, + std::vector chunk_desc, + std::vector concat_desc, + bool has_random); + + ~FusedKernelCUDA() override; + + void launch_raw(const uint32_t numel, std::vector& arguments) + const override; + + at::Backend backend() const override { + return at::Backend::CUDA; + } + + private: + static constexpr auto kBlockSize = 128; + + // Note: per device to store device properties and compute launch heuristics + // Acquiring these values at launch time would be too slow + at::DeviceIndex device_; + int maxBlocks_{}; + cudaDeviceProp* prop_{}; + std::vector ptx_; + CUmodule module_{}; + CUfunction function_{}; +}; + +} // namespace torch::jit::fuser::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/cuda/resource_strings.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/cuda/resource_strings.h new file mode 100644 index 0000000000000000000000000000000000000000..b495251ad391d9d9a86ab1c8867e84dfd1f07f07 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/cuda/resource_strings.h @@ -0,0 +1,417 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::jit::fuser::cuda { + +/*with type_as not checking type of its input, a fusion group can have non-fp32 +tensor as input. Correct code for this case is generated, however, nvrtc does +not know how to handle int*_t integer types, so typedefs help it handle those +cases*/ + +static constexpr auto bfloat16_type_string = "__nv_bfloat16"; + +#if defined(USE_ROCM) && ROCM_VERSION < 70000 +static auto type_declarations_template = at::jit::CodeTemplate(R"( +${HalfHeader} +${BFloat16Header} +${RandHeader} + +#define NAN __int_as_float(0x7fffffff) +#define POS_INFINITY __int_as_float(0x7f800000) +#define NEG_INFINITY __int_as_float(0xff800000) + +typedef ${IndexType} IndexType; +template +struct TensorInfo { + T* data; + IndexType sizes[N]; + IndexType strides[N]; +}; +template +struct TensorInfo { + T * data; +}; +)"); +#else +static auto type_declarations_template = at::jit::CodeTemplate(R"( +typedef unsigned char uint8_t; +typedef signed char int8_t; +typedef short int int16_t; +typedef long long int int64_t; +typedef unsigned long long int uint64_t; +${HalfHeader} +${BFloat16Header} +${RandHeader} + +#define NAN __int_as_float(0x7fffffff) +#define POS_INFINITY __int_as_float(0x7f800000) +#define NEG_INFINITY __int_as_float(0xff800000) + +typedef ${IndexType} IndexType; +template +struct TensorInfo { + T* data; + IndexType sizes[N]; + IndexType strides[N]; +}; +template +struct TensorInfo { + T * data; +}; +)"); +#endif + +// We rewrite the code for philox RNG from curand as nvrtc couldn't resolve the +// curand header correctly. +constexpr auto rand_support_literal = R"( + + class Philox { + public: + __device__ inline Philox(unsigned long long seed, + unsigned long long subsequence, + unsigned long long offset) { + key.x = (unsigned int)seed; + key.y = (unsigned int)(seed >> 32); + counter = make_uint4(0, 0, 0, 0); + counter.z = (unsigned int)(subsequence); + counter.w = (unsigned int)(subsequence >> 32); + STATE = 0; + incr_n(offset / 4); + } + + __device__ inline unsigned long operator()() { + if(STATE == 0) { + uint4 counter_ = counter; + uint2 key_ = key; + for(int i = 0; i < 9; i++) { + counter_ = single_round(counter_, key_); + key_.x += (kPhilox10A); key_.y += (kPhilox10B); + } + output = single_round(counter_, key_); + incr(); + } + unsigned long ret; + switch(STATE) { + case 0: ret = output.x; break; + case 1: ret = output.y; break; + case 2: ret = output.z; break; + case 3: ret = output.w; break; + } + STATE = (STATE + 1) % 4; + return ret; + } + + private: + uint4 counter; + uint4 output; + uint2 key; + unsigned int STATE; + __device__ inline void incr_n(unsigned long long n) { + unsigned int nlo = (unsigned int)(n); + unsigned int nhi = (unsigned int)(n >> 32); + counter.x += nlo; + if (counter.x < nlo) + nhi++; + counter.y += nhi; + if (nhi <= counter.y) + return; + if (++counter.z) + return; + ++counter.w; + } + __device__ inline void incr() { + if (++counter.x) + return; + if (++counter.y) + return; + if (++counter.z) + return; + ++counter.w; + } + __device__ unsigned int mulhilo32(unsigned int a, unsigned int b, + unsigned int *result_high) { + *result_high = __umulhi(a, b); + return a*b; + } + + __device__ inline uint4 single_round(uint4 ctr, uint2 key) { + unsigned int hi0; + unsigned int hi1; + unsigned int lo0 = mulhilo32(kPhiloxSA, ctr.x, &hi0); + unsigned int lo1 = mulhilo32(kPhiloxSB, ctr.z, &hi1); + + uint4 ret = {hi1 ^ ctr.y ^ key.x, lo1, hi0 ^ ctr.w ^ key.y, lo0}; + return ret; + } + + static const unsigned long kPhilox10A = 0x9E3779B9; + static const unsigned long kPhilox10B = 0xBB67AE85; + static const unsigned long kPhiloxSA = 0xD2511F53; + static const unsigned long kPhiloxSB = 0xCD9E8D57; + }; + + // Inverse of 2^32. + #define M_RAN_INVM32 2.3283064e-10f + __device__ __inline__ float uniform(unsigned int x) { + return x * M_RAN_INVM32; + } +)"; + +constexpr auto rand_param = + ",unsigned long long seed, unsigned long long offset"; + +constexpr auto rand_init = R"( + int idx = blockIdx.x*blockDim.x + threadIdx.x; + Philox rnd(seed, idx, offset); +)"; + +static auto cuda_compilation_unit_template = at::jit::CodeTemplate(R"( +${type_declarations} + +extern "C" __global__ +void ${kernelName}(IndexType totalElements, ${formals} ${RandParam}) { + ${RandInit} + // check whether do vectorized load/store and allocate buffer + bool flag_vec4 = true; + ${tensorChecks} + if (flag_vec4) { + for (IndexType linearIndex = 4 * (blockIdx.x * blockDim.x + threadIdx.x); + linearIndex < totalElements; + linearIndex += 4 * gridDim.x * blockDim.x) { + // Convert `linearIndex` into an offset of tensor as it is: + ${tensorOffsets} + // load 4 at a time + ${kernelLoad} + #pragma unroll 4 + for (int i=0; i<4; i++) { + // calculate the results + ${kernelBody_vec4} + } + // store 4 at a time + ${kernelStore} + } + } else { + for (IndexType linearIndex = blockIdx.x * blockDim.x + threadIdx.x; + linearIndex < totalElements; + linearIndex += gridDim.x * blockDim.x) { + // Convert `linearIndex` into an offset of tensor: + ${tensorOffsets} + // calculate the results + ${kernelBody} + } + } +} +)"); + +// This snippet enables half support in the jit. Following the pattern for +// reductions, fp16 input data is immediately upconverted to float +// with __half2float(). All mathematical operations are done on float +// values, and if needed the intermediate float representation is +// converted to half with __float2half() when writing to a half tensor. +#if defined(USE_ROCM) +constexpr auto half_support_literal = + R"( +typedef __half half; +)"; +#else +constexpr auto half_support_literal = + R"( +#define __HALF_TO_US(var) *(reinterpret_cast(&(var))) +#define __HALF_TO_CUS(var) *(reinterpret_cast(&(var))) +#if defined(__cplusplus) + struct __align__(2) __half { + __host__ __device__ __half() { } + + protected: + unsigned short __x; + }; + + /* All intrinsic functions are only available to nvcc compilers */ + #if defined(__CUDACC__) + /* Definitions of intrinsics */ + __device__ __half __float2half(const float f) { + __half val; + asm("{ cvt.rn.f16.f32 %0, %1;}\n" : "=h"(__HALF_TO_US(val)) : "f"(f)); + return val; + } + + __device__ float __half2float(const __half h) { + float val; + asm("{ cvt.f32.f16 %0, %1;}\n" : "=f"(val) : "h"(__HALF_TO_CUS(h))); + return val; + } +)" + // MSVC's preprocessor (but not the standard compiler) has a bug + // where it incorrectly tokenizes raw string literals, ending when it sees a + // " this causes the #endif in this string literal to be treated as a + // preprocessor token which, in turn, cause sccache on windows CI to fail. + // See https://godbolt.org/z/eVTIJq as an example. + // This workaround uses string-pasting to separate the " and the #endif into + // different strings + R"( + #endif /* defined(__CUDACC__) */ +#endif /* defined(__cplusplus) */ +#undef __HALF_TO_US +#undef __HALF_TO_CUS + +typedef __half half; +)"; +#endif + +#if defined(USE_ROCM) + +#if ROCM_VERSION >= 70000 +#define BF16_UINT32_DEF "typedef unsigned int uint32_t;\n" +#else +#define BF16_UINT32_DEF "" +#endif + +constexpr auto bfloat16_support_literal = + R"( +#ifndef __align__ +#define __align__(x) __attribute__((aligned(x))) +#endif +)" BF16_UINT32_DEF R"( +typedef struct __align__(2) { + unsigned short x; +} +__nv_bfloat16_raw; + +#if defined(__cplusplus) +struct __align__(2) __nv_bfloat16 { + __host__ __device__ __nv_bfloat16() {} + + __host__ __device__ __nv_bfloat16& operator=(const __nv_bfloat16_raw& hr) { + __x = hr.x; + return *this; + } + + unsigned short __x; +}; + +__device__ unsigned short __internal_float2bfloat16( + const float f, + unsigned int& sign, + unsigned int& remainder) { + unsigned int x; + + x = __float_as_uint(f); + + if ((x & 0x7fffffffU) > 0x7f800000U) { + sign = 0U; + remainder = 0U; + return static_cast(0x7fffU); + } + sign = x >> 31; + remainder = x << 16; + return static_cast(x >> 16); +} + +/* Definitions of intrinsics */ +__device__ __nv_bfloat16 __float2bfloat16(const float a) { + __nv_bfloat16 val; + __nv_bfloat16_raw r; + unsigned int sign; + unsigned int remainder; + r.x = __internal_float2bfloat16(a, sign, remainder); + if ((remainder > 0x80000000U) || + ((remainder == 0x80000000U) && ((r.x & 0x1U) != 0U))) { + r.x++; + } + val = r; + return val; +} + +__device__ float __bfloat162float(const __nv_bfloat16 a) { + union + { + uint32_t int32; + float fp32; + } u = {uint32_t(a.__x) << 16}; + return u.fp32; +} +#endif /* defined(__cplusplus) */ +)"; +#else +constexpr auto bfloat16_support_literal = + R"( +#define __BFLOAT16_TO_US(var) *(reinterpret_cast(&(var))) +#define __BFLOAT16_TO_CUS(var) \ + *(reinterpret_cast(&(var))) + +typedef struct __align__(2) { + unsigned short x; +} +__nv_bfloat16_raw; + +#if defined(__cplusplus) +struct __align__(2) __nv_bfloat16 { + __host__ __device__ __nv_bfloat16() {} + + __host__ __device__ __nv_bfloat16& operator=(const __nv_bfloat16_raw& hr) { + __x = hr.x; + return *this; + } + + protected: + unsigned short __x; +}; + +#if defined(__CUDACC__) +__device__ unsigned short __internal_float2bfloat16( + const float f, + unsigned int& sign, + unsigned int& remainder) { + unsigned int x; + + x = __float_as_uint(f); + + if ((x & 0x7fffffffU) > 0x7f800000U) { + sign = 0U; + remainder = 0U; + return static_cast(0x7fffU); + } + sign = x >> 31; + remainder = x << 16; + return static_cast(x >> 16); +} + +/* Definitions of intrinsics */ +__device__ __nv_bfloat16 __float2bfloat16(const float a) { + __nv_bfloat16 val; +#if __CUDA_ARCH__ >= 800 + asm("{ cvt.rn.bf16.f32 %0, %1;}\n" : "=h"(__BFLOAT16_TO_US(val)) : "f"(a)); +#else + __nv_bfloat16_raw r; + unsigned int sign; + unsigned int remainder; + r.x = __internal_float2bfloat16(a, sign, remainder); + if ((remainder > 0x80000000U) || + ((remainder == 0x80000000U) && ((r.x & 0x1U) != 0U))) { + r.x++; + } + val = r; +#endif + return val; +} + +__device__ float __bfloat162float(const __nv_bfloat16 a) { + float val; + asm("{ mov.b32 %0, {0,%1};}\n" : "=f"(val) : "h"(__BFLOAT16_TO_CUS(a))); + return val; +} +#endif /* defined(__CUDACC__) */ +#endif /* defined(__cplusplus) */ +#undef __BFLOAT16_TO_US +#undef __BFLOAT16_TO_CUS +)"; +#endif + +} // namespace torch::jit::fuser::cuda + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/executor.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/executor.h new file mode 100644 index 0000000000000000000000000000000000000000..aa6c691a0b807558cb4f5ce030dd156afb128a18 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/executor.h @@ -0,0 +1,24 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include + +namespace torch::jit::fuser { + +// Runs the fusion associated with the key (see registerFusion() in interface.h) +// on the inputs taken from the given Stack. +TORCH_API bool runFusion( + const int64_t key, + Stack& stack, + std::string* code_out = nullptr); + +} // namespace torch::jit::fuser + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/fallback.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/fallback.h new file mode 100644 index 0000000000000000000000000000000000000000..393127a6e99dc29fa41aecbb36d7e437e3a5bd51 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/fallback.h @@ -0,0 +1,16 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include + +namespace torch::jit::fuser { + +void runFallback(int64_t key, Stack& stack); + +} // namespace torch::jit::fuser + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/fused_kernel.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/fused_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..ca2adedd7b196d1db9dddca4b3319c51e7060226 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/fused_kernel.h @@ -0,0 +1,103 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include +#include +#include + +namespace torch::jit::fuser { + +struct FusedKernel { + AT_DISALLOW_COPY_AND_ASSIGN(FusedKernel); + + FusedKernel( + std::string name, + std::string code, + std::vector input_desc, + std::vector output_desc, + std::vector chunk_desc, + std::vector concat_desc, + bool has_random) + : name_(std::move(name)), + code_(std::move(code)), + input_desc_(std::move(input_desc)), + output_desc_(std::move(output_desc)), + chunk_desc_(std::move(chunk_desc)), + concat_desc_(std::move(concat_desc)), + has_random_(has_random) {} + + virtual ~FusedKernel() = default; + + // arguments is a list of pointers to the arguments for the compiled CUDA/CPU + // code. + // The format of arguments is suitable for directly passing to a call to + // cuLaunchKernel as the kernel arguments. + // Currently the first argument is a pointer to numel (for passing to + // CUDA code), and the remainder are pointers to the TensorInfo structs + // that compiled code uses to load Tensor data. + // launch_with_tensors handles packing at::Tensors into this arguments array. + // CPU code uses the same convention so that launch_with_tensors can be + // shared. + virtual void launch_raw(const uint32_t numel, std::vector& arguments) + const = 0; + virtual at::Backend backend() const = 0; + + // Getters + const std::string& name() const { + return name_; + } + const std::string& code() const { + return code_; + } + const std::vector& inputDesc() const { + return input_desc_; + } + const std::vector& outputDesc() const { + return output_desc_; + } + const std::vector& chunkDesc() const { + return chunk_desc_; + } + const std::vector& concatDesc() const { + return concat_desc_; + } + bool hasRandom() const { + return has_random_; + } + + protected: + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const std::string name_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const std::string code_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const std::vector input_desc_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const std::vector output_desc_; + + // same size as input_desc, describes whether an + // input should be broken into subtensors (chunks) + // to be consumed by the fusion group + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const std::vector chunk_desc_; + + // same size as output_desc, describes whether + // an output is actually a concatenation of + // many subtensors that the fusion group produces + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const std::vector concat_desc_; + + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const bool has_random_; +}; + +} // namespace torch::jit::fuser + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/interface.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/interface.h new file mode 100644 index 0000000000000000000000000000000000000000..516e192a0fb38a1c4af1cd56a9bff94509335347 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/interface.h @@ -0,0 +1,59 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include +#include +#include + +namespace torch::jit { + +constexpr int kCPUDevice = -1; + +// Assigns a "key" to the given fusion_group that it can use to run its +// fusion later (via runFusion() below). +TORCH_API int64_t registerFusion(const Node* fusion_group); + +// Runs the fusion corresponding to the given key on the inputs +// found on the stack. Outputs are placed on the same stack. +// In some cases a fusion cannot be run and a fallback path where +// PyTorch's interpreter runs the graph instead is attempted. +TORCH_API void runFusion(const int64_t key, Stack& stack); + +// True if the respective devices can fuse, false otherwise +TORCH_API bool canFuseOnCPU(); +TORCH_API bool canFuseOnGPU(); + +// Sets whether fusion on the CPU is allowed (disabled by default due to +// flakiness) +TORCH_API void overrideCanFuseOnCPU(bool value); + +// Sets whether fusion on CPU must use LLVM Codegen and not SimplieIREval +TORCH_API void overrideMustUseLLVMOnCPU(bool value); + +// Sets whether fusion on the GPU is allowed (enabled by default) +TORCH_API void overrideCanFuseOnGPU(bool value); + +// Treats the given graph as a fusion group and launches it on the +// specified device with the given inputs. +// Returns the outputs. +TORCH_API std::vector debugLaunchGraph( + Graph& graph, + at::ArrayRef inputs); + +// Treats the given graph as a fusion group and returns the generated code. +TORCH_API std::string debugGetFusedKernelCode( + Graph& graph, + at::ArrayRef inputs); + +TORCH_API size_t nCompiledKernels(); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/kernel_cache.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/kernel_cache.h new file mode 100644 index 0000000000000000000000000000000000000000..d2446f6aa8af57c66c8eeef1c5198cf5199e966f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/kernel_cache.h @@ -0,0 +1,38 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include +#include + +namespace torch::jit::fuser { + +// A thread-safe cache interface. + +// Normalizes the graph by canonicalizing and erasing shape information +TORCH_API std::shared_ptr normalizeGraphForCache( + const std::shared_ptr& graph); + +// Stores the given graph, returning the key used to access it +TORCH_API int64_t store(std::shared_ptr graph); + +// Given a graph, find a KernelSpec based on it +TORCH_API std::optional lookupGraph( + const std::shared_ptr& graph); + +// Returns the graph corresponding to the given key (if it exists) +TORCH_API std::optional retrieve(const int64_t key); + +// Returns the size of the fusion key -> KernelSpec cache. +// Only used for testing. +TORCH_API int64_t debugNumCachedKernelSpecs(); + +} // namespace torch::jit::fuser + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/kernel_spec.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/kernel_spec.h new file mode 100644 index 0000000000000000000000000000000000000000..a84bcc7b3b7c18de8bda1dfc1905a44f77cead26 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/kernel_spec.h @@ -0,0 +1,149 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +namespace torch::jit::fuser { + +// Helper struct containing partition information: the number of tensors +// created and the dimension the partitioning is performed on. +// Note: created during upfront compilation, once the tensors are known +// at runtime the partition info is logically combined with the tensor +// descriptions to create PartitionDesc objects. +struct TORCH_API PartitionInfo { + PartitionInfo(const int64_t _nSubTensors, const int64_t _dim) + : nSubTensors_{_nSubTensors}, dim_{_dim} {} + + int64_t nSubTensors() const { + return nSubTensors_; + } + int64_t dim() const { + return dim_; + } + + private: + int64_t nSubTensors_; + int64_t dim_; +}; + +// "Kernel Specification." - Contains device-independent fusion information. +// Each kernel specification contains a map of instantiated generated functions +// that implement some or most of its functionality. Multiple generated +// functions are needed by each abstract specification because of different +// devices (cpu vs gpu, different gpus) and different inputs (int vs float, +// contiguous vs discontiguous). +// Note: uses a mutex to control access to its kernel store +// Note: unordered containers do not invalidate references/pointers on +// rehashing, which is critical for thread-safety. +// TODO: allow abstract kernels to use multiple generated kernels +// TODO: allow abstract kernels to reuse generated kernels from common pool +struct TORCH_API KernelSpec { + // Note: assumes the spec is a single block + // Note: This is the appropriate place to generalize if you want to add other + // passes to upfront compilation that walk the graph. + KernelSpec(const int64_t _key, const std::shared_ptr& _graph) + : key_{_key}, + graph_{_graph}, + code_{_graph, ""}, + nInputs_{_graph->inputs().size()} + + { + // No need to iterate over reference since n is pointer + for (const auto n : graph_->nodes()) { + static_assert(std::is_pointer_v, "n must be a pointer"); + if (n->kind() == aten::rand_like) { + has_random_ = true; + break; + } + } + nTensorInputs_ = std::count_if( + graph_->inputs().begin(), graph_->inputs().end(), [](const Value* v) { + return v->type()->isSubtypeOf(*TensorType::get()); + }); + } + + // Getters + int64_t key() const { + return key_; + } + std::shared_ptr graph() const { + return graph_; + } + const Code& code() const { + return code_; + } + int64_t nInputs() const { + return nInputs_; + } + int64_t nTensorInputs() const { + return nTensorInputs_; + } + + std::vector>& inputBroadcastGroups() { + return inputBroadcastGroups_; + } + const std::vector>& inputBroadcastGroups() const { + return inputBroadcastGroups_; + } + + std::vector& inputChunks() { + return inputChunks_; + } + const std::vector& inputChunks() const { + return inputChunks_; + } + + bool hasRandom() const { + return has_random_; + } + + // Cache functions + std::optional> findKernel( + const ArgSpec& arg_spec) const { + std::lock_guard guard{mutex_}; + const auto it = kernels_.find(arg_spec); + if (it == kernels_.end()) + return std::nullopt; + return it->second; + } + void cacheKernel( + const ArgSpec& arg_spec, + const std::shared_ptr& kernel) const { + std::lock_guard guard{mutex_}; + kernels_.emplace(arg_spec, kernel); + } + + private: + int64_t key_; + std::shared_ptr graph_; + Code code_; + uint64_t nInputs_; + uint64_t nTensorInputs_{}; + std::vector> inputBroadcastGroups_; + std::vector inputChunks_; + bool has_random_{false}; + mutable std::mutex mutex_; + mutable std:: + unordered_map, c10::hash> + kernels_; +}; + +} // namespace torch::jit::fuser + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/partition_desc.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/partition_desc.h new file mode 100644 index 0000000000000000000000000000000000000000..6eee194e80d162044d5065be72d9e2797df3db2f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/partition_desc.h @@ -0,0 +1,63 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include +#include + +namespace torch::jit::fuser { + +// Descriptor for chunk-ing an input tensor into subtensors +// OR concat-ing an output tensor from subtensors +// Note: default constructed used for tensors that do not participate in +// chunk or cat operations. +struct TORCH_API PartitionDesc { + PartitionDesc() : nSubTensors_{1}, dim_{0} {} + + PartitionDesc(const TensorDesc& _desc, size_t _nSubTensors, size_t _dim) + : nSubTensors_{_nSubTensors}, dim_{_dim} { + AT_ASSERT(nSubTensors_ > 1); + std::vector cont = _desc.contiguity; + if (dim_ > 0) { + // when we narrow the concatenated output/chunked input + // we make the size[dim] smaller while keeping the stride[dim] the same, + // meaning: stride[dim - 1] != stride[dim]*size[dim] + // so dim - 1 is no longer contiguous + cont[dim_ - 1] = false; + } + subTensorDesc_ = std::make_shared(_desc.scalar_type, cont); + } + + bool isNoop() const { + return (nSubTensors_ == 1); + } + size_t nSubTensors() const { + return nSubTensors_; + } + size_t dim() const { + return dim_; + } + std::shared_ptr subTensorDesc() { + return subTensorDesc_; + } + const std::shared_ptr subTensorDesc() const { + return subTensorDesc_; + } + + private: + size_t nSubTensors_; // == 1 for tensors that should not be operated on via + // chunk/cat + size_t dim_; // dimension along which the chunk/concat occurs + std::shared_ptr + subTensorDesc_; // descriptor for the subtensor, if it exists +}; + +} // namespace torch::jit::fuser + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/tensor_desc.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/tensor_desc.h new file mode 100644 index 0000000000000000000000000000000000000000..0376875925a04b26615051b34d72ff4bf481898f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/tensor_desc.h @@ -0,0 +1,103 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace torch::jit::fuser { + +// type information needed by the compiler for input/outputs +// contiguity[i] is true if the dim i is contiguous with dim i + 1. +// contiguity.back() == true means strides.back() == 1. +struct TORCH_API TensorDesc { + at::ScalarType scalar_type; + std::vector contiguity; + + TensorDesc(const at::ScalarType& type, const std::vector& contiguity) + : scalar_type{type}, contiguity{contiguity} { + if (contiguity.empty()) { + nDim_ = 0; + } else { + nDim_ = std::count(contiguity.begin(), contiguity.end(), false) + + (lastIsContiguous() ? 1 : 0); + } + } + + // Delegating constructors + TensorDesc( + const at::ScalarType& type, + const at::IntArrayRef& sizes, + const at::IntArrayRef& strides) + : TensorDesc(type, TensorDesc::findContiguous(sizes, strides)) {} + + TensorDesc(const at::Tensor& t) + : TensorDesc(t.scalar_type(), t.sizes(), t.strides()) {} + + TensorDesc(const c10::TensorTypePtr& type) + : TensorDesc( + type->scalarType().value(), + type->sizes().concrete_sizes().value(), + type->strides().concrete_sizes().value()) {} + + // number of dimensions after contiguity compression + size_t nDim() const { + return nDim_; + } + + // True iff innermost stride is 1 + bool lastIsContiguous() const { + return (contiguity.empty() || contiguity.back()); + } + + static std::vector findContiguous( + const at::IntArrayRef& sizes, + const at::IntArrayRef& strides) { + AT_ASSERT(sizes.size() == strides.size()); + std::vector cont(sizes.size()); + for (size_t i = 0; i < sizes.size(); ++i) { + const auto expected_stride = + (i + 1 < sizes.size()) ? sizes[i + 1] * strides[i + 1] : 1; + cont[i] = (strides[i] == expected_stride); + } + return cont; + } + + bool operator==(const TensorDesc& desc) const { + return scalar_type == desc.scalar_type && contiguity == desc.contiguity; + } + + bool operator!=(const TensorDesc& desc) const { + return !(*this == desc); + } + + static size_t hash(const TensorDesc& spec) { + return c10::get_hash( + spec.scalar_type, + spec.nDim_, + std::hash>{}(spec.contiguity)); + } + + private: + size_t nDim_; +}; + +inline std::ostream& operator<<(std::ostream& out, const TensorDesc& d) { + out << d.scalar_type << '['; + for (const auto b : d.contiguity) + out << b << ';'; + out << ']'; + return out; +} + +} // namespace torch::jit::fuser + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/tensor_info.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/tensor_info.h new file mode 100644 index 0000000000000000000000000000000000000000..df2c1e12963bdaa58b56e688a3d6b950ee4e2f2d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/fuser/tensor_info.h @@ -0,0 +1,29 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +#include +#include + +namespace torch::jit::fuser { + +// Host-side view of TensorInfo +// Note dims[0] - we need to dynamically allocate the dims. +struct TORCH_API TensorInfo { + uint32_t* sizes(size_t nDim) { + return &sizes_strides[0]; + } + uint32_t* strides(size_t nDim) { + return &sizes_strides[nDim]; + } + + void* data; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays) + uint32_t sizes_strides[0]; +}; + +} // namespace torch::jit::fuser + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/LlgaTensorImpl.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/LlgaTensorImpl.h new file mode 100644 index 0000000000000000000000000000000000000000..72f267a4cffaffb73f8d10d2b1b129efe5cb159f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/LlgaTensorImpl.h @@ -0,0 +1,277 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include + +namespace torch::jit::fuser::onednn { + +// Engine represents a device and its context. From the device kind, the engine +// knows how to generate code for the target device and what kind of device +// object to be expected. The device id ensures that there is a unique engine +// being created for each device. The device handle passed from PyTorch allows +// oneDNN Graph implementation to work on the device specified by PyTorch, which +// is currently CPU, so we only have one engine. +// Ref: +// https://oneapi-spec.uxlfoundation.org/specifications/oneapi/latest/elements/onednn/source/graph/programming_model#engine +struct Engine { + // CPU engine singleton + static dnnl::engine& getEngine(); + Engine(const Engine&) = delete; + void operator=(const Engine&) = delete; +}; + +// Stream is the logical abstraction for execution units. It is created on top +// of oneDNN Graph engine. A compiled oneDNN Graph partition is submitted to a +// stream for execution. +struct Stream { + // CPU stream singleton + static dnnl::stream& getStream(); + Stream(const Stream&) = delete; + void operator=(const Stream&) = delete; +}; + +struct LlgaTensorDesc { + using desc = dnnl::graph::logical_tensor; + + LlgaTensorDesc( + size_t tid, + std::vector sizes, + std::vector strides, + desc::data_type dtype, + desc::property_type property_type) + : tid_(tid), + sizes_(std::move(sizes)), + strides_(std::move(strides)), + dtype_(dtype), + property_type_(property_type), + layout_type_(desc::layout_type::strided), + layout_id_(-1) {} + + LlgaTensorDesc(const desc& t) + : tid_(t.get_id()), + sizes_(t.get_dims()), + strides_({-1}), + dtype_(t.get_data_type()), + property_type_(t.get_property_type()), + layout_type_(t.get_layout_type()), + layout_id_(-1) { + if (is_opaque()) { + layout_id_ = t.get_layout_id(); + } + if (is_strided()) { + strides_ = t.get_strides(); + } + } + + LlgaTensorDesc(const torch::jit::Value* v) + : LlgaTensorDesc( + v->unique(), + {}, + {}, + desc::data_type::f32, + get_property_type(v)) { + if (v->type()->isSubtypeOf(TensorType::get())) { + auto tt = v->type()->cast(); + + if (tt->scalarType()) { + dtype_ = getLlgaDataType(tt->scalarType().value()); + } + + auto sizes = tt->sizes(); + if (sizes.sizes()) { + for (auto d : *sizes.sizes()) { + sizes_.push_back(d.value_or(DNNL_GRAPH_UNKNOWN_DIM)); + } + } + + auto strides = tt->strides(); + if (strides.sizes()) { + for (auto d : *strides.sizes()) { + strides_.push_back(d.value_or(DNNL_GRAPH_UNKNOWN_DIM)); + } + } + } + } + + LlgaTensorDesc supplementTensorInfo(const at::Tensor& t) const; + + desc::data_type getLlgaDataType(at::ScalarType dt) const; + + at::ScalarType aten_scalar_type() const; + + const std::vector& sizes() const { + return sizes_; + } + + const std::vector& strides() const { + TORCH_CHECK(!is_opaque(), "Cannot get strides on opaque layout"); + return strides_; + } + + size_t tid() const { + return tid_; + } + + LlgaTensorDesc tid(uint64_t new_id) const { + auto ret = *this; + ret.tid_ = new_id; + return ret; + } + + desc::data_type dtype() const { + return dtype_; + } + + LlgaTensorDesc dtype(desc::data_type new_dtype) const { + return LlgaTensorDesc(tid_, sizes_, strides_, new_dtype, property_type_); + } + + desc::layout_type layout_type() const { + return layout_type_; + } + + LlgaTensorDesc layout_type(desc::layout_type new_layout_type) { + auto ret = *this; + ret.layout_type_ = new_layout_type; + return ret; + } + + desc::property_type get_property_type(const torch::jit::Value* v) { + switch (v->node()->kind()) { + case prim::Constant: + return desc::property_type::constant; + default: + return desc::property_type::variable; + } + } + + LlgaTensorDesc any() { + return layout_type(desc::layout_type::any); + } + + size_t storage_size() const { + return logical_tensor().get_mem_size(); + } + + desc logical_tensor() const { + if (is_dimensionality_unknown()) { + return desc( + tid_, dtype_, DNNL_GRAPH_UNKNOWN_NDIMS, layout_type_, property_type_); + } else if (is_opaque()) { + return desc(tid_, dtype_, sizes_, layout_id_, property_type_); + } else if (is_any()) { + return desc(tid_, dtype_, sizes_, layout_type_, property_type_); + } else { + return desc(tid_, dtype_, sizes_, strides_, property_type_); + } + } + + bool is_strided() const { + return layout_type_ == desc::layout_type::strided; + } + + bool is_any() const { + return layout_type_ == desc::layout_type::any; + } + + bool is_opaque() const { + return layout_type_ == desc::layout_type::opaque; + } + + bool operator==(const LlgaTensorDesc& desc) const { + return tid_ == desc.tid_ && sizes_ == desc.sizes_ && + dtype_ == desc.dtype_ && layout_type_ == desc.layout_type_ && + ((is_opaque() && layout_id_ == desc.layout_id_) || + strides_ == desc.strides_); + } + + bool operator!=(const LlgaTensorDesc& desc) const { + return (tid_ != desc.tid_) || (sizes_ != desc.sizes_) || + (dtype_ != desc.dtype_) || (layout_type_ != desc.layout_type_) || + !((is_opaque() && (layout_id_ == desc.layout_id_)) || + (strides_ == desc.strides_)); + } + + static size_t hash(const LlgaTensorDesc& desc) { + return c10::get_hash( + desc.tid_, + desc.sizes_, + desc.dtype_, + desc.layout_type_, + desc.layout_id_); + } + + void set_compute_inplace() { + compute_inplace_ = true; + } + + void set_input_tensor_index(size_t index) { + input_tensor_index_ = index; + } + + bool reuses_input_tensor() { + return compute_inplace_; + } + + size_t get_input_tensor_index() { + return input_tensor_index_; + } + + private: + bool is_dimensionality_unknown() const { + return sizes_.empty(); + } + + size_t tid_; + std::vector sizes_; + std::vector strides_; + desc::data_type dtype_; + desc::property_type property_type_; + desc::layout_type layout_type_; + size_t layout_id_; + // If this is an output tensor, and querying the compiled partition would + // determine that this tensor would reuse its input tensor, then + // compute_inplace would be true, and input_tensor_index would be the index of + // the corresponding input tensor in inputSpecs_ of the LlgaKernel object. + bool compute_inplace_ = false; + size_t input_tensor_index_{}; +}; + +// Initially, oneDNN Graph also used to have blocked layout for tensors between +// partitions, and the LlgaTensorImpl wrapper helped us bypass guard checks. +// oneDNN Graph has switched over to using strided tensors between partitions, +// but this wrapper still helps us bypass guard checks because the strides of +// tensors between partitions would be different from the ones the guard is +// otherwise expecting. +struct TORCH_API LlgaTensorImpl : public c10::TensorImpl { + LlgaTensorImpl( + at::Storage&& storage, + const caffe2::TypeMeta& data_type, + const LlgaTensorDesc& desc); + + const LlgaTensorDesc& desc() const { + return desc_; + } + + static at::Tensor llga_to_aten_tensor(LlgaTensorImpl* llgaImpl); + + private: + LlgaTensorDesc desc_; +}; + +at::Tensor empty_llga( + const LlgaTensorDesc& desc, + const c10::TensorOptions& options); + +dnnl::graph::tensor llga_from_aten_tensor(const at::Tensor& tensor); + +} // namespace torch::jit::fuser::onednn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/decompose_silu.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/decompose_silu.h new file mode 100644 index 0000000000000000000000000000000000000000..24d20864e42cd77fa0540f8eb6378962af11f52c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/decompose_silu.h @@ -0,0 +1,14 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit::fuser::onednn { + +void DecomposeSiluForLLGA(std::shared_ptr& graph); + +} // namespace torch::jit::fuser::onednn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/defer_size_check.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/defer_size_check.h new file mode 100644 index 0000000000000000000000000000000000000000..0bb55003e88bb6c9d7b484ed761540a12a48e99a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/defer_size_check.h @@ -0,0 +1,14 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit::fuser::onednn { + +void DeferSizeCheck(std::shared_ptr& graph); + +} // namespace torch::jit::fuser::onednn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/graph_fuser.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/graph_fuser.h new file mode 100644 index 0000000000000000000000000000000000000000..8f14c5e33a9b3ac58392caaddb6b620c3444fc33 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/graph_fuser.h @@ -0,0 +1,52 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::jit::fuser::onednn { + +struct WorkBlock : public std::pair { + using pair::pair; + + Node* begin() { + return this->first; + } + Node* end() { + return this->second; + } +}; + +class GraphRewriter { + public: + GraphRewriter(Block* block, std::shared_ptr graph, AliasDb& aliasDb) + : block_(block), + graph_(std::move(graph)), + aliasDb_(aliasDb), + llgaHelper_(graph_) {} + + void cleanupSubgraphs(); + void buildupSubgraphs(); + + private: + Block* block_; + std::shared_ptr graph_; + AliasDb& aliasDb_; + LlgaGraphHelper llgaHelper_; + std::vector buildWorkBlocks(); + std::pair scanNode( + Node* consumer, + graph_node_list::iterator workblock_begin); + std::optional tryMerge(Node* consumer, Node* producer); +}; + +// This pass creates the subgraphs for oneDNN Graph Fusion Nodes. +// Its code-structure has been vastly inspired from +// torch/csrc/jit/passes/create_autodiff_subgraphs.cpp +void CreateLlgaSubgraphs(std::shared_ptr& graph); + +} // namespace torch::jit::fuser::onednn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/graph_helper.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/graph_helper.h new file mode 100644 index 0000000000000000000000000000000000000000..582db7e71cf4856f2710c8e5aefe19b4383f957f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/graph_helper.h @@ -0,0 +1,103 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::jit::fuser::onednn { + +#define STRIDED_LAYOUT 0 +#define OPAQUE_LAYOUT 1 + +struct OpPartitionMap { + void add(uint64_t opId, uint64_t partitionId) { + opmap_[opId] = partitionId; + } + void add(Node* n, uint64_t partitionId) { + add(Operator::getId(n), partitionId); + } + bool has(uint64_t opId) { + return opmap_.count(opId) > 0; + } + bool has(Node* n) { + return has(Operator::getId(n)); + } + uint64_t get(uint64_t opId) { + return opmap_[opId]; + } + uint64_t get(Node* n) { + auto opId = Operator::getId(n); + TORCH_CHECK( + has(opId), + "Node ", + n->kind().toQualString(), + " does not belong to any LLGA partition"); + return get(opId); + } + + private: + std::unordered_map opmap_; +}; + +class LlgaGraphHelper { + public: + LlgaGraphHelper( + const std::shared_ptr& graph, + dnnl::graph::partition::policy policy = + dnnl::graph::partition::policy::fusion); + + bool shouldMerge(Node* toMerge, Node* subgraph); + + bool shouldConsiderForMerge(Node* node); + + bool checkForSingleOpPartition(Node* node); + + Node* createSingletonSubgraph(Node* n, AliasDb& db); + + void mergeNodeIntoSubgraph(Node* toMerge, Node* subgraphNode, AliasDb& db); + + void unmergeIfAnyNodeIsMissing(Node* subgraphNode); + + static bool isLlgaSubgraph(const Node* node); + + Operator makeEltwiseOp(Node* node, dnnl::graph::op::kind kind); + + Operator makeBinaryOp(Node* node, dnnl::graph::op::kind kind); + + std::vector getPartitions() const; + + std::map getTensorIdToValue() const; + + Operator createOperator(Node* node); + + private: + size_t countSupportedOps(const std::shared_ptr& graph) const; + std::unique_ptr dnnl_graph_ = nullptr; + std::unique_ptr aliasDb_ = nullptr; + OpPartitionMap opToOwningPartition_; + std::vector partitions_; + std::map + tensorIdToValue_; // map from tensorId to torch::jit::Value +}; + +class LlgaNodeWrapper { + public: + LlgaNodeWrapper(const Node* node); + + void setOpaqueLayout(size_t offset); + + bool useOpaqueLayout(size_t offset) const; + + friend class LlgaGraphHelper; + + private: + Node* n; +}; + +} // namespace torch::jit::fuser::onednn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/guard_shape.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/guard_shape.h new file mode 100644 index 0000000000000000000000000000000000000000..73ca360ff573d42805ce3d65f350d9f5f8c9433f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/guard_shape.h @@ -0,0 +1,14 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit::fuser::onednn { + +void prepareFusionGroupAndGuardOutputs(Block* block); + +} // namespace torch::jit::fuser::onednn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/interface.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/interface.h new file mode 100644 index 0000000000000000000000000000000000000000..68cc22c7d582f3589ab429c621424d86d73ae30d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/interface.h @@ -0,0 +1,63 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include + +namespace torch::jit { +namespace fuser::onednn { + +static std::atomic onednn_enabled{false}; + +static std::atomic& getLlgaEnabled() { + return onednn_enabled; +} + +C10_EXPORT void fuseGraph(std::shared_ptr& g); + +} // namespace fuser::onednn + +struct C10_EXPORT RegisterLlgaFuseGraph + : public PassManager { + static bool setEnabled(bool enabled) { + TORCH_CHECK( + AT_MKLDNN_ENABLED(), + "Running oneDNN Graph fuser is only supported with MKLDNN builds."); + bool oldState = fuser::onednn::getLlgaEnabled(); + fuser::onednn::getLlgaEnabled() = enabled; + if (enabled) { + registerPass(fuser::onednn::fuseGraph); + } else { + clearPass(); + } + return oldState; + } + + static bool isEnabled() { + return fuser::onednn::getLlgaEnabled(); + } + + // override PassManager::registerPass to register pre-pass + static bool registerPass(GraphPass p) { + if (!isRegistered()) { + passID(registerPrePass(std::move(p)), true); + isRegistered(true); + return false; + } + return true; + } + + // override PassManager::clearPass to clear pre-pass + static void clearPass() { + if (isRegistered()) { + clearPrePass(passID()); + isRegistered(true); + } + } +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/kernel.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..0ac5b00556d552b765a6cfa21c76cb4ef0f36525 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/kernel.h @@ -0,0 +1,94 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include +#include +#include +#include + +#include + +namespace torch::jit::fuser::onednn { + +using ArgSpec = LlgaTensorDesc; +using ArgSpecs = std::vector; +using RunArg = dnnl::graph::tensor; +using RunArgs = std::vector; +using TensorArgs = std::vector; + +class LlgaKernel { + public: + explicit LlgaKernel(const Node* fusionNode); + + void run(Stack& stack); + + void initialize(const TensorArgs& inputs); + + const std::string& debugName() const { + return debugName_; + } + + private: + bool useOpaqueLayout(size_t offset) const; + + // PyTorch copy constants inside the subgraph instead of referencing them. + // Constants inputs to the partition are no longer in the graph->inputs(). + // Need use the tid retrieved from the partition to find the missing + // constant inputs. + void initializeConstantInputs(); + + ArgSpecs initializeInputSpecs(const TensorArgs& inputs); + + ArgSpecs initializeOutputSpecs() const; + + dnnl::graph::compiled_partition compile( + const dnnl::graph::partition& partition); + + std::map initializeTensorIdToOccurence() const; + + std::tuple prepareRunArgs( + const TensorArgs& inputs, + TensorArgs& outputs) const; + + static std::string genDebugName() { + static size_t debugId = 0; + return "LlgaPartition_" + std::to_string(debugId++); + } + + static dnnl::graph::logical_tensor toLogicalTensor(const ArgSpec& s) { + return s.logical_tensor(); + } + + at::Device device_ = at::kCPU; + const Node* fusionNode_; + std::shared_ptr graph_; + int64_t nGraphInputs_ = 0; // number of inputs to graph_ on the IR + int64_t nOutputs_ = 0; + std::map tensorIdToValue_; + std::vector runArgsIdx_; + dnnl::graph::partition partition_; + // nPartitionInputs_ is the actual number of inputs to partition_ of graph_ + // needed by the backend. + // nPartitionInputs_ = nGraphInputs_ + constantInputs_.size() since Constant + // inputs are copied to the inside of the subgraph + int64_t nPartitionInputs_; + dnnl::graph::compiled_partition compilation_; + std::set initializedInputIds_; + std::vector constantValues_; + TensorArgs constantInputs_; + ArgSpecs inputSpecs_; + ArgSpecs outputSpecs_; + std::vector constantLogicalTensors_; + std::string debugName_; + c10::once_flag initialized_flag; + bool is_initialized_ = false; +}; + +} // namespace torch::jit::fuser::onednn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/layout_propagation.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/layout_propagation.h new file mode 100644 index 0000000000000000000000000000000000000000..a654d8e7d15afb46e8c142d1f918ade6a1d20770 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/layout_propagation.h @@ -0,0 +1,14 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit::fuser::onednn { + +void PropagateLayout(const std::shared_ptr& graph); + +} // namespace torch::jit::fuser::onednn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/operator.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/operator.h new file mode 100644 index 0000000000000000000000000000000000000000..ab289941e48a7087b30ae65efb9e1da3be51baab --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/operator.h @@ -0,0 +1,151 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::jit::fuser::onednn { + +class Operator { + public: + Operator(const Node* node, dnnl::graph::op::kind kind) + : n(node), o(getId(node), kind, node->kind().toQualString()), k(kind) {} + + // Returns output index if the Value is a graph output. + // Otherwise returns -1 + int32_t graphOutputIdx(Value* v) { + int32_t i = 0; + for (const Value* output : v->owningGraph()->outputs()) { + if (v == output) { + return i; + } + i++; + } + return -1; + } + + Operator& setInputValue(Value* v) { + if (v->mustNotBeNone()) { + if (v->type()->kind() == c10::TensorType::Kind) { + o.add_input(createLogicalTensor(v)); + } + } + return *this; + } + + Operator& setInput(size_t offset) { + return setInputValue(n->input(offset)); + } + + template + Operator& setInput(size_t offset, Ts... other) { + setInput(offset); + return setInput(other...); + } + + Operator& setOutputValue(Value* v) { + if (v->mustNotBeNone()) { + o.add_output(createLogicalTensor(v)); + } + return *this; + } + + // setOutputValue & setOutput require a pointer to the LLGA graph, as output + // logical tensors that are graph outputs should be connected to an End LLGA + // op. A value of NULL can be provided for the graph pointer in order to + // maintain the legacy functionality of this function. + Operator& setOutputValue(Value* v, std::unique_ptr& g) { + if (v->mustNotBeNone()) { + auto output_tensor = createLogicalTensor(v); + o.add_output(output_tensor); + if (g) { + int32_t outputIndex = graphOutputIdx(v); + if (outputIndex != -1) { + dnnl::graph::op newEndNode( + LONG_MAX - outputIndex, + dnnl::graph::op::kind::End, + "EndNodeForGraphOutput"); + newEndNode.add_input(output_tensor); + g->add_op(newEndNode); + } + } + } + return *this; + } + + Operator& setOutput(std::unique_ptr& g, size_t offset) { + return setOutputValue(n->output(offset), g); + } + + Operator& setOutput(size_t offset) { + return setOutputValue(n->output(offset)); + } + + template + Operator& setOutput( + std::unique_ptr& g, + size_t offset, + Ts... other) { + setOutput(g, offset); + return setOutput(g, other...); + } + + template + Operator& setAttr(dnnl::graph::op::attr name, Attr&& attr) { + o.set_attr(name, std::forward(attr)); + return *this; + } + + template + Operator& setAttr(dnnl::graph::op::attr name, const F& fn, size_t offset) { + return setAttr(name, fn(n, offset)); + } + + static float ScalarToFloat(const Node* node, size_t offset) { + return toIValue(node->input(offset))->toScalar().to(); + } + + static std::vector Ints(const Node* node, size_t offset) { + return toIValue(node->input(offset))->toIntVector(); + } + + static int64_t Int(const Node* node, size_t offset) { + return toIValue(node->input(offset))->toInt(); + } + + static float Float(const Node* node, size_t offset) { + return static_cast(toIValue(node->input(offset))->toDouble()); + } + + static bool Bool(const Node* node, size_t offset) { + return toIValue(node->input(offset))->toBool(); + } + + static uint64_t getId(const Node* node) { + return reinterpret_cast(node); // cast node address as op id + } + + dnnl::graph::op::kind kind() const { + return k; + } + + dnnl::graph::op llgaOp() const { + return o; + } + + private: + dnnl::graph::logical_tensor createLogicalTensor(Value* value) const { + return LlgaTensorDesc(value).logical_tensor(); + } + + const Node* n; + dnnl::graph::op o; + dnnl::graph::op::kind k; +}; + +} // namespace torch::jit::fuser::onednn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/prepare_binary.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/prepare_binary.h new file mode 100644 index 0000000000000000000000000000000000000000..7e46d4d447b4922e96b66dbbf32b8d011af7cbae --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/codegen/onednn/prepare_binary.h @@ -0,0 +1,25 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit::fuser::onednn { + +// Prepare binary ops for LLGA +// +// The pass does the following: +// +// - Convert scalar input of aten::add and aten::mul into Float tensor with +// dimension [1] +// +// - Decompose fused add into aten::mul + aten::add when alpha != 1.0 +// +// - Eliminate identity add/mul, i.e., tensor + 0, tensor * 1 +// +void PrepareBinaryForLLGA(const std::shared_ptr& graph); + +} // namespace torch::jit::fuser::onednn + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/compatibility/backport.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/compatibility/backport.h new file mode 100644 index 0000000000000000000000000000000000000000..c43fd3542c3b9430702f5dbe4067462f36900ffa --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/compatibility/backport.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::jit { + +TORCH_API bool _backport_for_mobile( + std::istream& in, + std::ostream& out, + const int64_t to_version); + +TORCH_API bool _backport_for_mobile( + std::istream& in, + const std::string& output_filename, + const int64_t to_version); + +TORCH_API bool _backport_for_mobile( + const std::string& input_filename, + std::ostream& out, + const int64_t to_version); + +TORCH_API bool _backport_for_mobile( + const std::string& input_filename, + const std::string& output_filename, + const int64_t to_version); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/compatibility/backport_manager.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/compatibility/backport_manager.h new file mode 100644 index 0000000000000000000000000000000000000000..1a06d441ac992af2c5c1236d53d8b3398d19849e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/compatibility/backport_manager.h @@ -0,0 +1,53 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace c10 { +struct IValue; +} + +namespace caffe2::serialize { +class PyTorchStreamWriter; +} // namespace caffe2::serialize + +namespace torch::jit { + +/* +BackportManager manages a list of backport from n to n-1 function, and provides +function to check if a specific function exists. +*/ +class BackportManager final { + public: + bool hasBytecodeBackportFunction(const int64_t from_version) const; + + std::unordered_map< + int64_t, + std::function>& + bytecodeBackportFunctions() const; + + bool backport( + std::istream& oss, + caffe2::serialize::PyTorchStreamWriter& final_writer, + int64_t from_version, + int64_t to_version) const; + + BackportManager(BackportManager const&) = delete; + BackportManager& operator=(BackportManager const&) = delete; + BackportManager(); + + private: + // Registry of backport functions. + void registerBytecodeBackportFunction( + const int64_t from_version, + const std::function& + backport_function); +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/compatibility/model_compatibility.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/compatibility/model_compatibility.h new file mode 100644 index 0000000000000000000000000000000000000000..1469432f1e1a9e99e3b770e06afb0b281a63d301 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/compatibility/model_compatibility.h @@ -0,0 +1,109 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include +#include +#include + +namespace caffe2::serialize { +class PyTorchStreamReader; +class ReadAdapterInterface; +} // namespace caffe2::serialize + +namespace torch::jit { + +// The family of methods below to get bytecode version from a model +// Throws if not passed in a well formed model +TORCH_API uint64_t _get_model_bytecode_version(std::istream& in); + +TORCH_API uint64_t _get_model_bytecode_version(const std::string& filename); + +TORCH_API uint64_t _get_model_bytecode_version( + const std::shared_ptr& rai); + +uint64_t _get_model_bytecode_version( + const std::vector& bytecode_ivalues); + +// The family of methods below to get the operator version from a model +// Throws if not passed in a well formed model +TORCH_API uint64_t _get_model_operator_version(std::istream& in); + +TORCH_API uint64_t _get_model_operator_version(const std::string& filename); + +TORCH_API uint64_t _get_model_operator_version( + std::shared_ptr rai); + +// Utility Functions +std::vector get_bytecode_ivalues( + caffe2::serialize::PyTorchStreamReader& reader); + +c10::IValue readArchive( + const std::string& archive_name, + caffe2::serialize::PyTorchStreamReader& stream_reader); + +bool check_zip_file( + const std::shared_ptr& rai); + +// The family of methods below to get the root ops and information from a model +TORCH_API std::unordered_map _get_model_ops_and_info( + std::istream& in); + +TORCH_API std::unordered_map _get_model_ops_and_info( + const std::string& filename); + +TORCH_API std::unordered_map _get_model_ops_and_info( + std::shared_ptr rai); + +// The family of methods below to get contained types from a model +// Throws if not passed in a well formed model +TORCH_API std::unordered_set _get_mobile_model_contained_types( + std::istream& in); + +TORCH_API std::unordered_set _get_mobile_model_contained_types( + const std::string& filename); + +TORCH_API std::unordered_set _get_mobile_model_contained_types( + std::shared_ptr rai); + +std::unordered_set _get_mobile_model_contained_types( + const std::vector& bytecode_ivalues); + +// The family of methods below return the compatibility information of a model +struct ModelCompatibilityInfo { + uint64_t bytecode_version; + std::unordered_map operator_info; + std::unordered_set type_table; + uint64_t operator_version; + + // Factory Methods + static TORCH_API ModelCompatibilityInfo get(std::istream& in); + static TORCH_API ModelCompatibilityInfo get(const std::string& filename); + static TORCH_API ModelCompatibilityInfo + get(std::shared_ptr rai); +}; + +enum ModelCompatibilityStatus { + OK = 1, + ERROR = 2, +}; + +struct ModelCompatCheckResult { + ModelCompatibilityStatus status; + std::vector errors; +}; +// Takes in information about a runtime and a model and returns if the two are +// compatible with one another. +TORCH_API ModelCompatCheckResult is_compatible( + RuntimeCompatibilityInfo runtime_info, + const ModelCompatibilityInfo& model_info); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/compatibility/runtime_compatibility.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/compatibility/runtime_compatibility.h new file mode 100644 index 0000000000000000000000000000000000000000..886e118301706d2b3457228afebc9b6add1c530c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/compatibility/runtime_compatibility.h @@ -0,0 +1,47 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include + +namespace torch::jit { + +// Struct storing metadata of an operator that can be useful for versioning +struct OperatorInfo { + // The number of arguments within the schema of the op + std::optional num_schema_args; +}; + +struct RuntimeCompatibilityInfo { + std::pair min_max_supported_bytecode_version; + std::unordered_map operator_info; + std::unordered_set supported_types; + std::pair min_max_supported_opperator_versions; + + // Factory Method + static TORCH_API RuntimeCompatibilityInfo get(); +}; + +TORCH_API uint64_t _get_runtime_bytecode_version(); + +TORCH_API std::pair _get_runtime_bytecode_min_max_versions(); + +TORCH_API std::pair +_get_runtime_operators_min_max_versions(); + +TORCH_API std::unordered_map +_get_runtime_ops_and_info(); + +TORCH_API std::unordered_set _get_mobile_supported_types(); + +TORCH_API std::unordered_set _get_loaded_custom_classes(); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/model_tracer/BuildFeatureTracer.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/model_tracer/BuildFeatureTracer.h new file mode 100644 index 0000000000000000000000000000000000000000..76ecd8a5ebea3a3121e156bbca3f29e7edfe8e12 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/model_tracer/BuildFeatureTracer.h @@ -0,0 +1,42 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace torch::jit::mobile { + +/* The BuildFeatureTracer class handles the attachment and removal of a + * recording callback that traces the invocation of code that handles executing + * generic build features. + * + * You can get the set of used build features using + * getBuildFeatures(). + * + * Note: This class is not thread safe or re-entrant, and should not be used + * across multiple threads of execution. + * + */ +struct BuildFeatureTracer final { + at::CallbackHandle handle_; + /* These are the custom class names (constant + * character string) which shows up in code. + */ + typedef std::set build_feature_type; + + BuildFeatureTracer(); + static c10::Synchronized& getBuildFeatures(); + + ~BuildFeatureTracer() { + at::removeCallback(handle_); + } +}; + +} // namespace torch::jit::mobile + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/model_tracer/CustomClassTracer.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/model_tracer/CustomClassTracer.h new file mode 100644 index 0000000000000000000000000000000000000000..4a88312b2f5872e5edf81dfa8a3006b2a1b8b21f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/model_tracer/CustomClassTracer.h @@ -0,0 +1,42 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace torch::jit::mobile { + +/* The CustomClassTracer class handles the attachment and removal of a recording + * callback that traces the invocation of code that handles loading custom + * classes on mobile. + * + * You can get the set of used custom classes using + * getLoadedClasses(). + * + * Note: This class is not thread safe or re-entrant, and should not be used + * across multiple threads of execution. + * + */ +struct CustomClassTracer final { + at::CallbackHandle handle_; + /* These are the custom class names (constant + * character string) which shows up in code. + */ + typedef std::set custom_classes_type; + + CustomClassTracer(); + static c10::Synchronized& getLoadedClasses(); + + ~CustomClassTracer() { + at::removeCallback(handle_); + } +}; + +} // namespace torch::jit::mobile + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/model_tracer/KernelDTypeTracer.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/model_tracer/KernelDTypeTracer.h new file mode 100644 index 0000000000000000000000000000000000000000..e0ddb8c3aaea93ec3242aa3d1b86ec72890aec2e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/model_tracer/KernelDTypeTracer.h @@ -0,0 +1,42 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace torch::jit::mobile { +/* The KernelDTypeTracer class handles the attachment and removal of a recording + * callback that traces the invocation of code that handles specific dtypes in + * kernel function implementations that are tagged with specific tags. + * + * You can get the set of kernel tags and the dtypes using + * getCalledKernelTags(). + * + * Note: This class is not thread safe or re-entrant, and should not be used + * across multiple threads of execution. + * + */ +struct KernelDTypeTracer final { + at::CallbackHandle handle_; + /* The key of the map below (std::string) is the kernel tag name (constant + * character string) which shows up in code. The value part of type + * std::set is the collection of dtypes for which we need to + * generate code for the said kernel tag. + */ + typedef std::map> kernel_tags_type; + + KernelDTypeTracer(); + static c10::Synchronized& getCalledKernelTags(); + + ~KernelDTypeTracer() { + at::removeCallback(handle_); + } +}; +} // namespace torch::jit::mobile + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/model_tracer/MobileModelRunner.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/model_tracer/MobileModelRunner.h new file mode 100644 index 0000000000000000000000000000000000000000..2bb7593ca65735a8b8c21acba3d61bfdaa17760e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/model_tracer/MobileModelRunner.h @@ -0,0 +1,151 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include +#include +#include + +namespace torch::jit::mobile { + +class MobileModelRunner { + std::shared_ptr module_; + + public: + explicit MobileModelRunner(std::string const& file_path) { + module_ = std::make_shared( + torch::jit::_load_for_mobile(file_path)); + } + + MobileModelRunner( + std::string const& file_path, + uint64_t module_load_options) { + std::unordered_map extra_files; + module_ = std::make_shared( + torch::jit::_load_for_mobile( + file_path, + at::Device(at::DeviceType::CPU, 0), + extra_files, + module_load_options)); + } + + MobileModelRunner(std::stringstream oss) { + module_ = std::make_shared( + torch::jit::_load_for_mobile(oss, at::Device(at::DeviceType::CPU, 0))); + } + + /** + * Returns true if the list of operators passed in has a Metal GPU operator, + * and false otherwise. + * + */ + static bool set_has_metal_gpu_operators(std::set const& op_list); + + /** + * Fetches the set of root operators in the file "extra/mobile_info.json" + * within the .ptl archive at location file_path. + * + * An exception is thrown if: + * + * 1. The file at file_path does not exist, or + * 2. The contents of extra/mobile_info.json is not a JSON, or + * 3. The file extra/mobile_info.json does not exist, or + * 4. The JSON is malformed in some way and the operator list can not be + * extracted correctly. + * + */ + static std::set get_operators_from_mobile_info_json( + std::string const& file_path); + + static std::vector> ivalue_to_bundled_inputs( + const c10::IValue& bundled_inputs); + + static std::unordered_map + ivalue_to_bundled_inputs_map(const c10::IValue& bundled_inputs); + + /** + * Fetches all the bundled inputs of the loaded mobile model. + * + * A bundled input itself is of type std::vector and the + * elements of this vector<> are the arguments that the "forward" + * method of the model accepts. i.e. each of the at::IValue is a + * single argument to the model's "forward" method. + * + * The outer vector holds a bundled input. For models with bundled + * inputs, the outer most vector will have size > 0. + */ + std::vector> get_all_bundled_inputs(); + + /** + * Fetches all the bundled inputs for all functions of the loaded mobile + * model. + * + * The mapping is from 'function_names' eg 'forward' to bundled inputs for + * that function + * + * A bundled input itself is of type std::vector and the + * elements of this vector<> are the arguments that the corresponding + * method of the model accepts. i.e. each of the at::IValue in the entry + * for forward is a single argument to the model's "forward" method. + * + * The outer vector of each value holds a bundled input. For models with + * bundled inputs, the outer most vector will have size > 0. + */ + std::unordered_map>> + get_many_functions_bundled_inputs(); + + /** + * Returns true if a model possesses get_bundled_inputs_functions_and_info() + */ + bool has_new_style_bundled_inputs() const { + return module_->find_method("get_bundled_inputs_functions_and_info") != + std::nullopt; + } + + /** + * For each tensor in bundled inputs, call the user-provided function 'func'. + */ + void for_each_tensor_in_bundled_inputs( + std::function const& func); + + /** + * Get the root operators directly called by this model's Bytecode. + */ + std::set get_root_operators() { + return torch::jit::mobile::_export_operator_list(*module_); + } + + /** + * Runs the model against all of the provided inputs using the model's + * "forward" method. Returns an std::vector, where each element + * of the returned vector is one of the return values from calling forward(). + */ + std::vector run_with_inputs( + std::vector> const& bundled_inputs); + + /** + * Runs the model against all of the provided inputs for all the specified + * function. Returns an std::vector, where each element + * of the returned vector is one of the return values from calling the + * method named "function_name" on this model. + */ + std::vector run_with_inputs( + const std::string& function_name, + std::vector> const& bundled_inputs) const; + + /** + * Attempts to run all functions in the passed in list if they exist. All + * funcs should require no args + */ + void run_argless_functions(const std::vector& functions); +}; + +} // namespace torch::jit::mobile + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/model_tracer/OperatorCallTracer.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/model_tracer/OperatorCallTracer.h new file mode 100644 index 0000000000000000000000000000000000000000..2182156d87b9d2bcf7c0b40d7b9c5d49296d7e69 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/model_tracer/OperatorCallTracer.h @@ -0,0 +1,37 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::jit::mobile { +/* The OperatorCallTracer class handles the attachment and removal of a + * recording callback that traces invocation of ATen (and other) PyTorch + * operators that get called via the Dispatcher. + * + * You can get the set of operators that were called (op_name.overload_name) + * using getCalledOperators(). + * + * Note: This class is not thread safe or re-entrant, and should not be used + * across multiple threads of execution. + * + */ +struct OperatorCallTracer final { + at::CallbackHandle handle_; + + OperatorCallTracer(); + + static c10::Synchronized>& getCalledOperators() { + static c10::Synchronized> called_operators_; + return called_operators_; + } + + ~OperatorCallTracer() { + at::removeCallback(handle_); + } +}; +} // namespace torch::jit::mobile + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/model_tracer/TensorUtils.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/model_tracer/TensorUtils.h new file mode 100644 index 0000000000000000000000000000000000000000..f437d4b8ee6ea875a75c5856f6be898d3792b7fa --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/model_tracer/TensorUtils.h @@ -0,0 +1,18 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit::mobile { +/** + * Recursively scan the IValue object, traversing lists, tuples, dicts, and stop + * and call the user provided callback function 'func' when a Tensor is found. + */ +void for_each_tensor_in_ivalue( + const ::c10::IValue& iv, + std::function const& func); +} // namespace torch::jit::mobile + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/model_tracer/TracerRunner.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/model_tracer/TracerRunner.h new file mode 100644 index 0000000000000000000000000000000000000000..ddef55b7bc52711df4dce97cfa82e28338d2acdb --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/model_tracer/TracerRunner.h @@ -0,0 +1,44 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include +#include +#include + +namespace torch::jit::mobile { + +const std::vector always_included_traced_ops = { + // The following are called from setup sections. + "aten::resize_", + "aten::slice.Tensor", +}; + +struct TracerResult { + std::set root_ops; + std::set traced_operators; + KernelDTypeTracer::kernel_tags_type called_kernel_tags; + CustomClassTracer::custom_classes_type loaded_classes; + BuildFeatureTracer::build_feature_type build_features; + std::set enabled_backends; +}; + +/** + * Trace a single model and return the TracerResult. + */ +TracerResult trace_run(const std::string& input_module_path); + +/** + * Trace multiple models and return the TracerResult. + */ +TracerResult trace_run(const std::vector& input_module_paths); + +} // namespace torch::jit::mobile + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/nnc/aot_compiler.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/nnc/aot_compiler.h new file mode 100644 index 0000000000000000000000000000000000000000..f82f4a1e31fba6f037daf313a81d73cbba78d6e6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/nnc/aot_compiler.h @@ -0,0 +1,23 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::jit::mobile::nnc { + +// Performs Ahead Of Time compilation of a given method in a model +// returning the compiled function and LLVM assembly code +TORCH_API std::pair, const std::string> aotCompile( + const std::string& method_name, + std::shared_ptr& subgraph, + const std::vector>& sizes, + const std::vector& types, + const std::string& kernel_func_name = "func"); + +} // namespace torch::jit::mobile::nnc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/nnc/context.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/nnc/context.h new file mode 100644 index 0000000000000000000000000000000000000000..5ac9e2dad7a137faa0ad3b6840af03d90e533477 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/nnc/context.h @@ -0,0 +1,228 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include +#include + +namespace torch::jit::mobile::nnc { + +// Specify the requirements on an input tensor. +// TODO: support input tensor with dynamic shape (PR #54982) +struct TORCH_API InputSpec { + InputSpec() = default; + + // Deserialize the spec from an IValue. + explicit InputSpec(const c10::IValue& value); + + // Serialize the spec into an IValue. + [[nodiscard]] c10::IValue serialize() const; + + // Check whether the input tensor adheres to the spec. + [[nodiscard]] bool validate(const at::Tensor& input) const; + + std::vector sizes_; + c10::ScalarType dtype_{c10::ScalarType::Undefined}; +}; + +// Specify the sizes/dtype/... of output tensor to preallocate the output. +// TODO: support the case where kernel allocates output tensors dynamically. +struct TORCH_API OutputSpec { + OutputSpec() = default; + + // Deserialize the spec from an IValue. + explicit OutputSpec(const c10::IValue& value); + + // Serialize the spec into an IValue. + [[nodiscard]] c10::IValue serialize() const; + + // Allocate an output tensor in accordance with the spec. + [[nodiscard]] at::Tensor allocate() const; + + std::vector sizes_; + c10::ScalarType dtype_{c10::ScalarType::Undefined}; + std::optional qscale_; + std::optional qzero_; +}; + +// Hold the temporary buffers / states needed during the execution. +struct TORCH_API ExecutionState { + ExecutionState() = default; + ExecutionState(const ExecutionState&) = delete; + ExecutionState(ExecutionState&&) = default; + ExecutionState& operator=(const ExecutionState&) = delete; + ExecutionState& operator=(ExecutionState&&) = default; + + // Preallocated buffers needed by the NNC kernel. + std::vector preallocations_; + + // The NNC kernel expects the following arguments layout: + // input tensor 1 + // ... + // input tensor INPUT_NUM + // output tensor 1 + // ... + // output tensor OUTPUT_NUM + // parameter tensor 1 + // ... + // parameter tensor PARAM_NUM + // temporary buffer 1 + // ... + // temporary buffer BUFFER_NUM + std::vector arguments_; +}; + +// Specify how to allocate temporary buffers at initialization. +struct TORCH_API MemoryPlan { + MemoryPlan() = default; + + explicit MemoryPlan(const c10::IValue& value); + + [[nodiscard]] c10::IValue serialize() const; + + void allocate(ExecutionState* state) const; + + std::vector buffer_sizes_; +}; + +// Location of a symbolic shape among dimensions of the inputs +struct TORCH_API SymbolicShapePosition { + SymbolicShapePosition() = default; + SymbolicShapePosition(int64_t input_idx, int64_t dim_idx) + : input_idx_(input_idx), dim_idx_(dim_idx) {} + + int64_t input_idx_; + int64_t dim_idx_; +}; + +// Represents a compiled NNC function which has a 1-1 correspondence with a +// `Method` (e.g. `forward`). It's similar as torch::jit::mobile::Function. +class TORCH_API Function { + public: + explicit Function() = default; + + // Deserialize from an IValue that is generated by the 'serialize()' method. + explicit Function(const c10::IValue& value); + + // Serialize into an IValue. + c10::IValue serialize() const; + + // Execute the compiled NNC function. + c10::impl::GenericList run(const c10::impl::GenericList& inputs) const; + + // The name of the function as specified in the model code. + c10::QualifiedName name() const { + return name_; + } + + void set_name(const c10::QualifiedName& name) { + name_ = name; + } + + // The unique id of the generated NNC kernel corresponding to the function. + const std::string& nnc_kernel_id() const { + return nnc_kernel_id_; + } + + void set_nnc_kernel_id(const std::string& name) { + nnc_kernel_id_ = name; + } + + // The parameters (e.g. weights / bias tensors) to be passed to the generated + // NNC kernel. + const c10::impl::GenericList& parameters() const { + return parameters_; + } + + void set_parameters(const c10::impl::GenericList& parameters) { + parameters_ = parameters; + } + + const std::vector& input_specs() const { + return input_specs_; + } + + void set_input_specs(const std::vector& input_specs) { + input_specs_ = input_specs; + } + + const std::vector& output_specs() const { + return output_specs_; + } + + void set_output_specs(const std::vector& output_specs) { + output_specs_ = output_specs; + } + + const MemoryPlan& memory_plan() const { + return memory_plan_; + } + + void set_memory_plan(const MemoryPlan& memory_plan) { + memory_plan_ = memory_plan; + } + + const std::vector& sym_shape_positions() const { + return sym_shape_positions_; + } + + void set_sym_shape_positions( + const std::vector& sym_shape_pos) { + sym_shape_positions_ = sym_shape_pos; + } + + private: + void init_execution_state() const; + + c10::QualifiedName name_; + std::string nnc_kernel_id_; + c10::impl::GenericList parameters_{at::AnyType::get()}; + std::vector input_specs_; + std::vector output_specs_; + std::vector sym_shape_positions_; + MemoryPlan memory_plan_; + mutable std::unique_ptr execution_state_; +}; + +// CompilationUnit consists of a set of compiled NNC functions. It has a 1-1 +// correspondence with a `Module`. +// It's similar as torch::jit::mobile::CompilationUnit. +class TORCH_API CompilationUnit { + public: + CompilationUnit() = default; + CompilationUnit(const CompilationUnit&) = delete; + CompilationUnit(CompilationUnit&&) = default; + CompilationUnit& operator=(const CompilationUnit&) = delete; + CompilationUnit& operator=(CompilationUnit&&) = default; + + // Deserialize from an IValue that is generated by the 'serialize()' method. + explicit CompilationUnit(const c10::IValue& value); + + // Serialize all registered functions into an IValue. The IValue will be save + // into the compiled TorchScript model file ahead-of-time on the host, and + // will be deserialized at runtime on the target device. + [[nodiscard]] c10::IValue serialize() const; + + // Execute a registered function. + [[nodiscard]] c10::impl::GenericList run( + const c10::QualifiedName& function_name, + const c10::impl::GenericList& inputs) const; + + // Register a function to the compilation unit. + void register_function(std::unique_ptr fn); + + private: + [[nodiscard]] Function* find_function(const c10::QualifiedName& qn) const; + + std::unordered_map> functions_; +}; + +} // namespace torch::jit::mobile::nnc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/nnc/registry.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/nnc/registry.h new file mode 100644 index 0000000000000000000000000000000000000000..d6579ce1450ccae6fb8e58a273f944fff53844ee --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/nnc/registry.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::jit::mobile::nnc { + +using nnc_kernel_function_type = int(void**); + +struct TORCH_API NNCKernel { + virtual ~NNCKernel() = default; + virtual int execute(void** /* args */) = 0; +}; + +TORCH_DECLARE_REGISTRY(NNCKernelRegistry, NNCKernel); + +#define REGISTER_NNC_KERNEL(id, kernel, ...) \ + extern "C" { \ + nnc_kernel_function_type kernel; \ + } \ + struct NNCKernel_##kernel : public NNCKernel { \ + int execute(void** args) override { \ + return kernel(args); \ + } \ + }; \ + C10_REGISTER_TYPED_CLASS(NNCKernelRegistry, id, NNCKernel_##kernel); + +namespace registry { + +inline bool has_nnc_kernel(const std::string& id) { + return NNCKernelRegistry()->Has(id); +} + +inline std::unique_ptr get_nnc_kernel(const std::string& id) { + return NNCKernelRegistry()->Create(id); +} + +} // namespace registry + +} // namespace torch::jit::mobile::nnc + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/train/export_data.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/train/export_data.h new file mode 100644 index 0000000000000000000000000000000000000000..bf8613ed2be04eed0ce6f5c45306a9c5f30667f4 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/train/export_data.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit { + +/** + * Serializes the provided tensor map to the provided stream. + * + * @param[in] map The tensors to serialize. + * @param[in] out The stream to write the serialized data to. + * @param[in] use_flatbuffer If true, use Flatbuffers to serialize the data. + * If false, use Pickle. + */ +TORCH_API void _save_parameters( + const std::map& map, + std::ostream& out, + bool use_flatbuffer = false); + +/** + * Serializes the provided tensor map to a file. + * + * @param[in] map The tensors to serialize. + * @param[in] filename The stem of the file name to write to. If + * @p use_flatbuffer is false, the extension ".pkl" will be appended. If + * @p use_flatbuffer is true, the extension ".ff" will be appended. + * @param[in] use_flatbuffer If true, use Flatbuffers to serialize the data. + * If false, use Pickle. + */ +TORCH_API void _save_parameters( + const std::map& map, + const std::string& filename, + bool use_flatbuffer = false); + +namespace mobile { + +// NOTE: Please prefer using _save_parameters directly over using the 2 +// functions below. +TORCH_API mobile::Module tensor_dict_to_mobile( + const c10::Dict& dict); + +c10::Dict tensor_map_to_dict( + const std::map& map); + +} // namespace mobile + +extern void (*_save_mobile_module_to)( + const mobile::Module& module, + const std::function& writer_func); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/train/optim/sgd.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/train/optim/sgd.h new file mode 100644 index 0000000000000000000000000000000000000000..563cafb5e7fc25f911d6b66b065db6d66cdd7a50 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/train/optim/sgd.h @@ -0,0 +1,130 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include + +namespace torch::jit::mobile { + +class SGDParamState { + TORCH_ARG(torch::Tensor, momentum_buffer); + + public: + std::unique_ptr clone() const { + return std::make_unique( + static_cast(*this)); + } + friend bool operator==(const SGDParamState& lhs, const SGDParamState& rhs); +}; + +struct TORCH_API SGDOptions { + /* implicit */ SGDOptions(double lr); + TORCH_ARG(double, lr); + TORCH_ARG(double, momentum) = 0; + TORCH_ARG(double, dampening) = 0; + TORCH_ARG(double, weight_decay) = 0; + TORCH_ARG(bool, nesterov) = false; + + public: + std::unique_ptr clone() const { + return std::make_unique(static_cast(*this)); + } + TORCH_API friend bool operator==( + const SGDOptions& lhs, + const SGDOptions& rhs); +}; + +/// Stores parameters in the param_group and stores a pointer to the SGDOptions +class TORCH_API SGDParamGroup { + public: + // NOTE: In order to store `SGDParamGroup` in a `std::vector`, it has to be + // copy-constructible. + SGDParamGroup(const SGDParamGroup& param_group) + : params_(param_group.params()), + options_( + param_group.has_options() ? param_group.options().clone() + : nullptr) {} + SGDParamGroup& operator=(const SGDParamGroup& param_group) { + this->params_ = param_group.params(); + this->options_ = + param_group.has_options() ? param_group.options().clone() : nullptr; + return *this; + } + /* implicit */ SGDParamGroup(std::vector params) + : params_(std::move(params)) {} + SGDParamGroup(std::vector params, std::unique_ptr options) + : params_(std::move(params)), options_(std::move(options)) {} + + bool has_options() const; + SGDOptions& options(); + const SGDOptions& options() const; + void set_options(std::unique_ptr options); + std::vector& params(); + const std::vector& params() const; + + protected: + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::vector params_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::unique_ptr options_; +}; + +class TORCH_API SGD { + public: + explicit SGD( + const std::vector& param_groups, + SGDOptions defaults) + : defaults_(std::make_unique(defaults)) { + for (const auto& param_group : param_groups) { + add_param_group(param_group); + } + TORCH_CHECK(defaults.lr() >= 0, "Invalid learning rate: ", defaults.lr()); + TORCH_CHECK( + defaults.momentum() >= 0, + "Invalid momentum value: ", + defaults.momentum()); + TORCH_CHECK( + defaults.weight_decay() >= 0, + "Invalid weight_decay value: ", + defaults.weight_decay()); + TORCH_CHECK( + !defaults.nesterov() || + (defaults.momentum() > 0 && defaults.dampening() == 0), + "Nesterov momentum requires a momentum and zero dampening"); + } + + explicit SGD(std::vector params, SGDOptions defaults) + : SGD({SGDParamGroup(std::move(params))}, defaults) {} + + /// Adds the given param_group to the optimizer's param_group list. + void add_param_group(const SGDParamGroup& param_group); + + ~SGD() = default; + + using LossClosure = std::function; + /// A loss function closure, which is expected to return the loss value. + torch::Tensor step(const LossClosure& closure = nullptr); + + /// Zeros out the gradients of all parameters. + void zero_grad(); + + protected: + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::vector param_groups_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + ska::flat_hash_map> state_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::unique_ptr defaults_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::vector params_; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::unique_ptr options_; +}; +} // namespace torch::jit::mobile + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/train/random.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/train/random.h new file mode 100644 index 0000000000000000000000000000000000000000..d6f2389163c4ddc89fd7be4e60fffe84a0936176 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/train/random.h @@ -0,0 +1,55 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include + +namespace torch::serialize { +class OutputArchive; +class InputArchive; +} // namespace torch::serialize + +namespace torch::jit::mobile { + +/// A lighter `Sampler` that returns indices randomly and cannot be +/// serialized. +class TORCH_API RandomSampler : public torch::data::samplers::Sampler<> { + public: + /// Constructs a `RandomSampler` with a size and dtype for the stored indices. + /// + /// The constructor will eagerly allocate all required indices, which is the + /// sequence `0 ... size - 1`. `index_dtype` is the data type of the stored + /// indices. You can change it to influence memory usage. + explicit RandomSampler(int64_t size, Dtype index_dtype = torch::kInt64); + + ~RandomSampler() override; + + /// Resets the `RandomSampler` to a new set of indices. + void reset(std::optional new_size = std::nullopt) override; + + /// Returns the next batch of indices. + std::optional> next(size_t batch_size) override; + + /// Serializes the `RandomSampler` to the `archive`. + void save(serialize::OutputArchive& archive) const override; + + /// Deserializes the `RandomSampler` from the `archive`. + void load(serialize::InputArchive& archive) override; + + /// Returns the current index of the `RandomSampler`. + size_t index() const noexcept; + + private: + at::Tensor indices_; + int64_t index_ = 0; +}; + +} // namespace torch::jit::mobile + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/train/sequential.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/train/sequential.h new file mode 100644 index 0000000000000000000000000000000000000000..88d9b6e13b16da6610218b8c66e193f51b14291f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/mobile/train/sequential.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include + +namespace torch::serialize { +class OutputArchive; +class InputArchive; +} // namespace torch::serialize + +namespace torch::jit::mobile { + +/// A lighter `Sampler` that returns indices sequentially and cannot be +/// serialized. +class TORCH_API SequentialSampler : public torch::data::samplers::Sampler<> { + public: + /// Creates a `SequentialSampler` that will return indices in the range + /// `0...size - 1`. + explicit SequentialSampler(size_t size); + + /// Resets the `SequentialSampler` to zero. + void reset(std::optional new_size = std::nullopt) override; + + /// Returns the next batch of indices. + std::optional> next(size_t batch_size) override; + + /// Not supported for mobile SequentialSampler + void save(serialize::OutputArchive& archive) const override; + + /// Not supported for mobile SequentialSampler + void load(serialize::InputArchive& archive) override; + + /// Returns the current index of the `SequentialSampler`. + size_t index() const noexcept; + + private: + size_t size_; + size_t index_{0}; +}; + +} // namespace torch::jit::mobile + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/operator_upgraders/upgraders.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/operator_upgraders/upgraders.h new file mode 100644 index 0000000000000000000000000000000000000000..91d14a9bc272762a69b8011a2d052f30186d3e2e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/operator_upgraders/upgraders.h @@ -0,0 +1,51 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include +#include +#include + +namespace torch::jit { + +class UpgradersMap { + public: + void set_content( + std::unordered_map>&& content); + int count(); + const std::unordered_map>& get_content(); + bool is_populated(); + // THESE METHODS ARE ONLY USED FOR TESTING PURPOSES + void test_only_set_content( + const std::unordered_map& content); + void test_only_remove_content( + const std::unordered_map& content); + + private: + std::unordered_map> content_; + std::mutex lock; + bool isPopulated = false; +}; + +TORCH_API void populate_upgraders_map( + std::unordered_map>&& content); + +TORCH_API int get_upgraders_map_size(); + +TORCH_API bool is_upgraders_map_populated(); + +TORCH_API const std::unordered_map>& +dump_upgraders_map(); + +// THESE TWO METHODS BELOW ARE ONLY USED FOR TESTING +TORCH_API void test_only_populate_upgraders( + const std::unordered_map& content); + +TORCH_API void test_only_remove_upgraders( + const std::unordered_map& content); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/operator_upgraders/upgraders_entry.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/operator_upgraders/upgraders_entry.h new file mode 100644 index 0000000000000000000000000000000000000000..840b357f2ba31ad0dff6d0bee1a6e89429f06786 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/operator_upgraders/upgraders_entry.h @@ -0,0 +1,25 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include +#include + +namespace torch::jit { + +TORCH_API void populate_upgraders_graph_map(); + +TORCH_API std::unordered_map> +generate_upgraders_graph(); + +TORCH_API std::unordered_map get_upgraders_entry_map(); + +std::shared_ptr create_upgrader_graph( + const std::string& upgrader_name, + const std::string& upgrader_body); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/operator_upgraders/utils.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/operator_upgraders/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..53a588552db965a1d3e9e660ed42131088a9e349 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/operator_upgraders/utils.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include +#include +#include +#include + +namespace torch::jit { + +struct UpgraderRange { + int min_version; + int max_version; +}; + +// Given a list of upgrader entries for a single operator +// and the model version for that operator, find a valid +// upgrader. +TORCH_API std::optional findUpgrader( + const std::vector& upgraders_for_schema, + size_t current_version); + +// Utility methods to find if the operator is up-to-date +// based on all registered upgraders for this operator. +// This can be different from the current server version +// because the implementation of this operator could have +// been consistent for many later version bumps. +TORCH_API bool isOpCurrentBasedOnUpgraderEntries( + const std::vector& upgraders_for_schema, + size_t current_version); + +TORCH_API bool isOpSymbolCurrent( + const std::string& name, + size_t current_version); + +// Returns the possible old schemas for the operator that +// doesn't exist anymore. This can be true for deprecated +// operators. Since name is always a symbol name, there +// can be multiple schemas for different overloads. +TORCH_API std::vector loadPossibleHistoricOps( + const std::string& name, + std::optional version); + +TORCH_API uint64_t getMaxOperatorVersion(); + +// Returns the list of min and max version numbers of the operators +// that an upgrader `x` support for all upgraders for op `foo` +TORCH_API std::vector getUpgradersRangeForOp( + const std::string& name); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/operator_upgraders/version_map.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/operator_upgraders/version_map.h new file mode 100644 index 0000000000000000000000000000000000000000..f3db22b1003ec561d576c34996655d0b775f2e1c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/operator_upgraders/version_map.h @@ -0,0 +1,38 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include +#include + +namespace torch::jit { + +struct UpgraderEntry { + int bumped_at_version; + std::string upgrader_name; + std::string old_schema; +}; + +// Toggle the behaviour of calculating version for the module. +// If this is true, we calculate solely based on upgraders +// If this is false, we calculate it based on historic per op version map +TORCH_API void calculate_package_version_based_on_upgraders(bool val); + +TORCH_API bool get_version_calculator_flag(); + +TORCH_API const std::unordered_map>& +get_operator_version_map(); + +TORCH_API void test_only_add_entry( + const std::string& op_name, + UpgraderEntry entry); + +TORCH_API void test_only_remove_entry(const std::string& op_name); + +TORCH_API void test_only_reset_flag(); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/onnx/cast_all_constant_to_floating.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/onnx/cast_all_constant_to_floating.h new file mode 100644 index 0000000000000000000000000000000000000000..39fbd77a87591372577bf2ce4c6261b4d25e5da0 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/onnx/cast_all_constant_to_floating.h @@ -0,0 +1,15 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include + +namespace torch::jit { +// see .cpp for docs +TORCH_API void CastAllConstantToFloating(const std::shared_ptr& graph); +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/onnx/pattern_conversion/autograd_function_process.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/onnx/pattern_conversion/autograd_function_process.h new file mode 100644 index 0000000000000000000000000000000000000000..53551bab18164c077c87c3e73802085fa6e8a822 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/onnx/pattern_conversion/autograd_function_process.h @@ -0,0 +1,14 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit { + +TORCH_API void ONNXAutogradFunctionProcess(std::shared_ptr& graph); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/onnx/pattern_conversion/common.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/onnx/pattern_conversion/common.h new file mode 100644 index 0000000000000000000000000000000000000000..50c1b40cdf180dcdf1684ed1f249db281773014e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/onnx/pattern_conversion/common.h @@ -0,0 +1,22 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +// Functions used by both encapsulation and conversion. + +namespace torch::jit { + +struct IndexingPatternFinder { + public: + static std::vector FetchSliceAndSelect(const Node* node); + + private: + static bool IsSameSource(const Node* n, const Node* m); +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/onnx/pattern_conversion/pattern_conversion.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/onnx/pattern_conversion/pattern_conversion.h new file mode 100644 index 0000000000000000000000000000000000000000..0497bab7457257a94050bab0c73e00a402538d70 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/onnx/pattern_conversion/pattern_conversion.h @@ -0,0 +1,49 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::jit { + +// Introduction +// +// The conversion part is called inside the onnx pass. +// In onnx pass, _run_symbolic_function will be called for each node in +// topological order. When it reaches the placeholder node, this function will +// be invoked. It will convert the nodes inside the sub-block based on pattern. +// By that time, it will have shape/type of upstream operators available. After +// the conversion is complete, the placeholder node will be removed, and nodes +// inside its sub-block converted. NodeToONNX will be called for these +// nodes, and they will be converted from ATen operator to ONNX operator. +// +// Note: Edit Pattern Conversion +// +// Each pattern is differentiated by the name attribute of placeholder node. +// The placeholder node is part of torch IR graph, After this function, the aten +// nodes under placeholder node subblock will be converted to ONNX and appended +// to the new_block, which is under the new ONNX graph. For the pattern +// conversion code, it can be divided into three parts. +// 1. Nodes in this pattern should be captured inside the subblock of +// Placeholder node after pattern encapsulation[see +// pattern_encapsulation.h]. These nodes will be converted based on +// pattern. This part of conversion is from aten to aten. It happens on +// the torch IR graph inside placeholder node subblock. +// 2. The second part of conversion is to convert the aten nodes produced +// into ONNX. This is done by calling NodeToONNX for each node. The new +// ONNX nodes are appended to the new_block, which is under the new ONNX +// graph. +// 3. The last part of conversion is to find and return, in the same order, +// the ONNX outputs corresponding to the original output for the +// placeholder node. +TORCH_API std::vector ConvertPatternFromSubblock( + Block* new_block, + Node* old_node, + py::dict& env, + py::set& values_in_env); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/onnx/pattern_conversion/pattern_encapsulation.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/onnx/pattern_conversion/pattern_encapsulation.h new file mode 100644 index 0000000000000000000000000000000000000000..8d5cce99154810fc5537be4e8615d3196d992023 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/onnx/pattern_conversion/pattern_encapsulation.h @@ -0,0 +1,37 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit { + +// Introduction +// +// The encapsulation part will find the nodes of patterns, like how other +// pre-onnx passes are written. But instead of converting the nodes, it will +// encapsulate them into a sub-block of a new placeholder node. This part is +// called before onnx pass, so it runs before calling symbolic functions. +// +// Note: Why separate the function into two parts +// +// The purpose is to support conversions that depend on shape and type +// information. Shape and type information is only available after +// _jit_pass_onnx, which converts aten nodes to onnx nodes. So there is a +// interdependent issue. _jit_pass_onnx depends on preprocess passes to convert +// aten nodes into convertible condition, and preprocess passes depend on +// _jit_pass_onnx to convert upstream nodes and apply onnx shape inference. +// Separating the pass into two parts breaks the interdependency. +// +// Note: Edit Pattern Encapsulation +// +// Encapsulation step identifies the pattern, and copies the nodes into +// the subblock of a new placeholder node. The outputs of the new placeholder +// node are used in place of the original nodes instead. The category of the +// pattern is stored as attr::name. +TORCH_API std::optional EncapsulatePatternIntoSubblock(Node* n); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/onnx/unpack_quantized_weights.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/onnx/unpack_quantized_weights.h new file mode 100644 index 0000000000000000000000000000000000000000..4461870c64172a68fa33eaf57ba06e8c8776b3d7 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/onnx/unpack_quantized_weights.h @@ -0,0 +1,22 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +namespace torch::jit { + +TORCH_API void UnpackQuantizedWeights( + std::shared_ptr& graph, + std::map& paramsDict); +TORCH_API void insertPermutes( + std::shared_ptr& graph, + std::map& paramsDict); +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/quantization/dedup_module_uses.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/quantization/dedup_module_uses.h new file mode 100644 index 0000000000000000000000000000000000000000..fdaac9df01811de7d826d4f1c816d1f33d5c16cf --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/quantization/dedup_module_uses.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit { + +/** Recursively deduplicate multiple uses of the same module by + * creating an instance clone for each use of the module, which means + * the type will be the same as before and all the attributes will be + * copied, then we'll change the use of the original module to the use + * of cloned module in the Graph. + * + * This is done to ensure that modules can survive destructive passes + * without changing model behavior. For example, here: + * + * x = self.conv1(x) + * x = self.relu(x) + * x = self.conv2(x) + * x = self.relu(x) + * + * self.relu needs to be deduplicated for potential future destructive passes + * to work properly. + */ +TORCH_API void DedupModuleUses(Module& module); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/quantization/finalize.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/quantization/finalize.h new file mode 100644 index 0000000000000000000000000000000000000000..8fb9403cf4beac11604c4d2a76fc9d17cd8abac6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/quantization/finalize.h @@ -0,0 +1,66 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::jit { + +/** \brief Backend specific pass to fuse dequantize - op - quantize calls + * as quantized_op calls. + * + * Right now this is a fusion for fbgemm backend and only works for quantized + * conv op, we'll extend to more ops and more backends in the future. + * + * Currently supported fusion: + * q(conv2d(dq(a), dq(w), dq(b))) --> to_nchw(fbgemm_conv2d(prepack(to_nhwc(a)), + * prepack(to_nhwc(w)), + * prepack(to_nhwc(b)))) + * + * q(linear(dq(a), dq(w), dq(b))) --> to_nchw(fbgemm_linear(prepack(to_nhwc(a)), + * prepack(to_nhwc(w)), + * prepack(to_nhwc(b)))) + * + * \param graph the graph we want to apply fusion + */ +TORCH_API void QuantFusion( + std::shared_ptr& graph, + QuantType quant_type = QuantType::STATIC); + +/** \brief Insert prepack and unpack function in graph + * We want add pack/unpack functions for quantized weight because later we want + * to fold the packed weight as an attribute of the module, in order to reduce + * the cost of packing the weight on the fly in quantized models. + * + * Each quantized op has it's corresponding prepack/unpack function, + * right now, we only need to do prepack/unpack for quantized::linear + * and quantized::conv2d. + */ +TORCH_API void InsertPrepackUnpack(std::shared_ptr& graph); + +/** \brief Insert pack and unpack function in all graphs + * of module + * + * Go through graphs of all the methods of all child modules + * and call InsertPrepackUnpack on the graph. + */ +TORCH_API void InsertPrepackUnpack(Module& module); + +TORCH_API script::Module Finalize( + script::Module& module, + QuantType quant_type = QuantType::STATIC, + const std::vector& preserved_attrs = + std::vector()); + +TORCH_API void FoldQuantizedPrepackingOps(Module& module); + +TORCH_API Module FinalizeOnDevicePTQ( + Module& module, + QuantType quant_type, + const std::string& method_name); +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/quantization/fusion_passes.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/quantization/fusion_passes.h new file mode 100644 index 0000000000000000000000000000000000000000..063c09a3cdc2aea3215225a315d2ff3c7b48b1f2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/quantization/fusion_passes.h @@ -0,0 +1,12 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit { +TORCH_API void FuseQuantizedAddRelu(std::shared_ptr& graph); +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/quantization/helper.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/quantization/helper.h new file mode 100644 index 0000000000000000000000000000000000000000..5e85cfcae7e732be65edecd8ab886350b53fb2c9 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/quantization/helper.h @@ -0,0 +1,219 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include +#include +#include + +#include +#include + +namespace torch::jit { + +using graph_rewrite_helper::getFuncName; + +// Vector of a module and the name of its method +using ModuleMethodVector = std::vector>; +// Map of quantization parameter name and value +// for example _scale, _zero_point, +// _scalar_type and _axis(for per channel quantization) +using QParamVector = std::vector>; + +// =========== helper functions for Value ========= +// Check if a value is weight, since we need to use weight observer +// for weight +TORCH_API bool isWeight(Value* v); + +// Check if a value is bias for conv and linear, which we do not +// quantize +TORCH_API bool isBiasOfConvOrLinear(Value* v); + +TORCH_API bool isEmbeddingBagNonInput(Value* v); + +// Get the use as scalar input of clamp ops for the input value +std::optional getClampScalarInputUse(Value* v); + +// For a given value `v`, get the list of values that we need to check +// if they are observed/quantized or not, if so, we can say the +// `v` is also observed/quantized, since we can derive +// the quantization parameters for `v` given the list of values +TORCH_API std::vector getPassThroughInputs(Value* v); + +// Clones the method by the name of orig_method_name into new_method_name method +TORCH_API void cloneMethod( + Module& module, + const std::string& orig_method_name, + const std::string& new_method_name); + +// Check if a value in the graph is a Scalar value +TORCH_API bool isScalar(Value* v); + +// Check if value is the input of the graph +TORCH_API bool hitGraphInput(Value* value); + +// Converts a mangled name, such as +// __torch__.torch.ao.nn.quantized.modules.conv.___torch_mangle_7.Conv2d +// into an unmangled name, such as +// __torch__.torch.ao.nn.quantized.modules.conv.Conv2d +TORCH_API std::string removeTorchMangle(const std::string& orig_name); + +// Return the module name that corresponds to the value. +TORCH_API std::optional getModuleName(Value* value); + +// =========== helper functions for Node ========= +TORCH_API bool isSingleInputGeneralShapeAtenFunction(Node* n); + +TORCH_API bool isSingleInputGeneralValueAtenFunction(Node* n); + +TORCH_API bool isSingleInputGeneralCallFunction(Node* n); + +TORCH_API bool isSingleInputGeneralAtenFunction(Node* n); + +TORCH_API bool isClamp(Node* n); + +// Check if the node will produce the same result regardless of whether +// the input tensor is quantized or not, example: aten::size +TORCH_API bool isTensorInfoNode(Node* n); + +// Check if this the propagate op that has single input, e.g. aten::cat +TORCH_API bool isPropagateQuantSingleInputOp(Node* n); + +// Check if this is the propagate op that has two inputs, e.g. aten::add +TORCH_API bool isPropagateQuantBinaryOp(Node* n); + +// Check if this is the node that we'll quantize or not quantize depending on +// whether the input of the node is quantized, example: aten::cat +TORCH_API bool isPropagateQuantOp(Node* n); + +// Check if the node is a binary op like aten::add and aten::mul and +// if the input 1 is a scalar, these ops will be quantized to +// quantized::{op}_scalar +TORCH_API bool isBinaryOpWithScalarInput(Node* n); + +TORCH_API std::optional> getFixedQParams( + Node* n); + +// We don't want to analyze the graph for some `builtin` CallFunctions +// like `linear` because we want to preserve the op boundary +TORCH_API bool userDefinedCallFunction(Node* n); + +// Check if the node has scalar input +TORCH_API bool hasScalarInput(Node* n); + +// Check if a node is quantizable +TORCH_API bool nodeQuantizable( + Node* n, + QuantType quant_type = QuantType::STATIC); + +// Nodes which only require quantization of weight value, eg. embedding_bag +bool isWeightOnlyStaticQuantOp(Node* n); + +// Check if a use of the value is quantizable, this depends on +// both the use node and the offset +TORCH_API bool useQuantizable(const Use& use, QuantType quant_type); + +// Given a CallFunction node, extract the graph of the called function +TORCH_API std::shared_ptr getCallFunctionGraph(Node* n); + +// Check if `use` is a CallFunction of name `func_name` and if value +// `v` is the nth argument (if provided) of the function +bool matchCallFuncToUse( + const Use& use, + const std::string& func_name, + std::optional nth_arg); + +// Check if `use` is a AtenFunction of name `func_name` and if value +// `v` is the nth argument (if provided) of the function +bool matchAtenFuncToUse( + const Use& use, + const std::string& func_name, + std::optional nth_arg); + +// =========== helper functions for Block ========= +// checks if a block will always raise an Exception +TORCH_API bool alwaysRaisesException(Block* block); + +// =========== helper functions for Module ========== +// TODO: remove +TORCH_API std::vector getModuleAccessPath( + Value* instance, + Value* self); +// TODO: remove +TORCH_API Module +findChildModule(const Module& module, const std::vector& path); + +// Given an CallMethod node, get the module instance corresponding +// to the instance Value +// TODO: refactor all current uses of this function to the Opt one +TORCH_API Module getInvokedModule(Module& module, Node* n, Value* self); + +// Given an CallMethod node, get the module instance corresponding +// to the instance Value if the instance is a module, otherwise return +// std::nullopt +std::optional getInvokedModuleOpt( + const Module& module, + Node* n, + Value* self); + +// ==================== filter functions for matches ============== +// filter to check Value `vname` is a constant of int value `value` +bool is_int_constant( + const Match& match, + const std::unordered_map& vmap, + const std::string& vname, + int value); + +// filter to check if the %alpha argument of aten::add is constant 1 +bool aten_add_alpha_is_one( + const Match& match, + const std::unordered_map& vmap); + +// filter to check if the functional in CallFunction is relu +bool is_functional_relu( + const Match& match, + const std::unordered_map& vmap); + +// filter to check if the module is torch.nn.ReLU +bool is_relu_module( + const Match& match, + const std::unordered_map& vmap); + +bool is_linear_module( + const Match& match, + const std::unordered_map& vmap); + +// TODO: add a macro to declare the filters +bool is_conv1d_module( + const Match& match, + const std::unordered_map& vmap); + +bool is_conv2d_module( + const Match& match, + const std::unordered_map& vmap); + +bool is_conv3d_module( + const Match& match, + const std::unordered_map& vmap); + +bool is_conv_transpose1d_module( + const Match& match, + const std::unordered_map& vmap); + +bool is_conv_transpose2d_module( + const Match& match, + const std::unordered_map& vmap); + +bool is_batchnorm2d_module( + const Match& match, + const std::unordered_map& vmap); + +bool is_batchnorm3d_module( + const Match& match, + const std::unordered_map& vmap); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/quantization/insert_observers.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/quantization/insert_observers.h new file mode 100644 index 0000000000000000000000000000000000000000..bbc0b3cd92209f86d377d6a05f6baffb1c0b9135 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/quantization/insert_observers.h @@ -0,0 +1,71 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace std { + +template <> +struct hash { + inline size_t operator()(const torch::jit::Module& arg) const { + return std::hash>()(arg._ivalue()); + } +}; + +} // namespace std + +namespace torch::jit { + +using QConfig = std::tuple; +using QConfigDict = std::unordered_map>; + +/** \brief Insert observer module and observer function call for + * the Tensors that needs to be observed. + * + * For each Tensor that needs to be observed in the method, insert observer + * module to the input module and add forward calls of observer to the specified + * method. + * + * \param module the input module + * \param method_name the method we want to insert observers for + * \param qconfig_dict the qconfig dictionary that specifies how + * each module is going to be quantized + * \param inplace whether we want to do inplace modification to the input module + * or clone the module + * \param is_dynamic whether the dynamic quantization script is being used. + */ +TORCH_API Module InsertObservers( + Module& module, + const std::string& method_name, + const QConfigDict& qconfig_dict, + bool inplace, + QuantType quant_type = QuantType::STATIC); + +/** \brief Insert observer module and observer method for + * the Tensors that needs to be observed. + * + * For each Tensor that needs to be observed in the method, insert observer + * module to the input module and observe_ methods to the module. + * This method is clone of mehtod_name with forward calls of observer added. + * + * \param module the input module + * \param method_name the method we want to insert observers for + * \param qconfig_dict the qconfig dictionary that specifies how + * each module is going to be quantized + * \param inplace whether we want to do inplace modification to the input module + * or clone the module + * \param is_dynamic whether the dynamic quantization script is being used. + */ +TORCH_API Module InsertObserversForOnDevicePTQ( + Module& module, + const std::string& method_name, + const QConfigDict& qconfig_dict, + bool inplace, + QuantType quant_type = QuantType::STATIC); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/quantization/insert_quant_dequant.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/quantization/insert_quant_dequant.h new file mode 100644 index 0000000000000000000000000000000000000000..2410ddf652b6641ce11520a49764b1b356bc220a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/quantization/insert_quant_dequant.h @@ -0,0 +1,49 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::jit { + +/** Replicate quantize node for prim::If blocks, so that we can match + * quantization patterns in prim::If blocks + */ +TORCH_API void ReplicateQuant(std::shared_ptr& graph); + +/** Replicate dequantize node for each use, so that we can match + * quantization patterns + */ +TORCH_API void ReplicateDeQuant(std::shared_ptr& graph); + +/** \brief Insert quantize - dequantize calls to the Tensors + * that are observed in insert_observers pass + * + * For each Tensor that is observed, get the observer module and call + * calculate_qparam on the observer module to get quantization parameters + * and add quantize - int_repr - dequantize function calls using these + * parameters we also have special handling for quantizing "bias" right now. + * + * \param module the input module + * \param method_name the method we want to insert quantization calls for + */ +TORCH_API Module InsertQuantDeQuant( + Module& module, + const std::string& method_name, + bool inplace, + bool debug, + QuantType quant_type = QuantType::STATIC); + +TORCH_API Module InsertQuantDeQuantOnDevicePTQ( + Module& module, + const std::string& method_name, + bool inplace, + bool debug, + QuantType quant_type = QuantType::STATIC); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/quantization/quantization_patterns.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/quantization/quantization_patterns.h new file mode 100644 index 0000000000000000000000000000000000000000..8ce511b1e375368baabd643634940c39742eb9ca --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/quantization/quantization_patterns.h @@ -0,0 +1,1269 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace torch::jit { + +struct QuantFusionInfo { + std::string quantized_op_name; + std::string pattern; + std::string replacement; + std::vector filters; +}; + +namespace { +std::string getExtraArgList(std::vector extra_args) { + return std::accumulate( + extra_args.begin(), + extra_args.end(), + std::string(), + [](const std::string& acc, const std::string& arg) { + return acc + ", " + arg; + }); +} + +// Get the pattern we want to replace the match with +std::string getAtenOpPattern( + const std::string& graph_header, + const std::string& op_name, + const std::vector& extra_op_args, + bool scalar_args = false) { + std::vector _extra_op_args = extra_op_args; + std::string aten_op_pattern = graph_header; + if (scalar_args) { + for (const auto& extra_arg : _extra_op_args) { + aten_op_pattern + .append(R"( + )") + .append(extra_arg) + .append("_scalar = aten::item(") + .append(extra_arg) + .append(")"); + } + + for (auto& _extra_op_arg : _extra_op_args) { + _extra_op_arg.append("_scalar"); + } + } + const auto& extra_op_arg_list = getExtraArgList(std::move(_extra_op_args)); + aten_op_pattern += R"( + %r = )"; + aten_op_pattern += op_name + "(" + "%a_quant" + extra_op_arg_list + ")"; + aten_op_pattern += R"( + return (%r) )"; + return aten_op_pattern; +} + +// generate ops for quantize pattern for a scalar value +std::string getQuantizeForScalar(const std::string& value) { + // 6 is `torch.float` ScalarType, we are creating a float scalar + // tensor from a scalar value + std::string quantize_pattern = R"( + )" + + value + "_float_scalar_type : int = prim::Constant[value=6]()"; + quantize_pattern += R"( + )" + + value + "_none : None = prim::Constant()"; + quantize_pattern += R"( + )" + + value + "_tensor : Tensor = aten::scalar_tensor(" + value + ", " + value + + "_float_scalar_type"; + for ([[maybe_unused]] const auto i : c10::irange(3)) { + quantize_pattern += ", " + value + "_none"; + } + quantize_pattern += ")"; + quantize_pattern += + R"( + )" + + value + "_quant = aten::quantize_per_tensor(" + value + "_tensor" + + getExtraArgList( + {value + "_scale", value + "_zero_point", value + "_dtype"}) + + ")"; + return quantize_pattern; +} + +std::string getDequantize(const std::string& value) { + return R"( + )" + + value + "_dequant = aten::dequantize(" + value + "_quant)"; +} + +std::string getItem(const std::string& value) { + return R"( + )" + + value + "_scalar : float = aten::item(" + value + "_dequant)"; +} + +// Patterns for the ops that inherit parameters from input +std::string getInputTensorQParamOpPattern( + const std::string& op_name, + const std::vector& extra_op_args) { + const auto& extra_op_arg_list = getExtraArgList(extra_op_args); + std::string op_pattern = "graph(%a_quant" + extra_op_arg_list + "):" + R"( + %a_dequant = aten::dequantize(%a_quant) + %r = )" + + op_name + "(" + "%a_dequant" + extra_op_arg_list + ")" + R"( + %r_scale : float = aten::q_scale(%a_quant) + %r_zero_point : int = aten::q_zero_point(%a_quant) + %r_dtype : int = prim::dtype(%a_quant) + %r_quant = aten::quantize_per_tensor(%r, %r_scale, %r_zero_point, %r_dtype) + return (%r_quant) )"; + return op_pattern; +} + +// QuantFusionInfo for the ops that inherit parameters from input +QuantFusionInfo getInputTensorQParamOpFusionInfo( + const std::string& op_name, + const std::vector& extra_op_args) { + std::string op_pattern = + getInputTensorQParamOpPattern(op_name, extra_op_args); + const auto& extra_op_arg_list = getExtraArgList(extra_op_args); + std::string graph_header = "graph(%a_quant" + extra_op_arg_list + "):"; + std::string op_replacement = + getAtenOpPattern(graph_header, op_name, extra_op_args); + + return {op_name, std::move(op_pattern), std::move(op_replacement)}; +} + +// quant fusion for ops like `quantized::add_scalar`, `quantized::mul_scalar` +QuantFusionInfo getBinaryOpScalarFusionInfo( + const std::string& op_name, + const std::vector& extra_op_args, + const std::string& quantized_op_name, + const std::vector& extra_quantized_op_args, + const std::vector& filters = {}) { + std::string op_pattern = + getInputTensorQParamOpPattern(op_name, extra_op_args); + + const auto& extra_op_arg_list = getExtraArgList(extra_op_args); + std::string graph_header = "graph(%a_quant" + extra_op_arg_list + "):"; + std::string op_replacement = getAtenOpPattern( + graph_header, quantized_op_name, extra_quantized_op_args); + + return {op_name, std::move(op_pattern), std::move(op_replacement), filters}; +} + +QuantFusionInfo getClampOpFusionInfo( + const std::string& op_name, + const std::vector& extra_op_args) { + std::vector header_args = extra_op_args; + std::vector input_qparams = {"_scale", "_zero_point", "_dtype"}; + for (const auto& arg : extra_op_args) { + for (const auto& qparam : input_qparams) { + header_args.push_back(arg + qparam); + } + } + for (const auto& qparam : input_qparams) { + header_args.push_back("%r" + qparam); + } + const auto& extra_header_arg_list = getExtraArgList(std::move(header_args)); + std::string graph_header = "graph(%a_quant" + extra_header_arg_list + "):"; + std::string op_pattern = graph_header; + for (const auto& arg : extra_op_args) { + op_pattern += getQuantizeForScalar(arg); + op_pattern += getDequantize(arg); + op_pattern += getItem(arg); + } + op_pattern += getDequantize("%a"); + op_pattern += R"( + %r = )"; + std::vector scalar_extra_args; + scalar_extra_args.reserve(extra_op_args.size()); + for (const auto& arg : extra_op_args) { + scalar_extra_args.push_back(arg + "_scalar"); + } + op_pattern += op_name + "(" + "%a_dequant" + + getExtraArgList(std::move(scalar_extra_args)) + ")"; + // IR pattern common to all ops that inherit qparam from input + op_pattern += R"( + %r_quant = aten::quantize_per_tensor(%r, %r_scale, %r_zero_point, %r_dtype) + return (%r_quant) )"; + + std::string aten_op_pattern = + getAtenOpPattern(graph_header, op_name, extra_op_args); + + return {op_name, std::move(op_pattern), std::move(aten_op_pattern)}; +} + +// Patterns for the ops that has fixed quantization parameters +QuantFusionInfo getFixedQParamOpFusionInfo( + const std::string& op_name, + const std::vector& extra_op_args, + bool is_symmetric) { + const auto& extra_op_arg_list = getExtraArgList(extra_op_args); + std::string graph_header = "graph(%a_quant" + extra_op_arg_list + "):"; + std::string op_pattern = graph_header; + op_pattern += R"( + %a_dequant = aten::dequantize(%a_quant) + %r = )"; + op_pattern += op_name + "(" + "%a_dequant" + extra_op_arg_list + ")"; + // IR pattern common to all ops with fixed quantization parameters for + // asymmetric quantization + std::string asym_fixed_qparam_op_suffix = R"( + %r_scale : float = prim::Constant[value=0.00390625]() + %r_zero_point : int = prim::Constant[value=0]() + %r_dtype : int = prim::Constant[value=13]() + %r_quant = aten::quantize_per_tensor(%r, %r_scale, %r_zero_point, %r_dtype) + return (%r_quant) )"; + + std::string sym_fixed_qparam_op_suffix = R"( + %r_scale : float = prim::Constant[value=0.0078125]() + %r_zero_point : int = prim::Constant[value=128]() + %r_dtype : int = prim::Constant[value=13]() + %r_quant = aten::quantize_per_tensor(%r, %r_scale, %r_zero_point, %r_dtype) + return (%r_quant) )"; + op_pattern += + is_symmetric ? sym_fixed_qparam_op_suffix : asym_fixed_qparam_op_suffix; + + std::string aten_op_pattern = + getAtenOpPattern(graph_header, op_name, extra_op_args); + + return {op_name, std::move(op_pattern), std::move(aten_op_pattern)}; +} + +// filter that checks %b_scalar is a scalar +bool input_b_is_scalar( + const Match& match, + const std::unordered_map& vmap) { + const auto& match_vmap = match.values_map; + auto b_scalar = match_vmap.at(vmap.at("b_scalar")); + return isScalar(b_scalar); +} + +// Patterns for ops that require observation for output quantization parameters +// Example: +// +// before fusion: +// +// graph(%a_quant, %r_scale, %r_zero_point, %r_dtype): +// %a_dequant = aten::dequantize(%a_quant) +// %r = {op_name}(%a_dequant, {extra_args}) +// %r_quant = aten::quantize_per_tensor(%r, %r_scale, %r_zero_point, +// %r_dtype) return (%r_quant) +// +// after fusion: +// +// graph(%a_quant, %r_scale, %r_zero_point, %r_dtype): +// %r_quant = {quantized_op_name}(%a_quant, {extra_args}, %r_scale, +// %r_zero_point) return (%r_quant) +QuantFusionInfo getObservedQParamOpFusionInfo( + const std::string& fp_op_name, + const std::string& q_op_name, + const std::vector& fp_extra_args, + const std::vector& q_extra_args) { + const auto& fp_extra_arg_list = getExtraArgList(fp_extra_args); + const auto& q_extra_arg_list = getExtraArgList(q_extra_args); + + std::string op_pattern = "graph(%a_quant" + fp_extra_arg_list + + ", %r_scale, %r_zero_point, %r_dtype):" + R"( + %a_dequant = aten::dequantize(%a_quant) + %r = )" + + fp_op_name + "(" + "%a_dequant" + fp_extra_arg_list + ")" + R"( + %r_quant = aten::quantize_per_tensor(%r, %r_scale, %r_zero_point, %r_dtype) + return (%r_quant) )"; + + std::string aten_op_pattern = "graph(%a_quant" + fp_extra_arg_list + + ", %r_scale, %r_zero_point, %r_dtype):" + R"( + %r_quant = )" + + q_op_name + "(%a_quant" + q_extra_arg_list + + ", %r_scale, %r_zero_point)" + R"( + return (%r_quant) )"; + + return {q_op_name, std::move(op_pattern), std::move(aten_op_pattern)}; +} + +} // namespace + +static std::vector quant_fusion_pattern_and_replacements() { + // aten::conv1d + std::string conv1d = R"( +graph(%a_quant, %packed_params, %r_scale, %r_zero_point, %r_dtype, %stride, %padding, %dilation, %groups): + %a_dequant = aten::dequantize(%a_quant) + %w_quant : Tensor, %b : Tensor? = quantized::conv1d_unpack(%packed_params) + %w_dequant = aten::dequantize(%w_quant) + %r = aten::conv1d(%a_dequant, %w_dequant, %b, %stride, %padding, %dilation, %groups) + %r_quant = aten::quantize_per_tensor(%r, %r_scale, %r_zero_point, %r_dtype) + return (%r_quant) )"; + + // aten::conv1d - aten::relu + std::string conv1d_relu = R"( +graph(%a_quant, %packed_params, %r_scale, %r_zero_point, %r_dtype, %stride, %padding, %dilation, %groups): + %a_dequant = aten::dequantize(%a_quant) + %w_quant : Tensor, %b : Tensor? = quantized::conv1d_unpack(%packed_params) + %w_dequant = aten::dequantize(%w_quant) + %conv_out = aten::conv1d(%a_dequant, %w_dequant, %b, %stride, %padding, %dilation, %groups) + %r = aten::relu(%conv_out) + %r_quant = aten::quantize_per_tensor(%r, %r_scale, %r_zero_point, %r_dtype) + return (%r_quant) )"; + + // aten::conv1d - aten::relu_ + std::string conv1d_inplace_relu = R"( +graph(%a_quant, %packed_params, %r_scale, %r_zero_point, %r_dtype, %stride, %padding, %dilation, %groups): + %a_dequant = aten::dequantize(%a_quant) + %w_quant : Tensor, %b : Tensor? = quantized::conv1d_unpack(%packed_params) + %w_dequant = aten::dequantize(%w_quant) + %conv_out = aten::conv1d(%a_dequant, %w_dequant, %b, %stride, %padding, %dilation, %groups) + %r = aten::relu_(%conv_out) + %r_quant = aten::quantize_per_tensor(%r, %r_scale, %r_zero_point, %r_dtype) + return (%r_quant) )"; + + // quantized::conv1d + std::string quantized_conv1d = R"( +graph(%a_quant, %packed_params, %r_scale, %r_zero_point, %r_dtype, %stride, %padding, %dilation, %groups): + %r_quant = quantized::conv1d(%a_quant, %packed_params, %r_scale, %r_zero_point) + return (%r_quant) )"; + + // quantized::conv1d_relu + std::string quantized_conv1d_relu = R"( +graph(%a_quant, %packed_params, %r_scale, %r_zero_point, %r_dtype, %stride, %padding, %dilation, %groups): + %r_quant = quantized::conv1d_relu(%a_quant, %packed_params, %r_scale, %r_zero_point) + return (%r_quant) )"; + + // aten::conv2d + std::string conv2d = R"( +graph(%a_quant, %packed_params, %r_scale, %r_zero_point, %r_dtype, %stride, %padding, %dilation, %groups): + %a_dequant = aten::dequantize(%a_quant) + %w_quant : Tensor, %b : Tensor? = quantized::conv2d_unpack(%packed_params) + %w_dequant = aten::dequantize(%w_quant) + %r = aten::conv2d(%a_dequant, %w_dequant, %b, %stride, %padding, %dilation, %groups) + %r_quant = aten::quantize_per_tensor(%r, %r_scale, %r_zero_point, %r_dtype) + return (%r_quant) )"; + + // aten::conv2d - aten::relu + std::string conv2d_relu = R"( +graph(%a_quant, %packed_params, %r_scale, %r_zero_point, %r_dtype, %stride, %padding, %dilation, %groups): + %a_dequant = aten::dequantize(%a_quant) + %w_quant : Tensor, %b : Tensor? = quantized::conv2d_unpack(%packed_params) + %w_dequant = aten::dequantize(%w_quant) + %conv_out = aten::conv2d(%a_dequant, %w_dequant, %b, %stride, %padding, %dilation, %groups) + %r = aten::relu(%conv_out) + %r_quant = aten::quantize_per_tensor(%r, %r_scale, %r_zero_point, %r_dtype) + return (%r_quant) )"; + + // aten::conv2d - aten::relu_ + std::string conv2d_inplace_relu = R"( +graph(%a_quant, %packed_params, %r_scale, %r_zero_point, %r_dtype, %stride, %padding, %dilation, %groups): + %a_dequant = aten::dequantize(%a_quant) + %w_quant : Tensor, %b : Tensor? = quantized::conv2d_unpack(%packed_params) + %w_dequant = aten::dequantize(%w_quant) + %conv_out = aten::conv2d(%a_dequant, %w_dequant, %b, %stride, %padding, %dilation, %groups) + %r = aten::relu_(%conv_out) + %r_quant = aten::quantize_per_tensor(%r, %r_scale, %r_zero_point, %r_dtype) + return (%r_quant) )"; + + // quantized::conv2d + std::string quantized_conv2d = R"( +graph(%a_quant, %packed_params, %r_scale, %r_zero_point, %r_dtype, %stride, %padding, %dilation, %groups): + %r_quant = quantized::conv2d(%a_quant, %packed_params, %r_scale, %r_zero_point) + return (%r_quant) )"; + + // quantized::conv2d_relu + std::string quantized_conv2d_relu = R"( +graph(%a_quant, %packed_params, %r_scale, %r_zero_point, %r_dtype, %stride, %padding, %dilation, %groups): + %r_quant = quantized::conv2d_relu(%a_quant, %packed_params, %r_scale, %r_zero_point) + return (%r_quant) )"; + + // aten::conv3d + std::string conv3d = R"( +graph(%a_quant, %packed_params, %r_scale, %r_zero_point, %r_dtype, %stride, %padding, %dilation, %groups): + %a_dequant = aten::dequantize(%a_quant) + %w_quant : Tensor, %b : Tensor? = quantized::conv3d_unpack(%packed_params) + %w_dequant = aten::dequantize(%w_quant) + %r = aten::conv3d(%a_dequant, %w_dequant, %b, %stride, %padding, %dilation, %groups) + %r_quant = aten::quantize_per_tensor(%r, %r_scale, %r_zero_point, %r_dtype) + return (%r_quant) )"; + + // aten::conv3d - aten::relu + std::string conv3d_relu = R"( +graph(%a_quant, %packed_params, %r_scale, %r_zero_point, %r_dtype, %stride, %padding, %dilation, %groups): + %a_dequant = aten::dequantize(%a_quant) + %w_quant : Tensor, %b : Tensor? = quantized::conv3d_unpack(%packed_params) + %w_dequant = aten::dequantize(%w_quant) + %conv_out = aten::conv3d(%a_dequant, %w_dequant, %b, %stride, %padding, %dilation, %groups) + %r = aten::relu(%conv_out) + %r_quant = aten::quantize_per_tensor(%r, %r_scale, %r_zero_point, %r_dtype) + return (%r_quant) )"; + + // aten::conv3d - aten::relu_ + std::string conv3d_inplace_relu = R"( +graph(%a_quant, %packed_params, %r_scale, %r_zero_point, %r_dtype, %stride, %padding, %dilation, %groups): + %a_dequant = aten::dequantize(%a_quant) + %w_quant : Tensor, %b : Tensor? = quantized::conv3d_unpack(%packed_params) + %w_dequant = aten::dequantize(%w_quant) + %conv_out = aten::conv3d(%a_dequant, %w_dequant, %b, %stride, %padding, %dilation, %groups) + %r = aten::relu_(%conv_out) + %r_quant = aten::quantize_per_tensor(%r, %r_scale, %r_zero_point, %r_dtype) + return (%r_quant) )"; + + // quantized::conv3d + std::string quantized_conv3d = R"( +graph(%a_quant, %packed_params, %r_scale, %r_zero_point, %r_dtype, %stride, %padding, %dilation, %groups): + %r_quant = quantized::conv3d(%a_quant, %packed_params, %r_scale, %r_zero_point) + return (%r_quant) )"; + + // quantized::conv3d_relu + std::string quantized_conv3d_relu = R"( +graph(%a_quant, %packed_params, %r_scale, %r_zero_point, %r_dtype, %stride, %padding, %dilation, %groups): + %r_quant = quantized::conv3d_relu(%a_quant, %packed_params, %r_scale, %r_zero_point) + return (%r_quant) )"; + + // aten::conv_transpose1d + std::string conv_transpose1d = R"( +graph(%a_quant, %packed_params, %r_scale, %r_zero_point, %r_dtype, %stride, %padding, %output_padding, %groups, %dilation): + %a_dequant = aten::dequantize(%a_quant) + %w_quant : Tensor, %b : Tensor? = quantized::conv_transpose1d_unpack(%packed_params) + %w_dequant = aten::dequantize(%w_quant) + %r = aten::conv_transpose1d(%a_dequant, %w_dequant, %b, %stride, %padding, %output_padding, %groups, %dilation) + %r_quant = aten::quantize_per_tensor(%r, %r_scale, %r_zero_point, %r_dtype) + return (%r_quant) )"; + + // quantized::conv_transpose1d + std::string quantized_conv_transpose1d = R"( +graph(%a_quant, %packed_params, %r_scale, %r_zero_point, %r_dtype, %stride, %padding, %output_padding, %groups, %dilation): + %r_quant = quantized::conv_transpose1d(%a_quant, %packed_params, %r_scale, %r_zero_point) + return (%r_quant) )"; + + // aten::conv_transpose2d + std::string conv_transpose2d = R"( +graph(%a_quant, %packed_params, %r_scale, %r_zero_point, %r_dtype, %stride, %padding, %output_padding, %groups, %dilation): + %a_dequant = aten::dequantize(%a_quant) + %w_quant : Tensor, %b : Tensor? = quantized::conv_transpose2d_unpack(%packed_params) + %w_dequant = aten::dequantize(%w_quant) + %r = aten::conv_transpose2d(%a_dequant, %w_dequant, %b, %stride, %padding, %output_padding, %groups, %dilation) + %r_quant = aten::quantize_per_tensor(%r, %r_scale, %r_zero_point, %r_dtype) + return (%r_quant) )"; + + // quantized::conv_transpose1d + std::string quantized_conv_transpose2d = R"( +graph(%a_quant, %packed_params, %r_scale, %r_zero_point, %r_dtype, %stride, %padding, %output_padding, %groups, %dilation): + %r_quant = quantized::conv_transpose2d(%a_quant, %packed_params, %r_scale, %r_zero_point) + return (%r_quant) )"; + + std::string add_relu = R"( +graph(%a_quant, %b_quant, %alpha, %scale, %zero_point, %dtype): + %a_dequant = aten::dequantize(%a_quant) + %b_dequant = aten::dequantize(%b_quant) + %r_add = aten::add(%a_dequant, %b_dequant, %alpha) + %r_relu = aten::relu(%r_add) + %r = aten::quantize_per_tensor(%r_relu, %scale, %zero_point, %dtype) + return (%r) )"; + + std::string add_inplace_relu = R"( +graph(%a_quant, %b_quant, %alpha, %scale, %zero_point, %dtype): + %a_dequant = aten::dequantize(%a_quant) + %b_dequant = aten::dequantize(%b_quant) + %r_add = aten::add(%a_dequant, %b_dequant, %alpha) + %r_relu = aten::relu_(%r_add) + %r = aten::quantize_per_tensor(%r_relu, %scale, %zero_point, %dtype) + return (%r) )"; + + std::string inplace_add_relu = R"( +graph(%a_quant, %b_quant, %alpha, %scale, %zero_point, %dtype): + %a_dequant = aten::dequantize(%a_quant) + %b_dequant = aten::dequantize(%b_quant) + %r_add = aten::add_(%a_dequant, %b_dequant, %alpha) + %r_relu = aten::relu(%r_add) + %r = aten::quantize_per_tensor(%r_relu, %scale, %zero_point, %dtype) + return (%r) )"; + + std::string inplace_add_inplace_relu = R"( +graph(%a_quant, %b_quant, %alpha, %scale, %zero_point, %dtype): + %a_dequant = aten::dequantize(%a_quant) + %b_dequant = aten::dequantize(%b_quant) + %r_add = aten::add_(%a_dequant, %b_dequant, %alpha) + %r_relu = aten::relu_(%r_add) + %r = aten::quantize_per_tensor(%r_relu, %scale, %zero_point, %dtype) + return (%r) )"; + + std::string quantized_add_relu = R"( +graph(%a_quant, %b_quant, %alpha, %scale, %zero_point, %dtype): + %r = quantized::add_relu(%a_quant, %b_quant, %scale, %zero_point) + return (%r) )"; + + // aten::linear + std::string linear = R"( +graph(%a_quant, %packed_params, %r_scale, %r_zero_point, %r_dtype): + %a_dequant = aten::dequantize(%a_quant) + %w_quant : Tensor, %b : Tensor? = quantized::linear_unpack(%packed_params) + %w_dequant = aten::dequantize(%w_quant) + %r = aten::linear(%a_dequant, %w_dequant, %b) + %r_quant = aten::quantize_per_tensor(%r, %r_scale, %r_zero_point, %r_dtype) + return (%r_quant) )"; + + std::string linear_relu = R"( +graph(%a_quant, %packed_params, %r_scale, %r_zero_point, %r_dtype): + %a_dequant = aten::dequantize(%a_quant) + %w_quant : Tensor, %b : Tensor? = quantized::linear_unpack(%packed_params) + %w_dequant = aten::dequantize(%w_quant) + %linear_out = aten::linear(%a_dequant, %w_dequant, %b) + %r = aten::relu(%linear_out) + %r_quant = aten::quantize_per_tensor(%r, %r_scale, %r_zero_point, %r_dtype) + return (%r_quant) )"; + + std::string linear_inplace_relu = R"( +graph(%a_quant, %packed_params, %r_scale, %r_zero_point, %r_dtype): + %a_dequant = aten::dequantize(%a_quant) + %w_quant : Tensor, %b : Tensor? = quantized::linear_unpack(%packed_params) + %w_dequant = aten::dequantize(%w_quant) + %linear_out = aten::linear(%a_dequant, %w_dequant, %b) + %r = aten::relu_(%linear_out) + %r_quant = aten::quantize_per_tensor(%r, %r_scale, %r_zero_point, %r_dtype) + return (%r_quant) )"; + + // quantized::linear + std::string quantized_linear = R"( +graph(%a_quant, %packed_params, %r_scale, %r_zero_point, %r_dtype): + %r = quantized::linear(%a_quant, %packed_params, %r_scale, %r_zero_point) + return (%r) )"; + + std::string quantized_linear_relu = R"( +graph(%a_quant, %packed_params, %r_scale, %r_zero_point, %r_dtype): + %r = quantized::linear_relu(%a_quant, %packed_params, %r_scale, %r_zero_point) + return (%r) )"; + + std::string cat = R"( +graph(%input_quant, %dim, %r_scale, %r_zero_point, %r_dtype): + %input_dequant = aten::dequantize(%input_quant) + %r = aten::cat(%input_dequant, %dim) + %r_quant = aten::quantize_per_tensor(%r, %r_scale, %r_zero_point, %r_dtype) + return (%r_quant) )"; + + std::string quantized_cat = R"( +graph(%input_quant, %dim, %r_scale, %r_zero_point, %r_dtype): + %r_quant = quantized::cat(%input_quant, %dim, %r_scale, %r_zero_point) + return (%r_quant) )"; + + // aten::add + std::string add = R"( +graph(%a_quant, %b_quant, %alpha, %scale, %zero_point, %dtype): + %a_dequant = aten::dequantize(%a_quant) + %b_dequant = aten::dequantize(%b_quant) + %r_add = aten::add(%a_dequant, %b_dequant, %alpha) + %r = aten::quantize_per_tensor(%r_add, %scale, %zero_point, %dtype) + return (%r) )"; + + // TODO: add %dtype after when https://github.com/pytorch/pytorch/issues/34351 + // is fixed + // quantized::add + std::string quantized_add = R"( +graph(%a_quant, %b_quant, %alpha, %scale, %zero_point, %dtype): + %r = quantized::add(%a_quant, %b_quant, %scale, %zero_point) + return (%r) )"; + + // aten::add_ + std::string inplace_add = R"( +graph(%a_quant, %b_quant, %alpha, %scale, %zero_point, %dtype): + %a_dequant = aten::dequantize(%a_quant) + %b_dequant = aten::dequantize(%b_quant) + %r_add = aten::add_(%a_dequant, %b_dequant, %alpha) + %r = aten::quantize_per_tensor(%r_add, %scale, %zero_point, %dtype) + return (%r) )"; + + auto add_scalar = getBinaryOpScalarFusionInfo( + "aten::add", + {"%b_scalar", "%alpha"}, + "quantized::add_scalar", + {"%b_scalar"}, + {aten_add_alpha_is_one, input_b_is_scalar}); + + auto add_scalar_out = getBinaryOpScalarFusionInfo( + "aten::add_", + {"%b_scalar", "%alpha"}, + "quantized::add_scalar_out", + {"%b_scalar", "%a_quant"}, + {aten_add_alpha_is_one, input_b_is_scalar}); + + // quantized::add_scalar_relu -- fusing quantized::add_scalar + // and aten::relu + auto quantized_add_scalar_relu_pattern = R"( +graph(%a_quant, %b_scalar): + %r_add = quantized::add_scalar(%a_quant, %b_scalar) + %r = aten::relu(%r_add) + return (%r) )"; + + auto quantized_add_scalar_inplace_relu_pattern = R"( +graph(%a_quant, %b_scalar): + %r_add = quantized::add_scalar(%a_quant, %b_scalar) + %r = aten::relu_(%r_add) + return (%r) )"; + + auto quantized_add_scalar_relu_replacement = R"( +graph(%a_quant, %b_scalar): + %r = quantized::add_scalar_relu(%a_quant, %b_scalar) + return (%r) )"; + + // quantized::add_scalar_relu_out -- fusing quantized::add_scalarOut + // and aten::relu + auto quantized_add_scalar_relu_out_pattern = R"( +graph(%a_quant, %b_scalar): + %r_add = quantized::add_scalar_out(%a_quant, %b_scalar, %a_quant) + %r = aten::relu(%r_add) + return (%r) )"; + + auto quantized_add_scalar_inplace_relu_out_pattern = R"( +graph(%a_quant, %b_scalar): + %r_add = quantized::add_scalar_out(%a_quant, %b_scalar, %a_quant) + %r = aten::relu_(%r_add) + return (%r) )"; + + auto quantized_add_scalar_relu_out_replacement = R"( +graph(%a_quant, %b_scalar): + %r = quantized::add_scalar_relu_out(%a_quant, %b_scalar, %a_quant) + return (%r) )"; + + // quantized::batch_norm + std::string batch_norm = R"( +graph(%a_quant, %weight, %bias, %mean, %var, %training, %eaf, %eps, %7, %scale, %zero_point, %scalar_type): + %a_dequant = aten::dequantize(%a_quant) + %r_bn = aten::batch_norm(%a_dequant, %weight, %bias, %mean, %var, %training, %eaf, %eps, %7) + %r = aten::quantize_per_tensor(%r_bn, %scale, %zero_point, %scalar_type) + return (%r) )"; + std::string quantized_batch_norm = R"( +graph(%a_quant, %weight, %bias, %mean, %var, %training, %eaf, %eps, %7, %scale, %zero_point, %scalar_type): + %r = quantized::batch_norm(%a_quant, %weight, %bias, %mean, %var, %eps, %scale, %zero_point) + return (%r) )"; + + std::string batch_norm_relu = R"( +graph(%a_quant, %weight, %bias, %mean, %var, %training, %eaf, %eps, %7, %scale, %zero_point, %scalar_type): + %a_dequant = aten::dequantize(%a_quant) + %bn_out = aten::batch_norm(%a_dequant, %weight, %bias, %mean, %var, %training, %eaf, %eps, %7) + %relu = aten::relu(%bn_out) + %r = aten::quantize_per_tensor(%relu, %scale, %zero_point, %scalar_type) + return (%r) )"; + std::string batch_norm_inplace_relu = R"( +graph(%a_quant, %weight, %bias, %mean, %var, %training, %eaf, %eps, %7, %scale, %zero_point, %scalar_type): + %a_dequant = aten::dequantize(%a_quant) + %bn_out = aten::batch_norm(%a_dequant, %weight, %bias, %mean, %var, %training, %eaf, %eps, %7) + %relu = aten::relu_(%bn_out) + %r = aten::quantize_per_tensor(%relu, %scale, %zero_point, %scalar_type) + return (%r) )"; + + std::string quantized_batch_norm_relu = R"( +graph(%a_quant, %weight, %bias, %mean, %var, %training, %eaf, %eps, %7, %scale, %zero_point, %scalar_type): + %r = quantized::batch_norm_relu(%a_quant, %weight, %bias, %mean, %var, %eps, %scale, %zero_point) + return (%r) )"; + + // aten::mul + std::string mul = R"( +graph(%a_quant, %b_quant, %scale, %zero_point, %dtype): + %a_dequant = aten::dequantize(%a_quant) + %b_dequant = aten::dequantize(%b_quant) + %r_mul = aten::mul(%a_dequant, %b_dequant) + %r = aten::quantize_per_tensor(%r_mul, %scale, %zero_point, %dtype) + return (%r) )"; + + // aten::mul_ + std::string inplace_mul = R"( +graph(%a_quant, %b_quant, %scale, %zero_point, %dtype): + %a_dequant = aten::dequantize(%a_quant) + %b_dequant = aten::dequantize(%b_quant) + %r_mul = aten::mul_(%a_dequant, %b_dequant) + %r = aten::quantize_per_tensor(%r_mul, %scale, %zero_point, %dtype) + return (%r) )"; + + // quantized::mul + std::string quantized_mul = R"( +graph(%a_quant, %b_quant, %scale, %zero_point, %dtype): + %r = quantized::mul(%a_quant, %b_quant, %scale, %zero_point) + return (%r) )"; + + auto mul_scalar = getBinaryOpScalarFusionInfo( + "aten::mul", + {"%b_scalar"}, + "quantized::mul_scalar", + {"%b_scalar"}, + {input_b_is_scalar}); + + auto mul_scalar_out = getBinaryOpScalarFusionInfo( + "aten::mul_", + {"%b_scalar"}, + "quantized::mul_scalar_out", + {"%b_scalar", "%a_quant"}, + {input_b_is_scalar}); + + // quantized::mul_relu + std::string mul_relu = R"( +graph(%a_quant, %b_quant, %scale, %zero_point, %dtype): + %a_dequant = aten::dequantize(%a_quant) + %b_dequant = aten::dequantize(%b_quant) + %r_mul = aten::mul(%a_dequant, %b_dequant) + %r_relu = aten::relu(%r_mul) + %r = aten::quantize_per_tensor(%r_relu, %scale, %zero_point, %dtype) + return (%r) )"; + + std::string mul_inplace_relu = R"( +graph(%a_quant, %b_quant, %scale, %zero_point, %dtype): + %a_dequant = aten::dequantize(%a_quant) + %b_dequant = aten::dequantize(%b_quant) + %r_mul = aten::mul(%a_dequant, %b_dequant) + %r_relu = aten::relu_(%r_mul) + %r = aten::quantize_per_tensor(%r_relu, %scale, %zero_point, %dtype) + return (%r) )"; + + std::string inplace_mul_relu = R"( +graph(%a_quant, %b_quant, %scale, %zero_point, %dtype): + %a_dequant = aten::dequantize(%a_quant) + %b_dequant = aten::dequantize(%b_quant) + %r_mul = aten::mul_(%a_dequant, %b_dequant) + %r_relu = aten::relu(%r_mul) + %r = aten::quantize_per_tensor(%r_relu, %scale, %zero_point, %dtype) + return (%r) )"; + + std::string inplace_mul_inplace_relu = R"( +graph(%a_quant, %b_quant, %scale, %zero_point, %dtype): + %a_dequant = aten::dequantize(%a_quant) + %b_dequant = aten::dequantize(%b_quant) + %r_mul = aten::mul_(%a_dequant, %b_dequant) + %r_relu = aten::relu_(%r_mul) + %r = aten::quantize_per_tensor(%r_relu, %scale, %zero_point, %dtype) + return (%r) )"; + + std::string quantized_mul_relu = R"( +graph(%a_quant, %b_quant, %scale, %zero_point, %dtype): + %r = quantized::mul_relu(%a_quant, %b_quant, %scale, %zero_point) + return (%r) )"; + + // quantized::mul_scalar_relu -- fusing quantized::mul_scalar + // and aten::relu + auto quantized_mul_scalar_relu_pattern = R"( +graph(%a_quant, %b_scalar): + %r_mul = quantized::mul_scalar(%a_quant, %b_scalar) + %r = aten::relu(%r_mul) + return (%r) )"; + + auto quantized_mul_scalar_inplace_relu_pattern = R"( +graph(%a_quant, %b_scalar): + %r_mul = quantized::mul_scalar(%a_quant, %b_scalar) + %r = aten::relu_(%r_mul) + return (%r) )"; + + auto quantized_mul_scalar_relu_replacement = R"( +graph(%a_quant, %b_scalar): + %r = quantized::mul_scalar_relu(%a_quant, %b_scalar) + return (%r) )"; + + // quantized::mul_scalar_relu_out -- fusing quantized::mul_scalarOut + // and aten::relu + auto quantized_mul_scalar_relu_out_pattern = R"( +graph(%a_quant, %b_scalar): + %r_mul = quantized::mul_scalar_out(%a_quant, %b_scalar, %a_quant) + %r = aten::relu(%r_mul) + return (%r) )"; + + auto quantized_mul_scalar_inplace_relu_out_pattern = R"( +graph(%a_quant, %b_scalar): + %r_mul = quantized::mul_scalar_out(%a_quant, %b_scalar, %a_quant) + %r = aten::relu_(%r_mul) + return (%r) )"; + + auto quantized_mul_scalar_relu_out_replacement = R"( +graph(%a_quant, %b_scalar): + %r = quantized::mul_scalar_relu_out(%a_quant, %b_scalar, %a_quant) + return (%r) )"; + + // quantized::elu + std::string elu = R"( +graph(%a_quant, %alpha, %scale, %input_scale, %r_scale, %r_zero_point, %r_dtype): + %a_dequant = aten::dequantize(%a_quant) + %r = aten::elu(%a_dequant, %alpha, %scale, %input_scale) + %r_quant = aten::quantize_per_tensor(%r, %r_scale, %r_zero_point, %r_dtype) + return (%r_quant) )"; + + std::string quantized_elu = R"( +graph(%a_quant, %alpha, %scale, %input_scale, %r_scale, %r_zero_point, %r_dtype): + %r_quant = quantized::elu(%a_quant, %r_scale, %r_zero_point, %alpha, %scale, %input_scale) + return (%r_quant) )"; + + std::string elu_ = R"( +graph(%a_quant, %alpha, %scale, %input_scale, %r_scale, %r_zero_point, %r_dtype): + %a_dequant = aten::dequantize(%a_quant) + %r = aten::elu_(%a_dequant, %alpha, %scale, %input_scale) + %r_quant = aten::quantize_per_tensor(%r, %r_scale, %r_zero_point, %r_dtype) + return (%r_quant) )"; + + // ============= General Ops that inherit quantization parameters from input + // tensor ============= + auto avg_pool1d = getInputTensorQParamOpFusionInfo( + "aten::avg_pool1d", + {"%kernel_size", + "%stride", + "%padding", + "%ceil_mode", + "%count_include_pad"}); + + auto avg_pool2d = getInputTensorQParamOpFusionInfo( + "aten::avg_pool2d", + {"%kernel_size", + "%stride", + "%padding", + "%ceil_mode", + "%count_include_pad", + "%divisor_override"}); + + auto avg_pool3d = getInputTensorQParamOpFusionInfo( + "aten::avg_pool3d", + {"%kernel_size", + "%stride", + "%padding", + "%ceil_mode", + "%count_include_pad", + "%divisor_override"}); + + auto adaptive_avg_pool1d = getInputTensorQParamOpFusionInfo( + "aten::adaptive_avg_pool1d", {"%output_size"}); + + auto adaptive_avg_pool2d = getInputTensorQParamOpFusionInfo( + "aten::adaptive_avg_pool2d", {"%output_size"}); + + auto adaptive_avg_pool3d = getInputTensorQParamOpFusionInfo( + "aten::adaptive_avg_pool3d", {"%output_size"}); + + auto mean1 = getInputTensorQParamOpFusionInfo("aten::mean", {"%dim"}); + + auto mean2 = getInputTensorQParamOpFusionInfo( + "aten::mean", {"%dim", "%keepdim", "%out"}); + + auto upsample_nearest1d_vec = getInputTensorQParamOpFusionInfo( + "aten::upsample_nearest1d", {"%output_size", "%scale_factors"}); + + auto upsample_nearest2d_vec = getInputTensorQParamOpFusionInfo( + "aten::upsample_nearest2d", {"%output_size", "%scale_factors"}); + + auto upsample_nearest3d_vec = getInputTensorQParamOpFusionInfo( + "aten::upsample_nearest3d", {"%output_size", "%scale_factors"}); + + auto upsample_linear1d_vec = getInputTensorQParamOpFusionInfo( + "aten::upsample_linear1d", + {"%output_size", "%align_corners", "%scale_factors"}); + + auto upsample_bilinear2d_vec = getInputTensorQParamOpFusionInfo( + "aten::upsample_bilinear2d", + {"%output_size", "%align_corners", "%scale_factors"}); + + auto upsample_trilinear3d_vec = getInputTensorQParamOpFusionInfo( + "aten::upsample_trilinear3d", + {"%output_size", "%align_corners", "%scale_factors"}); + + auto upsample_nearest1d = getInputTensorQParamOpFusionInfo( + "aten::upsample_nearest1d", {"%output_size", "%scales"}); + + auto upsample_nearest2d = getInputTensorQParamOpFusionInfo( + "aten::upsample_nearest2d", {"%output_size", "%scale_h", "%scale_w"}); + + auto upsample_nearest3d = getInputTensorQParamOpFusionInfo( + "aten::upsample_nearest3d", + {"%output_size", "%scale_d", "%scale_h", "%scale_w"}); + + auto upsample_linear1d = getInputTensorQParamOpFusionInfo( + "aten::upsample_linear1d", {"%output_size", "%align_corners", "%scales"}); + + auto upsample_bilinear2d = getInputTensorQParamOpFusionInfo( + "aten::upsample_bilinear2d", + {"%output_size", "%align_corners", "%scale_h", "%scale_w"}); + + auto upsample_trilinear3d = getInputTensorQParamOpFusionInfo( + "aten::upsample_trilinear3d", + {"%output_size", "%align_corners", "%scale_d", "%scale_h", "%scale_w"}); + + auto clamp = getClampOpFusionInfo("aten::clamp", {"%min", "%max"}); + + auto hardtanh = getClampOpFusionInfo("aten::hardtanh", {"%min", "%max"}); + + auto hardtanh_ = getClampOpFusionInfo("aten::hardtanh_", {"%min", "%max"}); + + auto leaky_relu = + getInputTensorQParamOpFusionInfo("aten::leaky_relu", {"%negative_slope"}); + + auto leaky_relu_ = getInputTensorQParamOpFusionInfo( + "aten::leaky_relu_", {"%negative_slope"}); + + // Ops with fixed quantization parameters + auto hardsigmoid = getFixedQParamOpFusionInfo("aten::hardsigmoid", {}, false); + + auto hardsigmoid_ = + getFixedQParamOpFusionInfo("aten::hardsigmoid_", {}, false); + + auto sigmoid = getFixedQParamOpFusionInfo("aten::sigmoid", {}, false); + + auto sigmoid_ = getFixedQParamOpFusionInfo("aten::sigmoid_", {}, false); + + auto tanh = getFixedQParamOpFusionInfo("aten::tanh", {}, true); + + auto tanh_ = getFixedQParamOpFusionInfo("aten::tanh_", {}, true); + + auto hardswish = getObservedQParamOpFusionInfo( + "aten::hardswish", "quantized::hardswish", {}, {}); + + auto hardswish_ = getObservedQParamOpFusionInfo( + "aten::hardswish_", "quantized::hardswish", {}, {}); + + auto layer_norm = getObservedQParamOpFusionInfo( + "aten::layer_norm", + "quantized::layer_norm", + {"%normalized_shape", "%weight", "%bias", "%eps", "%cudnn_enabled"}, + {"%normalized_shape", "%weight", "%bias", "%eps"}); + + auto group_norm = getObservedQParamOpFusionInfo( + "aten::group_norm", + "quantized::group_norm", + {"%num_groups", "%weight", "%bias", "%eps", "%cudnn_enabled"}, + {"%num_groups", "%weight", "%bias", "%eps"}); + + auto instance_norm = getObservedQParamOpFusionInfo( + "aten::instance_norm", + "quantized::instance_norm", + {"%weight", + "%bias", + "%running_mean", + "%running_var", + "%use_input_stats", + "%momentum", + "%eps", + "%cudnn_enabled"}, + {"%weight", "%bias", "%eps"}); + + return { + {"quantized::conv1d", std::move(conv1d), std::move(quantized_conv1d)}, + {"quantized::conv1d_relu", std::move(conv1d_relu), quantized_conv1d_relu}, + {"quantized::conv1d_relu", + std::move(conv1d_inplace_relu), + std::move(quantized_conv1d_relu)}, + {"quantized::conv2d", std::move(conv2d), std::move(quantized_conv2d)}, + {"quantized::conv2d_relu", std::move(conv2d_relu), quantized_conv2d_relu}, + {"quantized::conv2d_relu", + std::move(conv2d_inplace_relu), + std::move(quantized_conv2d_relu)}, + {"quantized::conv3d", std::move(conv3d), std::move(quantized_conv3d)}, + {"quantized::conv3d_relu", std::move(conv3d_relu), quantized_conv3d_relu}, + {"quantized::conv3d_relu", + std::move(conv3d_inplace_relu), + std::move(quantized_conv3d_relu)}, + {"quantized::conv_transpose1d", + std::move(conv_transpose1d), + std::move(quantized_conv_transpose1d)}, + {"quantized::conv_transpose2d", + std::move(conv_transpose2d), + std::move(quantized_conv_transpose2d)}, + {"quantized::linear", std::move(linear), std::move(quantized_linear)}, + {"quantized::linear_relu", std::move(linear_relu), quantized_linear_relu}, + {"quantized::linear_relu", + std::move(linear_inplace_relu), + std::move(quantized_linear_relu)}, + {"quantized::add_relu", + std::move(add_relu), + quantized_add_relu, + {aten_add_alpha_is_one}}, + {"quantized::add_relu", + std::move(add_inplace_relu), + quantized_add_relu, + {aten_add_alpha_is_one}}, + {"quantized::add_relu", + std::move(inplace_add_relu), + quantized_add_relu, + {aten_add_alpha_is_one}}, + {"quantized::add_relu", + std::move(inplace_add_inplace_relu), + std::move(quantized_add_relu), + {aten_add_alpha_is_one}}, + std::move(add_scalar), + std::move(add_scalar_out), + // note that these must come after quantized::add_scalar and + // quantized::add_scalar_out patterns + {"quantized::add_scalar_relu", + quantized_add_scalar_relu_pattern, + quantized_add_scalar_relu_replacement}, + {"quantized::add_scalar_relu", + quantized_add_scalar_inplace_relu_pattern, + quantized_add_scalar_relu_replacement}, + {"quantized::add_scalar_relu_out", + quantized_add_scalar_relu_out_pattern, + quantized_add_scalar_relu_out_replacement}, + {"quantized::add_scalar_relu_out", + quantized_add_scalar_inplace_relu_out_pattern, + quantized_add_scalar_relu_out_replacement}, + {"quantized::add", + std::move(add), + quantized_add, + {aten_add_alpha_is_one}}, + {"quantized::add", + std::move(inplace_add), + std::move(quantized_add), + {aten_add_alpha_is_one}}, + {"quantized::cat", std::move(cat), std::move(quantized_cat)}, + {"quantized::batch_norm", + std::move(batch_norm), + std::move(quantized_batch_norm)}, + {"quantized::batch_norm_relu", + std::move(batch_norm_relu), + quantized_batch_norm_relu}, + {"quantized::batch_norm_relu", + std::move(batch_norm_inplace_relu), + std::move(quantized_batch_norm_relu)}, + std::move(mul_scalar), + std::move(mul_scalar_out), + // note that these must come after quantized::mul_scalar and + // quantized::mul_scalar_out patterns + {"quantized::mul_scalar_relu", + quantized_mul_scalar_relu_pattern, + quantized_mul_scalar_relu_replacement}, + {"quantized::mul_scalar_relu", + quantized_mul_scalar_inplace_relu_pattern, + quantized_mul_scalar_relu_replacement}, + {"quantized::mul_scalar_relu_out", + quantized_mul_scalar_relu_out_pattern, + quantized_mul_scalar_relu_out_replacement}, + {"quantized::mul_scalar_relu_out", + quantized_mul_scalar_inplace_relu_out_pattern, + quantized_mul_scalar_relu_out_replacement}, + {"quantized::mul_relu", std::move(mul_relu), quantized_mul_relu}, + {"quantized::mul_relu", std::move(mul_inplace_relu), quantized_mul_relu}, + {"quantized::mul_relu", std::move(inplace_mul_relu), quantized_mul_relu}, + {"quantized::mul_relu", + std::move(inplace_mul_inplace_relu), + std::move(quantized_mul_relu)}, + {"quantized::mul", std::move(mul), quantized_mul}, + {"quantized::mul", std::move(inplace_mul), std::move(quantized_mul)}, + std::move(hardswish), + std::move(hardswish_), + std::move(layer_norm), + std::move(group_norm), + std::move(instance_norm), + {"quantized::elu", std::move(elu), quantized_elu}, + {"quantized::elu_", std::move(elu_), std::move(quantized_elu)}, + std::move(avg_pool1d), + std::move(avg_pool2d), + std::move(avg_pool3d), + std::move(adaptive_avg_pool1d), + std::move(adaptive_avg_pool2d), + std::move(adaptive_avg_pool3d), + std::move(mean1), + std::move(mean2), + std::move(upsample_nearest1d), + std::move(upsample_nearest2d), + std::move(upsample_nearest3d), + std::move(upsample_linear1d), + std::move(upsample_bilinear2d), + std::move(upsample_trilinear3d), + std::move(upsample_nearest1d_vec), + std::move(upsample_nearest2d_vec), + std::move(upsample_nearest3d_vec), + std::move(upsample_linear1d_vec), + std::move(upsample_bilinear2d_vec), + std::move(upsample_trilinear3d_vec), + std::move(clamp), + std::move(hardtanh), + std::move(hardtanh_), + std::move(leaky_relu), + std::move(leaky_relu_), + // fixed qparam ops + std::move(hardsigmoid), + std::move(hardsigmoid_), + std::move(sigmoid), + std::move(sigmoid_), + std::move(tanh), + std::move(tanh_), + }; +} + +inline std::vector +dynamic_quantized_linear_pattern_and_replacements() { + std::string linear_dynamic = R"( +graph(%packed_params, %a): + %w_quant : Tensor, %b : Tensor? = quantized::linear_unpack(%packed_params) + %w_dequant = aten::dequantize(%w_quant) + %r = aten::linear(%a, %w_dequant, %b) + return (%r) )"; + + // This pattern ignores reduce range + // Set the reduce range to default to true, since qnnpack backend ignores this + // argument. + std::string quantized_linear_dynamic = R"( +graph(%packed_params, %a): + %reduce_range : bool = prim::Constant[value=1]() + %r = quantized::linear_dynamic(%a, %packed_params, %reduce_range) + return (%r) )"; + + return { + {"quantized::linear_dynamic", + std::move(linear_dynamic), + std::move(quantized_linear_dynamic)}, + }; +} + +static std::vector +dynamic_quant_fusion_pattern_and_replacements() { + std::string linear_dynamic = R"( +graph(%packed_params, %a, %reduce_range, %a_dtype): + %a_scale : float, %a_zero_point : int = aten::_choose_qparams_per_tensor(%a, %reduce_range) + %a_quant = aten::quantize_per_tensor(%a, %a_scale, %a_zero_point, %a_dtype) + %a_dequant = aten::dequantize(%a_quant) + %w_quant : Tensor, %b : Tensor? = quantized::linear_unpack(%packed_params) + %w_dequant = aten::dequantize(%w_quant) + %r = aten::linear(%a_dequant, %w_dequant, %b) + return (%r) )"; + + std::string quantized_linear_dynamic = R"( +graph(%packed_params, %a, %reduce_range, %a_dtype): + %r = quantized::linear_dynamic(%a, %packed_params, %reduce_range) + return (%r) )"; + + std::string linear_dynamic_fp16 = R"( +graph(%packed_params, %a): + %w_unpacked : Tensor, %b : Tensor? = quantized::linear_unpack_fp16(%packed_params) + %r = aten::linear(%a, %w_unpacked, %b) + return (%r) )"; + + std::string quantized_linear_dynamic_fp16 = R"( +graph(%packed_params, %a): + %r = quantized::linear_dynamic_fp16(%a, %packed_params) + return (%r) )"; + + return { + {"quantized::linear_dynamic", + std::move(linear_dynamic), + std::move(quantized_linear_dynamic)}, + {"quantized::linear_dynamic_fp16", + std::move(linear_dynamic_fp16), + std::move(quantized_linear_dynamic_fp16)}, + }; +} + +static std::vector linear_prepack_unpack_patterns() { + std::string linear_with_quant = R"( +graph(%a_dequant, %w_quant, %b): + %w_dequant = aten::dequantize(%w_quant) + %r = aten::linear(%a_dequant, %w_dequant, %b) + return (%r) )"; + + std::string linear_with_quant_prepack = R"( +graph(%a_dequant, %w_quant, %b): + %packed_params = quantized::linear_prepack(%w_quant, %b) + %w_quant_unpacked : Tensor, %b_unpacked : Tensor? = quantized::linear_unpack(%packed_params) + %w_dequant = aten::dequantize(%w_quant_unpacked) + %r = aten::linear(%a_dequant, %w_dequant, %b_unpacked) + return (%r) )"; + std::string linear_fp16_with_cast = R"( +graph(%w, %a_dq, %b): + %fp16_tensor = aten::_saturate_weight_to_fp16(%w) + %r = aten::linear(%a_dq, %fp16_tensor, %b) + return (%r) )"; + std::string linear_fp16_with_prepack = R"( +graph(%w, %a_dq, %b): + %packed_params = quantized::linear_prepack_fp16(%w, %b) + %w_unpacked : Tensor, %b_unpacked : Tensor? = quantized::linear_unpack_fp16(%packed_params) + %r = aten::linear(%a_dq, %w_unpacked, %b_unpacked) + return (%r) )"; + + return { + {"linear_prepack_unpack", + std::move(linear_with_quant), + std::move(linear_with_quant_prepack)}, + {"linear_fp16_prepack_unpack", + std::move(linear_fp16_with_cast), + std::move(linear_fp16_with_prepack)}, + }; +} + +static std::vector conv_prepack_unpack_patterns() { + std::string conv1d_with_quant = R"( +graph(%a_dequant, %w_quant, %b, %stride, %padding, %dilation, %groups): + %w_dequant = aten::dequantize(%w_quant) + %r = aten::conv1d(%a_dequant, %w_dequant, %b, %stride, %padding, %dilation, %groups) + return (%r) )"; + + std::string conv1d_with_quant_prepack = R"( +graph(%a_dequant, %w_quant, %b, %stride, %padding, %dilation, %groups): + %packed_params : __torch__.torch.classes.quantized.Conv2dPackedParamsBase = quantized::conv1d_prepack(%w_quant, %b, %stride, %padding, %dilation, %groups) + %w_quant_unpacked : Tensor, %b_unpacked : Tensor? = quantized::conv1d_unpack(%packed_params) + %w_dequant = aten::dequantize(%w_quant_unpacked) + %r = aten::conv1d(%a_dequant, %w_dequant, %b_unpacked, %stride, %padding, %dilation, %groups) + return (%r) )"; + + std::string conv2d_with_quant = R"( +graph(%a_dequant, %w_quant, %b, %stride, %padding, %dilation, %groups): + %w_dequant = aten::dequantize(%w_quant) + %r = aten::conv2d(%a_dequant, %w_dequant, %b, %stride, %padding, %dilation, %groups) + return (%r) )"; + + std::string conv2d_with_quant_prepack = R"( +graph(%a_dequant, %w_quant, %b, %stride, %padding, %dilation, %groups): + %packed_params : __torch__.torch.classes.quantized.Conv2dPackedParamsBase = quantized::conv2d_prepack(%w_quant, %b, %stride, %padding, %dilation, %groups) + %w_quant_unpacked : Tensor, %b_unpacked : Tensor? = quantized::conv2d_unpack(%packed_params) + %w_dequant = aten::dequantize(%w_quant_unpacked) + %r = aten::conv2d(%a_dequant, %w_dequant, %b_unpacked, %stride, %padding, %dilation, %groups) + return (%r) )"; + + std::string conv3d_with_quant = R"( +graph(%a_dequant, %w_quant, %b, %stride, %padding, %dilation, %groups): + %w_dequant = aten::dequantize(%w_quant) + %r = aten::conv3d(%a_dequant, %w_dequant, %b, %stride, %padding, %dilation, %groups) + return (%r) )"; + + std::string conv3d_with_quant_prepack = R"( +graph(%a_dequant, %w_quant, %b, %stride, %padding, %dilation, %groups): + %packed_params : __torch__.torch.classes.quantized.Conv3dPackedParamsBase = quantized::conv3d_prepack(%w_quant, %b, %stride, %padding, %dilation, %groups) + %w_quant_unpacked : Tensor, %b_unpacked : Tensor? = quantized::conv3d_unpack(%packed_params) + %w_dequant = aten::dequantize(%w_quant_unpacked) + %r = aten::conv3d(%a_dequant, %w_dequant, %b_unpacked, %stride, %padding, %dilation, %groups) + return (%r) )"; + + std::string conv_transpose1d_with_quant = R"( +graph(%a_dequant, %w_quant, %b, %stride, %padding, %output_padding, %groups, %dilation): + %w_dequant = aten::dequantize(%w_quant) + %r = aten::conv_transpose1d(%a_dequant, %w_dequant, %b, %stride, %padding, %output_padding, %groups, %dilation) + return (%r) )"; + + std::string conv_transpose1d_with_quant_prepack = R"( +graph(%a_dequant, %w_quant, %b, %stride, %padding, %output_padding, %groups, %dilation): + %packed_params : __torch__.torch.classes.quantized.Conv2dPackedParamsBase = quantized::conv_transpose1d_prepack(%w_quant, %b, %stride, %padding, %output_padding, %dilation, %groups) + %w_quant_unpacked : Tensor, %b_unpacked : Tensor? = quantized::conv_transpose1d_unpack(%packed_params) + %w_dequant = aten::dequantize(%w_quant_unpacked) + %r = aten::conv_transpose1d(%a_dequant, %w_dequant, %b_unpacked, %stride, %padding, %output_padding, %groups, %dilation) + return (%r) )"; + + std::string conv_transpose2d_with_quant = R"( +graph(%a_dequant, %w_quant, %b, %stride, %padding, %output_padding, %groups, %dilation): + %w_dequant = aten::dequantize(%w_quant) + %r = aten::conv_transpose2d(%a_dequant, %w_dequant, %b, %stride, %padding, %output_padding, %groups, %dilation) + return (%r) )"; + + std::string conv_transpose2d_with_quant_prepack = R"( +graph(%a_dequant, %w_quant, %b, %stride, %padding, %output_padding, %groups, %dilation): + %packed_params : __torch__.torch.classes.quantized.Conv2dPackedParamsBase = quantized::conv_transpose2d_prepack(%w_quant, %b, %stride, %padding, %output_padding, %dilation, %groups) + %w_quant_unpacked : Tensor, %b_unpacked : Tensor? = quantized::conv_transpose2d_unpack(%packed_params) + %w_dequant = aten::dequantize(%w_quant_unpacked) + %r = aten::conv_transpose2d(%a_dequant, %w_dequant, %b_unpacked, %stride, %padding, %output_padding, %groups, %dilation) + return (%r) )"; + + return { + {"conv1d_prepack_unpack", + std::move(conv1d_with_quant), + std::move(conv1d_with_quant_prepack)}, + {"conv2d_prepack_unpack", + std::move(conv2d_with_quant), + std::move(conv2d_with_quant_prepack)}, + {"conv3d_prepack_unpack", + std::move(conv3d_with_quant), + std::move(conv3d_with_quant_prepack)}, + {"conv_transpose1d_prepack_unpack", + std::move(conv_transpose1d_with_quant), + std::move(conv_transpose1d_with_quant_prepack)}, + {"conv_transpose2d_prepack_unpack", + std::move(conv_transpose2d_with_quant), + std::move(conv_transpose2d_with_quant_prepack)}}; +} + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/quantization/quantization_type.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/quantization/quantization_type.h new file mode 100644 index 0000000000000000000000000000000000000000..7f9f123ba89e777e6144097dcd10f5b81cb25402 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/quantization/quantization_type.h @@ -0,0 +1,18 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include + +namespace torch::jit { + +// Quantization type (dynamic quantization, static quantization). +// Should match the Python enum in quantize_jit.py +enum QuantType : std::uint8_t { DYNAMIC = 0, STATIC }; + +std::ostream& operator<<(std::ostream& os, QuantType t); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/quantization/register_packed_params.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/quantization/register_packed_params.h new file mode 100644 index 0000000000000000000000000000000000000000..fbc1a48c9d791fdf3e7b714a118a373a9660d708 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/quantization/register_packed_params.h @@ -0,0 +1,23 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::jit { + +using PrePackParamFilterFn = std::function; + +TORCH_API std::unordered_set RegisterPrePackParams( + Module& m, + const std::string& method_name, + const PrePackParamFilterFn& is_packed_param, + const std::string& attr_prefix); + +TORCH_API std::string joinPaths(const std::vector& paths); +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/symbolic_shape_analysis.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/symbolic_shape_analysis.h new file mode 100644 index 0000000000000000000000000000000000000000..9afac592de18fb5350a34f97c8bda30a16293671 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/symbolic_shape_analysis.h @@ -0,0 +1,61 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace torch::jit { + +// CAUTION NOT TO BE USED, STILL A WIP, NOT STABLE + +TORCH_API void PropagateShapesOnGraph(std::shared_ptr& graph); + +// CAUTION NOT TO BE USED, STILL A WIP, NOT STABLE +// From [beg, end) attempt to propagate shapes and +// build up a graph that will compute all remaining symbolic +// shapes in [beg, end) that can be executed before beg + +struct ShapeComputeGraphMapping { + ShapeComputeGraphMapping( + std::shared_ptr partial_eval_shape_graph, + std::unordered_map + enclosing_graph_value_to_shape_graph_input, + std::unordered_map graph_output_to_symbolic_shape_dim) + : partial_eval_shape_graph(std::move(partial_eval_shape_graph)), + enclosing_graph_value_to_shape_graph_input_( + std::move(enclosing_graph_value_to_shape_graph_input)), + graph_output_to_symbolic_shape_dim_( + std::move(graph_output_to_symbolic_shape_dim)) {} + + std::shared_ptr partial_eval_shape_graph; + std::unordered_map + enclosing_graph_value_to_shape_graph_input_; + std::unordered_map graph_output_to_symbolic_shape_dim_; +}; + +TORCH_API std::optional +PropagateShapesAndBuildLargeShapeComputeGraph( + std::shared_ptr& graph, + Node* beg, + Node* end); + +// don't insert complete tensor shapes in shape compute graphs and instead +// rely on our partial evaluation pipeline to propagate information. +// this is a good proxy for our ability to propagate non-complete shape +// information. +TORCH_API bool setSymbolicShapeAnalysisTestMode(bool value); +TORCH_API bool symbolicShapeAnalysisTestModeEnabled(); + +using SSAInput = std::variant; +TORCH_API std::optional> +calculateSymbolicShapesOnOp( + const FunctionSchema* schema, + const std::vector& inputs); +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/utils/check_alias_annotation.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/utils/check_alias_annotation.h new file mode 100644 index 0000000000000000000000000000000000000000..9f038180f3b85dbba36ac10b811edc76733f8ea8 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/utils/check_alias_annotation.h @@ -0,0 +1,25 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace torch::jit { + +// Verify that alias annotations are correct. See impl for definition of +// "correct". +// +// This function expects a graph with a single op with `unqualifiedOpName`, plus +// the inputs that you would otherwise have passed to the graph executor. +TORCH_API void checkAliasAnnotation( + const std::shared_ptr& graph, + std::vector pythonInputs, + const std::string& unqualifiedOpName); +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/utils/memory_dag.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/utils/memory_dag.h new file mode 100644 index 0000000000000000000000000000000000000000..755aead9375fbc1a563bb555871973755dfa8c5c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/utils/memory_dag.h @@ -0,0 +1,179 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include + +// Uses a compressed index representation for faster comparisons +typedef c10::SparseBitVector<256> MemoryLocations; +namespace torch::jit { + +struct Value; + +using AliasTypeSet = std::vector; + +// `Element` represents a vertex in the points-to graph. It represents +// anything that could have an aliasing relationship--mostly IR +// `Value`s, but also wildcards or the type inside a container (e.g. `T` +// in `List[T]`) +struct Element { + Element(const Value* value_, unsigned index_); + // wildcard constructor + explicit Element(unsigned index_); + + // Index into the owning DAG's bit vector that represents this element. + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + unsigned index; + + // All elements that this element *may* point to. It's possible to have + // multiple elements that you might point to due to control flow/complex ops + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + MemoryLocations pointsTo; + // Backreference for points-to. + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + MemoryLocations pointedFrom; + + // Elements can contain other elements (e.g. List[Tensor]) + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + MemoryLocations containedElements; + + // The values that this element corresponds to. May be empty if this element + // doesn't represent a first-class value. + // This is for debug information only. + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::unordered_set values; + + private: + // Make `from` point at `to`. + void makePointerTo(Element* from, Element* to); + + friend class MemoryDAG; + // We memoize the results of `getMemoryLocations` to speed up queries. + // A nullopt means that this cache is not yet populated. Since `MemoryDAG` is + // immutable, this cache should never need to be invalidated. + mutable std::optional cachedMemoryLocations_; + + mutable std::optional cachedAllContainedMemoryLocations_; +}; + +// class MemoryDAG +// +// This class tracks the "A points to B" graph for all values. It is used by +// AliasDb to provide a higher-level API. +// +// We maintain a DAG where: +// - Vertices (called "Elements") represent Values and +// other aliasing entities (e.g. the stuff inside a list) +// - Edges represent a "points-to" relationship. +// +// Leaves in this DAG are entities that don't point to anything, and thus +// correspond to unique "memory locations". +// +// So, by traversing the "points-to" graph to the leaves, you can determine +// which memory locations an element may point to. +class TORCH_API MemoryDAG { + public: + explicit MemoryDAG(std::vector> indexToElementMap) + : indexToElementMap_(std::move(indexToElementMap)) {} + // explicitly delete copy constructor because otherwise windows build is + // confused for an exported class see + // https://stackoverflow.com/a/51033485/105137 + MemoryDAG(const MemoryDAG&) = delete; + MemoryDAG& operator=(const MemoryDAG&) = delete; + + // Return the unique memory locations that `Element` might represent. + const MemoryLocations& getMemoryLocations(const Element* e) const; + + // Do `a` and `b` potentially share a memory location? + bool mayAlias(const Element* a, const Element* b) const; + + // Does `a` hold reference to any memory that is stored in `b`, or vice versa? + bool mayContainAlias(const Element* a, const Element* b) const; + + bool mayContainAlias(const Element* a, const at::ArrayRef b) const; + + bool mayContainAlias( + const at::ArrayRef a, + const at::ArrayRef b) const; + + // Converts from the compressed index representation + const Element* fromIndex(unsigned x) const; + Element* fromIndex(unsigned x); + void collectAllContainedMemoryLocations( + const Element* elem, + MemoryLocations& cont) const; + + /** + * The following methods are special cases where we need to mutate the + * internals of MemoryDAG for efficiency reasons. Don't call them unless you + * know what you're doing! In particular, don't add new mutating methods + * without ensuring that you are maintaining cache consistency for memory + * locations. + */ + + // Adding wildcards can trigger extremely expensive cache invalidations. This + // method adds them in a more efficient cache-aware way. + void setWildcards( + const std::unordered_set& wildcards, + const ska::flat_hash_map& elementMap, + const std::function& getWildcardElement); + Element* unsafeMakeFreshValue(const Value* v); + + private: + const MemoryLocations& getAllContainedMemoryLocations( + const Element* elem) const; + void collectAllContainedMemoryLocationsImpl( + const Element* elem, + MemoryLocations& cont) const; + std::vector> indexToElementMap_; +}; + +/** + * Helper to build up the points-to graph. + * + * We separate the "building" into a different class because it allows us to + * cache internally to MemoryDAG without worrying about how the DAG structure + * is mutated. + */ +class TORCH_API MemoryDAGBuilder { + public: + MemoryDAGBuilder() = default; + MemoryDAGBuilder(const MemoryDAGBuilder&) = delete; + MemoryDAGBuilder& operator=(const MemoryDAGBuilder&) = delete; + + // Make `from` point at `to`. + void makePointerTo(Element* from, Element* to); + + void addToContainedElements(Element* contained, Element* container); + + std::unique_ptr createMemoryDAG() && { + return std::make_unique(std::move(indexToElementMap_)); + } + + // Make a fresh Element (i.e. an Element that doesn't point to anything) and + // return it. + Element* makeFreshValue(const Value* v); + + friend MemoryDAG; + + private: + // `MemoryDAGBuilder` builds up `indexToElementMap_`, then uses + // the map to construct the `MemoryDAG` + std::vector> indexToElementMap_; +}; +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/utils/op_registry.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/utils/op_registry.h new file mode 100644 index 0000000000000000000000000000000000000000..d6a566d1ef06843ff0d27b6b07b42970f8c89825 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/utils/op_registry.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::jit { +// Moved from shape_analysis.cpp + +// Requirements: +// dims : preserved from the first argument +// scalar type : preserved from the first argument (doesn't have to +// match other arguments) +// device : always matching and preserved +// tensor inputs : * +// tensor outputs : 1 +// NB: those ops (with slight adjustments) are good candidates for restarts. +// Knowing the type and device of weights or biases is usually enough to +// infer the output type. +std::shared_ptr nn_ops_first_input_preserving(); + +// Requirements: +// dims : Changed from first argument +// scalar type : preserved from the first argument +// device : always matching and preserved +// tensor inputs : 1 +// tensor outputs : 1 +std::shared_ptr ops_one_tensor_in_shape_transform(); +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/utils/optimization_utils.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/utils/optimization_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..189d8128354445927ca03f457ca51951e81453e3 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/utils/optimization_utils.h @@ -0,0 +1,17 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) + +#pragma once + +#include + +namespace torch::jit { + +// Checks if the parameters, not including the +// first param are all constants. +bool nonConstantParameters(Node* n); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/utils/subgraph_utils.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/utils/subgraph_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..eb84520636361f0353ddafbb0d27d12c6de0df58 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/passes/utils/subgraph_utils.h @@ -0,0 +1,75 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +// Utilities for dealing with nodes that contain subgraphs. +// +// They handle the complexity of editing inputs/outputs as you merge nodes in +// and out of subgraphs. +namespace torch::jit::SubgraphUtils { + +// Create a new subgraph node that contains only `n`. The new subgraph will have +// `subgraphKind` as its type. +// +// `n` is destroyed. +// +// Returns the new subgraph node. +TORCH_API Node* createSingletonSubgraph(Node* n, Symbol subgraphKind); + +// Creates a new subgraph that only contains `n`, amd updates the new outputs +// of the subgraph to have the aliasing properties of the original `n` outputs +TORCH_API Node* createSingletonSubgraphAndUpdateAliasing( + Node* to_merge, + Symbol subgraphKind, + AliasDb& db); + +// Merge a node into a subgraph node. If `toMerge` is also a subgraph, the +// subgraphs are merged. +// If `destroyNode` is true `toMerge` is destroyed. +// An optional argument 'vmap' could be used to retrieve value mappings. +// Values will be mapped to their new subgraph values +TORCH_API void mergeNodeIntoSubgraph( + Node* toMerge, + Node* subgraphNode, + bool destroyNode = true); + +// Merges a node into a subgraph node, and updates the new outputs of the +// subgraph to have the aliasing properties of the corresponding `to_merge` +// outputs +TORCH_API void mergeNodeIntoSubgraphAndUpdateAliasing( + Node* to_merge, + Node* subgraphNode, + AliasDb& db); + +TORCH_API std::vector unmergeAliasedOutputs( + Node* subgraphNode, + AliasDb& db); + +// Move nodes from a subgraph node to the outer graph. +// `subgraphNode` is destroyed. +TORCH_API void unmergeSubgraph(Node* subgraphNode); + +// Move `node_to_unmerge` and its descendants after `subgraphNode` +// promotes any dependencies of `node_to_unmerge` to subgraphNode outputs +TORCH_API void unmergeNode(Node* node_to_unmerge, Node* subgraphNode); + +TORCH_API bool unmergeOutputsAlisingInputs(Node* subgraphNode); + +TORCH_API bool unmergeAliasedOutputs(Node* subgraphNode); + +// Convenience function +std::shared_ptr getSubgraph(Node* n); + +TORCH_API std::string generateNameForGraph( + const std::shared_ptr& graph, + size_t maxlen = 40, + const std::string& prefix = "fused"); + +} // namespace torch::jit::SubgraphUtils + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/init.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/init.h new file mode 100644 index 0000000000000000000000000000000000000000..29180349f5aade9591e36fe892eabf91cb92d064 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/init.h @@ -0,0 +1,14 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit { + +void initJITBindings(PyObject* module); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/module_python.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/module_python.h new file mode 100644 index 0000000000000000000000000000000000000000..c127ff8b5a6ac87672a7a1e77158351b9d5dc591 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/module_python.h @@ -0,0 +1,69 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include +#include +#include + +namespace py = pybind11; + +namespace torch::jit { + +inline std::optional as_module(py::handle obj) { +#if IS_PYBIND_2_13_PLUS + PYBIND11_CONSTINIT static py::gil_safe_call_once_and_store + storage; + auto& ScriptModule = + storage + .call_once_and_store_result([]() -> py::object { + return py::module_::import("torch.jit").attr("ScriptModule"); + }) + .get_stored(); +#else + static py::handle ScriptModule = + py::module::import("torch.jit").attr("ScriptModule"); +#endif + if (py::isinstance(obj, ScriptModule)) { + return py::cast(obj.attr("_c")); + } + return std::nullopt; +} + +inline std::optional as_object(py::handle obj) { +#if IS_PYBIND_2_13_PLUS + PYBIND11_CONSTINIT static py::gil_safe_call_once_and_store< + std::tuple> + storage; + auto& [ScriptObject, RecursiveScriptClass] = + storage + .call_once_and_store_result( + []() -> std::tuple { + return { + py::module_::import("torch").attr("ScriptObject"), + py::module_::import("torch.jit") + .attr("RecursiveScriptClass")}; + }) + .get_stored(); +#else + static py::handle ScriptObject = + py::module::import("torch").attr("ScriptObject"); + + static py::handle RecursiveScriptClass = + py::module::import("torch.jit").attr("RecursiveScriptClass"); +#endif + + if (py::isinstance(obj, ScriptObject)) { + return py::cast(obj); + } + if (py::isinstance(obj, RecursiveScriptClass)) { + return py::cast(obj.attr("_c")); + } + return std::nullopt; +} + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/opaque_obj.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/opaque_obj.h new file mode 100644 index 0000000000000000000000000000000000000000..8f6b6c324b0ac2642fc38ef05a323a544cc2c1b3 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/opaque_obj.h @@ -0,0 +1,84 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include +#include +#include +#include + +namespace torch::jit { +struct OpaqueObject : public CustomClassHolder { + OpaqueObject(py::object payload) : payload_(std::move(payload)) {} + + void setPayload(py::object payload) { + payload_ = std::move(payload); + } + + py::object getPayload() { + return payload_; + } + + py::object payload_; +}; + +static auto register_opaque_obj_class = + torch::class_("aten", "OpaqueObject") + .def( + "__eq__", + [](const c10::intrusive_ptr& self, + const c10::intrusive_ptr& other) { + auto self_payload = self->getPayload(); + auto other_payload = other->getPayload(); + + if (!self_payload.ptr() || !other_payload.ptr()) { + return false; + } + + py::gil_scoped_acquire gil; + auto res = PyObject_RichCompareBool( + self_payload.ptr(), other_payload.ptr(), Py_EQ); + if (res == -1) { + throw py::error_already_set(); + } + return res > 0; + }) + .def_pickle( + [](const c10::intrusive_ptr& self) { // __getstate__ + // Since we cannot directly return the py::object due to + // CustomClassHolder's signature limitations, we will have to + // serialize it directly here. We also can't return py::bytes so + // need to encode it into a string. + py::module_ pickle = py::module_::import("pickle"); + py::module_ base64 = py::module_::import("base64"); + py::bytes pickled_payload = + pickle.attr("dumps")(self->getPayload()); + py::bytes encoded_payload = + base64.attr("b64encode")(pickled_payload); + return std::string(encoded_payload); + }, + [](const std::string& state) { // __setstate__ + py::module_ pickle = py::module_::import("pickle"); + py::module_ base64 = py::module_::import("base64"); + py::bytes state_bytes(state); + py::bytes decoded_payload = base64.attr("b64decode")(state_bytes); + py::object restored_payload = + pickle.attr("loads")(decoded_payload); + return c10::make_intrusive(restored_payload); + }) + .def( + "__obj_flatten__", + [](const c10::intrusive_ptr& self) { + throw std::runtime_error( + "Unable to implement __obj_flatten__ for opaque objects."); + }); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/pybind.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/pybind.h new file mode 100644 index 0000000000000000000000000000000000000000..6ded51cf15914dea299a1e76b7abb534153a08b5 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/pybind.h @@ -0,0 +1,218 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +namespace py = pybind11; + +namespace torch::jit { + +// This is a variant of shared_ptr that "sees through" a wrapper. +// We use it to convert Value, Node, Block and node to "wrapped" Python +// values. When we destruct the C++ object, the wrapper's pointer will +// be set to 0 and any future dereferencing will throw. We need this +// because the Python objects may hang around after the C++ object +// has already been destroyed. +// This also needs the magic type_caster below, which is from the +// workaround offered in https://github.com/pybind/pybind11/issues/2751 +template +class unwrapping_shared_ptr { + static_assert( + std::is_same_v || + std::is_same_v || + std::is_same_v, + "unwrapping type only defined for Graph object types"); + + private: + std::shared_ptr> impl; + + public: + unwrapping_shared_ptr() : impl({}) {} + explicit unwrapping_shared_ptr(T* p) : impl(p->wrap()) { + impl->clear_cb = &clear_registered_instances; + } + T* get() const { + if (!impl->elem) { + throw std::logic_error("has been invalidated"); + } + return impl->elem; + } + // we need to disable the overloaded & for PyBind11 < 2.3 due. + // see https://github.com/pybind/pybind11/pull/1435 +#if (PYBIND11_VERSION_MAJOR > 2) || \ + ((PYBIND11_VERSION_MAJOR == 2) && (PYBIND11_VERSION_MINOR >= 3)) + T** operator&() { + if (!impl->elem) { + throw std::logic_error("has been invalidated"); + } + return &(impl->elem); + } +#endif +}; + +} // namespace torch::jit + +PYBIND11_DECLARE_HOLDER_TYPE(T, torch::jit::unwrapping_shared_ptr, true) + +namespace pybind11::detail { + +#define CREATE_UNWRAPPING_CASTER(Class) \ + template <> \ + struct type_caster : public type_caster_base { \ + public: \ + using type = Class; \ + using holder_type = torch::jit::unwrapping_shared_ptr; \ + \ + bool load(handle src, bool convert) { \ + return load_impl>(src, convert); \ + } \ + \ + explicit operator type*() { \ + return static_cast(value); \ + } \ + explicit operator type&() { \ + return *static_cast(value); \ + } \ + \ + protected: \ + friend class type_caster_generic; \ + \ + bool load_value(const value_and_holder& v_h) { \ + if (v_h.holder_constructed()) { \ + value = v_h.template holder().get(); \ + return true; \ + } else { \ + throw cast_error( \ + "Unable to cast from non-held to held instance (#Class& to Holder<#Class>)"); \ + } \ + } \ + } + +CREATE_UNWRAPPING_CASTER(torch::jit::Node); +CREATE_UNWRAPPING_CASTER(torch::jit::Value); +CREATE_UNWRAPPING_CASTER(torch::jit::Block); + +#undef CREATE_UNWRAPPING_CASTER + +template <> +struct type_caster { + public: + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + PYBIND11_TYPE_CASTER(torch::jit::IValue, _("IValue")); + + bool load(handle src, bool /*unused*/) { + try { + value = torch::jit::toTypeInferredIValue(src); + return true; + } catch (std::exception&) { + return false; + } + } + + static handle cast( + torch::jit::IValue src, + return_value_policy /* policy */, + handle /* parent */) { + return torch::jit::toPyObject(std::move(src)).release(); + } +}; + +template <> +struct type_caster { + public: + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + PYBIND11_TYPE_CASTER(torch::jit::Symbol, _("Symbol")); + + bool load(handle src, bool /*unused*/) { + // TODO: Is there a way to py::cast that doesn't raise an exception on + // failure? Can we catch pybind11::cast_error here instead? + std::string src_str; + try { + src_str = py::cast(src); + } catch (std::exception&) { + return false; + } + value = torch::jit::Symbol::fromQualString(src_str); + return true; + } + + static handle cast( + torch::jit::Symbol src, + return_value_policy /* policy */, + handle /* parent */) { + return py::cast(std::string(src.toQualString()), return_value_policy::copy) + .release(); + } +}; + +template <> +struct type_caster { + public: + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + PYBIND11_TYPE_CASTER(torch::jit::AttributeKind, _("AttributeKind")); + + bool load(handle src, bool /*unused*/) { + return false; + } + + static handle cast( + torch::jit::AttributeKind src, + return_value_policy /* policy */, + handle /* parent */) { + return py::cast( + std::string(torch::jit::toString(src)), + return_value_policy::copy) + .release(); + } +}; + +// See https://github.com/pybind/pybind11/issues/637 +using ListCasterBase = pybind11::detail:: + list_caster, torch::jit::Node*>; +template <> +struct type_caster> : ListCasterBase { + static handle cast( + const std::vector& src, + return_value_policy /*unused*/, + handle parent) { + return ListCasterBase::cast(src, return_value_policy::reference, parent); + } + static handle cast( + const std::vector* src, + return_value_policy pol, + handle parent) { + return cast(*src, pol, parent); + } +}; + +} // namespace pybind11::detail + +namespace torch::jit { + +static inline py::tuple tuple_tail(const py::tuple& tup) { + py::tuple r(tup.size() - 1); + for (const auto i : c10::irange(1, tup.size())) { + r[i - 1] = tup[i]; + } + return r; +} + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/pybind_utils.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/pybind_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..888926575eb40b7d0fb7a35f2fd8bea1edbc5f92 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/pybind_utils.h @@ -0,0 +1,1337 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#ifdef USE_DISTRIBUTED +#include +#include +#endif + +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +// The visibility attribute is to avoid a warning about storing a field in the +// struct that has a different visibility (from pybind) than the struct. +#ifdef _WIN32 +#define VISIBILITY_HIDDEN +#else +#define VISIBILITY_HIDDEN __attribute__((visibility("hidden"))) +#endif + +namespace torch::jit { + +using ResolutionCallback = std::function; + +void clear_registered_instances(void* ptr); + +TORCH_PYTHON_API IValue toIValue( + py::handle obj, + const TypePtr& type, + std::optional N = std::nullopt); + +TORCH_PYTHON_API py::object toPyObject(IValue ivalue); + +// Hack to overload the behavior of toIValue to accept Python +// numbers in places where a Tensor is expected +// See also torch::should_allow_numbers_as_tensors +class TORCH_PYTHON_API ToIValueAllowNumbersAsTensors { + bool old_; + + public: + ToIValueAllowNumbersAsTensors(bool enable); + ~ToIValueAllowNumbersAsTensors(); +}; + +// Wrap Python function to guard deref +// NB: Need VISIBILITY_HIDDEN for silencing compiler error, +// 'torch::jit::PythonFunctionGuard' declared with greater visibility than the +// type of its field 'torch::jit::PythonFunctionGuard::func_' +struct VISIBILITY_HIDDEN PythonFunctionGuard { + explicit PythonFunctionGuard(py::function func) : func_(std::move(func)) {} + PythonFunctionGuard(const PythonFunctionGuard&) = delete; + PythonFunctionGuard(PythonFunctionGuard&&) = delete; + PythonFunctionGuard& operator=(const PythonFunctionGuard&) = delete; + PythonFunctionGuard& operator=(PythonFunctionGuard&&) = delete; + + ~PythonFunctionGuard() { + pybind11::gil_scoped_acquire ag; + func_.dec_ref(); + // explicitly setting PyObject* to nullptr to prevent py::object's dtor to + // decref on the PyObject again. + // See Note [Destructing py::object] in python_ivalue.h + func_.ptr() = nullptr; + } + + py::function func_; +}; + +// The PythonFutureWrapper for ivalue::Future +// +// NB: VISIBILITY_HIDDEN is for silencing compiling error, +// "error: 'torch::jit::PythonFutureWrapper' declared with greater visibility +// than the type of its field 'torch::jit::PythonFutureWrapper::unwrap_func' +// [-Werror=attributes]" +// +// NB: inherit from enable_shared_from_this because then(py::function) needs to +// get a shared_ptr from this pointer. +struct VISIBILITY_HIDDEN PythonFutureWrapper + : std::enable_shared_from_this { + using UnwrapFunc = std::function; + + explicit PythonFutureWrapper( + c10::intrusive_ptr fut, + std::optional unwrap_func = std::nullopt) + : fut(std::move(fut)), unwrap_func(std::move(unwrap_func)) {} + + explicit PythonFutureWrapper(const PythonFutureWrapper&) = delete; + PythonFutureWrapper& operator=(const PythonFutureWrapper&) = delete; + PythonFutureWrapper(PythonFutureWrapper&&) = default; + PythonFutureWrapper& operator=(PythonFutureWrapper&&) = default; + ~PythonFutureWrapper() = default; + + bool done() { + return fut->completed(); + } + + py::object value() { + // acquiring GIL as toPyObject creates new py::object + // without grabbing the GIL. + py::gil_scoped_acquire acquire; + py::object py_obj = toPyObject(fut->value()); + // unwrap_func is a general compositional function that takes in a + // py::object and executes some python function. It is currently mostly used + // to throw python exceptions. + if (unwrap_func) { + (*unwrap_func)(py_obj); + } + return py_obj; + } + + py::object wait() { + fut->wait(); + if (jit::tracer::isTracing()) { + auto graph = jit::tracer::getTracingState()->graph; + + Value* fut_val = jit::tracer::getValueTrace(fut); + auto output = graph->insert(aten::wait, {fut_val}); + jit::tracer::setValueTrace(fut->value(), output); + } + return value(); + } + + // The py::function cb arg must take a std::shared_ptr + // (i.e., torch._C.Future) as the only argument. If the type mismatches, an + // error will be thrown when waiting for the value of this returned Future. + std::shared_ptr then(py::function cb) { + // We need this an additional layer of wrapper here to guard the + // destruction of the py::function object. Because, the + // Future owns a reference to the py::function in its callback + // vector, but Future does not acquire GIL on destruction. + auto pf = std::make_shared(std::move(cb)); + + return std::make_shared(fut->then( + // Capture a copy of the ivalue::Future instead of the `this` pointer + // because the PythonFutureWrapper object could have been deleted + // when the callbacks are fired. For example, RPC only captures the + // ivalue::Future instead of PythonFutureWrapper in JitFuture's + // callback functions. Hence, if user code does not hold a reference to + // this PythonFutureWrapper object, there is no guarantee that the + // PythonFutureWrapper is still valid when running the callback. + [pyFut(this->getPtr()), + pf(std::move(pf))](c10::ivalue::Future& /* unused */) -> IValue { + try { + pybind11::gil_scoped_acquire ag; + return toIValue(pf->func_(pyFut), PyObjectType::get()); + } catch (py::error_already_set& e) { + auto err = std::runtime_error(c10::str( + "Got the following error when running the callback: ", + e.what())); + { + pybind11::gil_scoped_acquire ag; + // Release ownership on py::objects and also restore Python + // Error Indicator. + e.restore(); + // Clear the Python Error Indicator as we has recorded the + // exception in the response message. + PyErr_Clear(); + } + + throw std::runtime_error(err); + } + }, + PyObjectType::get())); + } + + void add_done_callback(py::function cb) { + auto pf = std::make_shared(std::move(cb)); + // NOLINTNEXTLINE(modernize-avoid-bind) + fut->addCallback(std::bind( + [pyFut(this->getPtr())]( + const std::shared_ptr& pf) { + try { + pybind11::gil_scoped_acquire ag; + pf->func_(pyFut); + } catch (py::error_already_set& e) { + { + pybind11::gil_scoped_acquire ag; + // Release ownership on py::objects and also restore Python + // Error Indicator. + e.restore(); + // Clear the Python Error Indicator as we has recorded the + // exception in the response message. + PyErr_Clear(); + } + // Log and ignore exceptions raised through the callback + LOG(ERROR) << "Got the following error when running the callback: " + << e.what(); + + } catch (const std::exception& e) { + // Log and ignore exceptions raised through the callback + LOG(ERROR) << "Got the following error when running the callback: " + << e.what(); + } + }, + std::move(pf))); + } + + void markCompleted(const py::object& pyValue) { + DCHECK(PyGILState_Check()); + IValue value = toIValue(pyValue, PyObjectType::get()); + + py::gil_scoped_release release; + fut->markCompleted(std::move(value)); + } + + c10::intrusive_ptr fut; + // unwrap_func works like a callback for the value returned by + // PythonFutureWrapper::wait(). + std::optional unwrap_func; + + private: + std::shared_ptr getPtr() { + return shared_from_this(); + } +}; + +// The PythonAwaitWrapper for ivalue::Await +// +// Expresses delayed function execution with Lazy semantic. +// i.e. Await[W] in eager mode can be used as W. +// When the attribute of W type is requested, Await[W] will return the +// attribute of W, transparently calling wait() beforehand. +// No Lazy semantic for script, explicit wait(Await[W]) -> W must be called to +// convert to type W. +// +// The Await object takes shared ownership of specified function and the +// arguments. After first call for wait() it owns the result. Deliberately no +// type inference for eager mode. +struct VISIBILITY_HIDDEN PythonAwaitWrapper + : std::enable_shared_from_this { + explicit PythonAwaitWrapper(c10::intrusive_ptr aw) + : aw_(std::move(aw)) {} + explicit PythonAwaitWrapper(py::handle input) { + args_ = py::tuple(1u); + args_[0] = input; + auto type = PyObjectType::get(); + aw_ = c10::make_intrusive(type); + aw_->markCompleted(toIValue(input, type)); + } + + explicit PythonAwaitWrapper(py::function pf, py::tuple args) + : args_(std::move(args)) { + pyfg_ = std::make_shared(std::move(pf)); + + std::function f = [fg(pyfg_), &args(args_)]() { + pybind11::gil_scoped_acquire ag; + return toIValue(fg->func_(*args), PyObjectType::get()); + }; + aw_ = c10::make_intrusive( + PyObjectType::get(), std::move(f)); + } + + explicit PythonAwaitWrapper(const PythonAwaitWrapper&) = delete; + PythonAwaitWrapper& operator=(const PythonAwaitWrapper&) = delete; + PythonAwaitWrapper(PythonAwaitWrapper&&) = default; + PythonAwaitWrapper& operator=(PythonAwaitWrapper&&) = default; + ~PythonAwaitWrapper() = default; + + py::object wait() { + py::gil_scoped_acquire acquire; + return toPyObject(aw_->wait()); + } + + // Nowait semantic means trivial case when Await is constructed from the + // result + bool is_nowait() { + return pyfg_ == nullptr; + } + + const py::function fn() { + TORCH_CHECK( + pyfg_, "Await constructed as awaitable_nowait does not have fn"); + return pyfg_->func_; + } + + const py::tuple args() { + return args_; + } + + TypePtr type() { + return aw_->type(); + } + + c10::intrusive_ptr aw_; + std::shared_ptr pyfg_; + py::tuple args_; + + private: + std::shared_ptr getPtr() { + return shared_from_this(); + } +}; + +// error reporting: when reporting user-caused errors, these functions should +// not use AT_ERROR macros, since these macros add stack trace information +// that is confusing to display to the end user since it always reports +// locations in libtorch code rather than user code. + +inline std::shared_ptr get_python_cu() { + return py::module::import("torch.jit._state") + .attr("_python_cu") + .cast>(); +} + +struct TypedIValue : public std::pair { + using pair::pair; + + IValue& ivalue() { + return this->first; + } + TypePtr& type() { + return this->second; + } +}; + +inline TypedIValue toDictKeyIValue(py::handle key) { + if (py::isinstance(key)) { + return TypedIValue( + ConstantString::create(py::cast(key)), StringType::get()); + } else if (py::isinstance(key)) { + return TypedIValue(py::cast(key), IntType::get()); + } else if (py::isinstance(key)) { + return TypedIValue(py::cast(key), FloatType::get()); + } else { + TORCH_CHECK( + false, "Dictionary inputs may only have string, int, or float keys"); + } +} + +inline std::optional unifyOrInitializeType( + const TypePtr& accum, + const TypePtr& unify) { + if (!accum) { + return unify; + } + return unifyTypes(accum, unify); +} + +using InferredType = c10::InferredType; + +InferredType tryToInferContainerType(py::handle input, bool primitiveTypeOnly); + +// Try to infer the type of a Python object +// The type cannot be inferred if: +// input is an empty container (list, dict) +// input is an list with element types that cannot be unified +// input is an dict with key or value types that cannot be unified +inline InferredType tryToInferType(py::handle input) { + // Try tensor types + if (THPVariable_Check(input.ptr())) { + return InferredType(TensorType::get()); + } + + if (input.is_none()) { + return InferredType(NoneType::get()); + } + + if (py::isinstance(input)) { + auto fn = py::cast(input).function_; + return InferredType(FunctionType::create(fn)); + } + + // Try basic types first + if (py::isinstance(input)) { + return InferredType(BoolType::get()); + // NOLINTNEXTLINE(bugprone-branch-clone) + } else if (py::isinstance(input)) { + return InferredType(IntType::get()); + } else if (py::isinstance(input)) { + return InferredType(FloatType::get()); + } else if (PyComplex_CheckExact(input.ptr())) { + return InferredType(ComplexType::get()); + // NOLINTNEXTLINE(bugprone-branch-clone) + } else if (py::isinstance(input)) { + // NOTE: We may need a ByteType in the future + return InferredType(StringType::get()); + } else if (py::isinstance(input)) { + return InferredType(StringType::get()); + } else if (THPLayout_Check(input.ptr())) { + return InferredType(IntType::get()); + } else if (THPDevice_Check(input.ptr())) { + return InferredType(DeviceObjType::get()); + } else if (THPGenerator_Check(input.ptr())) { + return InferredType(GeneratorType::get()); + } else if (THPStream_Check(input.ptr())) { + return InferredType(StreamObjType::get()); + } else if (THPDtype_Check(input.ptr())) { + return InferredType(IntType::get()); + } else if (THPQScheme_Check(input.ptr())) { + return InferredType(IntType::get()); + } else if (THPLayout_Check(input.ptr())) { + return InferredType(IntType::get()); + } + + auto enum_type = py::module::import("enum").attr("Enum"); + py::bool_ isEnumValue = py::isinstance(input, enum_type); + if (py::cast(isEnumValue)) { + auto enum_class = input.attr("__class__"); + auto enum_type = py::cast( + py::module::import("torch.jit.annotations") + .attr("try_ann_to_type")(enum_class, SourceRange())); + return InferredType(std::move(enum_type)); + } + + py::bool_ isClass = + py::module::import("inspect").attr("isclass")(py::type::handle_of(input)); + if (py::cast(isClass)) { + // Assume that the class is compiled already or will compile. Invalidate + // this later if needed. + bool class_compiled = true; + + // Check if the type is already compiled. + py::object existing_ty = + py::module::import("torch.jit._state") + .attr("_get_script_class")(py::type::handle_of(input)); + + if (existing_ty.is_none()) { + // If not, try to compile it. + py::bool_ can_compile = + py::module::import("torch._jit_internal") + .attr("can_compile_class")(py::type::handle_of(input)); + + if (py::cast(can_compile)) { + // Try to compile the class. This is wrapped in a try-catch because + // compilation of class types can raise an Exception and in that case, + // we want to defer to other attempts at type inference below rather + // than fail compilation altogether. + try { + py::module::import("torch.jit._script") + .attr("_recursive_compile_class")( + py::type::handle_of(input), SourceRange()); + } catch (...) { + // Invalidate the assumption that the class compiled so that we don't + // look up and return its JIT type as the type for the input. + class_compiled = false; + } + } + } + + // If the class compiled successfully, look up the existing JIT type by + // qualified name and return it. + if (class_compiled) { + auto script_class = + py::module::import("torch.jit._state") + .attr("_get_script_class")(py::type::handle_of(input)); + + if (!script_class.is_none()) { + auto class_type = py::cast(script_class); + + if (class_type && !class_type->is_module()) { + return InferredType(std::move(class_type)); + } + } + } + } + + if (py::isinstance(input)) { + auto object = py::cast(input); + return InferredType(object.type()); +#ifdef USE_RPC + } else if (py::isinstance(input)) { + auto rref_ivalue = input.cast().toIValue(); + return InferredType(rref_ivalue.type()); +#endif + } + + auto await_type = py::module::import("torch._awaits").attr("_Await"); + py::bool_ is_await = py::isinstance(input, await_type); + if (py::cast(is_await)) { + auto awptr = input.cast>(); + return InferredType(AwaitType::create(awptr->aw_->elementType())); + } + + if (as_module(py::cast(input))) { + return InferredType("Cannot infer type of ScriptModule"); + } + + auto module_type = py::module::import("torch.nn").attr("Module"); + py::bool_ is_module = py::isinstance(input, module_type); + if (py::cast(is_module)) { + return InferredType("Cannot infer concrete type of torch.nn.Module"); + } + + // Try container types + return tryToInferContainerType(input, false); +} + +// This function is similar to tryToInferType, but it only tries to infer +// primitive types (int, float, bool, complex) or nested container of primitive +// types. +inline InferredType tryToInferPrimitiveType(py::handle input) { + if (input.is_none()) { + return InferredType(NoneType::get()); + } + + // Only primitive data type + if (py::isinstance(input)) { + return InferredType(BoolType::get()); + // NOLINTNEXTLINE(bugprone-branch-clone) + } else if (py::isinstance(input)) { + return InferredType(IntType::get()); + } else if (py::isinstance(input)) { + return InferredType(FloatType::get()); + } else if (PyComplex_CheckExact(input.ptr())) { + return InferredType(ComplexType::get()); + } + + // Try container types + return tryToInferContainerType(input, true); +} + +inline InferredType tryToInferContainerType( + py::handle input, + bool primitiveTypeOnly = false) { + if (six::isTuple(input)) { + py::tuple tuple = py::cast(input); + std::vector element_types; + element_types.reserve(tuple.size()); + + for (py::handle elem : tuple) { + auto type_match = primitiveTypeOnly ? tryToInferPrimitiveType(elem) + : tryToInferType(elem); + if (type_match.success()) { + element_types.push_back(type_match.type()); + } else { + // Forward error message along + return type_match.reason(); + } + } + return InferredType(TupleType::create(std::move(element_types))); + } else if (PyDict_Check(input.ptr())) { + // Check to make sure we can generate useful input/output types + auto dict = py::cast(input); + size_t len = py::len(dict); + if (!len) { + return InferredType("Dictionary inputs must have entries"); + } + + TypePtr key_type = nullptr; + TypePtr value_type = nullptr; + + for (auto entry : dict) { + // Try to infer the key type and unify it with the existing one + auto entry_key_type_match = primitiveTypeOnly + ? tryToInferPrimitiveType(entry.first) + : tryToInferType(entry.first); + if (!entry_key_type_match.success()) { + return entry_key_type_match.reason(); + } + auto unified_key = + unifyOrInitializeType(key_type, entry_key_type_match.type()); + if (!unified_key) { + return InferredType(c10::str( + "Dictionary inputs to traced functions must have consistent type. Found ", + key_type->repr_str(), + " and ", + (entry_key_type_match.type())->repr_str())); + } + + // Try to infer the value type and unify it with the existing one + auto entry_value_type_match = primitiveTypeOnly + ? tryToInferPrimitiveType(entry.second) + : tryToInferType(entry.second); + if (!entry_value_type_match.success()) { + return entry_value_type_match.reason(); + } + auto unified_value = + unifyOrInitializeType(value_type, entry_value_type_match.type()); + if (!unified_value) { + return InferredType(c10::str( + "Dictionary inputs to traced functions must have consistent type. Found ", + value_type->repr_str(), + " and ", + (entry_value_type_match.type())->repr_str())); + } + + key_type = *unified_key; + value_type = *unified_value; + } + return InferredType( + DictType::create(std::move(key_type), std::move(value_type))); + } else if (PyList_Check(input.ptr())) { + auto list = py::cast(input); + size_t len = py::len(list); + if (!len) { + return InferredType("List trace inputs must have elements"); + } + + TypePtr element_type = nullptr; + for (auto elem : list) { + auto element_type_match = primitiveTypeOnly + ? tryToInferPrimitiveType(elem) + : tryToInferType(elem); + if (!element_type_match.success()) { + return InferredType(c10::str( + "Could not infer type of list element: ", + element_type_match.reason())); + } + auto unified_type = + unifyOrInitializeType(element_type, element_type_match.type()); + if (!unified_type) { + return InferredType(c10::str( + "List inputs to traced functions must have consistent element type. Found ", + element_type->repr_str(), + " and ", + (element_type_match.type())->repr_str())); + } + element_type = *unified_type; + } + return InferredType(ListType::create(element_type)); + } else { + if (primitiveTypeOnly) { + return InferredType(c10::str( + "Only tuple, list, or dict (possibly nested) of primitive types (bool, float, int, complex)", + "are supported ", + "as inputs or outputs of traced functions", + ", but instead got value of type ", + py::str(py::type::handle_of(input).attr("__name__")), + ".")); + } else { + // TODO: this message is not correct anymore, since this InferredType is + // used from a bunch of circumstances unrelated to tracing. We can reuse + // this instead of the attribute_failure stuff in concreteType + return InferredType(c10::str( + "Only tensors and (possibly nested) tuples of tensors, lists, or dicts ", + "are supported ", + "as inputs or outputs of traced functions", + ", but instead got value of type ", + py::str(py::type::handle_of(input).attr("__name__")), + ".")); + } + } +} + +inline bool isTraceableType(const TypePtr& type) { + if (type->isSubtypeOf(*TensorType::get())) { + return true; + } + + if (auto list_type = type->cast()) { + return isTraceableType(list_type->getElementType()); + } + + if (auto tuple_type = type->cast()) { + return std::all_of( + tuple_type->elements().begin(), + tuple_type->elements().end(), + [](const TypePtr& element_type) { + return isTraceableType(element_type); + }); + } + + if (auto dict_type = type->cast()) { + return isTraceableType(dict_type->getValueType()); + } + + return false; +} + +inline IValue toTypeInferredIValue(py::handle input) { + auto match = tryToInferType(input); + if (!match.success()) { + auto object = py::cast(input); + if (auto mod = as_module(object)) { + // if obj is already a ScriptModule, just return its ivalue + auto ptr = mod.value()._ivalue(); + // explicit copy semantics for strong ownership of the resource. + return c10::intrusive_ptr::reclaim_copy( + ptr.release()); + } + + // Check if the obj is a ScriptObject. + if (auto script_obj = as_object(object)) { + auto ptr = script_obj.value()._ivalue(); + return c10::intrusive_ptr::reclaim_copy( + ptr.release()); + } + TORCH_CHECK( + false, + "Tracer cannot infer type of ", + py::str(input), + "\n:", + match.reason()); + } + return toIValue(input, match.type()); +} + +inline Stack toTraceableStack(const py::tuple& inputs) { + auto info = toTypeInferredIValue(inputs); + TORCH_CHECK( + isTraceableType(info.type()), + "Type '", + info.type()->repr_str(), + "' cannot be traced. Only Tensors and (possibly nested) Lists, Dicts, and" + " Tuples of Tensors can be traced"); + return info.toTupleRef().elements().vec(); +} + +// Serialize the python dictionary into a traceable stack. +inline Stack toTraceableStack(const py::dict& inputs) { + Stack res; + for (auto it = inputs.begin(); it != inputs.end(); it++) { + if (THPVariable_Check(it->second.ptr())) { + res.push_back(toIValue(it->second, tryToInferType(it->second).type())); + } + } + return res; +} + +inline IValue createGenericList(py::handle obj, const TypePtr& elem_type) { + auto elems = c10::impl::GenericList(elem_type); + for (auto elem : obj) { + elems.push_back(toIValue(elem, elem_type)); + } + return IValue(elems); +} + +inline IValue createGenericDict( + const py::dict& obj, + const TypePtr& key_type, + const TypePtr& value_type) { + c10::impl::GenericDict elems(key_type, value_type); + elems.reserve(py::len(obj)); + for (auto& entry : obj) { + elems.insert( + toIValue(entry.first, key_type), toIValue(entry.second, value_type)); + } + return IValue(elems); +} + +template +inline void guardAgainstNamedTensor(const T& var) { + TORCH_CHECK( + !var.has_names(), + "NYI: Named tensors are currently unsupported in TorchScript. As a " + "workaround please drop names via `tensor = tensor.rename(None)`."); +} + +// Extract custom class registered with torchbind +template +c10::intrusive_ptr toCustomClass(py::handle obj) { + static_assert( + std::is_base_of_v, "T is not a CustomClass"); + const auto& type = c10::getCustomClassType>(); + c10::IValue ivalue = toIValue(obj, type); + return std::move(ivalue).toCustomClass(); +} + +// Small wrapper around getting the type name string from Python to make +// types easier to interpret, e.g. give the structural type for a NamedTuple +inline std::string friendlyTypeName(py::handle obj) { + if (py::isinstance(obj) && py::hasattr(obj, "_fields")) { + auto field_names = + py::cast>(py::getattr(obj, "_fields")); + std::stringstream ss; + ss << py::str(py::type::handle_of(obj).attr("__name__")); + ss << " (aka NamedTuple("; + bool first = true; + for (auto& field_name : field_names) { + if (!first) { + ss << ", "; + } + ss << field_name; + first = false; + } + ss << "))"; + return ss.str(); + } else { + return py::str(py::type::handle_of(obj).attr("__name__")); + } +} + +// Thrown when trying to create a schema for a list of python +// arguments that cannot be converted. +// Can be caught by the caller to attempt to use other schema +// when there is an overloaded operator. +struct schema_match_error : public std::runtime_error { + using std::runtime_error::runtime_error; +}; + +inline IValue argumentToIValue( + const FunctionSchema& schema, + size_t argumentPosition, + py::handle object) { + const auto& argument = schema.arguments().at(argumentPosition); + try { + return toIValue(object, argument.real_type(), argument.N()); + } catch (const py::cast_error& error) { + throw schema_match_error(c10::str( + schema.formatTypeMismatchMsg( + argument, + friendlyTypeName(object), + argumentPosition, + py::repr(object)), + "\nCast error details: ", + error.what())); + } catch (const py::error_already_set& error) { + throw schema_match_error(c10::str( + schema.formatTypeMismatchMsg( + argument, + friendlyTypeName(object), + argumentPosition, + py::repr(object)), + "\n Python error details: ", + error.what())); + } +} + +inline IValue returnToIValue(const TypePtr& type, py::handle object) { + try { + return toIValue(object, type); + } catch (const py::cast_error& error) { + throw std::runtime_error(c10::str( + " expected value of type ", + type->str(), + " for return value but instead got value of type ", + py::str(py::type::handle_of(object).attr("__name__")), + ".", + "\nValue: ", + py::repr(object), + "\nCast error details: ", + error.what())); + } +} + +inline py::object getScriptedClassOrError(const c10::NamedTypePtr& classType) { + auto py_class = + py::module::import("torch.jit._state") + .attr("_get_python_class")(classType->name()->qualifiedName()); + if (py_class.is_none()) { + std::stringstream err; + err << "Unknown reference to ScriptClass "; + err << classType->name()->qualifiedName(); + err << ". (Did you forget to import it?)"; + throw std::runtime_error(err.str()); + } + return py_class; +} + +struct VISIBILITY_HIDDEN tuple_slice { + /*implicit*/ tuple_slice(py::tuple tup_) + : tup(std::move(tup_)), b(0), e(static_cast(tup.size())) {} + tuple_slice(py::tuple tup_, int64_t b_) + : tup(std::move(tup_)), b(b_), e(static_cast(tup.size())) {} + tuple_slice(py::tuple tup_, int64_t b_, int64_t e_) + : tup(std::move(tup_)), b(b_), e(e_) {} + py::detail::tuple_iterator begin() const { + return {tup, static_cast(b)}; + } + py::detail::tuple_iterator end() const { + return {tup, static_cast(e)}; + } + size_t size() const { + return e - b; + } + py::detail::tuple_accessor operator[](size_t index) const { + return {tup, static_cast(b + index)}; + } + + private: + py::tuple tup; + int64_t b; + int64_t e; +}; + +inline bool validateFakeScriptObjectSchema( + const c10::FunctionSchema& schema, + size_t argumentPosition, + py::handle object) { + auto argument = schema.arguments().at(argumentPosition); + auto class_type = argument.real_type()->expect(); + auto fake_class_registry = + py::module::import("torch._library.fake_class_registry"); + auto fake_class = fake_class_registry.attr("find_fake_class")( + class_type->name().value().qualifiedName()); + if (!py::isinstance(object.attr("wrapped_obj"), fake_class)) { + throw schema_match_error(c10::str( + schema.formatTypeMismatchMsg( + argument, + friendlyTypeName(object), + argumentPosition, + py::repr(object.attr("wrapped_obj"))), + "\nCast error details: ", + argument.name(), + " is expected to be a FakeScriptObject of ", + class_type->name().value().qualifiedName())); + } + return true; +} + +inline bool matchSchemaAllowFakeScriptObject( + const FunctionSchema& schema, + const tuple_slice& args, + const py::kwargs& kwargs) { + size_t all_arguments = args.size() + kwargs.size(); + if (all_arguments > schema.arguments().size()) { + throw schema_match_error(c10::str( + schema.name(), + "() expected at most ", + schema.arguments().size(), + " argument(s) but received ", + all_arguments, + " argument(s). Declaration: ", + schema)); + } + + int64_t arg_idx = 0; + auto fake_class_registry = + py::module::import("torch._library.fake_class_registry"); + + // First push all positional args. + for (const auto& arg : args) { + // ...but refuse to do it if the schema says that this was supposed + // to be keyword only + if (schema.arguments()[arg_idx].kwarg_only()) { + throw schema_match_error(c10::str( + schema.name(), + "() takes ", + arg_idx, + " positional argument(s) but ", + args.size(), + " was/were given. Declaration: ", + schema)); + } + // Use the type information from the schema to convert the PyObject. + const auto& argument = schema.arguments().at(arg_idx); + if (argument.real_type()->kind() == TypeKind::ClassType && + py::isinstance(arg, fake_class_registry.attr("FakeScriptObject"))) { + validateFakeScriptObjectSchema(schema, arg_idx, arg); + } else { + argumentToIValue(schema, arg_idx, arg); + } + + arg_idx++; + } + + // Now for every remaining non-positional argument in the schema, look for it + // in the kwargs dict and push it if found, or use its default value if it + // has one. + size_t consumed_kwargs = 0; + for (size_t i = arg_idx; i < schema.arguments().size(); ++i) { + const auto& arg = schema.arguments()[i]; + if (kwargs.contains(arg.name().c_str())) { + auto cur_kwarg = kwargs[arg.name().c_str()]; + if (arg.real_type()->kind() == TypeKind::ClassType && + py::isinstance( + cur_kwarg, fake_class_registry.attr("FakeScriptObject"))) { + validateFakeScriptObjectSchema(schema, i, cur_kwarg); + } else { + argumentToIValue(schema, i, cur_kwarg); + } + consumed_kwargs += 1; + } else if (arg.default_value()) { + continue; + } else { + throw schema_match_error(c10::str( + schema.name(), + "() is missing value for argument '", + arg.name(), + "'. Declaration: ", + schema)); + } + } + + if (consumed_kwargs != kwargs.size()) { + std::vector names; + for (const auto& kwarg : kwargs) { + names.emplace_back(py::cast(kwarg.first)); + } + throw schema_match_error(schema.findErrorInKwargs(names)); + } + + return true; +} + +inline Stack createStackForSchema( + const FunctionSchema& schema, + const tuple_slice& args, + const py::kwargs& kwargs, + std::optional self) { + size_t all_arguments = (self ? 1 : 0) + args.size() + kwargs.size(); + if (all_arguments > schema.arguments().size()) { + throw schema_match_error(c10::str( + schema.name(), + "() expected at most ", + schema.arguments().size(), + " argument(s) but received ", + all_arguments, + " argument(s). Declaration: ", + schema)); + } + Stack stack; + stack.reserve(schema.arguments().size()); + + int64_t arg_idx = 0; + if (self) { + push(stack, std::move(*self)); + arg_idx++; + } + // First push all positional args. + for (const auto& arg : args) { + // ...but refuse to do it if the schema says that this was supposed + // to be keyword only + if (schema.arguments()[arg_idx].kwarg_only()) { + throw schema_match_error(c10::str( + schema.name(), + "() takes ", + arg_idx, + " positional argument(s) but ", + self ? 1 + args.size() : args.size(), + " was/were given. Declaration: ", + schema)); + } + // Use the type information from the schema to convert the PyObject. + push(stack, argumentToIValue(schema, stack.size(), arg)); + arg_idx++; + } + + // Now for every remaining non-positional argument in the schema, look for it + // in the kwargs dict and push it if found, or use its default value if it + // has one. + size_t consumed_kwargs = 0; + for (size_t i = stack.size(); i < schema.arguments().size(); ++i) { + const auto& arg = schema.arguments()[i]; + if (kwargs.contains(arg.name().c_str())) { + push(stack, argumentToIValue(schema, i, kwargs[arg.name().c_str()])); + consumed_kwargs += 1; + } else if (arg.default_value()) { + push(stack, *arg.default_value()); + } else { + throw schema_match_error(c10::str( + schema.name(), + "() is missing value for argument '", + arg.name(), + "'. Declaration: ", + schema)); + } + } + + if (consumed_kwargs != kwargs.size()) { + std::vector names; + for (const auto& kwarg : kwargs) { + names.emplace_back(py::cast(kwarg.first)); + } + throw schema_match_error(schema.findErrorInKwargs(names)); + } + + return stack; +} + +// NOLINTNEXTLINE(cppcoreguidelines-rvalue-reference-param-not-moved) +inline py::object createPyObjectForStack(Stack&& stack) { + if (stack.empty()) { + return py::none(); + } + + // Return a simple value and not a single-element tuple if there is only one + // return value. + if (stack.size() == 1) { + return toPyObject(std::move(stack[0])); + } + + // If there is more than one return value, pop them into a py::tuple. + py::tuple return_values(stack.size()); + for (const auto ret : c10::irange(return_values.size())) { + return_values[ret] = toPyObject(std::move(stack[ret])); + } + +#if defined(__clang__) + return std::move(return_values); +#else + return return_values; +#endif +} + +// TODO: Remove once we clean up the GraphExecutor usage. +inline Stack evilDeprecatedBadCreateStackDoNotUse( + const py::tuple& tuple, + at::ArrayRef inputs, + size_t reserve_extra_space = 0) { + if (tuple.size() != inputs.size()) { + TORCH_CHECK( + false, + "expected " + std::to_string(inputs.size()) + " inputs, but got " + + std::to_string(tuple.size())); + } + Stack result; + result.reserve(tuple.size() + reserve_extra_space); + for (const auto i : c10::irange(inputs.size())) { + result.push_back(toIValue(std::move(tuple[i]), inputs[i]->type())); + } + return result; +} + +// Run `callee`, potentially inserting a CallFunction/CallMethod node into the +// tracing graph. +inline py::object runAndInsertCall( + Function& callee, + const tuple_slice& args, + const py::kwargs& kwargs, + std::optional self, + // Lambda that tells this function how to insert `callee` into the graph if + // we're tracing. + const std::function& + callInserter) { + auto stack = + createStackForSchema(callee.getSchema(), args, kwargs, std::move(self)); + const auto& tracing_state = tracer::getTracingState(); + if (!tracing_state) { + pybind11::gil_scoped_release no_gil_guard; + // If we're not tracing, just run the callee as normal. + callee.run(stack); + } else { + // If we are tracing, insert the appropriate CallFunction or CallMethod node + // and then run the callee with tracing disabled. + + // Get the graph `Value`s that represent the input IValues + auto inputs = last(stack, callee.num_inputs()); + auto input_values = + fmap(inputs, [](const IValue& v) { return tracer::getValueTrace(v); }); + TORCH_INTERNAL_ASSERT(callee.getSchema().returns().size() == 1) + auto return_type = callee.getSchema().returns().at(0).type(); + auto graph = tracing_state->graph; + std::vector named_values; + named_values.reserve(input_values.size()); + for (Value* v : input_values) { + named_values.emplace_back(v); + } + + // Add a call node. + MatchedSchema match = matchSchema( + callee.getSchema(), + tracer::getPythonInterpreterSourceRange(), + *graph, + named_values, + {}); + auto output_value = callInserter(*graph, match); + + // Actually run the callee. Pause the tracer so that we don't double-add the + // callee nodes. + { + pybind11::gil_scoped_release no_gil_guard; + ResourceGuard guard(tracer::pauseTracing()); + callee.run(stack); + } + + // Associate the output IValues with the output `Value`s in the graph + tracer::setValueTrace(stack.back(), output_value); + } + + TORCH_CHECK( + !stack.empty(), + "Expected values in the stack after execution but found none"); + return toPyObject(std::move(stack.back())); +} + +inline std::optional maybeTorchFunctionDispatch( + const py::object& callee, + const tuple_slice& args_no_self, + const py::kwargs& kwargs, + const c10::QualifiedName& qualname) { + std::vector args_vec; + for (const auto& arg : args_no_self) { + args_vec.push_back(arg); + } + py::tuple args = py::cast(args_vec); + + // Handle __torch_function__ dispatch + std::vector overloaded_args; + size_t total_arg_num = args.size() + kwargs.size(); + for (const auto& arg : args) { + is_tensor_and_append_overloaded(arg.ptr(), &overloaded_args); + is_tensor_list_and_append_overloaded( + arg.ptr(), + &overloaded_args, + static_cast(total_arg_num), + false /* throw_error */); + } + // NB: for kwargs, we cannot guarantee the order of appending + // is the same as the argument order in operator's schema. + // This is suboptimal, but should be fine. Later when we have + // better schema matching and argument parsing, we could + // match the operator in `operations` first, then the order will + // be guaranteed. + for (auto item : kwargs) { + is_tensor_and_append_overloaded(item.second.ptr(), &overloaded_args); + is_tensor_list_and_append_overloaded( + item.second.ptr(), + &overloaded_args, + total_arg_num, + false /* throw_error */); + } + if (!overloaded_args.empty()) { + return pybind11::reinterpret_steal( + handle_torch_function_no_python_arg_parser( + /*overloaded_args=*/overloaded_args, + /*args=*/args.ptr(), + /*kwargs=*/kwargs.ptr(), + /*func_name=*/qualname.name().c_str(), + /*torch_api_function=*/callee.ptr(), + /*module_name=*/qualname.prefix().c_str())); + } + + return std::nullopt; +} + +inline py::object invokeScriptFunctionFromPython( + Function& callee, + const tuple_slice& args, + const py::kwargs& kwargs) { + // TODO: we could add __torch_function__ dispatch here but I don't know + // the implications of doing so + + return runAndInsertCall( + callee, + args, + kwargs, + /*self=*/std::nullopt, + [&](Graph& graph, const MatchedSchema& match) { + return graph.insertFunctionCall(&callee, match); + }); +} + +inline py::object invokeScriptMethodFromPython( + Method& callee, + const tuple_slice& args, + const py::kwargs& kwargs) { + auto self = callee.owner()._ivalue(); + + if (auto torch_fn_result = maybeTorchFunctionDispatch( + py::cast(callee), args, kwargs, callee.name())) { + return *torch_fn_result; + } + + return runAndInsertCall( + callee.function(), + args, + kwargs, + self, + [&](Graph& graph, const MatchedSchema& match) { + return graph.insertMethodCall(callee.name(), match); + }); +} + +TORCH_PYTHON_API std::pair, Stack> getOpWithStack( + const std::vector>& operations, + const py::args& args, + const py::kwargs& kwargs); + +// Efficient overload (does not require vector allocation) of the +// above for use from C++ code. +std::pair, Stack> getOpWithStack( + c10::ArrayRef> operations, + const py::args& args, + const py::kwargs& kwargs); + +TORCH_PYTHON_API py::object invokeOperatorFromPython( + const std::vector>& operations, + const py::args& args, + const py::kwargs& kwargs, + std::optional dk = std::nullopt); + +// Efficient overload (does not require vector allocation) of the +// above for use from C++ code. +py::object invokeOperatorFromPython( + c10::ArrayRef> operations, + const py::args& args, + const py::kwargs& kwargs, + std::optional dk = std::nullopt); + +TORCH_PYTHON_API std::optional _maybe_handle_torch_function( + const std::string& ns, + const std::string& method_name, + const std::string& overload_name, + bool is_overload, + const py::args& args, + const py::kwargs& kwargs); + +TORCH_PYTHON_API bool checkSchemaAllowFakeScriptObject( + const FunctionSchema& schema, + const py::args& args, + const py::kwargs& kwargs); + +TORCH_PYTHON_API py::object _get_operation_for_overload_or_packet( + const std::vector>& operations, + Symbol symbol, + const py::args& args, + const py::kwargs& kwargs, + bool is_overload, + std::optional dk = std::nullopt); + +// Efficient overload (does not require vector allocation) of the +// above for use from C++ code. +py::object _get_operation_for_overload_or_packet( + c10::ArrayRef> operations, + Symbol symbol, + const py::args& args, + const py::kwargs& kwargs, + bool is_overload, + std::optional dk = std::nullopt); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/python_arg_flatten.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/python_arg_flatten.h new file mode 100644 index 0000000000000000000000000000000000000000..873901a15199313fc3358fa0013b7333ce570d46 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/python_arg_flatten.h @@ -0,0 +1,124 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include + +namespace torch::jit::python { + +struct IODescriptor { + struct VariableMetadata { + VariableMetadata(const autograd::Variable& var) + : sizes(var.sizes().vec()), + type(var.scalar_type()), + device(var.device()), + requires_grad(var.requires_grad()) {} + + bool operator==(const VariableMetadata& o) const { + return std::tie(device, requires_grad, type, sizes) == + std::tie(o.device, o.requires_grad, o.type, o.sizes); + } + + static size_t hash(const VariableMetadata& m) { + return c10::get_hash(m.sizes, m.device, m.requires_grad, m.type); + } + + std::vector sizes; + at::ScalarType type; + at::Device device; + bool requires_grad; + }; + + bool operator==(const IODescriptor& o) const { + return std::tie(structure, metadata, grad_enabled) == + std::tie(o.structure, o.metadata, o.grad_enabled); + } + + static size_t hash(const IODescriptor& o) { + return c10::get_hash(o.structure, o.metadata, o.grad_enabled); + } + + void extend(const autograd::variable_list& list) { + metadata.reserve(metadata.size() + list.size()); + for (auto& var : list) + metadata.emplace_back(var); + } + + // Description of argument structure. Variables are replaced with + // different characters, depending on their flags, beginnings and + // ends of tuples and lists are denoted by a pair of parenthesis + // of their corresponding kind. They should always be paired. + // Example desc: (vv[v(v)v]) + // NOTE: if extend() was ever called then metadata.size() can be + // different than the number of 'v's in structure. + std::string structure; + std::vector strings; + std::vector metadata; + bool grad_enabled = false; +}; + +static inline std::ostream& operator<<( + std::ostream& out, + const IODescriptor::VariableMetadata& meta) { + at::Device meta_device = meta.device; + auto& t = at::getDeprecatedTypeProperties( + meta_device.is_cpu() ? at::Backend::CPU : at::Backend::CUDA, meta.type); + out << t << "(requires_grad=" << meta.requires_grad; + if (meta_device.is_cuda()) { + out << ", device=" << meta_device.index(); + } + out << ") {"; + for (const auto i : c10::irange(meta.sizes.size())) { + if (i > 0) + out << ", "; + out << meta.sizes[i]; + } + out << '}'; + return out; +} + +static inline std::ostream& operator<<( + std::ostream& out, + const IODescriptor& desc) { + out << desc.structure << '\n'; + out << " with grad_enabled=" << desc.grad_enabled << '\n'; + for (const auto i : c10::irange(desc.metadata.size())) { + out << " with v" << i << " having type " << desc.metadata[i] << '\n'; + } + return out; +} + +struct ParsedArgs { + // Flat vector of Variables found in arguments + autograd::variable_list vars; + // Metadata describing nesting of objects received from Python and + // metadata of vars and whether grad is enabled. + IODescriptor desc; + + void extend(const autograd::variable_list& list) { + if (list.empty()) + return; + vars.reserve(vars.size() + list.size()); + for (auto& var : list) + vars.emplace_back(var); + desc.extend(list); + } +}; + +ParsedArgs flatten(py::handle obj); +PyObject* unflatten( + at::ArrayRef vars, + const IODescriptor& structure); + +} // namespace torch::jit::python + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/python_custom_class.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/python_custom_class.h new file mode 100644 index 0000000000000000000000000000000000000000..50af856c35ee8bf3e0c92676e2b71724631d5a82 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/python_custom_class.h @@ -0,0 +1,24 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::jit { + +void initPythonCustomClassBindings(PyObject* module); + +struct ScriptClass { + ScriptClass(c10::StrongTypePtr class_type) + : class_type_(std::move(class_type)) {} + + py::object __call__(const py::args& args, const py::kwargs& kwargs); + + c10::StrongTypePtr class_type_; +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/python_dict.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/python_dict.h new file mode 100644 index 0000000000000000000000000000000000000000..e698d5ad0ebfa6b500e22be43c8a787e9689883a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/python_dict.h @@ -0,0 +1,132 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::jit { + +void initScriptDictBindings(PyObject* module); + +/// An iterator over the keys of ScriptDict. This is used to support +/// .keys() and iteration. +class ScriptDictKeyIterator final { + public: + ScriptDictKeyIterator( + c10::impl::GenericDict::iterator iter, + c10::impl::GenericDict::iterator end) + : iter_(std::move(iter)), end_(std::move(end)) {} + at::IValue next(); + + private: + c10::impl::GenericDict::iterator iter_; + c10::impl::GenericDict::iterator end_; +}; + +/// An iterator over the key-value pairs of ScriptDict. This is used to support +/// .items(). +class ScriptDictIterator final { + public: + ScriptDictIterator( + c10::impl::GenericDict::iterator iter, + c10::impl::GenericDict::iterator end) + : iter_(std::move(iter)), end_(std::move(end)) {} + at::IValue next(); + + private: + c10::impl::GenericDict::iterator iter_; + c10::impl::GenericDict::iterator end_; +}; + +/// A wrapper around c10::Dict that can be exposed in Python via pybind +/// with an API identical to the Python dictionary class. This allows +/// dictionaries to have reference semantics across the Python/TorchScript +/// boundary. +class ScriptDict final { + public: + // Constructor. + ScriptDict(const at::IValue& data) + : dict_(at::AnyType::get(), at::AnyType::get()) { + TORCH_INTERNAL_ASSERT(data.isGenericDict()); + dict_ = data.toGenericDict(); + } + + // Get the type of the dictionary. + at::DictTypePtr type() const { + return at::DictType::create(dict_.keyType(), dict_.valueType()); + } + + // Return a string representation that can be used + // to reconstruct the instance. + std::string repr() const { + std::ostringstream s; + s << '{'; + bool f = false; + for (auto const& kv : dict_) { + if (f) { + s << ", "; + } + s << kv.key() << ": " << kv.value(); + f = true; + } + s << '}'; + return s.str(); + } + + // Return an iterator over the keys of the dictionary. + ScriptDictKeyIterator iter() const { + auto begin = dict_.begin(); + auto end = dict_.end(); + return ScriptDictKeyIterator(begin, end); + } + + // Return an iterator over the key-value pairs of the dictionary. + ScriptDictIterator items() const { + auto begin = dict_.begin(); + auto end = dict_.end(); + return ScriptDictIterator(begin, end); + } + + // Interpret the dictionary as a boolean; empty means false, non-empty means + // true. + bool toBool() const { + return !(dict_.empty()); + } + + // Get the value for the given key. Throws std::out_of_range if the key does + // not exist. + at::IValue getItem(const at::IValue& key) { + return dict_.at(key); + } + + // Set the value for the given key. + void setItem(const at::IValue& key, const at::IValue& value) { + dict_.insert_or_assign(key, value); + } + + // Check whether the dictionary contains the given key. + bool contains(const at::IValue& key) { + return dict_.contains(key); + } + + // Delete the given key from the dictionary. + bool delItem(const at::IValue& key) { + return dict_.erase(key); + } + + // Get the size of the dictionary. + int64_t len() const { + return dict_.size(); + } + + // A c10::Dict instance that holds the actual data. + c10::impl::GenericDict dict_; +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/python_ir.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/python_ir.h new file mode 100644 index 0000000000000000000000000000000000000000..28c757a3d95dc33283441ea815486b67f1ef8eb9 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/python_ir.h @@ -0,0 +1,55 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::jit { + +void initPythonIRBindings(PyObject* module); + +// execute a Python function, used for Ops we can't optimize but that we want to +// optimize around +struct ConcretePythonOp : public PythonOp { + static Symbol Kind; + + ConcretePythonOp(Graph* graph) : PythonOp(graph, ::c10::prim::PythonOp) {} + ConcretePythonOp* init( + THPObjectPtr&& pyobj, + const std::string& cconv, + pyobj_list&& scalar_args) { + this->pyobj = std::move(pyobj); + this->scalar_args = std::move(scalar_args); + this->cconv = cconv; + return this; + } + // The Python object which contains the implementation of this function. + // This is either a class (non-legacy) or an object (legacy). See + // TraceInterpreterState for execution semantics. + THPObjectPtr pyobj; + // The calling convention for the Python function. + // 'c' -- constant argument + // 'd' -- dynamic argument + std::string cconv; + // Scalar arguments to the Python function. Not necessarily passed to + // the function in this order; see cconv for the correct order. + std::vector scalar_args; + + std::string name() const override; + void cloneFrom(Node* other_) override; + Node* allocNewInstance(Graph* g) override { + return new ConcretePythonOp(g); + } + // recover the autograd.Function instance, if this PythonOp's function + // was originally SomeFunction.apply + // used in ONNX for discovering symbolics + std::optional autogradFunction() const override; + void writeScalars(std::ostream& out) const override; + void lint_python() const override; +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/python_ivalue.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/python_ivalue.h new file mode 100644 index 0000000000000000000000000000000000000000..fdb65d222f85a8551ae73f3532226a2d82e87804 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/python_ivalue.h @@ -0,0 +1,116 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include +#include +#include + +namespace py = pybind11; + +namespace c10::ivalue { + +// concrete ivalue Holder that hold a py::object +struct C10_EXPORT ConcretePyObjectHolder final : PyObjectHolder { + public: + static c10::intrusive_ptr create(py::object py_obj) { + return c10::make_intrusive(std::move(py_obj)); + } + + static c10::intrusive_ptr create(const py::handle& handle) { + py::gil_scoped_acquire ag; + return c10::make_intrusive( + handle.cast()); + } + + PyObject* getPyObject() override { + return py_obj_.ptr(); + } + + InferredType tryToInferType() override { + pybind11::gil_scoped_acquire ag; + return torch::jit::tryToInferType(py_obj_); + } + + IValue toIValue(const TypePtr& type, std::optional N = std::nullopt) + override { + pybind11::gil_scoped_acquire ag; + return torch::jit::toIValue(py_obj_, type, N); + } + + std::string toStr() override { + pybind11::gil_scoped_acquire ag; + return py::str(py_obj_); + } + + std::vector extractTensors() override { + // We could implement this entirely in C++ via pybind11 but it turns out to + // be substantially slower. Namely, the total time taken by markCompleted on + // a CUDAFuture is 21.5us with this implementation, but goes up to 58.7us + // when using C++. The reason is unclear. + try { + pybind11::gil_scoped_acquire ag; + +#if IS_PYBIND_2_13_PLUS + PYBIND11_CONSTINIT static py::gil_safe_call_once_and_store + storage; + auto& extractorFn = + storage + .call_once_and_store_result([]() -> py::object { + return py::module_::import("torch._jit_internal") + .attr("_extract_tensors"); + }) + .get_stored(); +#else + static py::object& extractorFn = *new py::object( + py::module::import("torch._jit_internal").attr("_extract_tensors")); +#endif + + return extractorFn(py_obj_).cast>(); + } catch (py::error_already_set& e) { + auto err = std::runtime_error( + c10::str("Cannot extract tensors from value: ", e.what())); + { + pybind11::gil_scoped_acquire ag; + e.restore(); + PyErr_Clear(); + } + throw std::runtime_error(err); + } + } + + // Note [Destructing py::object] + // ~~~~~~~~~~~~~~~~~~~~~~~~~~ + // + // (1) Why py_obj_ = py::none(); does not work. Because we also need to + // acquire GIL when destructing py::object of None that de-references None. + // https://docs.python.org/3/c-api/none.html#c.Py_RETURN_NONE + // + // https://stackoverflow.com/questions/15287590/why-should-py-increfpy-none-be-required-before-returning-py-none-in-c + // + // (2) Why we need to call dec_ref() explicitly. Because py::object of + // nullptr, on destruction, effectively does nothing because of it calls + // Py_XDECREF(NULL) underlying. + // https://docs.python.org/3/c-api/refcounting.html#c.Py_XDECREF + ~ConcretePyObjectHolder() override { + pybind11::gil_scoped_acquire ag; + py_obj_.dec_ref(); + // explicitly setting PyObject* to nullptr to prevent py::object's dtor to + // decref on the PyObject again. + py_obj_.ptr() = nullptr; + } + + // explicit construction to avoid erroneous implicit conversion and + // copy-initialization + explicit ConcretePyObjectHolder(py::object py_obj) + : py_obj_(std::move(py_obj)) {} + + private: + py::object py_obj_; +}; + +} // namespace c10::ivalue + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/python_list.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/python_list.h new file mode 100644 index 0000000000000000000000000000000000000000..a7e53f537833efdf013b13f67ba1b49a6762a10b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/python_list.h @@ -0,0 +1,233 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace torch::jit { + +void initScriptListBindings(PyObject* module); + +/// An iterator over the elements of ScriptList. This is used to support +/// __iter__(), . +class ScriptListIterator final { + public: + ScriptListIterator( + c10::impl::GenericList::iterator iter, + c10::impl::GenericList::iterator end) + : iter_(iter), end_(end) {} + at::IValue next(); + bool done() const; + + private: + c10::impl::GenericList::iterator iter_; + c10::impl::GenericList::iterator end_; +}; + +/// A wrapper around c10::List that can be exposed in Python via pybind +/// with an API identical to the Python list class. This allows +/// lists to have reference semantics across the Python/TorchScript +/// boundary. +class ScriptList final { + public: + // TODO: Do these make sense? + using size_type = size_t; + using diff_type = ptrdiff_t; + using ssize_t = Py_ssize_t; + + // Constructor for empty lists created during slicing, extending, etc. + ScriptList(const at::TypePtr& type) : list_(at::AnyType::get()) { + auto list_type = type->expect(); + list_ = c10::impl::GenericList(list_type); + } + + // Constructor for instances based on existing lists (e.g. a + // Python instance or a list nested inside another). + ScriptList(const at::IValue& data) : list_(at::AnyType::get()) { + TORCH_INTERNAL_ASSERT(data.isList()); + list_ = data.toList(); + } + + at::ListTypePtr type() const { + return at::ListType::create(list_.elementType()); + } + + // Return a string representation that can be used + // to reconstruct the instance. + std::string repr() const { + std::ostringstream s; + s << '['; + bool f = false; + for (auto const& elem : list_) { + if (f) { + s << ", "; + } + s << at::IValue(elem); + f = true; + } + s << ']'; + return s.str(); + } + + // Return an iterator over the elements of the list. + ScriptListIterator iter() const { + auto begin = list_.begin(); + auto end = list_.end(); + return ScriptListIterator(begin, end); + } + + // Interpret the list as a boolean; empty means false, non-empty means + // true. + bool toBool() const { + return !(list_.empty()); + } + + // Get the value for the given index. + at::IValue getItem(diff_type idx) { + idx = wrap_index(idx); + return list_.get(idx); + } + + // Set the value corresponding to the given index. + void setItem(diff_type idx, const at::IValue& value) { + idx = wrap_index(idx); + return list_.set(idx, value); + } + + // Check whether the list contains the given value. + bool contains(const at::IValue& value) { + for (const auto& elem : list_) { + if (elem == value) { + return true; + } + } + + return false; + } + + // Delete the item at the given index from the list. + void delItem(diff_type idx) { + idx = wrap_index(idx); + auto iter = list_.begin() + idx; + list_.erase(iter); + } + + // Get the size of the list. + ssize_t len() const { + return list_.size(); + } + + // Count the number of times a value appears in the list. + ssize_t count(const at::IValue& value) const { + ssize_t total = 0; + + for (const auto& elem : list_) { + if (elem == value) { + ++total; + } + } + + return total; + } + + // Remove the first occurrence of a value from the list. + void remove(const at::IValue& value) { + auto list = list_; + + int64_t idx = -1, i = 0; + + for (const auto& elem : list) { + if (elem == value) { + idx = i; + break; + } + + ++i; + } + + if (idx == -1) { + throw py::value_error(); + } + + list.erase(list.begin() + idx); + } + + // Append a value to the end of the list. + void append(const at::IValue& value) { + list_.emplace_back(value); + } + + // Clear the contents of the list. + void clear() { + list_.clear(); + } + + // Append the contents of an iterable to the list. + void extend(const at::IValue& iterable) { + list_.append(iterable.toList()); + } + + // Remove and return the element at the specified index from the list. If no + // index is passed, the last element is removed and returned. + at::IValue pop(std::optional idx = std::nullopt) { + at::IValue ret; + + if (idx) { + idx = wrap_index(*idx); + ret = list_.get(*idx); + list_.erase(list_.begin() + *idx); + } else { + ret = list_.get(list_.size() - 1); + list_.pop_back(); + } + + return ret; + } + + // Insert a value before the given index. + void insert(const at::IValue& value, diff_type idx) { + // wrap_index cannot be used; idx == len() is allowed + if (idx < 0) { + idx += len(); + } + + if (idx < 0 || idx > len()) { + throw std::out_of_range("list index out of range"); + } + + list_.insert(list_.begin() + idx, value); + } + + // A c10::List instance that holds the actual data. + c10::impl::GenericList list_; + + private: + // Wrap an index so that it can safely be used to access + // the list. For list of size sz, this function can successfully + // wrap indices in the range [-sz, sz-1] + diff_type wrap_index(diff_type idx) { + auto sz = len(); + if (idx < 0) { + idx += sz; + } + + if (idx < 0 || idx >= sz) { + throw std::out_of_range("list index out of range"); + } + + return idx; + } +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/python_sugared_value.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/python_sugared_value.h new file mode 100644 index 0000000000000000000000000000000000000000..8d907dfd8166b6c8270bee466b87627732652654 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/python_sugared_value.h @@ -0,0 +1,383 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace torch::jit { + +std::string typeString(py::handle h); + +inline std::shared_ptr toSimple(Value* v) { + return std::make_shared(v); +} + +// NB: This should be the single entry-point for instantiating a SugaredValue +// from a Python object. If you are adding support for converting a new Python +// type, *add it in this function's implementation*. +std::shared_ptr toSugaredValue( + py::object obj, + GraphFunction& m, + const SourceRange& loc, + bool is_constant = false); + +std::optional as_function(const py::object& obj); + +struct VISIBILITY_HIDDEN PythonValue : public SugaredValue { + PythonValue( + py::object the_self, + std::optional rcb = std::nullopt, + Value* module_self = nullptr) + : self(std::move(the_self)), + rcb(std::move(rcb)), + moduleSelf_(module_self) {} + + FunctionSchema getSchema( + const size_t n_args, + const size_t n_binders, + const SourceRange& loc); + + // call it like a function, e.g. `outputs = this(inputs)` + std::shared_ptr call( + const SourceRange& loc, + GraphFunction& m, + at::ArrayRef args, + at::ArrayRef kwargs, + size_t n_binders) override; + + std::string kind() const override; + + std::vector> asTuple( + const SourceRange& loc, + GraphFunction& m, + const std::optional& size_hint = {}) override; + + std::shared_ptr attr( + const SourceRange& loc, + GraphFunction& m, + const std::string& field) override; + + Value* asValue(const SourceRange& loc, GraphFunction& m) override { + throw( + ErrorReport(loc) + << kind() << " cannot be used as a value. " + << "Perhaps it is a closed over global variable? If so, please " + << "consider passing it in as an argument or use a local variable " + << "instead."); + } + + protected: + py::object getattr(const SourceRange& loc, const std::string& name); + + void checkForAddToConstantsError(std::stringstream& ss); + + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + py::object self; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::optional rcb; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + Value* moduleSelf_ = nullptr; +}; + +struct VISIBILITY_HIDDEN PythonModuleValue : public PythonValue { + explicit PythonModuleValue(py::object mod) : PythonValue(std::move(mod)) {} + + std::shared_ptr attr( + const SourceRange& loc, + GraphFunction& m, + const std::string& field) override; +}; + +// Used for desugaring uses of the torch.cuda module. All the CUDA APIs with +// torch.cuda.* are resolved using CUDAPythonModuleValue. +struct VISIBILITY_HIDDEN CUDAPythonModuleValue : public PythonValue { + explicit CUDAPythonModuleValue(py::object mod) + : PythonValue(std::move(mod)) {} + + std::shared_ptr attr( + const SourceRange& loc, + GraphFunction& m, + const std::string& field) override; +}; + +// Represents all the parameters of a module as a List[Tensor] +struct VISIBILITY_HIDDEN ConstantParameterList : public SugaredValue { + ConstantParameterList(Value* the_list) : the_list_(the_list) {} + std::string kind() const override { + return "constant parameter list"; + } + std::shared_ptr call( + const SourceRange& loc, + GraphFunction& caller, + at::ArrayRef args, + at::ArrayRef kwargs, + size_t n_binders) override { + return toSimple(the_list_); + } + + private: + Value* the_list_; +}; + +struct VISIBILITY_HIDDEN ModuleDictMethod : public SugaredValue { + explicit ModuleDictMethod(SugaredValuePtr iterable, std::string name) + : iterable_(std::move(iterable)), name_(std::move(name)) {} + + std::string kind() const override { + return name_; + } + + std::shared_ptr call( + const SourceRange& loc, + GraphFunction& f, + at::ArrayRef args, + at::ArrayRef kwargs, + size_t n_binders) override { + if (!args.empty() || !kwargs.empty()) { + throw( + ErrorReport(loc) << name_ << " method does not accept any arguments"); + } + return iterable_; + } + + SugaredValuePtr iterable_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const std::string name_; +}; + +struct SugaredDict; + +// defines how modules/methods behave inside the script subset. +// for now this does not have any interaction with python. +// in the future, we will add the ability to resolve `self.foo` to python +// {functions, modules, constants} so this SugaredValue is defined here +// anticipating we will eventually need to replace Module with a py::object +// holding the actual nn.Module class. + +struct VISIBILITY_HIDDEN ModuleValue : public SugaredValue { + ModuleValue(Value* self, std::shared_ptr concreteType) + : self_(self), concreteType_(std::move(concreteType)) {} + + std::string kind() const override { + return "module"; + } + + Value* asValue(const SourceRange& loc, GraphFunction& m) override; + + SugaredValuePtr asTupleValue(const SourceRange& loc, GraphFunction& m) + override; + + // select an attribute on it, e.g. `this.field` + std::shared_ptr tryGetAttr( + const SourceRange& loc, + GraphFunction& m, + const std::string& field); + + // select an attribute on it, e.g. `this.field` + std::shared_ptr attr( + const SourceRange& loc, + GraphFunction& m, + const std::string& field) override; + + // select an attribute on it, e.g. `this.field` + bool hasAttr( + const SourceRange& loc, + GraphFunction& m, + const std::string& field) override; + + // call module.forward with pre_hooks and hooks + std::shared_ptr call( + const SourceRange& loc, + GraphFunction& caller, + at::ArrayRef args, + at::ArrayRef kwargs, + size_t n_binders) override; + + std::shared_ptr getSugaredDict( + const SourceRange& loc, + GraphFunction& m); + + std::shared_ptr getSugaredNamedBufferDict( + const SourceRange& loc, + GraphFunction& m); + + std::shared_ptr getSugaredNamedParameterList( + const SourceRange& loc, + GraphFunction& m); + + std::shared_ptr getSugaredNamedParameterDict( + const SourceRange& loc, + GraphFunction& m); + + void setAttr( + const SourceRange& loc, + GraphFunction& m, + const std::string& field, + Value* newValue) override; + + SugaredValuePtr iter(const SourceRange& loc, GraphFunction& m) override; + + std::shared_ptr getitem( + const SourceRange& loc, + GraphFunction& m, + Value* idx, + TypePtr type_hint) override; + + private: + // Check that the type of all submodules is a subtype of ty. If the function + // returns false, more information about why it returns false (e.g. which + // submodule's type is not a subtype of ty) is printed it why_not if it is not + // null. + bool areAllSubmodulesSubtypeOf( + const TypePtr& ty, + std::ostream* why_not = nullptr) const; + + Value* self_; + std::shared_ptr concreteType_; +}; + +bool isNamedTupleClass(const py::object& obj); +TypePtr registerNamedTuple( + const py::object& obj, + const SourceRange& loc, + const ResolutionCallback& rcb); + +void recurseThroughNestedModules( + const SourceRange& loc, + GraphFunction& m, + std::vector& keys, + std::vector& values, + std::shared_ptr& self, + const std::string& prefix, + const std::string& field); + +// Used to support named_modules() +struct VISIBILITY_HIDDEN SugaredDict : public SugaredValue { + explicit SugaredDict( + std::shared_ptr self, + std::shared_ptr keys, + std::shared_ptr modules) + : self_(std::move(self)), + keys_(std::move(keys)), + modules_(std::move(modules)) {} + + std::string kind() const override { + return "ModuleDict"; + } + + std::shared_ptr getKeys() { + return keys_; + } + + std::shared_ptr getModules() { + return modules_; + } + + std::shared_ptr attr( + const SourceRange& loc, + GraphFunction& m, + const std::string& field) override; + + SugaredValuePtr iter(const SourceRange& loc, GraphFunction& m) override { + return keys_; + } + + std::shared_ptr self_; + std::shared_ptr keys_; + std::shared_ptr modules_; +}; + +struct VISIBILITY_HIDDEN BooleanDispatchValue : public SugaredValue { + BooleanDispatchValue(py::dict dispatched_fn) + : dispatched_fn_(std::move(dispatched_fn)) {} + + std::string kind() const override { + return "boolean dispatch"; + } + + std::shared_ptr call( + const SourceRange& loc, + GraphFunction& caller, + at::ArrayRef args, + at::ArrayRef kwargs, + size_t n_binders) override; + + private: + py::dict dispatched_fn_; +}; + +struct VISIBILITY_HIDDEN PythonClassValue : public ClassValue { + PythonClassValue(ClassTypePtr type, py::object py_type) + : ClassValue(std::move(type)), py_type_(std::move(py_type)) {} + + std::string kind() const override { + return "Python type"; + } + + std::shared_ptr attr( + const SourceRange& loc, + GraphFunction& m, + const std::string& field) override; + + bool hasAttr( + const SourceRange& loc, + GraphFunction& m, + const std::string& field) override; + + private: + py::object py_type_; +}; + +struct VISIBILITY_HIDDEN PythonExceptionValue : public ExceptionValue { + explicit PythonExceptionValue(const py::object& exception_class) + : ExceptionValue( + py::str(py::getattr(exception_class, "__name__", py::str("")))), + exception_class_qualified_name_( + py::str(py::module::import("torch._jit_internal") + .attr("_qualified_name")( + exception_class, + /*mangle_name=*/false))) {} + + std::string kind() const override { + return "Python exception"; + } + + std::shared_ptr call( + const SourceRange& loc, + GraphFunction& caller, + at::ArrayRef args, + at::ArrayRef kwargs, + size_t n_binders) override; + + private: + std::string exception_class_qualified_name_; +}; + +// Python Slice class. +struct VISIBILITY_HIDDEN PythonSliceClass : public SugaredValue { + explicit PythonSliceClass() = default; + + std::string kind() const override { + return "Python slice class"; + } + + std::shared_ptr call( + const SourceRange& loc, + GraphFunction& caller, + at::ArrayRef args, + at::ArrayRef kwargs, + size_t n_binders) override; +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/python_tracer.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/python_tracer.h new file mode 100644 index 0000000000000000000000000000000000000000..d6fc5f502fb4820b8d45343d1fa104e4eaabb32e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/python_tracer.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include +#include + +namespace torch::jit { + +struct Module; + +namespace tracer { +void initPythonTracerBindings(PyObject* module); + +SourceRange getPythonInterpreterSourceRange(); + +Node* preRecordPythonTrace( + THPObjectPtr pyobj, + const std::string& arg_types, + at::ArrayRef inputs, + std::vector scalar_args); + +std::pair, Stack> createGraphByTracingWithDict( + const py::function& func, + const py::dict& inputs_dict, + const Stack& inputs, + const py::function& var_name_lookup_fn, + bool strict, + bool force_outplace, + Module* self = nullptr, + const std::vector& argument_names = {}); + +std::pair, Stack> createGraphByTracing( + const py::function& func, + Stack inputs, + const py::function& var_name_lookup_fn, + bool strict, + bool force_outplace, + Module* self = nullptr, + const std::vector& argument_names = {}); +} // namespace tracer +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/python_tree_views.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/python_tree_views.h new file mode 100644 index 0000000000000000000000000000000000000000..922babfff9ddd475c6f6bbf1da32080f71c39a32 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/python_tree_views.h @@ -0,0 +1,14 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit { + +void initTreeViewBindings(PyObject* module); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/script_init.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/script_init.h new file mode 100644 index 0000000000000000000000000000000000000000..29aea54b291cb43ce5c921dd30ebaf98e209d27c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/script_init.h @@ -0,0 +1,12 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit { +void initJitScriptBindings(PyObject* module); +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/update_graph_executor_opt.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/update_graph_executor_opt.h new file mode 100644 index 0000000000000000000000000000000000000000..0ee12ba1cfabddba9b497b189fe756d7cdf3d221 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/update_graph_executor_opt.h @@ -0,0 +1,11 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +namespace torch::jit { +TORCH_API void setGraphExecutorOptimize(bool o); +TORCH_API bool getGraphExecutorOptimize(); +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/utf8_decoding_ignore.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/utf8_decoding_ignore.h new file mode 100644 index 0000000000000000000000000000000000000000..8e0f57b2114203af5e68b519cf3b2d4ef2baaaf2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/python/utf8_decoding_ignore.h @@ -0,0 +1,11 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +namespace torch::jit { +TORCH_API void setUTF8DecodingIgnore(bool o); +TORCH_API bool getUTF8DecodingIgnore(); +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/argument_spec.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/argument_spec.h new file mode 100644 index 0000000000000000000000000000000000000000..1e8083dc7ab5e5f25fcfc0f6f0a5d9e9431d5f17 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/argument_spec.h @@ -0,0 +1,508 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +C10_CLANG_DIAGNOSTIC_PUSH() +#if C10_CLANG_HAS_WARNING("-Wshorten-64-to-32") +C10_CLANG_DIAGNOSTIC_IGNORE("-Wshorten-64-to-32") +#endif + +namespace torch::jit { + +// GraphExecutor creates specializations of Graphs for different +// dimensionalitities and types of inputs. + +struct ArgumentInfo { + friend struct ArgumentSpec; + using plain_data_type = uint64_t; + + bool defined() const { + return defined_; + } + at::Device device() const { + return at::Device(DeviceType(dev_type_), device_); + } + // XXX: It is guaranteed that this will return false when called on non-tensor + // arguments + bool requires_grad() const { + return requires_grad_; + } + int dim() const { + return dim_; + } + at::ScalarType type() const { + return at::ScalarType(type_); + } + TypePtr toType() const { + if (!defined()) + return TensorType::get(); + + return TensorType::create( + type(), device(), std::optional(dim()), requires_grad()); + } + operator TypePtr() const { + return toType(); + } + + private: + unsigned defined_ : 1; + unsigned requires_grad_ : 1; + unsigned : 5; + unsigned dim_ : 8; + unsigned device_ : 8; + unsigned type_ : 8; + unsigned dev_type_ : 16; + unsigned : 16; +}; + +static_assert( + std::is_standard_layout_v, + "ArgumentInfo is to be a POD struct"); +static_assert( + sizeof(ArgumentInfo) == sizeof(ArgumentInfo::plain_data_type), + "ArgumentInfo is expected to be a 32-bit struct"); + +struct ArgumentSpec { + ArgumentSpec(size_t num_flat_tensor_inputs, size_t num_flat_optional_inputs) + : hash_code(c10::hash_combine( + num_flat_tensor_inputs, + num_flat_optional_inputs)) { + tensor_args.reserve(num_flat_tensor_inputs); + optional_presence.reserve(num_flat_optional_inputs); + } + + void addOptional(const IValue& input) { + bool is_present = !input.isNone(); + optional_presence.push_back(is_present); + hash_code = c10::hash_combine(hash_code, is_present); + } + + void addTensor(const IValue& input, bool with_grad) { + AT_ASSERT(input.isTensor(), "Expected Tensor but found ", input.tagKind()); + tensor_args.emplace_back(); + auto& arg = tensor_args.back(); + // Initialize all fields to 0. This is convenient, because e.g. + // requires_grad() can be checked even on tensors AND will make + // padding bits all 0s. + std::memset(&arg, 0, sizeof(ArgumentInfo)); + + // [argspec refcounting] reinterpret the IValue to avoid having to refcount + // the Tensor microbenchmarks + // https://github.com/zdevito/pytorch/commit/21e7200a0a0fc456bea2f10e95b1781f83933d10 + // show overhead in extra refcounting along this path + const at::Tensor* t = reinterpret_cast(&input); + arg.defined_ = t->defined(); + if (arg.defined_) { + arg.requires_grad_ = with_grad && t->requires_grad(); + arg.dim_ = t->dim(); + at::Device device = t->device(); + arg.dev_type_ = + // NOLINTNEXTLINE(bugprone-signed-char-misuse) + static_cast>(device.type()); + // NOLINTNEXTLINE(bugprone-signed-char-misuse) + arg.device_ = device.index(); + arg.type_ = static_cast(t->scalar_type()); + } + combineHash(arg); + } + + void combineHash(const ArgumentInfo& arg) { + ArgumentInfo::plain_data_type arg_data = 0; + std::memcpy(&arg_data, &arg, sizeof(ArgumentInfo)); + hash_code = c10::hash_combine(hash_code, arg_data); + } + + // equality is fast: check ninputs, and then check the raw array data, + // there are no size/stride indirections + // hopefully std::vector has fast equality + bool operator==(const ArgumentSpec& spec) const { + if (optional_presence != spec.optional_presence) { + return false; + } + if (tensor_args.size() != spec.tensor_args.size()) + return false; + // NB: we need to break out early when there are no elements, because + // passing a nullptr to memcmp is UB. + if (tensor_args.empty()) + return true; + return std::memcmp( + tensor_args.data(), + spec.tensor_args.data(), + tensor_args.size() * sizeof(ArgumentInfo)) == 0; + } + bool operator!=(const ArgumentSpec& spec) const { + return !(*this == spec); + } + size_t numTensors() const { + return tensor_args.size(); + } + const ArgumentInfo& tensorAt(size_t i) const { + return tensor_args[i]; + } + size_t numOptionals() const { + return optional_presence.size(); + } + bool isPresent(size_t i) const { + return optional_presence[i]; + } + size_t hashCode() const { + return hash_code; + } + + private: + size_t hash_code; // precomputed on construction + std::vector tensor_args; + std::vector optional_presence; +}; + +namespace { +static constexpr size_t ARG_SPEC_DEPTH_LIMIT = 128; +} + +// ArgumentSpecCreator takes an initial graph and comes up with a set +// of simple instructions to compute the ArgumentSpec given a set of +// input tensors. +struct TORCH_API ArgumentSpecCreator { + // instructs acts on a stack of a list of input IValues + // at the beginning the stack contains a single list of the inputs to the + // function the ENTER_ instructs descend into subobjects and push new lists + // onto the stack + enum Inst : char { + ENTER_TUPLE, // consume a tuple ivalue from the top-most list, and push the + // list of its elements onto the stack as a new list + ENTER_OBJECT, // same as ENTER_TUPLE, but the input is a class + LEAVE, // pop the top-most list from the stack + SKIP, // consume an element from the top-most list, and discard + SPECIALIZE_OPTIONAL_TENSOR, // consume a optional tensor for the top-most + // list, and add it to the ArgSpec key being + // created + SPECIALIZE_TENSOR, // consume a tensor for the top-most + // list, and add it to the ArgSpec key being created + SPECIALIZE_OPTIONAL, + // consume a nontensor optional from the top-most list, + // and add it to the ArgSpec key being created + }; + ArgumentSpecCreator(Graph& graph); + ArgumentSpec create(bool with_grad, const Stack& stack) const; + void specializeTypes(Graph& g, const ArgumentSpec& spec) const; + void dump() const; + using WrittenSlots = std::unordered_set; + + private: + void scan( + const TypePtr& typ, + size_t depth, + const WrittenSlots& written_slots); + size_t num_inputs_; + size_t num_tensors_ = 0; + size_t num_optionals_ = 0; + std::vector instructions_; +}; + +// CompleteArgumentSpec represents one particular specialization. +// It is designed so that it can be created, hashed, and compared quickly +// since it is used along the hot-path of the JIT to check if the code +// we have created is valid for the given inputs. + +// COmpleteArgumentInfoPOD is only used internally in CompleteArgumentSpec +// API users should use ArgumentInfo +struct CompleteArgumentInfoPOD { + // total size is 64-bit + unsigned is_tensor : 8; // all other fields are invalid if this is false + unsigned type : 8; // scalar type + unsigned defined : 1; + unsigned requires_grad : 1; + signed device : 14; + unsigned dev_type : 16; + unsigned + total_dims : 16; // all TensorInfoPODs are in CompleteArgumentSpec's + // tensor_info() array. total_dims is the total number of + // dimensions seen so far in all previous members of + // tensor_info(), including this tensor 2*total_dims + // becomes the offset into the sizes_strides list for the + // _next_ tensor in the tensor_info array for tensor 0, + // the offset is always 0 +}; + +static_assert( + sizeof(CompleteArgumentInfoPOD) == sizeof(int64_t), + "CompleteArgumentInfoPOD must be 64-bit struct for CompleteArgumentSpec encoding to work"); + +struct CompleteArgumentInfo; + +struct CompleteArgumentSpec { + CompleteArgumentSpec(bool with_grad, at::ArrayRef inputs) + : ninputs(inputs.size()) { + int64_t all_dims = 0; + const auto num_inputs = inputs.size(); + for (const auto i : c10::irange(num_inputs)) { + if (!inputs[i].isTensor()) + continue; + auto& tensor = inputs[i].toTensor(); + all_dims += tensor.defined() ? tensor.ndimension() : 0; + } + // allocate enough room for all TensorPODs and dimensions + data.resize(ninputs + all_dims * 2); + + // and reinterpret our data array as these structs + auto* pods = reinterpret_cast(data.data()); + int64_t* next_dim = sizes_strides(); + int32_t total_dims = 0; + for (const auto i : c10::irange(num_inputs)) { + auto& pod = pods[i]; + pod.is_tensor = static_cast(inputs[i].isTensor()); + if (pod.is_tensor) { + at::Tensor t = inputs[i].toTensor(); + pod.defined = t.defined(); + if (pod.defined) { + pod.type = static_cast(t.scalar_type()); + at::Device device = t.device(); + // NOLINTNEXTLINE(bugprone-signed-char-misuse) + pod.dev_type = + static_cast>(device.type()); + // NOLINTNEXTLINE(bugprone-signed-char-misuse) + pod.device = device.index(); + pod.requires_grad = with_grad && t.requires_grad(); + total_dims += t.ndimension(); + auto sizes = t.sizes(); + std::copy(sizes.begin(), sizes.end(), next_dim); + next_dim += sizes.size(); + auto strides = t.strides(); + std::copy(strides.begin(), strides.end(), next_dim); + next_dim += strides.size(); + } + } + // each POD has a running tally of all dimensions including its own + TORCH_CHECK( + total_dims < std::numeric_limits::max(), + "The number of dims cannot be packed into CompleteArgumentSpec:", + total_dims); + pod.total_dims = total_dims; + } + // we precompute the hash_code to minimize the time inside of hash + // table operations where we may need to hold a compiler cache lock. + hash_code = c10::hash_combine(0, ninputs); + for (auto d : data) { + hash_code = c10::hash_combine(hash_code, d); + } + } + + // equality is fast: check ninputs, and then check the raw array data, + // there are no size/stride indirections + bool operator==(const CompleteArgumentSpec& spec) const { + return ninputs == spec.ninputs && data == spec.data; + } + bool operator!=(const CompleteArgumentSpec& spec) const { + return !(*this == spec); + } + friend struct CompleteArgumentInfo; + CompleteArgumentInfo at(size_t i) const; + size_t size() const { + return ninputs; + } + size_t hashCode() const { + return hash_code; + } + + private: + ArrayRef tensor_info() const { + return ArrayRef( + reinterpret_cast(data.data()), ninputs); + } + // the start of the sizes_strides information, which comes after the + // CompleteArgumentInfoPOD list. + const int64_t* sizes_strides() const { + return data.data() + ninputs; + } + int64_t* sizes_strides() { + return data.data() + ninputs; + } + size_t hash_code{0}; // precomputed on construction + size_t ninputs; + // layout is ninputs of TensorPOD (each 64-bit) followed by their size and + // stride info for 3 tensors: + // [t0POD][t1POD][t2POD]... + // [t0 sizes][t0 strides][t1 sizes][t1 strides][t2 sizes][t2 strides] + std::vector data; +}; + +// public view of compressed CompleteArgumentInfo +struct CompleteArgumentInfo { + CompleteArgumentInfo(const CompleteArgumentSpec& spec, const int i) + : spec(spec), i(i) {} + bool isTensor() const { + return pod(i).is_tensor; + } + at::ScalarType type() const { + return at::ScalarType(pod(i).type); + } + bool defined() const { + return pod(i).defined; + } + bool requires_grad() const { + return pod(i).requires_grad; + } + at::Device device() const { + return at::Device( + DeviceType(pod(i).dev_type), + static_cast(pod(i).device)); + } + int ndimension() const { + // See [valid range], it is always valid to ask for offset for (i + 1) + return (sizes_strides_offset(i + 1) - sizes_strides_offset(i)) / 2; + } + at::IntArrayRef sizes() const { + return at::IntArrayRef( + spec.sizes_strides() + sizes_strides_offset(i), ndimension()); + } + at::IntArrayRef strides() const { + int ndim = ndimension(); + return at::IntArrayRef( + spec.sizes_strides() + sizes_strides_offset(i) + ndim, ndim); + } + operator TypePtr() const { + if (!defined()) + return TensorType::get(); + return TensorType::create( + type(), + device(), + c10::VaryingShape{sizes()}, + c10::VaryingShape{strides()}, + requires_grad()); + } + + private: + // offsetinto sizes_strides() array where the sizes start for tensor j + // [valid range] valid range is [0, ninputs] + // (i.e. you can ask for the offset at ninputs, which would be the offset of + // the next tensor if it existed) + int sizes_strides_offset(int j) const { + if (j == 0) + return 0; + return 2 * pod(j - 1).total_dims; + } + const CompleteArgumentInfoPOD& pod(int j) const { + return spec.tensor_info().at(j); + } + const CompleteArgumentSpec& spec; + const int i; +}; + +inline std::ostream& operator<<(std::ostream& out, const ArgumentInfo& info) { + if (!info.defined()) { + return out << ""; + } + out << "Tensor(device=" << info.device() << ", type=" << toString(info.type()) + << ", requires_grad=" << info.requires_grad() << ", dims=" << info.dim() + << ')'; + return out; +} + +inline std::ostream& operator<<(std::ostream& out, const ArgumentSpec& spec) { + out << '{'; + for (const auto i : c10::irange(spec.numTensors())) { + if (i > 0) + out << ", "; + out << spec.tensorAt(i); + } + out << "; "; + for (const auto i : c10::irange(spec.numOptionals())) { + if (i > 0) + out << ", "; + out << spec.isPresent(i); + } + out << '}'; + return out; +} + +inline std::ostream& operator<<( + std::ostream& out, + const CompleteArgumentInfo& info) { + if (!info.defined()) { + return out << ""; + } + out << "Tensor(device=" << info.device() << ", type=" << toString(info.type()) + << ", requires_grad=" << info.requires_grad() + << ", sizes=" << info.sizes() << ", strides=" << info.strides() << ')'; + return out; +} + +inline std::ostream& operator<<( + std::ostream& out, + const CompleteArgumentSpec& spec) { + out << '{'; + for (const auto i : c10::irange(spec.size())) { + if (i > 0) + out << ", "; + out << spec.at(i); + } + out << '}'; + return out; +} + +inline CompleteArgumentInfo CompleteArgumentSpec::at(size_t i) const { + return CompleteArgumentInfo(*this, i); +} + +inline std::optional convertOptional( + std::optional const& from) { + return from ? std::optional(static_cast(*from)) + : std::optional{}; +} + +} // namespace torch::jit + +namespace std { + +template +struct hash> { + size_t operator()(const c10::VaryingShape& vs) const { + return c10::get_hash( + vs.size(), + vs.size() ? vs.sizes().value() : std::vector>()); + } +}; + +template <> +struct hash { + size_t operator()(const c10::TensorType& ptt) const { + return c10::get_hash< + std::optional, + c10::VaryingShape, + c10::VaryingShape, + std::optional>( + torch::jit::convertOptional(ptt.scalarType()), + ptt.sizes(), + ptt.strides(), + ptt.requiresGrad()); + } +}; + +template <> +struct hash { + size_t operator()(const torch::jit::ArgumentSpec& spec) const { + return spec.hashCode(); + } +}; +template <> +struct hash { + size_t operator()(const torch::jit::CompleteArgumentSpec& spec) const { + return spec.hashCode(); + } +}; +} // namespace std + +C10_CLANG_DIAGNOSTIC_POP() + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/autodiff.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/autodiff.h new file mode 100644 index 0000000000000000000000000000000000000000..9c47f7a9f08042a997b52e1d579042ad133c02f5 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/autodiff.h @@ -0,0 +1,99 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include + +namespace torch::jit { + +using value_list = std::vector; +// clang-format off +// Example showcasing how Gradient is constructed: +// +// Let's assume we have a function f, `m` and `n` do not require grad +// (`n` can depend only on `m`): +// y, n = f(x, m) +// +// Now, let's assume that the reverse of f (called f') needs to use values of `x`, `t` and `y`. +// `t` is an intermediate value produced in the body of f, and let's assume that it requires +// grad too. +// +// In this case differentiate(f) will return this: +// y, n, t = f(x, m) // `t` is appended to the output list +// dx = f'(dy, dt, x, t, y) // No `dm` or `dn` because they do not require gradient +// // All needed values from f are prepended to the input list +// +// f_real_outputs = 2 // Only first two outputs were present in f originally +// df_input_vjps = {0, 2} // i.e. connect grad_fn of y and t variables produced by f, +// y t // with y's output_nr = 0 and t's output_nr = 1 +// df_input_captures = {I0, O2, O0} // Order matches the prefix of inputs to df +// x t y +// df_output_vjps = {0} // i.e. connect next_edge[0] of grad_fn to x's (grad_fn, output_nr). +// +// Terminology: vjp = vector-jacobian product +// clang-format on + +struct Gradient { + explicit operator bool() const { + return df != nullptr; + } + std::shared_ptr f; + std::shared_ptr df; + + // Describes how to construct outputs of f from what its graph will return. + // This is necessary because some trailing outputs are intermediates produced + // only to be saved for df (and should be ignored). + size_t f_real_outputs = 0; // initialized for safety. + + // df inputs are split into two sections: vjps (aka grad_outputs) and + // captures. VJPs are "seeds" for the gradient computation given for each + // input capture of an Output kind. Captures are values the need to be saved + // when f is run. We handle inputs specially, because this allows us to avoid + // adding extra vjps as df inputs. + + std::vector df_input_vjps; // Offsets into f's outputs. + // capture can come from inputs or outputs + std::vector df_input_captured_inputs; // Offsets into f's inputs + std::vector df_input_captured_outputs; // Offsets into f's outputs + + // df will produce vjps for a subset of inputs of f that required grad. + // df_output_vjps[idx] == inp_idx means that idx-th output of df produces a + // vjp for inp_idx-th input of f. + std::vector df_output_vjps; // Offsets into f's inputs. + + // How to use gradient to implement a differentiable autograd function: + // When running f: + // - Unwrap input Variables + // - Run f's graph + // - Create grad_fn + // - Wrap outputs in Variables (assume we have a tensor_outputs array): + // outputs = map(Variable, tensor_output) + // for i, offset in enumerate(df_input_vjps): + // outputs[offset].set_grad_fn(grad_fn, output_nr=i) + // - Use df_output_vjps to connect next_edges of grad_fn: + // for idx in df_output_vjps: + // grad_fn.add_next_edge(inputs[idx].gradient_edge()) + // - Save captures for df (care needs to be taken to use SavedVariables for + // inputs and outputs that we will actually return) + // - Return outputs[:f_real_outputs] + // + // When running df: + // - Concatenate received vjps and captured Variables + // - Interpret df + // - Wrap outputs of df into Variables (that don't require grad) +}; +TORCH_API Gradient differentiate(std::shared_ptr& graph); + +// can we take a derivative of this node symbolically? +TORCH_API bool isDifferentiable(const Node* n); +TORCH_API bool isDifferentiable(Graph& g); +TORCH_API bool isZero(Value* v); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/calculate_necessary_args.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/calculate_necessary_args.h new file mode 100644 index 0000000000000000000000000000000000000000..4951616fbded8b83d0cf9d75459a29849c65822e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/calculate_necessary_args.h @@ -0,0 +1,74 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::jit { + +// Calculates the number of args that need to be passed in. +// Less args may be needed if defaults are provided. +// Returns: {number args needed, number of out args} +inline std::pair CalculateNecessaryArgs( + const std::vector& schema_args, + at::ArrayRef actual_inputs, + bool allow_trailing_out_args) { + if (schema_args.empty()) { + return std::make_pair(0, 0); + } + + // count number of out arguments + int64_t schema_idx = static_cast(schema_args.size()) - 1; + if (allow_trailing_out_args) { + // skip over out arguments in the end. + while (schema_idx >= 0) { + const auto& current_arg = schema_args.at(schema_idx); + if (!current_arg.is_out()) { + break; + } + schema_idx--; + } + } + + int64_t num_out = static_cast(schema_args.size()) - schema_idx - 1; + + if (schema_args.size() < actual_inputs.size()) { + return std::make_pair(actual_inputs.size(), num_out); + } + + // if it is the default args, we reset the index to the last element + if (!allow_trailing_out_args) { + schema_idx = schema_args.size() - 1; + } + // keeps track of trailing unnecessary args + while (schema_idx >= 0) { + // this means it is not default argument, so it is necessary + if (!schema_args.at(schema_idx).default_value().has_value()) { + return std::make_pair(schema_idx + 1, num_out); + } else { + auto schema_value = + schema_args.at(schema_idx).default_value().value().toIValue(); + // non-const value will become nullptr here, so will be marked necessary + // non-const would include prim::ListConstruct, prim::DictConstruct as + // well. + auto actual_value = toIValue(actual_inputs[schema_idx]); + if (!actual_value.has_value()) { + return std::make_pair(schema_idx + 1, num_out); + } + // if the IR has same value as default value of the schema, + // it is not necessary argument. + if (schema_value != actual_value.value()) { + return std::make_pair(schema_idx + 1, num_out); + } + } + schema_idx--; + } + return std::make_pair(0, num_out); +} + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/custom_operator.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/custom_operator.h new file mode 100644 index 0000000000000000000000000000000000000000..07b2ca1264bb178b526eb4d2bb6c0daf0a8f7c78 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/custom_operator.h @@ -0,0 +1,35 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::jit { + +/// Registration class for new operators. Effectively calls +/// `torch::jit::registerOperator` for every supplied operator, but allows doing +/// so in the global scope when a `RegisterOperators` object is assigned to a +/// static variable. +/// Note: This is *not* the custom operator API. If you want to register custom +/// operators, take a look at torch::RegisterOperators. +struct TORCH_API RegisterOperators { + RegisterOperators() = default; + + /// Registers a vector of already created `Operator`s. + /// The operator element is now optional to filter null ops. It's backward + /// compatible and works for selective operator registration. + explicit RegisterOperators(std::vector> operators) { + for (std::optional& o : operators) { + if (o) { + registerOperator(std::move(o.value())); + } + } + } +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/decomposition_registry.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/decomposition_registry.h new file mode 100644 index 0000000000000000000000000000000000000000..1761821aaeb846a379310913ca9bed4e408e6f72 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/decomposition_registry.h @@ -0,0 +1,38 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// This file is temporary until native_functions.yaml and derivatives.yaml are +// merged. Ideally this should all go into native_functions.yaml + +#include +#include + +namespace torch::jit { + +TORCH_API std::optional> GetDecomposition( + const FunctionSchema& schema); + +TORCH_API void RegisterDecomposition( + const FunctionSchema& schema, + std::shared_ptr g); + +TORCH_API void RunDecompositions(std::shared_ptr g); + +TORCH_API std::optional GetDecompositionFunction( + const FunctionSchema& schema); + +// For invocation in C++, recommended is to assign to static local variable +TORCH_API Function* GetDecompositionExecutor(const char* schema_literal); + +TORCH_API Function* GetDecompositionExecutor(const FunctionSchema& schema); + +TORCH_API void run_jit_decomposition( + const c10::OperatorHandle& op, + torch::jit::Stack* stack); + +TORCH_API bool has_jit_decomposition(const FunctionSchema& schema); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/decomposition_registry_util.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/decomposition_registry_util.h new file mode 100644 index 0000000000000000000000000000000000000000..87da540f6da8ab34bcff01c70817dcc1411d6862 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/decomposition_registry_util.h @@ -0,0 +1,17 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::jit { + +TORCH_API const std::string& GetSerializedDecompositions(); + +TORCH_API const OperatorMap& GetDecompositionMapping(); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/exception_message.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/exception_message.h new file mode 100644 index 0000000000000000000000000000000000000000..2c88b716dd9d0917b6f07dc2f2ab135d4f459035 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/exception_message.h @@ -0,0 +1,34 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include + +namespace torch::jit { + +struct ExceptionMessage { + ExceptionMessage(const std::exception& e) : e_(e) {} + + private: + const std::exception& e_; + friend std::ostream& operator<<( + std::ostream& out, + const ExceptionMessage& msg); +}; + +inline std::ostream& operator<<( + std::ostream& out, + const ExceptionMessage& msg) { + auto c10_error = dynamic_cast(&msg.e_); + if (c10_error) { + out << c10_error->what_without_backtrace(); + } else { + out << msg.e_.what(); + } + return out; +} + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/graph_executor.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/graph_executor.h new file mode 100644 index 0000000000000000000000000000000000000000..3ed45801284a1f0eae10bff98f09972ff796f781 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/graph_executor.h @@ -0,0 +1,152 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include +#include +#include + +TORCH_DECLARE_bool(torch_jit_enable_new_executor); + +TORCH_DECLARE_bool(torch_jit_execution_plan_reuse_code_graph); + +namespace torch::jit { +struct GraphExecutorState; +struct Code; + +enum ExecutorExecutionMode { + SIMPLE, + PROFILING, +}; + +struct ExecutionPlan { + ExecutionPlan() = default; + ExecutionPlan(std::shared_ptr graph, std::string function_name) + : code(graph, std::move(function_name)), + graph( + FLAGS_torch_jit_execution_plan_reuse_code_graph + ? code.graph() + : std::move(graph)) {} + + operator bool() const { + return static_cast(graph); + } + + Code code; + std::shared_ptr graph; +}; + +// Notice that those structs don't manage lifetime of their members. +// They are only valid only right after you call getDebugState() and should +// never be used again once another GraphExecutor function is called. + +struct GraphExecutorState { + const Graph* graph = nullptr; + ExecutionPlan fallback; // XXX: members of this field are optional + std::unordered_map execution_plans; +}; + +struct TORCH_API EnableProfilingGuard { + EnableProfilingGuard(); + ~EnableProfilingGuard(); + + private: + bool old_executor_mode = false; + bool old_get_optimize = false; +}; + +struct GraphExecutorImplBase; +struct TORCH_API GraphExecutor { + GraphExecutor() = default; + GraphExecutor(const std::shared_ptr& graph, std::string function_name); + + GraphExecutor( + const std::shared_ptr& graph, + std::string function_name, + ExecutorExecutionMode executor_mode); + + void run(Stack& inputs); + c10::intrusive_ptr runAsync( + Stack& stack, + TaskLauncher taskLauncher = at::launch); + + // `remaining_bailout_depth` stands for the maximum number of profiled and + // specialized recompilations allowed for the current `GraphExecutor`. if + // remaining_bailout_depth is equal to 0, `GraphExecutor` won't perform any + // profiling and specialization. This is also equivalent to the + // SIMPLE_EXECUTOR mode. if remaining_bailout_depth is greater than 0, + // `GraphExecutor` will profile and specialize its input graph based on the + // profiled information whenever a bailout check is failed/triggered, a new + // `GraphExecutor` will be created. This new `GraphExecutor`'s + // remaining_bailout_depth will be reduced by 1. + // If no bailout depth is passed, the depth will be initialized from the + // current global fusion strategy settings. + const ExecutionPlan& getPlanFor( + Stack& inputs, + std::optional remaining_bailout_depth = std::nullopt); + GraphExecutorState getDebugState(); + + void debugFlushCompilationCache(); + + bool isOptimized() const; + + private: + std::shared_ptr pImpl; +}; + +TORCH_API Node* replaceBlockWithFallbackGraph( + Block* b, + ArrayRef inputs); + +// These passes need to run before it is valid to pass to the interpreter +// regardless of whether sizes have been specialized or not. +TORCH_API void runRequiredPasses(const std::shared_ptr& g); + +TORCH_API void debugSetFusionGroupInlining(bool state); +TORCH_API bool getFusionGroupInlining(); + +TORCH_API void debugSetAutodiffSubgraphInlining(bool state); +TORCH_API std::shared_ptr lastExecutedOptimizedGraph(); + +TORCH_API std::atomic& getProfilingMode(); +TORCH_API std::atomic& getExecutorMode(); +TORCH_API std::atomic& getNumProfiledRuns(); +TORCH_API size_t getBailoutDepth(); +TORCH_API bool IsNewExecutorEnabled(); + +struct TORCH_API GraphOptimizerEnabledGuard { + GraphOptimizerEnabledGuard(bool state) + : old_state_(getGraphExecutorOptimize()) { + setGraphExecutorOptimize(state); + } + + ~GraphOptimizerEnabledGuard() { + setGraphExecutorOptimize(old_state_); + } + + bool old_state_; +}; + +namespace detail { + +GraphExecutor* getGradExecutor(Operation& op); + +GraphExecutor* getDifferentiableGraphOpExecutor(Operation& op); + +// for debugging information we expose a way to get the last actually +// run graph. Previous approaches allowed querying the GraphExecutor +// for what graph it would run in certain circumstances (graphFor), but +// this is fragile because we sometimes change how these decisions are made. +// This interface still allows our tests to look at optimized graphs, but +// with less plumbing. +} // namespace detail + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/graph_executor_impl.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/graph_executor_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..c9129b02b3fdc278fde42caeeda587895e33b79e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/graph_executor_impl.h @@ -0,0 +1,118 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +namespace torch::jit { + +void packGradient(const Gradient& gradient, Node* dnode); +bool needsGradient(const std::shared_ptr& graph); +void runOptimization( + std::shared_ptr& graph, + bool unroll_non_constant_loops = true, + bool const_prop_user_classes = true); +void runNondiffOptimization( + std::shared_ptr& graph, + bool strict_fuser_check = false); +void debugSetAutodiffSubgraphInlining(bool state); +bool TORCH_API getAutodiffSubgraphInlining(); + +void debugSetFusionGroupInlining(bool state); +bool getFusionGroupInlining(); + +// Tunable parameters for deciding when to create/keep subgraphs of +// differentiable code +const size_t autodiffSubgraphNodeThreshold = 2; +const size_t autodiffSubgraphInlineThreshold = 5; + +// a Graph can be created via tracing, or via a language-based frontend +// GraphExecutor runs it. It can run the same graph on many different sizes +// and different requires_grad states, and handles specializations for each +// situation. GraphExecutor is completely unaware of tracing or module +// parameters to keep the tracing concerns separated. +struct GraphExecutorImplBase { + static std::shared_ptr prepareGraph( + const std::shared_ptr& graph) { + auto copy = graph->copy(); + EraseShapeInformation(copy); + return copy; + } + + GraphExecutorImplBase( + const std::shared_ptr& graph, + std::string function_name) + : graph(prepareGraph(graph)), + function_name_(std::move(function_name)), + num_inputs(this->graph->inputs().size()), + num_outputs(this->graph->outputs().size()) {} + + // entry point where execution begins + void run(Stack& stack); + c10::intrusive_ptr runAsync( + Stack& stack, + TaskLauncher taskLauncher = at::launch); + + virtual const ExecutionPlan& getPlanFor( + Stack& stack, + std::optional remaining_bailout_depth = std::nullopt) = 0; + virtual GraphExecutorState getDebugState() = 0; + virtual ~GraphExecutorImplBase() = default; + + virtual bool isOptimized() const { + return false; + } + + protected: + friend struct GraphExecutor; + + // The unoptimized starting graph. This field is effectively const, but we + // can't make it so because Graph::copy() is not const (and making it const is + // not that easy at this point). + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::shared_ptr graph; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::string function_name_; + + // If false, we'll run the graph as we get it, without any optimizations. + // Useful for debugging. + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const size_t num_inputs; + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + const size_t num_outputs; + + // GraphExecutors can be accessed from multiple threads, so this thread needs + // to be held every time we access the fallback or plan_cache. + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + std::mutex compile_mutex; +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/graph_iterator.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/graph_iterator.h new file mode 100644 index 0000000000000000000000000000000000000000..4d0db56a19b489224168b58ebdc854f4d3ed574c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/graph_iterator.h @@ -0,0 +1,152 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include + +namespace torch::jit { + +// This class facilitates depth-first iteration over all nodes in a graph. +class DepthFirstGraphNodeIterator { + Node* current_; + + public: + // Constructor. + explicit DepthFirstGraphNodeIterator(std::shared_ptr& graph) + : current_(*(graph->block()->nodes().begin())) {} + + // Moves up and to the next node (may move up recursively). + void move_up() { + if (current_ == nullptr) { + return; + } + // Basically we start from the child block (which is current_) + // and we try to find the block that owns it. Now we need to check + // if that block is the graph root block, or if it is an If/Loop/etc + // block. + // + // If it's the graph root block we can stop because there is no "up" + // but if it is a node (e.g. If/Loop/etc) we need to apply logic + // based on where we are coming from to move to the next block. + // This might mean that we need to traverse up again (e.g. if we've + // reached the end of the else clause in an if block we need to go) + // up to the parent block that contains the if. + // + // Similarly if we've reached the end of the parent block containing + // the else clause we might need to go up again so this is a recursive + // function. + // + // BlockNode (if/loop/with) + // | + // [Block1] ... [Block2] + // | + // [ Node1, Node2, Node3, FromNode] + // + auto parent_block = current_->owningBlock(); + TORCH_INTERNAL_ASSERT(parent_block, "Every node must be owned by a block"); + + // Get the node that owns the parent block. This node has to be an if, + // loop, or with. + auto parent_node = parent_block->owningNode(); + if (parent_node == nullptr) { + // If there's no node that owns this current block then we're at the + // top of the graph and since we're trying to move up we have reached + // the end of the traversal. + current_ = nullptr; + return; + } + + // Check the type of node this root is. + if (parent_node->kind() == prim::If) { + // Need to check if we came from the `then` branch or the `else` branch. + auto* then_block = parent_node->blocks().at(0); + auto* else_block = parent_node->blocks().at(1); + + if (parent_block == else_block) { + // If else block then we move to the next node in the parent block. + current_ = parent_node->next(); + if (current_->kind() == prim::Return) { + move_up(); + } + } else { + // If then block then move to the else block if it is not empty. + TORCH_INTERNAL_ASSERT(parent_block == then_block); + bool else_block_empty = + else_block->nodes().begin() == else_block->nodes().end(); + + if (!else_block_empty) { + current_ = *(else_block->nodes().begin()); + } else { + // Since it's empty we move to the next node. + current_ = parent_node->next(); + if (current_->kind() == prim::Return) { + move_up(); + } + } + } + } else if ( + parent_node->kind() == prim::Loop || + parent_node->kind() == prim::With) { + current_ = parent_node->next(); + if (current_->kind() == prim::Return) { + move_up(); + } + } else { + TORCH_INTERNAL_ASSERT( + false, "Only if/loop/with nodes should have child blocks"); + } + } + + // Moves to the next adjacent node or up in to the parent if that is not + // possible. + void move_next() { + if (current_ == nullptr) { + return; + } + + // Increment to the next node in the current block. + current_ = current_->next(); + + // Check if we're at the end of the block. If so we need + // to move upwards (if it makes sense to). + if (current_->kind() == prim::Return) { + move_up(); + } + } + + // Moves to the next node in the graph into children if it can. + void move_into() { + if (current_ == nullptr) { + return; + } + + // Check if we're currently on a node that contains sub-nodes. + if (current_->kind() == prim::If || current_->kind() == prim::Loop || + current_->kind() == prim::With) { + auto* first_block = current_->blocks().at(0); + current_ = first_block->param_node(); + // Move next will move up and out of the current node if the block is + // empty. `move_up` which is called by `move_next` will handle the + // difference between If, Loop, and With blocks appropriately. + move_next(); + } else { + move_next(); + } + } + + // Get the next Node in the graph. \returns nullptr if there are no nodes + // left. + Node* next() { + auto result = current_; + + // Try move into the existing node to set the next node to be returned. + // This will move to the next node if not possible, or move upwards and + // to the next. + move_into(); + + return result; + } +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/instruction.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/instruction.h new file mode 100644 index 0000000000000000000000000000000000000000..080937ce6d88af4b64ec03ecc9b86eed935756d3 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/instruction.h @@ -0,0 +1,104 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::jit { +// instruction look like: +// op_code X, N +// meaning of X, N depend on the op: +// O - index into operator table +// R - index into register table +// I - literal integer +// C - index into constant table +// P - jump offset relative to beginning of current instruction +// F - index into function table +// T - index into the type table, used for guard instructions +// S - index into object slots +// C - index into code table + +#define FORALL_OPCODES(_) \ + _(OP, "O") /* invoke operator X */ \ + _(OPN, "OI") /* invoke vararg operator X with N arguments */ \ + _(LOAD, "R") /* push a value from a register X */ \ + _(MOVE, "R") /* push a value from register X, clearing the register */ \ + _(STOREN, "RI") /* store N values to registers [X, X+N) */ \ + _(STORE, "R") /* store 1 value to registers X */ \ + _(DROP, "") /* drop 1 value from the top of the stack */ \ + _(DROPR, "R") /* clear register X */ \ + _(LOADC, "C") /* push the constant X */ \ + _(JF, "P") /* pop the top of the stack, if false, branch to P */ \ + _(JMP, "P") /* unconditional branch to X */ \ + _(LOOP, "PI") /* perform a loop, X is where to branch if cond is false */ \ + _(RET, "") /* exit execution */ \ + _(WAIT, "") /* wait for a future to be complete */ \ + _(CALL, "F") /* call function X */ \ + _(GUARD, "T") /* check a guard against type_table, true if passes */ \ + _(TYPECHECK, "TN") /* check each type of input[i] against type_table[X+N] */ \ + _(FAIL_GUARD, "T") /* fail a guard, patch back to GUARD */ \ + _(PROFILE_OP, "F") /* get a callback from profile_function_table at X */ \ + _(TAIL_CALL, "F") /* replace current frame with function F */ \ + _(INTERFACE_CALL, "CI") /* call method X on the first argument (of N) */ \ + _(GET_ATTR, "S") /* get attribute from slot X in an Object */ \ + _(SET_ATTR, "S") /* set attribute to slot X in an Object */ \ + _(LIST_UNPACK, "I") /* unpack list expecting length I */ \ + _(TUPLE_CONSTRUCT, "I") /* construct a tuple using X inputs */ \ + _(NAMED_TUPLE_CONSTRUCT, \ + "TI") /* construct a tuple of type X, using N inputs */ \ + _(LIST_CONSTRUCT, "TI") /* construct a list of type X, using N inputs */ \ + _(DICT_CONSTRUCT, "TI") /* construct a dict of type X, using N inputs */ \ + _(CREATE_OBJECT, "T") /* create an object of type X */ \ + _(ISINSTANCE, "TI") /* check object is one of types[X:X+N] */ \ + _(TUPLE_SLICE, "II") /* slice tup[X:(X+N)] */ \ + _(TUPLE_INDEX, "") /* get the value from a tuple at that index */ \ + _(RAISE_EXCEPTION, "") /* throws the exception from Python */ \ + _(DICT_INDEX, "") /* gets the value from the dict for given key */ \ + _(UNCHECKED_CAST, "") /* perform an unchecked cast operation */ \ + _(__IS__, "") /* performs `is` operator from Python */ \ + _(UN_INITIALIZED, \ + "") /* sets default values to variables that are uninitialized */ \ + _(__ISNOT__, "") /* performs `is not` operator from Python */ \ + _(FORMAT, "I") /* performs string format function `f strings` or `{}.format` \ + the number of inputs in stored in X */ \ + _(DEVICE, "") /* invokes aten::device for a Tensor */ \ + _(DTYPE, "") /* invokes aten::dtype for a Tensor */ \ + _(DIM, "") /* invokes aten::dim for a Tensor */ \ + _(__NOT__, "") /* performs `not` operator from Python */ \ + _(TO_LIST, "") /* convert the input to a list */ \ + _(NUM_TO_TENSOR, \ + "") /* performs the conversion of a number/scalar to Tensor */ \ + _(IS_CUDA, "") /* invokes aten::is_cuda for a Tensor */ \ + _(FORK, "CN") /* launch a thread to run code entry x with N inputs */ \ + _(WARN, "I") /* emit a warning with line information */ \ + _(ENTER, "EN") /* enter scope of a contextmanager */ \ + _(EXIT, "EX") /* exit the last entered contextmanager */ \ + _(AWAITABLE, "CN") /* initialize await for code entry x with N inputs */ + +enum OpCode : uint8_t { +#define DEFINE_OP(op, _) op, + FORALL_OPCODES(DEFINE_OP) +#undef DEFINE_OP +}; + +struct Instruction { + OpCode op; + uint8_t unused; + uint16_t N; + int32_t X; + // TODO: check for overflow + Instruction(OpCode op, int32_t X, uint16_t N) + : op(op), unused(0), N(N), X(X) {} +}; +std::ostream& operator<<(std::ostream& out, Instruction inst); + +bool isOpSupportedInMobile(OpCode op); +char const* toString(OpCode op); +OpCode parseOpCode(const char* str); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/interpreter.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/interpreter.h new file mode 100644 index 0000000000000000000000000000000000000000..b5b5b3da8f19a94d8cf666148595ac0736826769 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/interpreter.h @@ -0,0 +1,165 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include + +#include +#include +#include +#include +#include + +TORCH_DECLARE_bool(torch_jit_disable_warning_prints); +TORCH_DECLARE_bool(torch_jit_enable_rethrow_caught_exception); + +namespace at { +class Tensor; +TORCH_API void launch(std::function func); +} // namespace at +namespace c10 { +struct IValue; +struct OperatorName; +} // namespace c10 + +namespace torch::jit { + +// The interpreter run Graphs with Tensor inputs and Tensor outputs +// a separate component in the autograd handles unwrapping and wrapping +// variable objects for use in the interpreter. +namespace interpreter { +struct CodeImpl; +} + +struct Node; +struct GraphExecutor; +struct InterpreterStateImpl; +struct Graph; +struct Node; +struct Instruction; +using Stack = std::vector; +using c10::ivalue::Future; +using TaskLauncher = std::function)>; + +bool TORCH_API in_torchscript_runtime(); + +struct TORCH_API Code { + Code() = default; + explicit Code(interpreter::CodeImpl* pImpl); + // remaining_bailout_depth is irrelevant in a `Code` object unless the `Code` + // is directly created by `GraphExecutor` in which case it's likely to contain + // `prim::BailOut`s to control the maximum depth of bailout chains + explicit Code( + const std::shared_ptr& graph, + std::string function_name, + size_t remaining_bailout_depth = 0); + + const std::vector& grad_executors(); + const std::vector& diff_graph_op_executors(); + + explicit operator bool() const { + return pImpl != nullptr; + } + size_t num_inputs() const; + size_t num_outputs() const; + size_t num_bailouts() const; + const std::vector& constant_table() const; + const std::vector& type_table() const; + const std::vector& instructions() const; + const std::unordered_map& op_to_num_specified_args() + const; + const std::vector& instructions_source() const; + void request_bailout(size_t index); + size_t register_size() const; + std::shared_ptr graph() const; + + private: + std::shared_ptr pImpl; + friend struct InterpreterStateImpl; + friend std::ostream& operator<<(std::ostream& out, const Code& code); +}; + +struct TORCH_API MobileCode : Code { + explicit MobileCode( + const std::shared_ptr& graph, + std::string function_name, + bool emit_default_input_instructions = true, + bool support_default_args_before_out = true, + bool emit_promoted_ops = true, + size_t remaining_bailout_depth = 0); +}; + +struct InterpreterState { + TORCH_API InterpreterState( + const Code& code, + TaskLauncher taskLauncher = at::launch); + TORCH_API void run(Stack& stack); + TORCH_API c10::intrusive_ptr runAsync(Stack& stack); + c10::intrusive_ptr getFuture(); + + private: + InterpreterState(c10::intrusive_ptr pImpl); + // Ideally we should use c10::intrusive_ptr for pImpl; + // but intrusive_ptr requires full definition of InterpreterStateImpl, + // which we need to hide in the header. + c10::intrusive_ptr pImpl; + friend struct InterpreterStateImpl; +}; + +// Created by wait() +struct Suspend : public std::exception { + const char* what() const noexcept override { + return "Suspend"; + } + + explicit Suspend(c10::intrusive_ptr future_) + : future(std::move(future_)) {} + + c10::intrusive_ptr future; +}; + +// InterpreterContinuation propagates dist_autograd_context_id +// through (and only through) the forward pass manually, other +// thread local settings are propagated with ThreadLocalState +struct InterpreterContinuation { + InterpreterContinuation( + InterpreterState state_, + Stack stack_, + int64_t dist_autograd_context_id = 0, + std::optional tls_state = std::nullopt) + : state(std::move(state_)), + stack(std::move(stack_)), + tls_state_(std::move(tls_state)) +#ifdef USE_DISTRIBUTED + , + dist_autograd_context_id_(dist_autograd_context_id) +#endif + { + } + + void operator()(); + + private: + InterpreterState state; + Stack stack; + std::optional tls_state_ = std::nullopt; +#ifdef USE_DISTRIBUTED + int64_t dist_autograd_context_id_; +#endif +}; + +// what is the tensors type, including state from the current execution context +// that modifies how the tensor behaves. For instance if no_grad is enabled +// this will cause the TensorType to have requires_grad=False. +TORCH_API at::TensorTypePtr tensorTypeInCurrentExecutionContext( + const at::Tensor& t); + +// current (TLS) TorchScript interpreter callstack +TORCH_API std::vector currentCallstack(); +TORCH_API std::vector currentModuleHierarchy(); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/interpreter/can_emit_inline.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/interpreter/can_emit_inline.h new file mode 100644 index 0000000000000000000000000000000000000000..65479623bac2d18d83a213a1c95163844462da08 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/interpreter/can_emit_inline.h @@ -0,0 +1,111 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include + +namespace torch::jit::interpreter { +/* +This is an optimization that reduces the number of store/load/move nodes needed +by recognizing that parts of the graph are simple trees like a*x + b*y. When +this happens it is possible to work directly off of the stack by emitting the +tree in a depth-first left-to-right manner: + load a + load x + mul # stack now is a*x + load b + load y + mul # stack now is a*x, b*y + add + +can_emit_inline_[node] == true means that this node participates as a non-root +member of one of these trees. The code emitter will not emit this node when +it is encountered in the node. Instead the node is emitted in a depth first +traversal from where it is used in a tree. + +To participate in a tree a node must have a single use (otherwise it is not +tree-like) and output a single value (for simplicity.) If our IR was functional, +these would be the only constraints. However, many nodes have side effects, so +we must ensure that emitting the nodes in depth first order from the tree's root +_does not reorder the emission of the nodes_. To ensure this, we work backward +from the root of a potential tree, visiting its inputs in reverse depth first +order, while scanning the node list backward (with the block_point node). When +these traversal line up we know it is safe to emit the tree in this way. We +ignore constant nodes, which do not have side effects. +*/ +struct CanEmitInline { + explicit CanEmitInline(Graph& graph) { + scanBlock(graph.block()); + } + bool canInline(Value* v) { + return v->node()->kind() != prim::Param && + // without this a BailOut may float downstream past some later + // BailOut + // and receive a higher jf_index. Then a GUARD instruction + // we generated for the floated BailOut will get popped up from the + // instruction stack + // by the later BailOut in createBailoutBlock and its jf_index + // will become invalid. + v->node()->kind() != prim::TensorExprGroup && + v->node()->kind() != prim::TensorExprDynamicGroup && + v->node()->kind() != prim::StaticSubgraph && + v->node()->kind() != prim::CudaFusionGroup && + v->node()->kind() != prim::FusionGroup && + v->node()->kind() != prim::BailOut && v->uses().size() == 1 && + v->node()->outputs().size() == 1; + } + + Node* previousNonConstant(Node* n) { + do { + n = n->prev(); + } while (n->kind() == prim::Constant); + return n; + } + + Node* scanValue(Node* block_point, Value* v) { + // this node is a candidate for inline, if our reverse scan of the + // node list lines up with the use of v, we know it will be emitted in + // tree order, and we can inlining. Scan continues for further nodes. + if (v->node() == block_point && canInline(v)) { + // since we inlined this node, we may be able to recursively inline + // its inputs, so we continue scanning it + block_point = scanNode(v->node()); + can_emit_inline_[v->node()] = true; + } + // if it does not line up, we can't inline 'v', and will just generate + // a load/move for it. However, other inputs may still appear in tree + // order so we continue the scan of the inputs. + return block_point; + } + + Node* scanNode(Node* n) { + // don't bother to scan nodes we have already determined to be inline + if (can_emit_inline_.count(n)) { + return nullptr; + } + for (auto b : n->blocks()) { + scanBlock(b); + } + Node* block_point = previousNonConstant(n); + for (auto it = n->inputs().rbegin(), end = n->inputs().rend(); it != end; + ++it) { + block_point = scanValue(block_point, *it); + } + return block_point; + } + + void scanBlock(Block* b) { + scanNode(b->return_node()); + for (auto node : b->nodes().reverse()) { + scanNode(node); + } + } + std::unordered_map can_emit_inline_; +}; + +} // namespace torch::jit::interpreter + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/interpreter/code_impl.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/interpreter/code_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..8b3563f42b919259fd3e682f25f6470ebac6325d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/interpreter/code_impl.h @@ -0,0 +1,1066 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +TORCH_DECLARE_bool(torch_jit_enable_expanded_stacks); +TORCH_DECLARE_bool(torch_jit_expanded_stacks_mangled); + +namespace torch::jit::interpreter { + +template +Ttarget safe_narrow_cast(Tsource v) { + Ttarget res = static_cast(v); + // Casting it back to check whether it overflew. + if (static_cast(res) != v) { + TORCH_WARN( + "ATTENTION: your model computation is overflowing, safe_narrow_cast<>() failed"); + return v; + } + return res; +} + +// BailoutBlocks are used to temporarily store +// instructions (typically, argument LOADs and TAIL_CALL) +// generated for prim::BailOut nodes +// before they are merged back into +// CodeImpl._instructions_ by insertBailoutBlocks +struct BailoutBlock { + size_t jf_instruction_index; // this node gets patched to jump here on failure + std::vector instructions; // ends in a TAIL_CALL + + explicit BailoutBlock(size_t jf_index) : jf_instruction_index(jf_index) {} +}; + +// for keeping track of the current node +struct WithCurrentNode { + WithCurrentNode(Node** loc, Node* new_value) : loc_(loc), old_value_(*loc_) { + *loc = new_value; + } + ~WithCurrentNode() { + *loc_ = old_value_; + } + + private: + Node** loc_; + Node* old_value_; +}; + +struct NodeSourceInfo { + const char* func_name_{nullptr}; + const char* file_name_{nullptr}; + size_t line_{0}; + NodeSourceInfo() = default; +}; + +struct CodeImpl { + friend struct InterpreterState; + std::vector instructions_; + + const c10::unique_t node_stack_attr_symbol_ = + static_cast(attr::node_stack_idx); + // Expanded inlined stacks as pointers to values in inlined call stack. + std::vector> expanded_node_stacks_; + + // same length as instructions. + // what node in the graph cause this + // instruction to be emitted? + std::vector instructions_source_; + std::vector constant_table_; + std::vector operator_table_; +#ifndef NDEBUG + std::vector full_operator_table_; +#endif + // map<(op name, num inputs), index in operator table>, to avoid duplicates, + // not including vararg operators + std::unordered_map< + std::pair, + int, + std::function& p)>> + operator_table_inv_; + std::vector function_table_; + std::vector> forked_functions_; + std::vector> awaited_functions_; + std::vector type_table_; + std::vector&)>> + profile_function_table_; + + int register_size_ = 0; + size_t n_outputs; + size_t n_inputs; + TypePtr return_type_; + std::string function_name_; + + // We MUST hold onto graph here because some Operators stored in the + // instruction lists have dependencies on meta-data stored in the graph + // that would be dead otherwise. + // It is also very useful for debugging interpreter problems to + // keep this around. + std::shared_ptr graph_; + std::optional> grad_executors_; + std::optional> forward_executors_; + PreprocessGraph preprocess_; + + // map from unique of nodes to register in register table + std::unordered_map value_to_reg_; + + // map from operator name to specified arguments + // Example: for a schema of aten::foo.str + // aten::foo.str(arg0: str="default", arg1: int=0, + // arg2: bool=False, arg3: float=0.0) + // If the usages in a graph is: + // aten::foo("somestr", arg1=0, arg2=True, arg3=0.0) + // aten::foo("somestr", arg1=1, arg2=False, arg3=0.0) + // op_to_num_specified_args_["aten::foo.str"] = 3 + // This is because for all usages, at most 3 args are used. + std::unordered_map op_to_num_specified_args_; + + std::unordered_map op_to_num_out_args_; + + // running count of uses as we emit. When we reach use_count_[v] = + // v.uses().size() we know it is the final use and we can move rather than + // load. + std::unordered_map use_count_; + + Node* current_node_; // used in creation of code to keep track + // of node being emitted + Node* last_inserted_op_ = nullptr; + + // out-of-line jumps for bailouts that are patched in at the end + std::vector bailout_blocks_; + std::vector> bailout_functions_; + size_t remaining_bailout_depth_; + + CodeImpl( + const std::shared_ptr& graph, + std::string function_name, + size_t remaining_bailout_depth, + bool emit_instructions = true) + : operator_table_inv_( + 0, + [](const std::pair& p) { + return std::hash()(p.first) ^ + std::hash()(p.second); + }), + function_name_(std::move(function_name)), + preprocess_(*graph), + current_node_(preprocess_.graph->return_node()), + remaining_bailout_depth_(remaining_bailout_depth) { + graph_ = preprocess_.graph; + n_outputs = graph_->outputs().size(); + if (n_outputs == 1) { + return_type_ = graph->outputs().at(0)->type(); + } else { + return_type_ = TupleType::create( + fmap(graph->outputs(), [](const Value* v) { return v->type(); })); + } + n_inputs = graph_->inputs().size(); + if (emit_instructions) { + // NOLINTNEXTLINE(clang-analyzer-optin.cplusplus.VirtualCall) + run(); + } + } + + virtual ~CodeImpl() = default; + + // since subclass of CodeImpl needs to populate + // op_to_num_specified_args, we separate the calls + // that changes internals of CodeImpl into a separate + // function. + virtual void run() { + emitCodeForBlock(graph_->block()); + insertInstruction(RET); + // we deferred the emission of bailout blocks so they appear at the end + // emit them now and patch up the jumps + insertBailoutBlocks(); + } + + const std::vector& constant_table() const { + return constant_table_; + } + + void request_bailout(size_t index) { + auto count = index; + for (const auto instr_index : c10::irange(instructions_.size())) { + if (instructions_[instr_index].op == GUARD || + instructions_[instr_index].op == FAIL_GUARD) { + if (count-- == 0) { + // patching GUARD to FAIL_GUARD + instructions_[instr_index].op = FAIL_GUARD; + GRAPH_DEBUG( + "Added a bailout request for ", + index, + " at instruction ", + instr_index); + break; + } + } + } + } + + const std::vector& instructions() const { + return instructions_; + } + + const std::unordered_map& op_to_num_specified_args() + const { + return op_to_num_specified_args_; + } + + const std::vector& instructions_source() const { + return instructions_source_; + } + + NodeSourceInfo getSourceInfoFromSourceRange(const SourceRange& range) { + NodeSourceInfo nodeSource; + SourceRange r = range; + if (!FLAGS_torch_jit_expanded_stacks_mangled && range.source()) { + if (auto orig = range.source()->findSourceRangeThatGenerated(r)) { + r = *orig; + } + } + if (r.source()) { + auto lineno = r.source()->lineno_for_offset(r.start()); + nodeSource.line_ = r.source()->lineno_to_source_lineno(lineno); + if (r.source()->filename()) { + nodeSource.file_name_ = r.source()->filename().value().c_str(); + } + } + return nodeSource; + } + + void expandInlinedNodeStack( + const InlinedCallStackPtr& cs, + std::vector* expandedstack) { + auto nodeSourceInfo = getSourceInfoFromSourceRange(cs->source_range()); + nodeSourceInfo.func_name_ = cs->function_name().c_str(); + expandedstack->emplace_back(nodeSourceInfo); + + if (cs->callee()) { + expandInlinedNodeStack(cs->callee().value(), expandedstack); + } + } + + void getNodeStack( + const Node* node, + std::vector* expandedstack) { + if (current_node_->callstack()) { + expandInlinedNodeStack(current_node_->callstack().value(), expandedstack); + } + auto nodeSourceInfo = getSourceInfoFromSourceRange(node->sourceRange()); + expandedstack->emplace_back(nodeSourceInfo); + } + + void insertInstruction(OpCode op, int64_t X = 0, uint64_t N = 0) { + instructions_.emplace_back( + op, + safe_narrow_cast(X), + safe_narrow_cast(N)); + instructions_source_.emplace_back(current_node_); + + if (FLAGS_torch_jit_enable_expanded_stacks && + !current_node_->hasAttribute(attr::node_stack_idx)) { + std::vector expandedStack; + getNodeStack(current_node_, &expandedStack); + auto insertIdx = expanded_node_stacks_.size(); + expanded_node_stacks_.emplace_back(expandedStack); + current_node_->i_(attr::node_stack_idx, insertIdx); + } + + // check that we didn't accidentally emit nodes out of topological order + if (op == OP) { + if (last_inserted_op_ != nullptr && current_node_ != last_inserted_op_ && + current_node_->owningBlock() == last_inserted_op_->owningBlock()) { + TORCH_INTERNAL_ASSERT( + current_node_->isAfter(last_inserted_op_), + *current_node_, + " is not after ", + *last_inserted_op_); + } + last_inserted_op_ = current_node_; + } + } + + void truncateInstructions(size_t size) { + while (instructions_.size() > size) { + instructions_.pop_back(); + instructions_source_.pop_back(); + } + } + + void createBailoutBlock(size_t jf_index) { + bailout_blocks_.emplace_back(jf_index); + auto& bailout_instructions = bailout_blocks_.back().instructions; + + bailout_instructions.insert( + bailout_instructions.end(), + instructions_.begin() + jf_index + 1, + instructions_.end()); + truncateInstructions(jf_index + 1); + } + + int allocRegs(at::ArrayRef vs) { + int result = register_size_ + 1; + for (Value* v : vs) { + AT_ASSERT(value_to_reg_.count(v) == 0); + value_to_reg_[v] = ++register_size_; + } + return result; + } + + int registerFor(Value* v) { + return value_to_reg_.at(v); + } + + void emitUse(Value* input, bool drop) { + // drop - if true, we are not actually going to use this thing + // and we can short circuit doing many instructions here + // by either clearing the register (DROPR) or just popping the stack + // (DROP) + if (preprocess_.can_emit_inline[input->node()]) { + emitNode(input->node()); + if (drop) { + insertInstruction(DROP); + } + } else { + int reg = registerFor(input); + bool moved = input->uses().size() == ++use_count_[input]; + + OpCode op{}; + if (input->node()->kind() == prim::Constant) { + op = LOADC; + } else if (moved) { + op = MOVE; + } else { + op = LOAD; + } + + if (drop) { + op = DROPR; + } + insertInstruction(op, reg); + } + } + + void emitLoadInputs(at::ArrayRef inputs) { + for (Value* input : inputs) { + emitUse(input, false); + } + } + + void emitLoadInputs(at::ArrayRef inputs, int num_include) { + int count = 0; + for (Value* input : inputs) { + if (count < num_include) { + emitUse(input, false); + count++; + } + } + } + + void emitLoadInputs(at::ArrayRef inputs, size_t start, size_t end) { + for (size_t i = start; i < end; i++) { + emitUse(inputs[i], false); + } + } + + virtual void emitOperator(Node* node) { + emitLoadInputs(node->inputs()); + const Operator& op = node->getOperator(); + int num_inputs = node->inputs().size(); + bool is_vararg = op.schema().is_vararg(); + + int operation_index = add_to_operator_table( + op, + node, + c10::toString(op.schema().operator_name()), + num_inputs, + is_vararg); + + if (op.hasOperation() && is_vararg) { + insertInstruction(OPN, operation_index, num_inputs); + } else { + insertInstruction(OP, operation_index); + } + } + + void emitWait(Node* node) { + emitLoadInputs(node->inputs()); + insertInstruction(WAIT); + } + + void emitDrop(at::ArrayRef to_drop) { + for (Value* input : to_drop) { + emitUse(input, true); + } + } + + void emitStoreOutputs(Node* node) { + size_t N = node->outputs().size(); + if (N == 0) { + return; + } + int regs = allocRegs(node->outputs()); + if (N == 1) { + insertInstruction(STORE, regs); + } else { + insertInstruction(STOREN, regs, node->outputs().size()); + } + } + + int insertConstant(IValue value) { + int result = constant_table_.size(); + constant_table_.emplace_back(std::move(value)); + return result; + } + + virtual void emitOperatorOrInstruction( + Node* node, + OpCode op, + int64_t X = 0, + uint64_t N = 0, + bool emit_inputs = true) { + if (emit_inputs) { + emitLoadInputs(node->inputs()); + } + insertInstruction(op, X, N); + } + + void emitFormat(Node* node) { + emitOperatorOrInstruction(node, FORMAT, node->inputs().size(), 0); + } + + void checkNodeAndEmit(Node* node) { + // check if the node should be emitted as instruction or operator + const Operator& op = node->getOperator(); + std::string unique_op_name = c10::toString(op.schema().operator_name()); + if (unique_op_name.find("aten::__getitem__.Dict") == 0) { + // __get_item__ overloaded operator for Dict + // needs to be emitted an instruction + emitOperatorOrInstruction(node, DICT_INDEX); + } else { + emitOperator(node); + } + } + + void emitConstant(Node* node) { + if (node->output()->type()->kind() == FunctionType::Kind) { + return; + } + // constants are just put in the constant table + value_to_reg_[node->output()] = + insertConstant(toIValue(node->output()).value()); + } + + void emitIf(Node* node) { + emitLoadInputs(node->inputs()); + size_t start_if = instructions_.size(); + insertInstruction(JF, 0); // dummy offset to be filled in + emitCodeForBlock(node->blocks().at(0)); + insertInstruction(JMP, 0); // dummy offset + size_t start_else = instructions_.size(); + instructions_[start_if].X = start_else - start_if; + emitCodeForBlock(node->blocks().at(1)); + instructions_[start_else - 1].X = instructions_.size() - (start_else - 1); + } + + void emitLoop(Node* loop) { + insertInstruction(LOADC, insertConstant(0)); + emitLoadInputs(loop->inputs()); + size_t start = instructions_.size(); + insertInstruction(LOOP, 0, loop->inputs().size()); // dummy offset + emitCodeForBlock(loop->blocks().at(0)); + insertInstruction(JMP, start - instructions_.size()); + instructions_[start].X = instructions_.size() - start; + } + + void emitCall(Function* func, at::ArrayRef inputs) { + emitLoadInputs(inputs); + insertInstruction(CALL, function_table_.size()); + function_table_.emplace_back(func); + } + + void emitNodeAtBlockLevel(Node* node) { + WithCurrentNode guard(¤t_node_, node); + switch (node->kind()) { + case prim::Constant: + emitConstant(node); + break; + case prim::Return: + emitLoadInputs(node->inputs()); + break; + default: + if (!preprocess_.can_emit_inline[node]) { + emitNode(node); + emitStoreOutputs(node); + } + break; + } + } + + size_t emitType(TypePtr t) { + size_t r = type_table_.size(); + type_table_.emplace_back(std::move(t)); + return r; + } + + void emitTypeCheck(Node* node) { + auto num_inputs = node->inputs().size(); + + // Check that TypeCheck has at least one input. + TORCH_INTERNAL_ASSERT( + num_inputs && num_inputs + 1 == node->outputs().size()); + emitLoadInputs(node->inputs()); + + // Emit the expected type. + size_t types_start = type_table_.size(); + auto types = node->tys(attr::types); + for (const auto i : c10::irange(num_inputs)) { + emitType(types[i]); + } + insertInstruction(TYPECHECK, types_start, num_inputs); + } + + size_t emitGuard(Node* node) { + // unoptimized graph is at index 0 + // guarded input is at index 1 + // the rest of args follow + emitLoadInputs(node->inputs().slice(1, 1)); + insertInstruction(GUARD, emitType(node->outputs().at(0)->type())); + insertInstruction(JF, 0 /* to be patched */); + return instructions_.size() - 1; + } + + void emitBailOut(Node* node) { + auto jf_index = emitGuard(node); + auto unoptimized_graph = node->inputs().at(0)->node()->g(attr::Subgraph); + // note, guaded input is already loaded onto the stack + // for GUARD instruction + emitLoadInputs(node->inputs().slice(2)); + insertInstruction(TAIL_CALL, function_table_.size()); + TORCH_INTERNAL_ASSERT(node->kind() == prim::BailOut); + auto bailout_index = node->i(attr::index); + TORCH_INTERNAL_ASSERT(bailout_index >= 0); + + auto build_bailout_graph = [bailout_index, + unoptimized_graph](GraphFunction& func) { + BuildBailOutGraphFrom(bailout_index, unoptimized_graph, func.graph()); + }; + + auto empty_graph = std::make_shared(); + auto func = std::make_unique( + "bailout", empty_graph, build_bailout_graph); + function_table_.emplace_back(func.get()); + bailout_functions_.emplace_back(std::move(func)); + createBailoutBlock(jf_index); + } + + void emitProfile(Node* node) { + emitLoadInputs(node->inputs()); + insertInstruction(PROFILE_OP, profile_function_table_.size()); + if (node->cast()) { + profile_function_table_.push_back(node->cast()->getCallback()); + } else if (node->cast()) { + profile_function_table_.push_back( + node->cast()->getCallback()); + } else { + TORCH_INTERNAL_ASSERT(false); + } + } + + void emitGetAttr(Node* node) { + emitLoadInputs(node->inputs()); + const auto type = node->input()->type()->expect(); + const auto& field = node->s(attr::name); + const auto slot = type->getAttributeSlot(field); + insertInstruction(GET_ATTR, slot); + } + + void emitSetAttr(Node* node) { + emitLoadInputs(node->inputs()); + const auto type = node->inputs().at(0)->type()->expect(); + const auto& field = node->s(attr::name); + const auto slot = type->getAttributeSlot(field); + insertInstruction(SET_ATTR, slot); + } + + void insertBailoutBlocks() { + for (const BailoutBlock& block : bailout_blocks_) { + TORCH_INTERNAL_ASSERT(instructions_[block.jf_instruction_index].op == JF) + instructions_[block.jf_instruction_index].X = + instructions_.size() - block.jf_instruction_index; + instructions_.insert( + instructions_.end(), + block.instructions.begin(), + block.instructions.end()); + instructions_source_.insert( + instructions_source_.end(), + block.instructions.size(), + instructions_source_[block.jf_instruction_index]); + } + } + void emitInterfaceCall( + std::string method_name_str, + c10::ArrayRef inputs) { + emitLoadInputs(inputs); + auto method_name = insertConstant(std::move(method_name_str)); + insertInstruction(INTERFACE_CALL, method_name, inputs.size()); + } + + void emitListUnpack(Node* node) { + emitLoadInputs(node->inputs()); + insertInstruction(LIST_UNPACK, node->outputs().size()); + } + + void emitTupleConstruct(Node* node) { + bool named = + node->output()->type()->expectRef().name().has_value(); + if (named) { + emitContainerConstruct(NAMED_TUPLE_CONSTRUCT, node); + } else { + emitLoadInputs(node->inputs()); + insertInstruction(TUPLE_CONSTRUCT, node->inputs().size()); + } + } + + void emitContainerConstruct(OpCode op, Node* node) { + emitLoadInputs(node->inputs()); + insertInstruction( + op, emitType(node->output()->type()), node->inputs().size()); + } + + void emitCreateObject(Node* node) { + insertInstruction(CREATE_OBJECT, emitType(node->output()->type())); + } + void emitIsinstance(Node* node) { + emitLoadInputs(node->inputs()); + std::vector types = node->tys(attr::types); + size_t types_start = type_table_.size(); + for (const auto& typ : types) { + emitType(typ); + } + insertInstruction(ISINSTANCE, types_start, types.size()); + } + + void emitTupleSlice(Node* node) { + emitLoadInputs(node->inputs()); + int64_t beg_ind = node->i(attr::beg); + int64_t end_ind = node->i(attr::end); + insertInstruction(TUPLE_SLICE, beg_ind, end_ind - beg_ind); + } + + void emitFork(Node* node) { + emitLoadInputs(node->inputs()); + auto forked_fn = std::make_unique( + "", node->g(attr::Subgraph), nullptr); + forked_functions_.emplace_back(std::move(forked_fn)); + function_table_.emplace_back(forked_functions_.back().get()); + insertInstruction(FORK, function_table_.size() - 1, node->inputs().size()); + } + + void emitAwaitable(Node* node) { + emitLoadInputs(node->inputs()); + auto await_fn = std::make_unique( + "", node->g(attr::Subgraph), nullptr); + awaited_functions_.emplace_back(std::move(await_fn)); + function_table_.emplace_back(awaited_functions_.back().get()); + insertInstruction( + AWAITABLE, function_table_.size() - 1, node->inputs().size()); + } + + void emitWarn(Node* node) { + if (FLAGS_torch_jit_disable_warning_prints) { + return; + } + + emitLoadInputs(node->inputs()); + int32_t idx = -1; + if (node->hasAttribute(attr::warn_id)) { + idx = static_cast(node->i(attr::warn_id)); + } + insertInstruction(WARN, idx); + } + + void emitEnter(Node* node) { + emitLoadInputs(node->inputs()); + insertInstruction(ENTER); + } + + void emitExit(Node* /* node */) { + insertInstruction(EXIT); + } + + void emitNode(Node* node) { + WithCurrentNode guard(¤t_node_, node); + switch (node->kind()) { + default: + // NOLINTNEXTLINE(clang-analyzer-optin.cplusplus.VirtualCall) + checkNodeAndEmit(node); + // emitOperator(node); + break; + case prim::RaiseException: + emitOperatorOrInstruction(node, RAISE_EXCEPTION); + break; + case prim::TupleIndex: + emitOperatorOrInstruction(node, TUPLE_INDEX); + break; + case prim::Drop: + emitDrop(node->inputs()); + break; + case prim::Constant: + emitConstant(node); + break; + case prim::If: + emitIf(node); + break; + case prim::Loop: + emitLoop(node); + break; + case aten::wait: + emitWait(node); + break; + case prim::Param: + break; + case prim::CallFunction: + emitCall( + node->inputs().at(0)->type()->expectRef().function(), + node->inputs().slice(1)); + break; + case prim::CallMethod: + if (auto class_type = node->inputs().at(0)->type()->cast()) { + emitCall(&class_type->getMethod(node->s(attr::name)), node->inputs()); + } else { + emitInterfaceCall(node->s(attr::name), node->inputs()); + } + break; + case prim::TypeCheck: + emitTypeCheck(node); + break; + case prim::BailOut: + emitBailOut(node); + break; + case prim::profile_ivalue: + case prim::profile: + emitProfile(node); + break; + case prim::GetAttr: + emitGetAttr(node); + break; + case prim::SetAttr: + emitSetAttr(node); + break; + case prim::ListUnpack: + emitListUnpack(node); + break; + case prim::TupleConstruct: + emitTupleConstruct(node); + break; + case prim::ListConstruct: + emitContainerConstruct(LIST_CONSTRUCT, node); + break; + case prim::DictConstruct: + emitContainerConstruct(DICT_CONSTRUCT, node); + break; + case prim::CreateObject: + emitCreateObject(node); + break; + case prim::isinstance: + emitIsinstance(node); + break; + case prim::TupleSlice: + emitTupleSlice(node); + break; + case prim::fork: + emitFork(node); + break; + case prim::awaitable: + emitAwaitable(node); + break; + case aten::warn: + emitWarn(node); + break; + case prim::Enter: + emitEnter(node); + break; + case prim::Exit: + emitExit(node); + break; + case prim::Uninitialized: + emitOperatorOrInstruction(node, UN_INITIALIZED, 0, 0, false); + break; + case prim::dtype: + emitOperatorOrInstruction(node, DTYPE); + break; + case prim::device: + emitOperatorOrInstruction(node, DEVICE); + break; + case aten::dim: + emitOperatorOrInstruction(node, DIM); + break; + case prim::is_cuda: + emitOperatorOrInstruction(node, IS_CUDA); + break; + case aten::__not__: + emitOperatorOrInstruction(node, __NOT__); + break; + case aten::format: + emitFormat(node); + break; + case aten::__is__: + emitOperatorOrInstruction(node, __IS__); + break; + case aten::__isnot__: + emitOperatorOrInstruction(node, __ISNOT__); + break; + case prim::NumToTensor: + emitOperatorOrInstruction(node, NUM_TO_TENSOR); + break; + case prim::tolist: + emitOperatorOrInstruction(node, TO_LIST); + break; + } + } + + void emitCodeForBlock(Block* block) { + emitNodeAtBlockLevel(block->param_node()); + for (auto node : block->nodes()) { + emitNodeAtBlockLevel(node); + } + emitNodeAtBlockLevel(block->return_node()); + } + + const std::vector& grad_executors() { + if (!grad_executors_) { + grad_executors_.emplace(); + for (Operation& op : operator_table_) { + if (auto executor = detail::getGradExecutor(op)) { + grad_executors_->push_back(executor); + } + } + } + return *grad_executors_; + } + + const std::vector& diff_graph_op_executors() { + if (!forward_executors_) { + forward_executors_.emplace(); + for (Operation& op : operator_table_) { + if (auto executor = detail::getDifferentiableGraphOpExecutor(op)) { + forward_executors_->push_back(executor); + } + } + } + return *forward_executors_; + } + + void dump(std::ostream& out, size_t i) const { + out << i << ' ' << instructions_[i]; + if (instructions_[i].op == OP || instructions_[i].op == CALL || + instructions_[i].op == OPN) { + out << " # " << *instructions_source_[i]; + } else { + out << '\n'; + } + } + + void dump(std::ostream& out) const { + out << *graph_ << '\n'; + for (const auto i : c10::irange(instructions_.size())) { + dump(out, i); + } + } + + /** + * Add an operation to operator_table_ if not a duplicate and return its index + */ + int add_to_operator_table( + const Operator& op, + const Node* node, + const std::string& op_name, + const int num_inputs, + const bool is_vararg) { + int size = operator_table_.size(); + + const Operation& oper = op.getOperation(node); + + if (!is_vararg) { + std::pair key(op_name, num_inputs); + auto found = operator_table_inv_.find(key); + + if (found != operator_table_inv_.end()) { + return found->second; + } + + operator_table_inv_.emplace(key, size); + } + + operator_table_.emplace_back(oper); +#ifndef NDEBUG + full_operator_table_.emplace_back(op); +#endif + return size; + } + + inline void assert_stack_size( + int32_t instruction_index, + size_t init_size, + size_t actual_size) const { +#ifndef NDEBUG + const auto& schema = full_operator_table_[instruction_index].schema(); + int64_t expected_size = static_cast(init_size) - + static_cast(schema.arguments().size()) + + static_cast(schema.returns().size()); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY( + static_cast(expected_size) == actual_size || + schema.is_varret() || schema.is_vararg(), + "Expected to find ", + expected_size, + " values on the stack, but found ", + actual_size, + " on the stack after ", + toString(full_operator_table_[instruction_index].schema())); +#endif + } +}; + +struct MobileCodeImpl : CodeImpl { + MobileCodeImpl( + const std::shared_ptr& graph, + std::string function_name, + bool emit_default_input_instructions, + bool support_default_args_before_out, + bool emit_promoted_ops, + size_t remaining_bailout_depth) + : CodeImpl( + graph, + std::move(function_name), + remaining_bailout_depth, + false), + emit_default_input_instructions_(emit_default_input_instructions), + support_default_args_before_out_(support_default_args_before_out), + emit_promoted_ops_(emit_promoted_ops) { + // NOLINTNEXTLINE(clang-analyzer-optin.cplusplus.VirtualCall) + run(); + } + + void run() override { + process_ops_for_mobile(); + emitCodeForBlock(graph_->block()); + insertInstruction(RET); + // we deferred the emission of bailout blocks so they appear at the end + // emit them now and patch up the jumps + insertBailoutBlocks(); + } + + void process_ops_for_mobile() { + DepthFirstGraphNodeIterator graph_it(graph_); + Node* node = graph_it.next(); + while (node) { + if (node->maybeOperator()) { + auto op_schema = node->getOperator().schema(); + // skip if schema has vararg + if (!op_schema.is_vararg()) { + auto specifiedArgs = CalculateNecessaryArgs( + op_schema.arguments(), + node->inputs(), + support_default_args_before_out_); + + size_t numInclude = specifiedArgs.first + + (support_default_args_before_out_ ? specifiedArgs.second : 0); + auto unique_name = !op_schema.overload_name().empty() + ? op_schema.name() + "." + op_schema.overload_name() + : op_schema.name(); + auto it = op_to_num_specified_args_.insert( + std::pair(unique_name, 0)); + op_to_num_out_args_.insert(std::pair( + unique_name, specifiedArgs.second)); + auto prev_value = it.first->second; + it.first->second = std::max(numInclude, prev_value); + } + } + node = graph_it.next(); + } + } + + private: + void emitOperator(Node* node) override { + if (emit_default_input_instructions_) { + CodeImpl::emitOperator(node); + } else { + const Operator& op = node->getOperator(); + std::string unique_op_name = c10::toString(op.schema().operator_name()); + int num_inputs = node->inputs().size(); + bool is_vararg = op.schema().is_vararg(); + + if (op.hasOperation() && is_vararg) { + emitLoadInputs(node->inputs()); + int operation_index = add_to_operator_table( + op, + node, + unique_op_name, + num_inputs, + /* is_vararg */ true); + insertInstruction(OPN, operation_index, num_inputs); + } else { + auto num_include = num_inputs; + auto it = op_to_num_specified_args_.find(unique_op_name); + if (it != op_to_num_specified_args_.end()) { + num_include = it->second; + } + if (support_default_args_before_out_) { + auto num_out = op_to_num_out_args_.find(unique_op_name)->second; + auto num_specified_before_out = num_include - num_out; + emitLoadInputs(node->inputs(), 0, num_specified_before_out); + emitLoadInputs( + node->inputs(), + node->inputs().size() - num_out, + node->inputs().size()); + } else { + emitLoadInputs(node->inputs(), num_include); + } + int operation_index = add_to_operator_table( + op, node, unique_op_name, num_inputs, is_vararg); + insertInstruction(OP, operation_index); + } + } + } + + void emitOperatorOrInstruction( + Node* node, + OpCode op, + int64_t X = 0, + uint64_t N = 0, + bool emit_inputs = true) override { + if (emit_promoted_ops_) { + CodeImpl::emitOperatorOrInstruction(node, op, X, N, emit_inputs); + } else { + CodeImpl::emitOperator(node); + } + } + + // To support forward compatibility for bytecode version bump from v5 to v6 + bool emit_default_input_instructions_; + // To support forward compatibility for bytecode version bump from v6 to v7 + bool support_default_args_before_out_; + // To support forward compatibility for bytecode version bump from v7 to v8 + bool emit_promoted_ops_; +}; + +} // namespace torch::jit::interpreter + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/interpreter/frame.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/interpreter/frame.h new file mode 100644 index 0000000000000000000000000000000000000000..16b4efc0497fdf3b943e52075ae8c2129ddc1caf --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/interpreter/frame.h @@ -0,0 +1,45 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include + +namespace torch::jit::interpreter { + +// A Frame captures function's state +// (e.g. `pc` and `base_pointer`) +// Each Frame corresponds to a call to a `Frame::function` +// which has not yet returned +// The arguments for `Frame::function` +// are located at [base_pointer + arg_number] +struct Frame { + std::shared_ptr function; + // program counter corresponds to the index + // of the currently executed instruction + size_t pc; + // marks the start index of the frame + // base_pointer is used by TAIL_CALL + // to replace the current frame + // with a frame of a bailout graph + size_t base_pointer; + + // unique to every frame with prim::profile across all threads + std::optional id; + + // RecordFunction object associated with this frame + std::unique_ptr record_function; + + // symbol table for a frame + ShapeSymbolTable symbols2dims; + + static size_t genId(); +}; + +} // namespace torch::jit::interpreter + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/interpreter/preprocess_graph.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/interpreter/preprocess_graph.h new file mode 100644 index 0000000000000000000000000000000000000000..62cb9e5367ba77f9c9e364f0cd775cca48039952 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/interpreter/preprocess_graph.h @@ -0,0 +1,24 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +namespace torch::jit::interpreter { + +// pre-processing that happens once per graph +struct PreprocessGraph { + explicit PreprocessGraph(Graph& g); + + // Outputs of the preprocessing: + std::shared_ptr graph; + std::unordered_map can_emit_inline; +}; + +} // namespace torch::jit::interpreter + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/jit_exception.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/jit_exception.h new file mode 100644 index 0000000000000000000000000000000000000000..1ce95c6271f6f2ec91155d36793e2769f1da0fa4 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/jit_exception.h @@ -0,0 +1,43 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include +#include + +namespace torch::jit { + +struct TORCH_API JITException : public std::runtime_error { + explicit JITException( + const std::string& msg, + std::optional python_class_name = std::nullopt, + std::optional original_msg = std::nullopt); + + std::optional getPythonClassName() const { + return python_class_name_; + } + + // the original msg if this is from a python exception. The interpreter has + // changed the original message by adding "The following operation failed in + // the TorchScript interpreter." in front of it in the handleError function. + std::optional getOriginalMsg() const { + return original_msg_; + } + + static const std::string& getCaughtOriginalMsg(); + static const std::string& getCaughtPythonClassName(); + static void setCaughtOriginalMsg(const std::string& msg); + static void setCaughtPythonClassName(const std::string& pythonClassName); + + private: + std::optional python_class_name_; + std::optional original_msg_; +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/jit_trace.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/jit_trace.h new file mode 100644 index 0000000000000000000000000000000000000000..d6c7d15e94c0785223a7e96b5e02981c5cc4c3c2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/jit_trace.h @@ -0,0 +1,13 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include +#include + +namespace torch::jit { +TORCH_API std::shared_ptr TraceGraph( + const std::shared_ptr& graph, + Stack& stack); +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/logging.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/logging.h new file mode 100644 index 0000000000000000000000000000000000000000..b56c20b3ba24fdedd04c9e31b3700b8d98057680 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/logging.h @@ -0,0 +1,94 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace torch::jit::logging { + +class LoggerBase { + public: + TORCH_API virtual void addStatValue( + const std::string& stat_name, + int64_t val) = 0; + virtual ~LoggerBase() = default; +}; + +TORCH_API LoggerBase* getLogger(); +TORCH_API LoggerBase* setLogger(LoggerBase* logger); + +// No-op logger. This is the default and is meant to incur almost no runtime +// overhead. + +class NoopLogger : public LoggerBase { + public: + void addStatValue( + const std::string& stat_name [[maybe_unused]], + int64_t val [[maybe_unused]]) override {} + ~NoopLogger() override = default; +}; + +// Trivial locking logger. Pass in an instance of this to setLogger() to use it. +// This keeps track of the sum of all statistics. +// +// NOTE: this is not written in a scalable way and should probably only be used +// in the single-threaded case or for testing. +class TORCH_API LockingLogger : public LoggerBase { + public: + void addStatValue(const std::string& stat_name, int64_t val) override; + virtual int64_t getCounterValue(const std::string& name) const; + enum class AggregationType { SUM = 0, AVG = 1 }; + void setAggregationType(const std::string& stat_name, AggregationType type); + ~LockingLogger() override = default; + + private: + mutable std::mutex m; + struct RawCounter { + RawCounter() = default; + int64_t sum{0}; + size_t count{0}; + }; + std::unordered_map raw_counters; + std::unordered_map agg_types; +}; + +// Make this struct so the timer internals are opaque to the user. +struct JITTimePoint { + std::chrono::time_point point; +}; + +TORCH_API JITTimePoint timePoint(); +TORCH_API void recordDurationSince( + const std::string& name, + const JITTimePoint& tp); + +namespace runtime_counters { +constexpr const char* GRAPH_EXECUTORS_CONSTRUCTED = + "pytorch_runtime.graph_executors_constructed"; +constexpr const char* GRAPH_EXECUTOR_INVOCATIONS = + "pytorch_runtime.graph_executor_invocations"; +constexpr const char* EXECUTION_PLAN_CACHE_HIT = + "pytorch_runtime.execution_plan_cache_hit"; +constexpr const char* EXECUTION_PLAN_CACHE_MISS = + "pytorch_runtime.execution_plan_cache_miss"; + +inline std::vector allRuntimeCounters() { + return { + GRAPH_EXECUTORS_CONSTRUCTED, + GRAPH_EXECUTOR_INVOCATIONS, + EXECUTION_PLAN_CACHE_HIT, + EXECUTION_PLAN_CACHE_MISS}; +} + +} // namespace runtime_counters + +} // namespace torch::jit::logging + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/operator.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/operator.h new file mode 100644 index 0000000000000000000000000000000000000000..619965550581c0ebe99a436aea2de536b0c2868c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/operator.h @@ -0,0 +1,348 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// in memory description of all ATen Ops similar to Caffe2 schema +// once C10 exists this can be removed, or stubbed out, but we need +// it now to implement correct semantic checking for script +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +namespace torch::jit { + +struct Node; +using ::c10::Argument; +using ::c10::FunctionSchema; +using ::c10::Symbol; + +using OperationCreator = Operation (*)(const Node*); + +namespace { +const std::array kJitOnlyOperatorTags = { + at::Tag::pt2_compliant_tag}; +} + +/* + * Note: JIT relies on Operator instances having static lifetime, because + * it for example stores a non-owning FunctionSchema* pointer in the Node class, + * which points to the function schema stored in the Operator instance. + * Also, jit::Operator is meant to store more operator related information like + * symbolic derivatives, which also requires them to have static lifetime + * so that changes to symbolic derivatives are remembered. + * + * Currently, the JIT operator library contains a jit::Operator instance + * with a wrapper for each c10 operator. The c10 operator library registers + * those wrappers using listeners in register_c10_ops.cpp. + * TODO Instead of doing it this way, we should only have pure-jit ops in + * the jit library but have the JIT operator lookup look into the c10 library + * too. + */ + +// An Operator is a thin wrapper around either a pure JIT operator (e.g. prim +// ops) or a c10 operator, allowing some common operations and abstracting away +// the concrete operator nature. +struct TORCH_API Operator { + private: + struct C10Operator final { + c10::OperatorHandle handle_; + Operation op_; + }; + struct UnparsedFunctionSchema final { + std::string schema_string_; + mutable std::optional alias_analysis_; + }; + struct JitOnlyOperator final { + // The only valid transition for schema_ is from right->left, i.e. + // when the schema gets parsed. + mutable std::variant schema_; + + std::variant op_; + }; + + public: + Operator(c10::OperatorHandle opHandle, Operation operation) + : op_(C10Operator{std::move(opHandle), std::move(operation)}) {} + + Operator( + std::string schema, + Operation op, + c10::AliasAnalysisKind alias_analysis) + : op_(JitOnlyOperator{ + UnparsedFunctionSchema{std::move(schema), alias_analysis}, + Operation(std::move(op))}) {} + + Operator( + std::string name, + std::string overload_name, + std::vector arguments, + std::vector returns, + Operation op, + c10::AliasAnalysisKind alias_analysis) + : op_(JitOnlyOperator{ + FunctionSchema(varArgSchemaWithName( + std::move(name), + std::move(overload_name), + std::move(arguments), + std::move(returns), + alias_analysis)), + std::move(op)}) {} + + Operator( + std::string schema, + OperationCreator op_creator, + c10::AliasAnalysisKind alias_analysis) + : op_(JitOnlyOperator{ + UnparsedFunctionSchema{std::move(schema), alias_analysis}, + op_creator}) {} + + // Helper constructor to register `op` to run + // run for _every_ IR Node where n.kind() == name, regardless of arguments. + // This is accomplished by marking the schema varargs and having no required + // arguments. + Operator( + Symbol name, + OperationCreator op_creator, + c10::AliasAnalysisKind alias_analysis) + : op_(JitOnlyOperator{ + FunctionSchema(varArgSchemaWithName(name, alias_analysis)), + op_creator}) {} + + Operation getOperation(const Node* node = nullptr) const { + return std::visit( + c10::overloaded( + [](const C10Operator& op) { return op.op_; }, + [node](const JitOnlyOperator& op) { + return std::visit( + c10::overloaded( + [](const Operation& op) { return op; }, + [node](const OperationCreator& op_creator) { + return op_creator(node); + }), + op.op_); + }), + op_); + } + + Operation getOperationForDispatchKey(c10::DispatchKey dk) const { + // TODO: some sort of caching mechanism? + return std::visit( + c10::overloaded( + [dk](const C10Operator& op) { + return Operation([op, dk](Stack& stack) { + op.handle_.callBoxedForDispatchKey(dk, stack); + }); + }, + [](const JitOnlyOperator& op) { + TORCH_CHECK( + false, + "calling a JIT operator for dispatch key is not supported"); + return Operation(nullptr); + }), + op_); + } + + const FunctionSchema& schema() const { + return std::visit( + c10::overloaded( + [](const C10Operator& op) -> const FunctionSchema& { + return op.handle_.schema(); + }, + [](const JitOnlyOperator& op) -> const FunctionSchema& { + // we lazily parse schema initialized from strings so that + // we do less work during static operator registration + if (op.schema_.index() == 1) { + auto& unmaterializedSchema = + std::get(op.schema_); + FunctionSchema schema = + parseSchema(unmaterializedSchema.schema_string_); + if (unmaterializedSchema.alias_analysis_.has_value()) { + // TODO What if it gets set later? + schema.setAliasAnalysis( + *unmaterializedSchema.alias_analysis_); + } + op.schema_ = std::move(schema); + } + return std::get(op.schema_); + }), + op_); + } + + c10::ArrayRef getTags() const { + return std::visit( + c10::overloaded( + [](const C10Operator& op) { return op.handle_.getTags(); }, + [](const JitOnlyOperator& op) { + // JitOnlyOperators don't have an c10::OperatorHandle or a way to + // specify tags. We're grandfathering them all into + // pt2_compliant_tag, but for anything else, please just stop + // using JitOnlyOperator. + return c10::ArrayRef(kJitOnlyOperatorTags); + }), + op_); + } + + bool isC10Op() const { + return op_.index() == 0; + } + + c10::AliasAnalysisKind aliasAnalysisKind() const { + const FunctionSchema& schemaRef = schema(); + c10::AliasAnalysisKind alias_analysis = schemaRef.aliasAnalysis(); + + TORCH_CHECK( + alias_analysis == AliasAnalysisKind::FROM_SCHEMA || + !schemaRef.hasAnyAliasInfo(), + "In operator registration: Tried to register operator ", + schemaRef, + " with aliasing information in the schema but without AliasAnalysisKind::FROM_SCHEMA."); + return alias_analysis; + } + + bool hasOperation() const { + return std::visit( + c10::overloaded( + [](const C10Operator&) { return true; }, + [](const JitOnlyOperator& op) { return op.op_.index() == 0; }), + op_); + } + + private: + static FunctionSchema varArgSchemaWithName( + Symbol name, + AliasAnalysisKind alias_analysis) { + auto result = FunctionSchema( + name, + "", + {}, + {}, + /*is_vararg*/ true, + /*is_varret*/ true); + result.setAliasAnalysis(alias_analysis); + return result; + } + + static FunctionSchema varArgSchemaWithName( + std::string name, + std::string overload_name, + std::vector arguments, + std::vector returns, + AliasAnalysisKind alias_analysis) { + auto result = FunctionSchema( + std::move(name), + std::move(overload_name), + std::move(arguments), + std::move(returns), + /*is_vararg*/ false, + /*is_varret*/ false); + result.setAliasAnalysis(alias_analysis); + return result; + } + + std::variant op_; +}; + +TORCH_API std::string canonicalSchemaString(const FunctionSchema& schema); + +TORCH_API const std::vector> getAllOperators(); +TORCH_API const std::vector>& getAllOperatorsFor( + Symbol name); +// Returns operators in the order which OpOverloadPacket resolves them. +TORCH_API std::vector> getAllSortedOperatorsFor( + Symbol name); + +// given a operator with an overload name, find the specific operator related to +// it, may return nullptr if no operator exists. +TORCH_API std::shared_ptr findOperatorFor( + const c10::OperatorName& full_name); + +TORCH_API std::vector findSimilarOperators(Symbol input_op); + +TORCH_API void registerOperator(Operator&& op); +TORCH_API void deregisterOperator(const FunctionSchema& schema); + +// XXX: this function is meant to be used with string literals only! +TORCH_API std::shared_ptr getOperatorForLiteral( + const char* signature); + +// Ensure the thing that registers c10 ops is defined. +// Otherwise, our registry will not have c10 ops. You can run into this +// scenario if you're querying registered ops during static init. +// +// This fn is defined in register_c10_ops.cpp +TORCH_API void ensure_c10_registerer_defined(); + +// Used to assert that unschematized operators have an analysis method written +TORCH_API bool aliasAnalysisHasSpecialCaseFor(c10::Symbol sym); + +// A factory function to generate an optional operator. It has two +// instantiations depending on the template bool arg value. The arg can be a +// compile-time function for the selective op registration based on schema +// string. +template +std::optional OperatorGenerator( + const char* schema_str, + Func&& op, + AliasAnalysisKind alias_analysis) { + return std::optional(Operator( + std::string(schema_str), std::forward(op), alias_analysis)); +} + +template +std::optional OperatorGenerator( + torch::detail::SelectiveStr schema_str, + Func&& op, + AliasAnalysisKind alias_analysis) { + return OperatorGenerator( + static_cast(schema_str), + std::forward(op), + alias_analysis); +} + +template +std::optional OperatorGenerator( + torch::detail::SelectiveStr schema_str, + Func&& op, + AliasAnalysisKind alias_analysis) { + return std::nullopt; +} + +template +std::optional OperatorGenerator( + const std::string name, + const std::string overload_name, + const std::vector arguments, + const std::vector returns, + Func&& op, + AliasAnalysisKind alias_analysis) { + return std::optional(Operator( + name, + overload_name, + arguments, + returns, + std::forward(op), + alias_analysis)); +} + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/operator_options.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/operator_options.h new file mode 100644 index 0000000000000000000000000000000000000000..ade935e052b07e79dcbd722407422538c2ea3c68 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/operator_options.h @@ -0,0 +1,14 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit { + +using AliasAnalysisKind = c10::AliasAnalysisKind; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/print_handler.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/print_handler.h new file mode 100644 index 0000000000000000000000000000000000000000..911e9bc905ce609ce146090b24c684e370d08660 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/print_handler.h @@ -0,0 +1,20 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include + +namespace torch::jit { + +using PrintHandler = void (*)(const std::string&); + +TORCH_API PrintHandler getDefaultPrintHandler(); +TORCH_API PrintHandler getPrintHandler(); +TORCH_API void setPrintHandler(PrintHandler ph); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/profiling_graph_executor_impl.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/profiling_graph_executor_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..f9d0e095fda6c885785b7e819a04ed871d13ab5f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/profiling_graph_executor_impl.h @@ -0,0 +1,83 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include + +TORCH_DECLARE_bool(torch_jit_static_then_dynamic); + +TORCH_DECLARE_bool(torch_jit_always_dynamic); + +C10_DECLARE_bool(torch_jit_release_profiling_graph_after_optimization); +C10_DECLARE_int32(torch_jit_release_profiling_graph_delay_in_seconds); +C10_DECLARE_int64(torch_jit_num_profiled_runs); +C10_DECLARE_int64(torch_jit_bailout_depth); + +namespace torch::jit { + +TORCH_API void runNooptPassPipeline(std::shared_ptr& graph); + +struct TORCH_API ProfilingGraphExecutorImpl : public GraphExecutorImplBase { + ProfilingGraphExecutorImpl( + const std::shared_ptr& graph, + std::string function_name); + + const ExecutionPlan& getPlanFor( + Stack& stack, + std::optional remaining_bailout_depth) override; + GraphExecutorState getDebugState() override; + ~ProfilingGraphExecutorImpl() override = default; + + void debugFlushCompilationCache(); + + bool isOptimized() const override { + return optimized_plan_.has_value(); + } + + private: + const ExecutionPlan& getOptimizedPlanFor( + Stack& stack, + std::optional remaining_bailout_depth); + void runProfilingInsensitiveOptimizations(std::shared_ptr& graph); + void runProfilingOptimizations( + std::shared_ptr& graph, + size_t remaining_depth); + void replaceFallbackGraphWithFallbackFunction(Block* b); + FusionBehavior getCurrentBehavior(size_t remaining_depth); + size_t getInstantiatedBailoutDepth(); + void runNoGradOptimizations( + std::shared_ptr& graph, + size_t remaining_bailout_depth); + void runFinalOptimizations(std::shared_ptr& graph); + + void clearTheGraphCompilationIntermediateGraphs(); + + std::unique_ptr pr_; + std::optional + profiling_plan_; // plan to run in order to profiling the code + std::optional optimized_plan_; + FusionStrategy fusion_strategy_; + + // this plan is used if getGraphExecutorOptimize is unset + std::optional fallback_plan_; + // fallback functions are inserted for tensorexpr fusion groups + // and by specialize_autogradzero. Whenever, at runtime, input + // tensor don't match profiled properties, fallback functions are called + // They are the deoptimized version of the logic in fusion groups + // and/or autograd. + // The fallback functions are owned by a GraphExecutor instance + // They only exist in the optimized graph which is a private property + // of the GraphExecutor and only shared with InterpreterState + std::vector> fallback_functions_; + std::optional remaining_bailout_depth_; + // The time the optimized_plan_ is created. + int32_t time_optimized_plan_created_ = 0; + // Has the extra memory used by the graph for profiling is released? + bool is_graph_extra_memory_released_ = false; +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/profiling_record.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/profiling_record.h new file mode 100644 index 0000000000000000000000000000000000000000..a72e67bd31d3b530b6118251c78b7fdea7f69564 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/profiling_record.h @@ -0,0 +1,211 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + +// We would like to assign each position/axis of a tensor an abstract size +// * For each `tensor` we have a profiled `Value` of a `TensorType` describing +// the properties of the `tensor`. +// * `TensorType` has a property called `symbolic_sizes_` to describe observed +// `tensor.sizes()` +// * `symbolic_sizes_` is a vector of abstract sizes (or +// `std::vector`) where +// * `ShapeSymbol`at `symbolic_sizes_[i]` describes the size value +// (`Dimension`) at `tensor.sizes()[i]` +// * We may see the same `Dimension` at different positions `i` in +// `tensor.sizes()` or even in different `tensor` +// * First, we would like associate the same `ShapeSymbol` to the same +// `Dimension` across **one** profiling execution or run of a TorchScript +// function. +// * The same `ShapeSymbol`s in different positions of `symbolic_shapes_` in +// possibly different `TensorType`s (i.e. `TensorType`s for different +// profiled values) form an implicit set. The elements of such a set are +// called *dimension locations*. +// * These sets allow us to track how the shapes of input arguments of some +// operation relate to operation's output shapes as the input and output +// shapes might share the same `ShapeSymbol`s +// * For **every** profiling run, we would like to maintain the invariant that +// *the same `ShapeSymbol` is always associated with the same `Dimension`*. +// * To maintain this invariant we merge the profiling information from all +// profiling runs, +// * For every two runs, we iterate over all `symbic_shapes_` and compare +// their `ShapeSymbol`s in the same position. +// * if we observe that for every dimension location that has +// the`ShapeSymbol S1` in run #1 there is **only one** `ShapeSymbol S2` in +// the same dimension location in run #2, we conclude that the invariant +// holds. +// * However, if we observe some dimension locations in run #2 have +// `ShapeSymbol S2` and the other ones have `ShapeSymbol S3` we would like +// to partition the virtual set of dimension locations associated with +// `ShapeSymbol S1` into two new subsets, so the invariant holds. +// * The partitioning works by assigning a new symbol to the dimension +// locations (associated with `ShapeSymbol S1`) that have `ShapeSymbol S2` +// and another new symbol to the dimension locations that have `ShapeSymbol +// S3`. In other words, +// * Subset #1 will consist of the dimension locations that in run #2 have +// `ShapeSymbol S2` and will have `ShapeSymbol S4` in those dimension +// locations +// * Subset #2 will consist of the dimension locations that in run #2 have +// `ShapeSymbol S4` and will have `ShapeSymbol S5` in those dimension +// locations +// * The effective result of merging the profiling information from two runs +// is new `TensorTypes` whose `symbolic_sizes_` /dimension locations have +// either `ShapeSymbol S4` or `ShapeSymbol S5`. +// * Partitioning can be done even before we have seen all the dimension +// locations associated with `ShapeSymbol S1` +// * We use `getSymbolInSet` of `ShapeSymbolTable` to remember all +// `ShapeSymbols` from run #2 we observed in the dimension locations +// associated with `ShapeSymbol S1` . +// * For every `ShapeSymbol` from run #2 in the dimension location +// associated with `ShapeSymbol S1` `getSymbolInSet` returns a symbol +// that we assign to the dimension location in a new TensorType. +// * It's important to point out that the same `ShapeSymbol S2` from run +// #2 in two dimension locations that have different `ShapeSymbol`s in +// run #1 are different! These dimension locations will belong to +// different subsets and have different `ShapeSymbol`s after merge. +// * On the other hand, for the same `ShapeSymbol S2` in two dimension +// locations that have `ShapeSymbol S1` in run #1`getSymbolInSet` will +// return the same symbol. + +namespace torch::jit { + +using ::c10::TensorTypePtr; +using Dimension = int64_t; + +TORCH_API void RegisterProfilingNode( + const std::function& /*func*/); + +struct ProfilingRecord; + +// `SetPartitioningHelper` is used to maintain the following invariant: +// For **every** profiling run, *the same `ShapeSymbol` is always associated +// with the same `Dimension`*. +// while merging the profiling information from multiple runs. +struct SetPartitioningHelper { + std::map> + sets2subsets_; + + // `partitionSetByDimension` partitions a virtual set + // of dimension locations associated with ShapeSymbol `symbol` into subsets. + // Partitioning is equivalent to giving (or renaming) a particular + // dimension location a new `ShapeSymbol`. + // The same `Dimension` value in different dimension locations + // that used to have `symbol` will receive the same + // new `ShapeSymbol`, effectively forming a new set. + c10::ShapeSymbol partitionSetByDimension( + Dimension new_size, + c10::ShapeSymbol symbol) { + auto& dims2symbols = getSetForSymbol(symbol); + + if (dims2symbols.count(new_size) == 0) { + auto new_sym = c10::ShapeSymbol::newSymbol(); + dims2symbols[new_size] = new_sym; + return new_sym; + } + + return dims2symbols[new_size]; + } + + private: + std::map& getSetForSymbol(c10::ShapeSymbol s) { + auto& set = sets2subsets_[s]; + // N.B. adding a mapping { s.static_size(), s } + // makes sure we preserve the fact that + // some dimension values remain the same + // across all profiled runs + if (s.is_static()) { + set.insert({s.static_size(), s}); + } + return set; + } +}; + +// ShapeSymbolTable is used by Interpreter +// to assign dimension values to ShapeSymbols +// and fail a guard if the same symbol +// is assigned more than one dimension value. +struct ShapeSymbolTable { + // N.B. we treat static symbols as always assigned + // to themselves + bool isBound(c10::ShapeSymbol s) { + if (s.is_static()) { + return true; + } + return data_.count(s) != 0; + } + + // N.B. we treat static symbols as always assigned + // to themselves + Dimension getValue(c10::ShapeSymbol s) { + if (s.is_static()) { + return s.static_size(); + } + return data_[s]; + } + void assign(c10::ShapeSymbol s, Dimension v) { + TORCH_INTERNAL_ASSERT(!s.is_static()); + data_[s] = v; + } + std::map data_; + // Tries to assign dimension values from `new_sizes` to + // `ShapeSymbol`s `sym_shapes`. + // Returns `true` if every dimension value from `new_sizes` + // can be assigned to the corresponding `ShapeSymbol` from + // `sym_shapes` + // A dimension value can be assigned to a `ShapeSymbol` + // * if the symbol isn't assigned yet any dimension value + // * if the symbol is assigned and its value is equal to + // the dimension value from `new_sizes` + bool bindSymbolicShapes( + at::IntArrayRef new_sizes, + const c10::SymbolicShape& sym_shapes); +}; + +struct ProfilingRecord { + // N.B. ProfilingRecord's copy and move c-tor are disabled, so we won't + // end up accidentally copying or moving ProfilingRecords whose addresses + // are captured in callbacks_ + ProfilingRecord(const ProfilingRecord&) = delete; + ProfilingRecord(ProfilingRecord&&) noexcept = delete; + TORCH_API static std::unique_ptr instrumentGraph( + const std::shared_ptr& graph); + TORCH_API static void removeProfilingNodes(Block* b); + TORCH_API static void removeProfileCounter(Block* b); + + std::shared_ptr profiled_graph_; + mutable std::mutex mutex_; + size_t profiling_count_; + + bool ready() const; + + std::shared_ptr graph() const { + return profiled_graph_; + } + + TORCH_API ProfileIValueOp* createProfileIValueNode(Value* in_val); + TORCH_API ProfileIValueOp* createProfileIValueNode(ArrayRef inputs); + + private: + ProfileOp* createProfileNode( + const std::function& fp, + at::ArrayRef inputs); + void instrumentBlock(Block* block); + void insertShapeProfile(Node* n, size_t offset, const TypePtr& input_type); + ProfilingRecord(std::shared_ptr g); +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/register_ops_utils.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/register_ops_utils.h new file mode 100644 index 0000000000000000000000000000000000000000..4c6c441a5a9cd0ec0e90e6b13c035f35e9a3ea67 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/register_ops_utils.h @@ -0,0 +1,888 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace torch::jit { +constexpr inline c10::AliasAnalysisKind aliasAnalysisFromSchema() { + return c10::AliasAnalysisKind::FROM_SCHEMA; +} + +constexpr inline c10::AliasAnalysisKind aliasAnalysisConservative() { + return c10::AliasAnalysisKind::CONSERVATIVE; +} + +constexpr inline c10::AliasAnalysisKind aliasAnalysisSpecialCase() { + return c10::AliasAnalysisKind::INTERNAL_SPECIAL_CASE; +} + +template +c10::List make_result_list(const TypePtr& elemType) { + return c10::List(); +} + +template <> +c10::impl::GenericList make_result_list(const TypePtr& elemType); + +// As described in https://docs.python.org/3/library/functions.html#round +// When a number is exactly halfway between two integers, python builtin round +// function will round to even number. We use round(x/2)*2 to handle the +// special halfway case. For positive 'x', round(x/2)*2 = +// round((x_e + x_r)/2)*2 = x_e + round(x_r/2)*2, where x_e is an even integer, +// x_r is either 0.5 of 1.5, round(x_r/2)*2 results a 0 or 2, so the final +// result will always be a even number. Due to symmetricity, it also applies to +// negative cases. +inline double round_to_even(double a) { + return a - std::floor(a) == 0.5 ? (std::round(a * 0.5) * 2.0) : std::round(a); +} + +// using the rules from python_arg_parser FunctionParameter::check +// tensor cannot have grad set, tensor must be 0 dim, +// and if the dest is an int the source must be integral type +void checkImplicitTensorToNum(const at::Tensor& t, bool toInt); + +[[maybe_unused]] static int64_t floordiv(int64_t a, int64_t b) { + if (b == 0) { + throw std::runtime_error("division by 0"); + } + if ((a > 0) == (b > 0)) { + // simple case, both have same sign + return a / b; + } else { + // in python division rounds down, it doesn't not truncate like in c++ + auto r = lldiv(a, b); + return (r.rem) ? r.quot - 1 : r.quot; + } +} +TORCH_API void checkDoubleInRange(double a); +[[maybe_unused]] static int64_t floor(double a) { + checkDoubleInRange(a); + return std::floor(a); +} +[[maybe_unused]] static int64_t ceil(double a) { + checkDoubleInRange(a); + return std::ceil(a); +} + +[[maybe_unused]] static int64_t gcd(int64_t a, int64_t b) { + while (b != 0) { + int64_t r = a % b; + a = b; + b = r; + } + // in python gcd returns non-negative values + return std::abs(a); +} + +int64_t partProduct(int n, int m); + +void loop(int n, int64_t& p, int64_t& r); + +int nminussumofbits(int v); + +int64_t factorial(int n); +static const double degToRad = std::acos(-1.0) / 180.0; +static const double radToDeg = 180.0 / std::acos(-1.0); +double degrees(double x); +double radians(double x); + +// Convert an python index (which may be negative) into an index usable for a +// C++ container + +// Equivalent to list.at(idx) +template +auto getItem(const c10::List& list, int64_t idx) { + const int64_t list_size = list.size(); + const int64_t normalized_idx = normalizeIndex(idx, list_size); + if (normalized_idx < 0 || normalized_idx >= list_size) { + throw std::out_of_range("list index out of range"); + } + return list.get(normalized_idx); +} + +template +void setItem(const c10::List& list, int64_t idx, T&& value) { + const int64_t list_size = list.size(); + const int64_t normalized_idx = normalizeIndex(idx, list_size); + if (normalized_idx < 0 || normalized_idx >= list_size) { + throw std::out_of_range("list index out of range"); + } + list.set(normalized_idx, std::forward(value)); +} + +void listAppend(Stack& stack); + +void listReverse(Stack& stack); + +template +void minList(Stack& stack) { + c10::List a = pop(stack).to>(); + c10::List b = pop(stack).to>(); + + size_t min_size = std::min(a.size(), b.size()); + for (const auto i : c10::irange(min_size)) { + if (a[i] == b[i]) { + continue; + } + + push(stack, a[i] < b[i] ? a : b); + return; + } + + push(stack, b.size() < a.size() ? b : a); +} + +template +void maxList(Stack& stack) { + c10::List a = pop(stack).to>(); + c10::List b = pop(stack).to>(); + + size_t min_size = std::min(a.size(), b.size()); + for (const auto i : c10::irange(min_size)) { + if (a[i] == b[i]) { + continue; + } + + push(stack, a[i] > b[i] ? a : b); + return; + } + + push(stack, b.size() > a.size() ? b : a); +} + +void listPopImpl(Stack& stack, const char* empty_message); + +void listPop(Stack& stack); + +void listClear(Stack& stack); + +void listDelete(Stack& stack); + +void listInsert(Stack& stack); + +template +void listRemove(Stack& stack) { + T elem = pop(stack).to(); + c10::List list = pop(stack).to>(); + + auto pos = std::find(list.begin(), list.end(), elem); + + if (pos != list.end()) { + list.erase(pos); + } else { + TORCH_CHECK(false, "list.remove(x): x not in list"); + } +} + +template +void listMin(Stack& stack) { + c10::List list = pop(stack).to>(); + size_t list_size = list.size(); + if (list_size == 0) { + throw std::runtime_error("min() arg is an empty sequence"); + } + + T min_elem = list[0]; + for (const auto i : c10::irange(1, list_size)) { + T elem = list[i]; + min_elem = elem < min_elem ? elem : min_elem; + } + + stack.push_back(min_elem); +} + +template +void listMax(Stack& stack) { + c10::List list = pop(stack).to>(); + size_t list_size = list.size(); + if (list_size == 0) { + throw std::runtime_error("max() arg is an empty sequence"); + } + + T max_elem = list[0]; + for (const auto i : c10::irange(1, list_size)) { + T elem = list[i]; + max_elem = elem > max_elem ? elem : max_elem; + } + + stack.push_back(max_elem); +} + +template <> +void listRemove(Stack& stack); + +template +void listIndex(Stack& stack) { + T elem = pop(stack).to(); + c10::List list = pop(stack).to>(); + + auto pos = std::find(list.begin(), list.end(), elem); + + if (pos != list.end()) { + push(stack, static_cast(std::distance(list.begin(), pos))); + } else { + TORCH_CHECK(false, "'", elem, "' is not in list"); + } +} + +template <> +void listIndex(Stack& stack); + +template +void listCount(Stack& stack) { + T elem = pop(stack).to(); + c10::List list = pop(stack).to>(); + + const int64_t count = std::count(list.begin(), list.end(), elem); + push(stack, count); +} + +template <> +void listCount(Stack& stack); + +void listExtend(Stack& stack); + +void listCopy(Stack& stack); + +void listSelect(Stack& stack); + +void listLen(Stack& stack); + +template +void listEq(Stack& stack) { + c10::List b = pop(stack).to>(); + c10::List a = pop(stack).to>(); + push(stack, a == b); +} + +template +void listNe(Stack& stack) { + c10::List b = pop(stack).to>(); + c10::List a = pop(stack).to>(); + push(stack, a != b); +} + +inline bool tensor_list_equal( + const c10::List& a, + const c10::List& b) { + if (a.size() != b.size()) { + return false; + } + + for (const auto i : c10::irange(a.size())) { + const at::Tensor& a_element = a[i]; + const at::Tensor& b_element = b[i]; + // This preserves Python's semantics, which uses eq() to compare two + // elements, then passes the result to bool(). + // see: https://docs.python.org/3.4/reference/datamodel.html#object.__ge__ + const auto cmp_result = a_element.eq(b_element); + if (!at::native::is_nonzero(cmp_result)) { + return false; + } + } + + return true; +} + +// Specialization for at::Tensor, since it doesn't define operator== +template <> +void listEq(Stack& stack); + +// Specialization for at::Tensor, since it doesn't define operator== +template <> +void listNe(Stack& stack); + +void listList(Stack& stack); + +template +void listContains(Stack& stack) { + auto key = pop(stack).to(); + auto list = pop(stack).to>(); + // NOLINTNEXTLINE(performance-implicit-conversion-in-loop) + for (const T& item : list) { + if (item == key) { + push(stack, true); + return; + } + } + push(stack, false); +} + +void listAdd(Stack& stack); + +void listInplaceAdd(Stack& stack); + +void listMulIntLeftInPlace(Stack& stack); + +void listMulIntLeft(Stack& stack); + +void listMulIntRight(Stack& stack); + +void listSlice(Stack& stack); + +template +void listSort(Stack& stack) { + bool reverse = pop(stack).toBool(); + c10::List list = pop(stack).to>(); + std::sort(list.begin(), list.end(), [reverse](const T& a, const T& b) { + // FBCode errors without this check - "strict weak ordering" + // TODO: remove when possible, since it just slows down + // sorting and doesn't do anything useful + if (a == b) { + return false; + } + return (a < b) != reverse; + }); +} + +// Specialization for at::Tensor +template <> +void listSort(Stack& stack); + +template +void listCopyAndSort(Stack& stack) { + c10::List list = pop(stack).to>(); + auto list_copied = list.copy(); + std::sort(list_copied.begin(), list_copied.end(), [](const T& a, const T& b) { + // "strict weak ordering" issue - see other sort + if (a == b) { + return false; + } + return a < b; + }); + push(stack, list_copied); +} + +// Specialization for at::Tensor +template <> +void listCopyAndSort(Stack& stack); + +void listSetItem(Stack& stack); + +struct OperatorGeneratorArgs { + const char* schema_str; + bool isOperationCreator; + union { + void (*operation)(Stack&); + OperationCreator operationCreator; + }; + AliasAnalysisKind aliasAnalysis; + + explicit constexpr OperatorGeneratorArgs( + torch::detail::SelectiveStr schema_str, + void (*op)(Stack&), + AliasAnalysisKind aa) + : schema_str(schema_str), + isOperationCreator(false), + operation(op), + aliasAnalysis(aa) {} + + explicit constexpr OperatorGeneratorArgs( + torch::detail::SelectiveStr schema_str, + OperationCreator opCreator, + AliasAnalysisKind aa) + : schema_str(schema_str), + isOperationCreator(true), + operationCreator(opCreator), + aliasAnalysis(aa) {} + + template + explicit constexpr OperatorGeneratorArgs( + torch::detail::SelectiveStr /*unused*/, + Args... /*unused*/) + : schema_str(nullptr), + isOperationCreator(false), + operation(nullptr), + aliasAnalysis(AliasAnalysisKind::INTERNAL_SPECIAL_CASE) {} +}; + +#define DEFINE_GENERIC_BINARY_OP( \ + aten_op, op, int_float_result, complex_result) \ + OperatorGeneratorArgs( \ + TORCH_SELECTIVE_SCHEMA(#aten_op \ + ".int_int(int a, int b) -> " #int_float_result), \ + [](Stack& stack) { \ + int64_t a, b; \ + pop(stack, a, b); \ + push(stack, op); \ + }, \ + aliasAnalysisFromSchema()), \ + OperatorGeneratorArgs( \ + TORCH_SELECTIVE_SCHEMA( \ + #aten_op \ + ".float_float(float a, float b) -> " #int_float_result), \ + [](Stack& stack) { \ + double a, b; \ + pop(stack, a, b); \ + push(stack, op); \ + }, \ + aliasAnalysisFromSchema()), \ + OperatorGeneratorArgs( \ + TORCH_SELECTIVE_SCHEMA( \ + #aten_op \ + ".complex_complex(complex a, complex b) -> " #complex_result), \ + [](Stack& stack) { \ + c10::complex a, b; \ + pop(stack, a, b); \ + push(stack, op); \ + }, \ + aliasAnalysisFromSchema()) + +// define implementations for primitive number ops +#define DEFINE_GENERIC_OP(aten_op, int_op, float_op, int_result, float_result) \ + OperatorGeneratorArgs( \ + TORCH_SELECTIVE_SCHEMA(#aten_op ".int(int a, int b) -> " #int_result), \ + [](Stack& stack) { \ + int64_t a, b; \ + pop(stack, a, b); \ + push(stack, int_op); \ + }, \ + aliasAnalysisFromSchema()), \ + OperatorGeneratorArgs( \ + TORCH_SELECTIVE_SCHEMA( \ + #aten_op ".float(float a, float b) -> " #float_result), \ + [](Stack& stack) { \ + double a, b; \ + pop(stack, a, b); \ + push(stack, float_op); \ + }, \ + aliasAnalysisFromSchema()) + +#define DEFINE_INT_FLOAT_OP(aten_op, op, result) \ + OperatorGeneratorArgs( \ + TORCH_SELECTIVE_SCHEMA(#aten_op \ + ".int_float(int a, float b) -> " #result), \ + [](Stack& stack) { \ + int64_t a; \ + double b; \ + pop(stack, a, b); \ + push(stack, op); \ + }, \ + aliasAnalysisFromSchema()), \ + OperatorGeneratorArgs( \ + TORCH_SELECTIVE_SCHEMA(#aten_op \ + ".float_int(float a, int b) -> " #result), \ + [](Stack& stack) { \ + double a; \ + int64_t b; \ + pop(stack, a, b); \ + push(stack, op); \ + }, \ + aliasAnalysisFromSchema()) + +#define DEFINE_INT_OP(aten_op, op) \ + OperatorGeneratorArgs( \ + TORCH_SELECTIVE_SCHEMA(#aten_op ".int(int a, int b) -> int"), \ + [](Stack& stack) { \ + int64_t a, b; \ + pop(stack, a, b); \ + push(stack, op); /* NOLINT(hicpp-signed-bitwise) */ \ + }, \ + aliasAnalysisFromSchema()) + +#define DEFINE_STR_CMP_OP(aten_op, op) \ + OperatorGeneratorArgs( \ + TORCH_SELECTIVE_SCHEMA(#aten_op ".str(str a, str b) -> bool"), \ + [](Stack& stack) { \ + auto b = pop(stack).toStringRef(); \ + auto a = pop(stack).toStringRef(); \ + push(stack, op); \ + }, \ + aliasAnalysisFromSchema()) + +// define a primitive op over Scalar operands. +// it's necessary to register this overload following +// int/float variations to avoid trapping Scalar args +// in unintended implicit conversions +#define DEFINE_SCALAR_BINARY_OP_AVOID_COLLISION_GENERIC( \ + aten_op, int_op, float_op, result, string_val) \ + OperatorGeneratorArgs( \ + TORCH_SELECTIVE_SCHEMA(#aten_op string_val \ + "(Scalar a, Scalar b) -> " #result), \ + [](Stack& stack) { \ + IValue x, y; \ + pop(stack, x, y); \ + if (x.isDouble()) { \ + if (y.isDouble()) { \ + double a = x.toDouble(); \ + double b = y.toDouble(); \ + push(stack, float_op); \ + } else { \ + double a = x.toDouble(); \ + int64_t b = y.toInt(); \ + push(stack, float_op); \ + } \ + } else { \ + if (y.isDouble()) { \ + int64_t a = x.toInt(); \ + double b = y.toDouble(); \ + push(stack, float_op); \ + } else { \ + int64_t a = x.toInt(); \ + int64_t b = y.toInt(); \ + push(stack, int_op); \ + } \ + } \ + }, \ + aliasAnalysisFromSchema()) + +#define DEFINE_SCALAR_BINARY_OP(aten_op, int_op, float_op, result) \ + DEFINE_SCALAR_BINARY_OP_AVOID_COLLISION_GENERIC( \ + aten_op, int_op, float_op, result, "") + +#define DEFINE_SCALAR_BINARY_OP_AVOID_COLLISION( \ + aten_op, int_op, float_op, result) \ + DEFINE_SCALAR_BINARY_OP_AVOID_COLLISION_GENERIC( \ + aten_op, int_op, float_op, result, ".Scalar_Scalar") + +#define DEFINE_BINARY_OP(aten_op, op) \ + DEFINE_GENERIC_OP(aten_op, op, op, int, float), \ + DEFINE_INT_FLOAT_OP(aten_op, op, float), \ + DEFINE_SCALAR_BINARY_OP(aten_op, op, op, Scalar) + +#define DEFINE_BINARY_FLOAT_OP(aten_op, op) \ + DEFINE_GENERIC_OP(aten_op, op, op, float, float), \ + DEFINE_INT_FLOAT_OP(aten_op, op, float), \ + DEFINE_SCALAR_BINARY_OP(aten_op, op, op, float) + +#define DEFINE_COMPARISON_OP(aten_op, op) \ + DEFINE_GENERIC_OP(aten_op, op, op, bool, bool), \ + DEFINE_INT_FLOAT_OP(aten_op, op, bool), \ + DEFINE_SCALAR_BINARY_OP(aten_op, op, op, bool), \ + DEFINE_STR_CMP_OP(aten_op, op) + +#define DEFINE_UNARY_INT_OP(aten_op, op, result) \ + OperatorGeneratorArgs( \ + TORCH_SELECTIVE_SCHEMA(#aten_op ".int(int a) -> " #result), \ + [](Stack& stack) { \ + int64_t a; \ + pop(stack, a); \ + push(stack, op); \ + }, \ + aliasAnalysisFromSchema()) + +#define DEFINE_UNARY_FLOAT_OP(aten_op, op, result) \ + OperatorGeneratorArgs( \ + TORCH_SELECTIVE_SCHEMA(#aten_op ".float(float a) -> " #result), \ + [](Stack& stack) { \ + double a; \ + pop(stack, a); \ + push(stack, op); \ + }, \ + aliasAnalysisFromSchema()) + +#define DEFINE_UNARY_OP(aten_op, op, int_result, float_result) \ + DEFINE_UNARY_INT_OP(aten_op, op, int_result), \ + DEFINE_UNARY_FLOAT_OP(aten_op, op, float_result), \ + OperatorGeneratorArgs( \ + TORCH_SELECTIVE_SCHEMA(#aten_op ".Scalar(Scalar a) -> Scalar"), \ + [](Stack& stack) { \ + IValue x; \ + pop(stack, x); \ + if (x.isDouble()) { \ + double a = x.toDouble(); \ + push(stack, static_cast(op)); \ + } else { \ + int64_t a = x.toInt(); \ + push(stack, static_cast(op)); \ + } \ + }, \ + aliasAnalysisFromSchema()) +#define DEFINE_BOOL_OP(aten_op, op) \ + OperatorGeneratorArgs( \ + TORCH_SELECTIVE_SCHEMA(#aten_op ".bool(bool a, bool b) -> bool"), \ + [](Stack& stack) { \ + bool a, b; \ + pop(stack, a, b); \ + push(stack, op); \ + }, \ + aliasAnalysisFromSchema()) +#define DEFINE_STRING_OP(op_name, string_op, result) \ + OperatorGeneratorArgs( \ + TORCH_SELECTIVE_SCHEMA(#op_name ".str(str a, str b) ->" #result), \ + [](Stack& stack) { \ + auto b = pop(stack).toStringRef(); \ + auto a = pop(stack).toStringRef(); \ + push(stack, string_op); \ + }, \ + aliasAnalysisFromSchema()) + +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +//----------------------------------------------------------------------------- +#define DEFINE_UNARY_COMPLEX_OP(aten_op, op, result) \ + OperatorGeneratorArgs( \ + TORCH_SELECTIVE_SCHEMA(#aten_op ".complex(complex a) -> " #result), \ + [](Stack& stack) { \ + c10::complex a; \ + pop(stack, a); \ + push(stack, op); \ + }, \ + aliasAnalysisFromSchema()) + +// Some complex unary ops (like abs, angle) return real valued output, but most +// other unary ops return complex valued output. So, this macro is used in the +// former case where we can explicitly pass complex_result_cast argument, which +// is set to c10::complex in the macro `DEFINE_UNARY_OP_WITH_COMPLEX` +// defined below. +#define DEFINE_UNARY_OP_WITH_COMPLEX_CAST( \ + aten_op, \ + op, \ + int_result, \ + float_result, \ + complex_result, \ + complex_result_cast) \ + DEFINE_UNARY_INT_OP(aten_op, op, int_result), \ + DEFINE_UNARY_FLOAT_OP(aten_op, op, float_result), \ + DEFINE_UNARY_COMPLEX_OP(aten_op, op, complex_result), \ + OperatorGeneratorArgs( \ + TORCH_SELECTIVE_SCHEMA(#aten_op ".Scalar(Scalar a) -> Scalar"), \ + [](Stack& stack) { \ + IValue x; \ + pop(stack, x); \ + if (x.isDouble()) { \ + double a = x.toDouble(); \ + push(stack, static_cast(op)); \ + } else if (x.isComplexDouble()) { \ + c10::complex a = x.toComplexDouble(); \ + push(stack, static_cast(op)); \ + } else { \ + int64_t a = x.toInt(); \ + push(stack, static_cast(op)); \ + } \ + }, \ + aliasAnalysisFromSchema()) + +#define DEFINE_UNARY_OP_WITH_COMPLEX(aten_op, op, int_result, float_result) \ + DEFINE_UNARY_OP_WITH_COMPLEX_CAST( \ + aten_op, op, int_result, float_result, complex, c10::complex) + +#define DEFINE_GENERIC_OP_WITH_COMPLEX( \ + aten_op, \ + int_op, \ + float_op, \ + complex_op, \ + int_result, \ + float_result, \ + complex_result) \ + OperatorGeneratorArgs( \ + TORCH_SELECTIVE_SCHEMA(#aten_op ".int(int a, int b) -> " #int_result), \ + [](Stack& stack) { \ + int64_t a, b; \ + pop(stack, a, b); \ + push(stack, int_op); \ + }, \ + aliasAnalysisFromSchema()), \ + OperatorGeneratorArgs( \ + TORCH_SELECTIVE_SCHEMA( \ + #aten_op ".complex(complex a, complex b) -> " #complex_result), \ + [](Stack& stack) { \ + c10::complex a, b; \ + pop(stack, a, b); \ + push(stack, complex_op); \ + }, \ + aliasAnalysisFromSchema()), \ + OperatorGeneratorArgs( \ + TORCH_SELECTIVE_SCHEMA( \ + #aten_op ".float(float a, float b) -> " #float_result), \ + [](Stack& stack) { \ + double a, b; \ + pop(stack, a, b); \ + push(stack, float_op); \ + }, \ + aliasAnalysisFromSchema()) + +#define DEFINE_INT_COMPLEX_OP(aten_op, op, result) \ + OperatorGeneratorArgs( \ + TORCH_SELECTIVE_SCHEMA(#aten_op \ + ".int_complex(int a, complex b) -> " #result), \ + [](Stack& stack) { \ + int64_t a; \ + c10::complex b; \ + pop(stack, a, b); \ + push(stack, op); \ + }, \ + aliasAnalysisFromSchema()), \ + OperatorGeneratorArgs( \ + TORCH_SELECTIVE_SCHEMA( \ + #aten_op ".complex_int(complex a, int b) -> " #result), \ + [](Stack& stack) { \ + c10::complex a; \ + int64_t b; \ + pop(stack, a, b); \ + push(stack, op); \ + }, \ + aliasAnalysisFromSchema()) + +#define DEFINE_FLOAT_COMPLEX_OP(aten_op, op, result) \ + OperatorGeneratorArgs( \ + TORCH_SELECTIVE_SCHEMA( \ + #aten_op ".float_complex(float a, complex b) -> " #result), \ + [](Stack& stack) { \ + double a; \ + c10::complex b; \ + pop(stack, a, b); \ + push(stack, op); \ + }, \ + aliasAnalysisFromSchema()), \ + OperatorGeneratorArgs( \ + TORCH_SELECTIVE_SCHEMA( \ + #aten_op ".complex_float(complex a, float b) -> " #result), \ + [](Stack& stack) { \ + c10::complex a; \ + double b; \ + pop(stack, a, b); \ + push(stack, op); \ + }, \ + aliasAnalysisFromSchema()) + +#define DEFINE_SCALAR_BINARY_OP_WITH_COMPLEX_AVOID_COLLISION_GENERIC( \ + aten_op, int_op, float_op, complex_op, result, string_val) \ + OperatorGeneratorArgs( \ + TORCH_SELECTIVE_SCHEMA(#aten_op string_val \ + "(Scalar a, Scalar b) -> " #result), \ + [](Stack& stack) { \ + IValue x, y; \ + pop(stack, x, y); \ + if (x.isComplexDouble()) { \ + c10::complex a = x.toComplexDouble(); \ + if (y.isComplexDouble()) { \ + c10::complex b = y.toComplexDouble(); \ + push(stack, complex_op); \ + } else if (y.isDouble()) { \ + double b = y.toDouble(); \ + push(stack, complex_op); \ + } else { \ + int64_t b = y.toInt(); \ + push(stack, complex_op); \ + } \ + } else if (x.isDouble()) { \ + double a = x.toDouble(); \ + if (y.isComplexDouble()) { \ + c10::complex b = y.toComplexDouble(); \ + push(stack, complex_op); \ + } else if (y.isDouble()) { \ + double b = y.toDouble(); \ + push(stack, float_op); \ + } else { \ + int64_t b = y.toInt(); \ + push(stack, float_op); \ + } \ + } else { \ + int64_t a = x.toInt(); \ + if (y.isComplexDouble()) { \ + c10::complex b = y.toComplexDouble(); \ + push(stack, complex_op); \ + } else if (y.isDouble()) { \ + double b = y.toDouble(); \ + push(stack, float_op); \ + } else { \ + int64_t b = y.toInt(); \ + push(stack, int_op); \ + } \ + } \ + }, \ + aliasAnalysisFromSchema()) + +#define DEFINE_SCALAR_BINARY_OP_WITH_COMPLEX_WITHOUT_INT_COMPLEX_PAIR( \ + aten_op, int_op, float_op, complex_op, result) \ + OperatorGeneratorArgs( \ + TORCH_SELECTIVE_SCHEMA(#aten_op "(Scalar a, Scalar b) -> " #result), \ + [](Stack& stack) { \ + IValue x, y; \ + pop(stack, x, y); \ + if (x.isComplexDouble()) { \ + c10::complex a = x.toComplexDouble(); \ + if (y.isComplexDouble()) { \ + c10::complex b = y.toComplexDouble(); \ + push(stack, complex_op); \ + } else if (y.isDouble()) { \ + double b = y.toDouble(); \ + push(stack, complex_op); \ + } \ + } else if (x.isDouble()) { \ + double a = x.toDouble(); \ + if (y.isComplexDouble()) { \ + c10::complex b = y.toComplexDouble(); \ + push(stack, complex_op); \ + } else if (y.isDouble()) { \ + double b = y.toDouble(); \ + push(stack, float_op); \ + } else { \ + int64_t b = y.toInt(); \ + push(stack, float_op); \ + } \ + } else { \ + int64_t a = x.toInt(); \ + if (y.isDouble()) { \ + double b = y.toDouble(); \ + push(stack, float_op); \ + } else if (y.isInt()) { \ + int64_t b = y.toInt(); \ + push(stack, int_op); \ + } \ + } \ + }, \ + aliasAnalysisFromSchema()) + +#define DEFINE_SCALAR_BINARY_OP_WITH_COMPLEX( \ + aten_op, int_op, float_op, complex_op, result) \ + DEFINE_SCALAR_BINARY_OP_WITH_COMPLEX_AVOID_COLLISION_GENERIC( \ + aten_op, int_op, float_op, complex_op, result, "") + +#define DEFINE_BINARY_OP_WITH_COMPLEX(aten_op, op) \ + DEFINE_GENERIC_OP_WITH_COMPLEX(aten_op, op, op, op, int, float, complex), \ + DEFINE_INT_COMPLEX_OP(aten_op, op, complex), \ + DEFINE_FLOAT_COMPLEX_OP(aten_op, op, complex), \ + DEFINE_INT_FLOAT_OP(aten_op, op, float), \ + DEFINE_SCALAR_BINARY_OP_WITH_COMPLEX(aten_op, op, op, op, Scalar) + +#define DEFINE_COMPARISON_OP_WITH_COMPLEX(aten_op, op) \ + DEFINE_GENERIC_OP_WITH_COMPLEX(aten_op, op, op, op, bool, bool, bool), \ + DEFINE_INT_FLOAT_OP(aten_op, op, bool), \ + DEFINE_FLOAT_COMPLEX_OP(aten_op, op, bool), \ + DEFINE_SCALAR_BINARY_OP_WITH_COMPLEX_WITHOUT_INT_COMPLEX_PAIR( \ + aten_op, op, op, op, bool), \ + DEFINE_STR_CMP_OP(aten_op, op) + +TORCH_API at::Generator make_generator_for_device( + c10::Device device, + std::optional seed = std::nullopt); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/script_profile.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/script_profile.h new file mode 100644 index 0000000000000000000000000000000000000000..04db7ab64ca4c0a2849ca29da6c4b7ad823d4eef --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/script_profile.h @@ -0,0 +1,108 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include +#include +#include + +namespace torch::jit { +namespace profiling { + +struct Datapoint { + using Timepoint = std::chrono::time_point; + SourceRange sourceRange; + Timepoint start; + Timepoint end; + + explicit Datapoint(SourceRange sr) + : sourceRange(std::move(sr)), start(std::chrono::steady_clock::now()) {} +}; + +class TORCH_API InstructionSpan { + public: + explicit InstructionSpan(Node& /*node*/); + ~InstructionSpan(); + InstructionSpan(InstructionSpan&&) = delete; + InstructionSpan& operator=(InstructionSpan&&) = delete; + + private: + std::unique_ptr datapoint_; +}; + +bool TORCH_API isProfilingOngoing(); + +} // namespace profiling + +struct TORCH_API InstructionStats : public CustomClassHolder { + int64_t count{0}; + std::chrono::nanoseconds duration{0}; +}; + +class TORCH_API SourceStats : public CustomClassHolder { + public: + using LineMap = c10::Dict>; + + SourceStats(SourceRef source, const LineMap& lineMap) + : source_(std::move(source)), lineMap_(lineMap) {} + + const SourceRef& getSourceRef() const { + return source_; + } + + const LineMap& getLineMap() const { + return lineMap_; + } + + private: + SourceRef source_; + LineMap lineMap_; +}; + +/** + * ScriptProfile is an underlying C++ implementation for TorchScript profiling. + * The profiling section is specified by calling enable() and disable(): + * + * ... + * scriptProfile.enable(); + * ... + * (scripts) + * ... + * scriptProfile.disable(); + * ... + * + * NOTE: you cannot attach the profiler while the script is running. + * + * To retrieve collected runtime data, users may call dumpStats() and do + * arbitrary filtering on the data they want. Note that dumpStats() should + * not be called inside a profiling section. + * In general, stats are aggregated per source function body, and then by line + * number. + */ +class TORCH_API ScriptProfile : public CustomClassHolder { + // Aggregates datapoints by function source id, then by line number. + using LineMap = std::map; + using SourceMap = std::map>; + + public: + void enable(); + void disable(); + const SourceMap& dumpStats(); + void addDatapoint(std::shared_ptr /*datapoint*/); + ~ScriptProfile() override; + + private: + bool enabled_{false}; + std::vector> datapoints_; + SourceMap sourceMap_; +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/serialized_shape_function_registry.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/serialized_shape_function_registry.h new file mode 100644 index 0000000000000000000000000000000000000000..d690b7e1a4eedb8ddd14ef9f52780ea6e8940602 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/serialized_shape_function_registry.h @@ -0,0 +1,20 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::jit { + +TORCH_API const std::string& GetSerializedShapeFunctions(); + +TORCH_API const OperatorMap& GetShapeFunctionMappings(); + +TORCH_API const OperatorMap>& +GetBoundedShapeMappings(); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/shape_function_registry.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/shape_function_registry.h new file mode 100644 index 0000000000000000000000000000000000000000..c2f8ed5907bea4a2a241b9187236203ae04965f9 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/shape_function_registry.h @@ -0,0 +1,17 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::jit { + +TORCH_API const std::string& GetSerializedFuncs(); + +TORCH_API const OperatorMap& GetFuncMapping(); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/simple_graph_executor_impl.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/simple_graph_executor_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..81244822f5a6521b01968b0789070827fad76820 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/simple_graph_executor_impl.h @@ -0,0 +1,28 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include + +namespace torch::jit { + +struct TORCH_API SimpleGraphExecutorImpl : public GraphExecutorImplBase { + SimpleGraphExecutorImpl( + const std::shared_ptr& graph, + std::string function_name); + + const ExecutionPlan& getPlanFor( + Stack& stack, + std::optional remaining_bailout_depth) override; + GraphExecutorState getDebugState() override; + ~SimpleGraphExecutorImpl() override = default; + + private: + std::optional execution_plan_; +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/slice_indices_adjust.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/slice_indices_adjust.h new file mode 100644 index 0000000000000000000000000000000000000000..5c386e8728c02f2a83fd9cd97efc78663f989463 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/slice_indices_adjust.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::jit { + +// Copyright (c) 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, +// 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019, 2020 Python Software +// Foundation; All Rights Reserved +// +// Stolen (with appropriate modifications) by @agolynski +// (https://github.com/pytorch/pytorch/pull/33019) from cpython repo +// Objects/sliceobject.c with comment: this is harder to get right than you +// might think +// +// This adjusts indexes according to python list semantics and returns number +// of elements in the resulting list. +TORCH_API int64_t slice_indices_adjust( + int64_t length, + int64_t* start, + int64_t* stop, + int64_t step); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/ProcessedNodeInputs.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/ProcessedNodeInputs.h new file mode 100644 index 0000000000000000000000000000000000000000..17742bab4f2e11bd6ddd909c3406a9c5a759bbf0 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/ProcessedNodeInputs.h @@ -0,0 +1,246 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +#include +#include + +/** + * Packed representation of input indices for ProcessedNode. + */ +class ProcessedNodeInputs { + private: + // This keeps the size usage for inputs + outputs down to 16 bytes; + // we use 12 bytes, and then two 2-byte integers are used to store + // the outputs. + static constexpr size_t kMaxInlineInputs = 5; + + public: + ProcessedNodeInputs() : ProcessedNodeInputs(0) {} + + explicit ProcessedNodeInputs(size_t size) { + TORCH_DCHECK_LT(size, (1 << 16)); + if (size <= kMaxInlineInputs) { + repr_.inline_repr_.size = size; + } else { + new (&repr_.outline_repr_) HeapArrayPtr(size); + } + } + + uint16_t operator[](uint16_t idx) const { + // NOLINTNEXTLINE(*const-cast*) + return (*const_cast(this))[idx]; + } + + uint16_t& operator[](uint16_t idx) { + if (C10_LIKELY(repr_.is_inline())) { + TORCH_DCHECK_LT(idx, repr_.inline_repr_.size); + return repr_.inline_repr_.inputs[idx]; + } else { + return repr_.outline_repr_[idx]; + } + } + + [[nodiscard]] uint16_t size() const { + if (C10_LIKELY(repr_.is_inline())) { + return repr_.inline_repr_.size; + } else { + return repr_.outline_repr_.size(); + } + } + + [[nodiscard]] bool empty() const { + return size() == 0; + } + + private: + class HeapArrayPtr { + public: + HeapArrayPtr() = default; + ~HeapArrayPtr() = default; + + explicit HeapArrayPtr(uint16_t size) : array_(alloc(size)) {} + + HeapArrayPtr(const HeapArrayPtr& rhs) : array_(alloc(rhs.size())) { + if (rhs.array_) { + std::memcpy( + array_.get(), + rhs.array_.get(), + (rhs.size() + 1) * sizeof(uint16_t)); + } + } + + HeapArrayPtr& operator=(const HeapArrayPtr& rhs) { + if (&rhs == this) { + return *this; + } + + if (size() != rhs.size()) { + array_ = alloc(rhs.size()); + } + + if (rhs.array_) { + std::memcpy( + array_.get(), + rhs.array_.get(), + (rhs.size() + 1) * sizeof(uint16_t)); + } + return *this; + } + + HeapArrayPtr(HeapArrayPtr&&) noexcept = default; + HeapArrayPtr& operator=(HeapArrayPtr&&) noexcept = default; + + [[nodiscard]] bool empty() const { + return size() != 0; + } + + [[nodiscard]] uint16_t size() const { + return array_ ? array_[0] : 0; + } + + uint16_t operator[](uint16_t idx) const { + TORCH_DCHECK_LT(idx, size()); + return array_[idx + 1]; + } + + uint16_t& operator[](uint16_t idx) { + TORCH_DCHECK_LT(idx, size()); + return array_[idx + 1]; + } + + private: + // NOLINTNEXTLINE(modernize-avoid-c-arrays) + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) + std::unique_ptr array_; + + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) + // NOLINTNEXTLINE(modernize-avoid-c-arrays) + static std::unique_ptr alloc(uint16_t num_elts) { + if (num_elts) { + auto result = std::make_unique(num_elts + 1); + result[0] = num_elts; + return result; + } else { + return nullptr; + } + } + }; + + // We want ProcessedNode to be able to pack two more `uint16_t` + // fields after its ProcessedNodeInputs, and we'll end up being + // aligned to an 8-byte boundary anyway. We could avoid this pragma + // at the cost of having to move ProcessedNode::outputs_offset_ and + // ProcessedNode::num_outputs_ into this class, which would be + // awkward. +#pragma pack(push, 2) + union Repr { + [[nodiscard]] bool is_inline() const { + uint8_t tag = 0; + // Use of reinterpret_cast to pointer to char or unsigned char + // is defined behavior; see + // https://en.cppreference.com/w/cpp/language/reinterpret_cast . + std::memcpy(&tag, reinterpret_cast(this), 1); + // HeapArrayPtr will be represented as a plain old pointer, + // which will have alignment to at least a 2-byte boundary + // (because it's uint16_t*) and more likely an 8- or 16-byte + // boundary because malloc will tend to just align everything to + // one of those. So, we just set tag to 1 when inline_repr_ is + // active so as to be able to differentiate the two. + return (tag & 1) != 0; + } + + // NOLINTNEXTLINE(modernize-use-equals-default) + Repr() {} + + ~Repr() { + destroyIfOutline(); + } + + Repr(const Repr& rhs) { + if (rhs.is_inline()) { + std::memcpy(&inline_repr_, &rhs.inline_repr_, sizeof(inline_repr_)); + } else { + new (&outline_repr_) OutlineRepr(rhs.outline_repr_); + } + } + + Repr& operator=(const Repr& rhs) { + if (&rhs == this) { + return *this; + } + if (rhs.is_inline()) { + destroyIfOutline(); + new (&inline_repr_) InlineRepr(); + std::memcpy(&inline_repr_, &rhs.inline_repr_, sizeof(inline_repr_)); + } else { + if (is_inline()) { + new (&outline_repr_) OutlineRepr(rhs.outline_repr_); + } else { + outline_repr_ = rhs.outline_repr_; + } + } + return *this; + } + + Repr(Repr&& rhs) noexcept { + if (rhs.is_inline()) { + std::memcpy(&inline_repr_, &rhs.inline_repr_, sizeof(inline_repr_)); + } else { + new (&outline_repr_) OutlineRepr(std::move(rhs.outline_repr_)); + } + } + + Repr& operator=(Repr&& rhs) noexcept { + if (&rhs == this) { + return *this; + } + + if (rhs.is_inline()) { + destroyIfOutline(); + new (&inline_repr_) InlineRepr(); + std::memcpy(&inline_repr_, &rhs.inline_repr_, sizeof(inline_repr_)); + } else { + if (is_inline()) { + new (&outline_repr_) OutlineRepr(std::move(rhs.outline_repr_)); + } else { + outline_repr_ = std::move(rhs.outline_repr_); + } + } + + return *this; + } + + struct InlineRepr { + uint8_t tag = 0x1; + uint8_t size{}; + uint16_t inputs[kMaxInlineInputs]{}; + }; + + using OutlineRepr = HeapArrayPtr; + + InlineRepr inline_repr_{}; + OutlineRepr outline_repr_; + + private: + void destroyIfOutline() { + if (!is_inline()) { + outline_repr_.~OutlineRepr(); + } + } + } repr_; +#pragma pack(pop) +}; + +static_assert( + sizeof(ProcessedNodeInputs) == 12, + "ProcessedNodeInputs has the wrong size!"); + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/fusion.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/fusion.h new file mode 100644 index 0000000000000000000000000000000000000000..3009ca031cf304074424eb64b52e07fa0937978c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/fusion.h @@ -0,0 +1,20 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit { + +TORCH_API void fuseStaticSubgraphs( + std::shared_ptr graph, + size_t min_size); + +TORCH_API void performTensorExprFusion( + std::shared_ptr graph, + std::vector sample_inputs); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/impl.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/impl.h new file mode 100644 index 0000000000000000000000000000000000000000..7ace18127095dba25390c7bda2b448e2b55fd504 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/impl.h @@ -0,0 +1,1153 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef FBCODE_CAFFE2 +#include +#include +#endif + +namespace torch::jit { + +TORCH_API bool canEnableStaticRuntime( + const std::shared_ptr& graph); + +TORCH_API std::string dumpValueSet( + const c10::FastSet& value_set, + const char* set_name = ""); + +inline bool doesNotHeapAllocateWhenStoredInIValue(const Type& type) { + switch (type.kind()) { + // NOTE: NumberType may allocate because it includes complex. + case TypeKind::NoneType: + case TypeKind::IntType: + case TypeKind::FloatType: + case TypeKind::BoolType: + case TypeKind::DeviceObjType: + case TypeKind::StreamObjType: + return true; + default: + return false; + } +} + +inline c10::Symbol getStaticRuntimeMetadataSymbol() { + return Symbol::attr("static_runtime::metadata"); +} + +inline bool borrowsOutputs(c10::Symbol kind) { + static const std::array symbols_with_borrowed_outputs = { + c10::Symbol::fromQualString("static_runtime::select_tensor"), + c10::Symbol::fromQualString("static_runtime::dict_unpack"), + c10::Symbol::fromQualString("static_runtime::VarTupleUnpack"), + c10::Symbol::fromQualString("prim::IfThenElse"), + }; + return std::find( + symbols_with_borrowed_outputs.begin(), + symbols_with_borrowed_outputs.end(), + kind) != symbols_with_borrowed_outputs.end(); +} + +// Group values used by `graph` into three categories: +// +// - output_aliases: +// values that are either outputs or contain aliases of outputs +// - external_aliases: +// values that are inputs, constants, or their aliases. +// The output aliases that end up here are as a result of aliasDb failing to +// recognize them as outputs due to collection object (e.g., Tuple) aliasing +// inputs. +// Values that don't show up in output_aliases or external_aliases are created +// and consumed within the graph. +class ValueGroup { + public: + explicit ValueGroup() = default; + void init(const Block& block, const AliasDb& db); + + bool isExternalAlias(const Value* value) const { + return external_aliases_.find(value) != external_aliases_.end(); + } + + bool isOutputAlias(const Value* value) const { + return output_aliases_.find(value) != output_aliases_.end(); + } + + bool isAlwaysAlive(const Value* value) const { + return isExternalAlias(value) || isOutputAlias(value); + } + + std::string toString() const { + return c10::str( + dumpValueSet(output_aliases_, "ValueGroup::output_aliases_"), + "\n", + dumpValueSet(external_aliases_, "ValueGroup::external_aliases_")); + } + + private: + c10::FastSet output_aliases_; + c10::FastSet external_aliases_; +}; + +class TORCH_API ManagedTensorRanges { + public: + ManagedTensorRanges() = default; + ManagedTensorRanges( + Block& block, + const AliasDb& alias_db, + const c10::FastSet& managed_tensor_values); + + // If true, then this node is the last use of at least one + // managed tensor. availableTensorValuesAfterNode(node) will return a vector + // of the managed tensors that are available for reuse + // in the nodes following this one. + bool nodeFreesManagedTensors(Node* node) const; + const std::vector& availableTensorValuesAfterNode( + Node* node) const; + + // For testing. True if v1 and v2 are both mutable types and have lifetimes + // that overlap. + bool lifetimesOverlap(const Value* v1, const Value* v2) const; + + private: + struct Lifetime { + Lifetime(size_t start_, size_t end_) : start(start_), end(end_) {} + size_t start; + size_t end; + }; + + // Returns nullptr if we are not tracking the lifetime of value + Lifetime* getLifetime(const Value* value); + const Lifetime* getLifetime(const Value* value) const; + // Collect all values in the input that have tracked lifetimes. + // A value's lifetime may not be tracked if it is a graph input + // or immutable type (containers with at least one mutable + // type are mutable) + std::vector collectValuesWithTrackedLifetimes( + at::ArrayRef values); + void extendLifetime(Value* input, size_t new_end); + void extendInputLifetime(Node* node, size_t new_end); + + // Maps Node* to the set of managed tensors that are now available + // for reuse after this node. + c10::FastMap> node_to_newly_free_tensors_; + // Maps each Value* to its lifetime (start node index, end node index) + c10::FastMap value_lifetimes_; +}; + +struct TORCH_API StaticModuleOptions { + // enabling out variant allows Static Runtime to do memory planning + bool enable_out_variant{true}; + // to reuse tensor storage for tensors whose live-range do not overlap to + // reduce memory footprint (enable_out_variant must be true) + bool optimize_memory{true}; + // to batch allocate tensor storage for output tensors of the + // graph, where storage is deallocated outside static runtime + // (enable_out_variant must be true) + bool manage_output_tensors{false}; + // Gates the ReplaceWithCopy pass, which replaces ops that + // sometimes alias their outputs with out variants that + // always copy (so the output may participate in memory planning). + // Since replacing with copies is done after TensorExpr fusion, the + // resulting graph does not conform to the assumptions made in the fuser. + // So, even if this flag is turned on, the ReplaceWithCopy pass will not + // be executed if TensorExpr fusion is enabled. + bool use_copy_variants{true}; + // Gates the ReplaceWithMaybeCopy pass, which replaces ops that + // sometimes alias their outputs with subgraphs that include an out + // variant. + // For the same reason as `use_copy_variants`, the ReplaceWithMaybeCopy pass + // will not be executed if TensorExpr fusion is enabled, even if this flag + // is turned on. + bool use_maybe_copy_variants{true}; + // enable TensorExpr fusion of ops at model loading time + bool enable_tensorexpr_fusion{false}; +}; + +/* + Responsible for plugging StaticRuntime metadata onto the + IR nodes. StaticRuntimeMetdata extends CustomClassHolder + which can be casted to IValue and attached to IR node. + This is needed to pass parent graph metadata to forked + graph in presence of prim::fork operator +*/ +class TORCH_API StaticRuntimeMetadata : public torch::CustomClassHolder { + public: + explicit StaticRuntimeMetadata(const StaticModuleOptions& opts) + : opts_(opts) {} + + const StaticModuleOptions& get_opts() { + return opts_; + } + + private: + StaticModuleOptions opts_; +}; + +/// The static runime supports two execution modes. +/// +/// Mode 1: single-threaded with no parallelism except for intra-op parallelism +/// For this mode, you can do either: +/// @code +/// // m is a TorchScript module +/// auto module = StaticModule(m, opts); +/// auto output = module(args, kwargs); +/// @endcode +/// +/// or +/// +/// @code +/// // g is the TorchScript graph +/// auto module = StaticModule(g, opts); +/// auto output = module(args, kwargs); +/// @endcode +/// +/// Mode 2: similar to data parallelism, run the same model for different inputs +/// on different threads at the same time. +/// You should have one StaticModule per model, and one StaticRuntime instance +/// per running thread. To avoiding creating StaticRuntimes on the fly, use a +/// synchronized stack (i.e. boost::lockfree::stack) to cache all the +/// StaticRuntime instances in your code. +/// @code +/// // initialization +/// auto module = std::make_shared(m, opts); +/// +/// // 128 is good for most cases. Pick a number that works for you +/// boost::lockfree::stack, +/// boost::lockfree::fixed_sized> pool(128); +/// +/// // inference +/// std::shared_ptr runtime = nullptr; +/// pool.pop(runtime); +/// if (!runtime) { +/// // holds a reference to the underlying module +/// // but does its own memory management +/// runtime = std::make_shared(*module); +/// } +/// auto output = runtime(args, kwargs); +/// pool.push(runtime); +/// @endcode +/// +class MemoryPlanner; +class StaticNodeInfo; +class ProcessedNode; +class StaticRuntime; + +using SROperator = std::function; + +#ifdef FBCODE_CAFFE2 +struct TORCH_API SROperatorObserver { + using OperatorCallback = void (*)(const Node*); + OperatorCallback startCb = nullptr; + OperatorCallback endCb = nullptr; + + static void setCurrentThreadObserver(SROperatorObserver* observer); + static SROperatorObserver* getCurrentThreadObserver(); + static void onStart(const Node* name); + static void onEnd(const Node* name); +}; +#endif + +class TORCH_API ProcessedFunction { + public: + ProcessedFunction( + Node* node, + bool enable_out_variant, + bool check_memory_overlap); + + enum class Kind : uint8_t { + kOutVariant, + kNativeFunction, + kInterpreterFallback, + }; + + void run(ProcessedNode* pnode) const { + return f_(pnode); + } + + Kind kind() const { + return kind_; + } + + bool checkMemoryOverlap() const { + return check_memory_overlap_; + } + + size_t num_outputs() const { + return num_outputs_; + } + + private: + SROperator f_; + Kind kind_{ProcessedFunction::Kind::kOutVariant}; + bool check_memory_overlap_{false}; + size_t num_outputs_{0}; +}; + +// A `BlockInfo` instance stores all of the shared state that each +// `BlockRunner` will need to access. Most of this information is +// read-only and shared between threads. +// - Each `BlockInfo` corresponds to one block in the graph. +// - Each `BlockInfo` may be used by multiple block runners (when there are many +// threads). +// - All of the `BlockInfo`s are stored in a vector in the `StaticModule` and +// are initialized during `StaticModule` construction. +// - Most of the information stored is used to initialize the block's memory +// planner. +class BlockInfo { + public: + BlockInfo(uint32_t input_idx, Block& block); + + void set_nodes( + std::vector nodes, + const c10::FastMap& node_has_out_variant); + + const std::vector& nodes() const { + return nodes_; + } + + size_t num_nodes() const; + + size_t num_inputs() const { + return block_.inputs().size(); + } + + size_t num_outputs() const { + return block_.outputs().size(); + } + + graph_node_list node_ptrs() const { + return block_.nodes(); + } + + void set_output_indices(std::vector indices) { + output_indices_ = std::move(indices); + } + + const std::vector& block_output_indices() const { + return output_indices_; + } + + auto block_inputs_idx() const { + return input_idx_; + } + + bool node_is_optimizable_container_type(const Node* node) const { + return node_is_optimizable_container_type_.find(node) != + node_is_optimizable_container_type_.end(); + } + + bool value_is_managed_tensor(const Value* value) const { + return managed_tensor_values_.find(value) != managed_tensor_values_.end(); + } + + bool value_is_leaked_container(const Value* value) const { + return leaked_values_.find(value) != leaked_values_.end(); + } + + const ValueGroup& value_group() const { + return value_group_; + } + + const ManagedTensorRanges& managed_tensor_ranges() const { + return managed_tensor_ranges_; + } + + void init_value_group(const AliasDb& alias_db) { + value_group_.init(block_, alias_db); + } + + void prepare_for_memory_planner( + const AliasDb& alias_db, + const StaticModuleOptions& opt); + + const auto& managed_output_tensor_values() const { + return managed_output_tensor_values_; + } + + const auto& managed_tensor_values() const { + return managed_tensor_values_; + } + + const auto& leaked_values() const { + return leaked_values_; + } + + private: + std::vector nodes_; + + ValueGroup value_group_; + + c10::FastSet node_is_optimizable_container_type_; + c10::FastSet managed_tensor_values_; + c10::FastSet managed_output_tensor_values_; + c10::FastSet leaked_values_; + + ManagedTensorRanges managed_tensor_ranges_; + + // The index of this block's inputs in the shared values_ array. + const uint16_t input_idx_; + // The indices of this block's outputs in the shared values_ array. + std::vector output_indices_; + Block& block_; +}; + +class TORCH_API StaticModule { + public: + explicit StaticModule( + const std::shared_ptr& g, + const StaticModuleOptions& opts = StaticModuleOptions(), + std::vector sample_inputs = {}); + + explicit StaticModule( + const torch::jit::Module& m, + bool is_frozen = false, + const StaticModuleOptions& opts = StaticModuleOptions(), + std::vector sample_inputs = {}); + + private: + explicit StaticModule( + std::pair, std::optional> + graph_and_module, + const StaticModuleOptions& opts); + + public: + using KeywordArgs = std::unordered_map; + c10::IValue operator()( + const std::vector& args, + const KeywordArgs& kwargs = KeywordArgs()); + c10::IValue operator()( + std::vector&& args, + const KeywordArgs& kwargs = KeywordArgs()); + + const Graph& graph() const { + return *graph_; + } + + const Module& module() const { + DCHECK(module_.has_value()); + return *module_; + } + + const StaticModuleOptions& opts() const; + + size_t num_inputs() const; + size_t num_outputs() const; + + size_t num_constants() const { + return constants_.size(); + } + + size_t num_intermediate_values() const { + return num_intermediate_values_; + } + + size_t total_num_values() const { + return num_inputs() + num_constants() + num_intermediate_values(); + } + + [[nodiscard]] const std::vector& output_indices() const { + return output_indices_; + } + + const std::vector& constants() const { + return constants_; + } + + const BlockInfo& block_info(Block* block) const { + return block_infos_.at(block); + } + + Block* root_block() const { + return graph_->block(); + } + + private: + friend class StaticRuntime; + friend class BlockRunner; + + public: + auto num_nodes() const { + return std::accumulate( + block_infos_.begin(), + block_infos_.end(), + 0, + [](size_t sum, const auto& block_and_info) { + auto& block_info = block_and_info.second; + return sum + block_info.num_nodes(); + }); + } + + [[nodiscard]] Node* findNodeWithKindForTesting(const std::string& kind) const; + + const std::optional& schema() const { + return schema_; + } + + bool first_input_is_self() const { + return module_.has_value(); + } + + StaticRuntime& runtime(); + + // See [Shared values array] + size_t value_buffer_size() const { + return value_buffer_size_; + } + + private: + // Recursively prepares the BlockInfo array. + // - Populates `value_to_index` with the indices of each intermediate value + // - Returns the number of Value* processed, including sub-blocks. + size_t prepareBlockInfo( + Block* block, + const size_t start_idx, + c10::FastMap& value_to_index); + + void prepareFunctionsAndConstants( + Block* block, + const AliasDb& alias_db, + c10::FastMap& value_to_index); + + // Recursively traverse the graph and attach SR metadata + // to the prim::fork nodes as additional attributes + void attachNodeMetadata(Block* block); + + // Recurses on sub-blocks and populates the array of ProcessedNodes + // Returns (number of nodes processed, number of blocks processed) + size_t prepareStaticNodeInfos( + Block* block, + const c10::FastMap& value_to_index, + const AliasDb& alias_db, + size_t node_idx = 0); + + // Initialize various attributes that the memory planner will need. + // To be called at the tail of the ctor. + void prepareForMemoryPlanner(); + + StaticModuleOptions opts_; + // metadata that is stored in IR nodes as attribute + at::intrusive_ptr sr_metadata_; + std::shared_ptr graph_; + std::optional module_; + std::optional schema_; + std::unique_ptr cached_runtime_; + + // Bookkeeping for creating new StaticRuntime instances + // IValue table (defined by prim::Constant nodes) + std::vector constants_; + // The functions to be called by corresponding ProcessedNode. + std::vector functions_; + // A list of pre-processed nodes from which ProcessedNode are created per + // StaticRuntime instance. + std::vector nodes_; + // Indices of graph outputs in the single values array. + std::vector output_indices_; + + size_t num_intermediate_values_ = 0; + + // Includes self if module_ != std::nullopt. + // Note that we might have num_inputs_ == 0 even if the schema has a `self` + // argument. In this case, `self` isn't used in the graph, but the schema + // includes it anyways to be consistent with the JIT interpreter. + size_t num_inputs_; + // See `BlockInfo` definition. The blocks are stored in depth-first order. + c10::FastMap block_infos_; + size_t value_buffer_size_ = 0; +}; + +// `BlockRunner` contains the core runtime logic. Each block runner +// corresponds to one block in the graph and has its own memory planner. +// `StaticRuntime` will initialize all `BlockRunner`s +// upon construction. Each block runner only directly executes nodes from its +// block. Special ops with sub-blocks like `prim::If` may have +// `BlockRunner`s stored in their `ProcessedNode`s; these +// sub-blocks get executed in the op's implementation. +// `StaticRuntime` stores a vector of IValues that all +// `BlockRunner`s share. This vector is used to store all +// constants, inputs, and intermediate tensors. +class TORCH_API BlockRunner { + public: + BlockRunner( + const StaticModule& sm, + IValue* values, + Block* block, + torch::jit::TaskLauncher* launcher, + bool is_root_block = false); + BlockRunner(BlockRunner&&) noexcept; + BlockRunner& operator=(BlockRunner&&) = delete; + ~BlockRunner(); + + C10_DISABLE_COPY_AND_ASSIGN(BlockRunner); + + using KeywordArgs = std::unordered_map; + c10::IValue operator()( + const std::vector& args, + const KeywordArgs& kwargs = KeywordArgs()); + c10::IValue operator()( + std::vector&& args, + const KeywordArgs& kwargs = KeywordArgs()); + + c10::intrusive_ptr runAsync( + const std::vector& args, + const KeywordArgs& kwargs); + + c10::intrusive_ptr runAsync( + std::vector&& args, + const KeywordArgs& kwargs); + + void benchmark( + const std::vector>& args_list, + const std::vector& kwargs_list, + const uint32_t warmup_runs, + const uint32_t main_runs, + bool print_per_node_time = false, + bool generate_ai_pep_output = false); + + struct IndividualMetrics { + float setup_time{0.0}; + float memory_alloc_time{0.0}; + float memory_dealloc_time{0.0}; + float output_dealloc_time{0.0}; + float first_iter_time{0.0}; + float total_time{0.0}; + size_t out_nodes_count{0}; + size_t total_nodes_count{0}; + std::vector time_per_node; + std::unordered_map time_per_node_type; + std::unordered_map percent_per_node_type; + std::unordered_map instances_per_node_type; + std::unordered_set out_nodes; + std::unordered_set native_nodes; + }; + + IndividualMetrics benchmark_individual_ops( + const std::vector>& args_list, + const std::vector& kwargs_list, + const uint32_t warmup_runs, + const uint32_t main_runs); + + // Input is readwrite + IValue& Input(uint32_t i) { + TORCH_DCHECK_LT(i, block_info_.num_inputs()); + return values_[i + block_info_.block_inputs_idx()]; + } + + // Output is readonly. The writing process happens inside ProcessedNodes + [[nodiscard]] const IValue& Output(uint32_t i) const { + DCHECK(i < outputs_.size()); + return *outputs_[i]; + } + + const std::vector outputs() const { + return outputs_; + } + + const std::vector& nodes() const { + return nodes_; + } + + std::vector& nodes() { + return nodes_; + } + + graph_node_list node_ptrs() const { + return block_info_.node_ptrs(); + } + + const Graph& graph() const { + return static_module_.graph(); + } + + const MemoryPlanner* get_memory_planner() const { + return planner_.get(); + } + + bool check_for_memory_leak( + bool output_returned = true, + bool recurse_on_sub_blocks = false); + + // WARNING: Deallocate managed output tensors. A client receiving Static + // Runtime-managed Tensors needs to be very careful to call + // `StaticRuntime::deallocateOutputTensors` after all references of output + // Tensors are gone. + void deallocateOutputTensors(); + + bool checkOutputTensorMemoryLeaks(); + + bool isManagedOutputTensor(const IValue& ivalue) const; + bool isManagedOutputTensorValue(const Value* value) const; + + void disableManageOutputTensors(); + + // This is the fallback path taken if we can't construct the memory planner + // on the first iteration. + // IMPORTANT: Nothing here should be able to throw!!! + // This function can be called from the (implicitly) `noexcept` destructor + // of Deallocator, meaning that std::terminate will be called + // if any exception escapes. Even if resetMemory and ~Deallocator were + // `noexcept(false)`, it's possible that when ~Deallocator is called, the + // stack is already unwinding, so there's still danger of calling + // std::terminate. + void resetMemory() noexcept; + + private: + // A helper object that invokes memory planner deallocation code + // when destructed. + class Deallocator { + public: + explicit Deallocator(BlockRunner& block_runner) + : block_runner_(block_runner) {} + + Deallocator(Deallocator&&) = default; + Deallocator(const Deallocator&) = default; + Deallocator& operator=(const Deallocator&) = delete; + Deallocator& operator=(Deallocator&&) = delete; + ~Deallocator(); + + void setFinished() { + finished_ = true; + } + + private: + void cleanupImpl(); + + bool finished_ = false; + BlockRunner& block_runner_; + }; + + template + c10::IValue run_impl(IValueList&& args, const KeywordArgs& kwargs); + + template + c10::IValue run_impl_record_functions( + IValueList&& args, + const KeywordArgs& kwargs); + + template + c10::intrusive_ptr run_impl_async( + IValueList&& args, + const KeywordArgs& kwargs); + + template + c10::intrusive_ptr run_impl_record_functions_async( + IValueList&& args, + const KeywordArgs& kwargs); + + // helper method for copying input args/kwargs into inputs_ + template + void set_inputs(IValueList&& args, const KeywordArgs& kwargs); + + // Set Input(idx) to args[idx]. Invoked by set_inputs. Copies or moves + // depending on overload. + void set_arg(const size_t idx, std::vector&& args); + void set_arg(const size_t idx, const std::vector& args); + + // Set Input(idx) to arg. Always copies. Used for kwargs. + void set_arg(const size_t idx, const IValue& arg); + + bool fast_check_and_correct_overlap_with( + ProcessedNode& n, + c10::IValue& tensor_ival); + void verify_and_correct_memory_overlap(ProcessedNode& n); + + // clean up owning refs of input IValues + void clean_up_input_ivalues() noexcept { + for (const auto idx : c10::irange(block_info_.num_inputs())) { + values_[idx + inputs_begin_] = IValue(); + } + } + + void clean_up_intermediate_ivalues() noexcept; + + IValue move_outputs_to_tuple(uint32_t num_outputs); + + void create_memory_planner(); + + float benchmark_model( + const std::vector>& args_list, + const std::vector& kwargs_list, + const uint32_t warmup_runs, + const uint32_t main_runs); + + void display_nodes( + const std::vector& args, + const KeywordArgs& kwargs); + + const StaticModule& static_module_; + const BlockInfo& block_info_; + + const bool is_root_block_; + // Cache this so we don't have to call static_module_.first_input_is_self() + const bool first_input_is_self_; + // Index of the start of this blocks inputs in the shared values_ array. + const uint16_t inputs_begin_; + + bool manage_output_tensors_enabled_ = false; + std::unique_ptr planner_; + // [Shared values array] + // ProcessedNodes reference their inputs and outputs with + // offsets into this array, which saves memory. + // All BlockRunners share the same array. The layout is as + // follows: + // [constants][block_0][block_1]...[block_N] + // Note that constants from all blocks are pooled together at the start. + // The block ordering is depth-first. + // Each block is further divided into inputs and intermediates: + // [block_i] = [inputs_i][intermediates_i] + // Each BlockRunner knows where its inputs start. Each ProcessedNode + // knows how to find the indices of its outputs/inputs in this array. + IValue* values_; + + std::vector outputs_; + std::vector nodes_; +}; + +class TORCH_API StaticNodeInfo { + public: + StaticNodeInfo( + Node* n, + ProcessedFunction* fn, + ProcessedNodeInputs inputs, + uint16_t outputs_offset); + + Node* node() const { + return node_; + } + + size_t num_outputs() const { + DCHECK(fn_ != nullptr); + return fn_->num_outputs(); + } + + bool has_out_variant() const { + return fn_->kind() == ProcessedFunction::Kind::kOutVariant; + } + + private: + friend class ProcessedNode; + + Node* node_; + const ProcessedFunction* fn_; + ProcessedNodeInputs inputs_; + uint16_t outputs_offset_; +}; + +inline size_t BlockInfo::num_nodes() const { + return nodes_.size(); +} + +/* + ProcessedNodeMetadata class wraps the possible metadata + for ProcessedNode. Depending upon the nature of op, processedNode + can have one of the below possibilities of metadata: + - prim::If/prim::Loop ops contains block_runners_ as their metadata + - prim::fork op contains TaskLauncher (std::function) responsible for + execution of forked subgraph +*/ +class TORCH_API ProcessedNodeMetadata { + public: + ProcessedNodeMetadata( + std::vector runners, + torch::jit::TaskLauncher* launcher) + : block_runners_(std::move(runners)), launcher_(launcher) {} + + ProcessedNodeMetadata() : launcher_(nullptr) {} + + // deleted copy ctor/assignment as standard containers (vector) always + // have copy constructors, but their instantiation is not well-formed + // if the contained type (BlockRunner) is not copyable + ProcessedNodeMetadata(const ProcessedNodeMetadata&) = delete; + ProcessedNodeMetadata& operator=(const ProcessedNodeMetadata&) = delete; + ProcessedNodeMetadata(ProcessedNodeMetadata&&) = delete; + ProcessedNodeMetadata&& operator=(ProcessedNodeMetadata&&) = delete; + ~ProcessedNodeMetadata() = default; + + std::vector& block_runners() { + return block_runners_; + } + + void set_block_runners(std::vector runners) { + block_runners_ = std::move(runners); + } + + void set_launcher(torch::jit::TaskLauncher* launcher) { + launcher_ = launcher; + } + + torch::jit::TaskLauncher* launcher() { + return launcher_; + } + + private: + std::vector block_runners_; + torch::jit::TaskLauncher* launcher_; +}; + +class TORCH_API ProcessedNode { + public: + ProcessedNode() = default; + + ProcessedNode(const StaticNodeInfo& other, IValue* values) + : node_(other.node_), + fn_(other.fn_), + inputs_(other.inputs_), + outputs_offset_(other.outputs_offset_), + values_(values), + metadata_(nullptr) {} + + // These should be noexcept, but some Android build is failing + // saying the noexcept specification doesn't match the calculated + // one. Maybe std::variant is throwing it off? + ProcessedNode(ProcessedNode&&) = default; + + ProcessedNode(const ProcessedNode&) = delete; + ProcessedNode& operator=(const ProcessedNode& other) = delete; + ProcessedNode& operator=(ProcessedNode&&) = default; + ~ProcessedNode() = default; + + void run(); + + Node* node() const { + return node_; + } + + // Input is readonly + [[nodiscard]] const IValue& Input(uint32_t i) const { + return values_[inputs_[i]]; + } + + // Output is readwrite + IValue& Output(uint32_t i) { + DCHECK(i < num_outputs()); + return values_[outputs_offset_ + i]; + } + + [[nodiscard]] const IValue& Output(uint32_t i) const { + DCHECK(i < num_outputs()); + return values_[outputs_offset_ + i]; + } + + uint32_t num_outputs() const { + DCHECK(fn_ != nullptr); + return static_cast(fn_->num_outputs()); + } + + [[nodiscard]] c10::ArrayRef outputs() const { + return c10::ArrayRef( + values_ + outputs_offset_, num_outputs()); + } + + [[nodiscard]] uint16_t num_inputs() const { + return inputs_.size(); + } + + std::vector inputs_ivalue_vec() const; + + bool has_out_variant() const { + return fn_->kind() == ProcessedFunction::Kind::kOutVariant; + } + + bool has_native() const { + return fn_->kind() == ProcessedFunction::Kind::kNativeFunction; + } + +#ifndef PYTORCH_DISABLE_PER_OP_PROFILING + const char* get_op_name() const { + return node_->kind().toQualString(); + } +#endif + + bool check_outputs_for_memory_overlap() const { + return fn_->checkMemoryOverlap(); + } + + void set_outputs_memory_overlap_detected() { + overlap_detected_ = true; + } + + bool outputs_memory_overlap_detected() { + return overlap_detected_; + } + + bool check_and_correct_overlap_with( + const at::Tensor& input, + c10::IValue& output); + void verify_and_correct_memory_overlap(); + + void set_values(IValue* values) { + DCHECK(values_ == nullptr); + values_ = values; + } + + [[nodiscard]] uint16_t output_ivalue_index(uint16_t i) const { + DCHECK(i < num_outputs()); + return outputs_offset_ + i; + } + // used in debug mode + bool verify_no_memory_overlap(bool force_check = false) const; + + // returns pointer to ProcessedNodeMetadata or nullptr if no object is owned + ProcessedNodeMetadata* metadata() { + return metadata_.get(); + } + + // attach block_runner to metadata of ProcessedNode + void set_metadata(std::vector block_runners) { + if (metadata_ == nullptr) { + metadata_ = std::make_unique(); + } + metadata_->set_block_runners(std::move(block_runners)); + } + + // attach TaskLauncher to metadata of ProcessedNode + void set_metadata(torch::jit::TaskLauncher* launcher) { + if (metadata_ == nullptr) { + metadata_ = std::make_unique(); + } + metadata_->set_launcher(launcher); + } + + private: + [[nodiscard]] bool verify_outputs_dont_overlap_each_other() const; + + [[nodiscard]] bool verify_inputs_dont_overlap_outputs(bool force_check) const; + + Node* node_{nullptr}; + const ProcessedFunction* fn_{nullptr}; + ProcessedNodeInputs inputs_; + uint16_t outputs_offset_{0}; + bool overlap_detected_{false}; + IValue* values_ = nullptr; // unowned + // Metadata for ProcessedNode. + // 1. prim::If/Loop nodes contains sub-blocks as metadata + // 2. prim::fork nodes contains custom executor for async execution + std::unique_ptr metadata_; +}; + +// `StaticRuntime` is the owner of the array of IValues (used for constants, +// inputs, and intermediate tensors) that all `BlockRunner`s share. +// Upon construction, it initializes all block runners. `operator()` simply +// forwards the inputs to the top-level block runner. Each `StaticRuntime` +// instance corresponds to one `StaticModule`. Multiple `StaticRuntime` +// instances can be created; this is useful for multi-threaded execution, since +// `operator()` is not thread-safe. +class TORCH_API StaticRuntime { + public: + explicit StaticRuntime(const StaticModule& sm); + + using KeywordArgs = std::unordered_map; + c10::IValue operator()( + const std::vector& args, + const KeywordArgs& kwargs = KeywordArgs()); + c10::IValue operator()( + std::vector&& args, + const KeywordArgs& kwargs = KeywordArgs()); + + // runAsync performs inline execution of graph on + // caller thread and async execution on taskLauncher + // If no custom taskLauncher is specified, execution is done + // on inter-op thread pool. + c10::intrusive_ptr runAsync( + const std::vector& args, + const KeywordArgs& kwargs = KeywordArgs(), + torch::jit::TaskLauncher taskLauncher = at::launch); + + c10::intrusive_ptr runAsync( + std::vector&& args, + const KeywordArgs& kwargs = KeywordArgs(), + torch::jit::TaskLauncher taskLauncher = at::launch); + + bool check_for_memory_leak(bool output_returned = true); + bool checkOutputTensorMemoryLeaks(); + + void deallocateOutputTensors(); + bool isManagedOutputTensor(const IValue& ivalue) const; + void disableManageOutputTensors(); + + // Gets the top-level memory planner. Used for testing. + const MemoryPlanner* get_memory_planner() const; + + void benchmark( + const std::vector>& args_list, + const std::vector& kwargs_list, + const uint32_t warmup_runs, + const uint32_t main_runs, + bool print_per_node_time = false, + bool generate_ai_pep_output = false) { + block_->benchmark( + args_list, + kwargs_list, + warmup_runs, + main_runs, + print_per_node_time, + generate_ai_pep_output); + } + + using IndividualMetrics = BlockRunner::IndividualMetrics; + + IndividualMetrics benchmark_individual_ops( + const std::vector>& args_list, + const std::vector& kwargs_list, + const int warmup_runs, + const int main_runs) { + return block_->benchmark_individual_ops( + args_list, kwargs_list, warmup_runs, main_runs); + } + + private: + // An array of IValues with unchanging size/data ptr. + class IValueArray { + public: + IValueArray() = default; + explicit IValueArray(size_t size) : array_(allocate(size)), size_(size) {} + + IValue* data() const { + return array_.get(); + } + + size_t size() const { + return size_; + } + + private: + // NOLINTNEXTLINE(modernize-avoid-c-arrays) + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) + static std::unique_ptr allocate(size_t size) { + if (size) { + return std::make_unique(size); + } + return nullptr; + } + + // NOLINTNEXTLINE(modernize-avoid-c-arrays) + // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays) + std::unique_ptr array_ = nullptr; + size_t size_ = 0; + }; + + std::unique_ptr block_; + // for execution of async operations present in graph + torch::jit::TaskLauncher async_task_launcher_; + IValueArray values_; +}; + +} // namespace torch::jit +C10_DECLARE_bool(static_runtime_disable_debug_memory_overlap_check); + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/init.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/init.h new file mode 100644 index 0000000000000000000000000000000000000000..d39c7141cb007a050e10d9a7d7588fa8a1ca0604 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/init.h @@ -0,0 +1,12 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include + +namespace torch::jit { + +void initStaticModuleBindings(PyObject* module); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/memory_planner.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/memory_planner.h new file mode 100644 index 0000000000000000000000000000000000000000..f2d5d34a57eaff8b256be9a6e11d0c7bd2bcb0bb --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/memory_planner.h @@ -0,0 +1,303 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit { + +// A StorageGroup represents a collection of tensors that share backing storage. +class StorageGroup { + public: + // Every storage group must contain at least one tensor. + explicit StorageGroup(at::Tensor* tensor) : group_{tensor} {} + + void addTensor(at::Tensor* tensor) { + group_.push_back(tensor); + } + + const std::vector& group() const { + return group_; + } + + size_t maxTensorSize() const { + return max_tensor_size_; + } + + void setMaxTensorSize(size_t new_size) { + max_tensor_size_ = new_size; + } + + size_t numManagedTensors() const { + return group_.size(); + } + + private: + // The size attribute represents the amount of memory that will be + // allocated for all tensors in this storage group. Initially it + // is zero, eventually it gets updated by the MemoryPlanner. + size_t max_tensor_size_ = 0; + std::vector group_; +}; + +// A contiguous buffer of `StorageImpl`s +class ManagedStorages { + public: + ManagedStorages(); + + ~ManagedStorages(); + + void allocate(size_t capacity); + + void deallocate(); + + bool is_allocated() const { + return storages_ != nullptr; + } + + // Append a new StorageImpl to the buffer. The new StorageImpl is given the + // same size and allocator as `storageImpl` argument + void append(at::StorageImpl& storageImpl); + + at::StorageImpl& operator[](size_t idx) { + TORCH_INTERNAL_ASSERT(storages_ != nullptr); + return storages_[idx]; + } + + const at::StorageImpl& operator[](size_t idx) const { + TORCH_INTERNAL_ASSERT(storages_ != nullptr); + return storages_[idx]; + } + + size_t size() const { + return size_; + } + + bool empty() const { + return size_ == 0; + } + + size_t capacity() const { + return capacity_; + } + + private: + // We will use placement-new to add new storages to this buffer + at::StorageImpl* storages_; + + // Current number of storages that have been placed into the storage buffer + size_t size_; + + // Total allocated capacity of the storage buffer + size_t capacity_; +}; + +TORCH_API std::vector assignStorageToManagedTensors( + graph_node_list nodes, + const ManagedTensorRanges& ranges, + const c10::FastMap& tensor_value_to_tensor); + +// There are three types of ops in a processed graph in Static Runtime: +// 1. op with _out variant +// 2. view-producing op +// 3. tensor-producing op (could be replaced with type 1 by adding the _out +// variant to Static Runtime) +// In Static Runtime, type 2 ops are replaced with their corresponding copy +// versions when enable_out_variant is enabled and become type 1 ops.The memory +// planner only manages tensors that are outputs of type 1 ops. For type 3, the +// output tensors are allocated inside the operator and can't be directly +// managed by memory planner. +// +// Memory planner tries to minimize the number of memory allocations by +// tracking the output tensors of ops with _out variants with unique DataPtr +// (part of StorageImpl). It tries to do this in several steps: +// 1. record the max memory usage for each Tensor with unique DataPtr at the +// end of each iteration +// 2. in the next iteration, allocate the buffer for the max total usage and +// compute the offset of each allocation with regard to the single memory +// buffer, optionally reusing memory. In the first iteration, we rely on +// the default allocator for memory allocation. +// 3. free the buffer at the end of each iteration +// Steps 1 and 3 are handled by `deallocate()`, and step 2 by `allocate()`. +// Only models with simple output types are supported, i.e. None, Tensor or +// List/Tuple/Dict of Tensors. Complex output types such as List of Lists are +// not supported. +// +// Additional Optimizations: +// +// [Borrowed IValue Outputs] +// A few native ops (notably, `static_runtime::dict_unpack` and +// `static_runtime::VarTupleUnpack`) simply unpack IValues to a bunch of +// outputs without modification. For example, `dict_unpack` does the following: +// for each key in inputs: +// output[i] = dict_input[key] +// To avoid refcount bumps, the outputs of these ops are non-owning references. +// This requires special logic in the memory planner - when adding an op that +// borrows outputs, be sure that the memory planner is updated accordingly! +// +// [Managed Output Tensors] +// The memory planner is able to manage output tensors if the appropriate +// `StaticModuleOptions` are set. However, the memory planner handles output +// tensors separately from regular intermediate tensors: +// 1. They don't participate in memory reuse. +// 2. The memory planner cannot reclaim their backing storage until they have +// been explicitly freed by the client. + +class MemoryPlanner { + public: + MemoryPlanner( + BlockRunner* block_runner, + const BlockInfo& block_info, + bool enable_out_variant, + bool manage_output_tensors); + + // disable copying and moving + MemoryPlanner(const MemoryPlanner&) = delete; + MemoryPlanner& operator=(const MemoryPlanner&) = delete; + MemoryPlanner(MemoryPlanner&&) = delete; + MemoryPlanner& operator=(MemoryPlanner&&) = delete; + virtual ~MemoryPlanner() = default; + + void allocate(); + void deallocate(); + void deallocateOutputTensors(); + + size_t total_num_managed_tensors() const { + return num_managed_tensors_; + } + + size_t total_reused_tensors() const { + return reused_tensors_; + } + + size_t total_num_managed_output_tensors() const { + return managed_output_tensors_.size(); + } + + [[nodiscard]] size_t total_num_unmanaged() const { + return num_unmanaged_non_scalars() + num_unmanaged_scalars(); + } + + [[nodiscard]] size_t num_unmanaged_non_scalars() const { + return unmanaged_ivalues_.size() + unmanaged_borrowed_ivalues_.size(); + } + + [[nodiscard]] size_t num_unmanaged_scalars() const { + return num_unmanaged_scalar_ivalues_; + } + + size_t total_managed() const { + return managed_bytes_; + } + + size_t numOutputBufferBytes() const { + return output_buffer_bytes_; + } + + // Check if `ivalue` is contained as a managed tensor. Only used in DCHECK(). + bool isManagedOutputTensor(const IValue& ivalue) const { + if (!output_buffer_ || // output buffer got already deallocated. + output_buffer_bytes_ == 0 || // memory planning is not yet initialized. + !ivalue.isTensor() // a non-tensor is never managed + ) { + return false; + } + const auto& tensor = ivalue.toTensor(); + if (!tensor.has_storage() || !tensor.storage().data_ptr()) { + return false; + } + // TODO: Improve this once D31357486 is landed. + uint8_t* tensor_ptr = + static_cast(tensor.storage().data_ptr().get()); + uint8_t* buffer_start = static_cast(output_buffer_.get()); + uint8_t* buffer_end = buffer_start + output_buffer_bytes_; + return buffer_start <= tensor_ptr && tensor_ptr < buffer_end; + } + + bool isManagedStorageImpl(const at::StorageImpl* impl) const { + if (storages_.empty()) { + return false; + } + // Comparing pointers that aren't within the same array is + // UB. We're doing fancy memory allocation stuff, so we cast to an + // integer type and carry on. + const auto impl_p = reinterpret_cast(impl); + const auto start = reinterpret_cast(&storages_[0]); + const auto end = + reinterpret_cast(&storages_[0] + storages_.size()); + return impl_p >= start && impl_p < end; + } + + bool overlapWithInternalBuffer(void* data_ptr) { + return buffer_start_ <= data_ptr && data_ptr < buffer_end_; + } + + protected: + uint8_t* allocateBuffer(size_t num_bytes); + + size_t managed_bytes_{0}; + size_t reused_tensors_{0}; + + // We allocate StorageImpls ourselves so that 1) we don't have to do + // an extra two loads per Tensor (which will likely miss in the CPU + // data cache) first reading the Storage (i.e., StorageImpl pointer) + // from the TensorImpl object and then second dereferencing it and + // 2) our memory access pattern during allocate() has high locality. + // We don't have any guarantee that the model doesn't change the + // Storage for managed tensors out from under us during execution, + // so we have to check the StorageImpls each time we deallocate. + ManagedStorages storages_; + + // Contains the size (in bytes) of the data to be allocated for each storage + std::vector storages_nbytes_; + + private: + // ivalues created in one run but not managed by MemoryPlanner + std::vector unmanaged_ivalues_; + + // Special class of unmanaged values: some native ops create IValues + // in a "borrowed" state that can and must be cleaned up without a + // reference count decrement. + std::vector unmanaged_borrowed_ivalues_; + + // Even more special class of unmanaged values: if select_tensor + // outputs are outputs of the graph, then they need to be restored + // to an ordinary "strong reference" state. + std::vector borrowed_ivalues_needing_incref_; + + std::vector> managed_output_tensors_; + at::DataPtr buffer_; // allocated each time we call Run() + uint8_t* buffer_start_{nullptr}; + uint8_t* buffer_end_{nullptr}; + size_t num_managed_tensors_{0}; + size_t num_unmanaged_scalar_ivalues_{0}; + + at::DataPtr output_buffer_; + size_t output_buffer_bytes_{0}; + + virtual void allocateManagedTensors() = 0; + virtual void deallocateManagedTensors() = 0; + + void allocateOutputTensors(); +}; + +class StandardMemoryPlanner : public MemoryPlanner { + public: + StandardMemoryPlanner( + BlockRunner* block_runner, + const BlockInfo& block_info, + bool enable_out_variant, + bool manage_output_tensors, + bool optimize_memory); + + protected: + void allocateManagedTensors() override; + void deallocateManagedTensors() override; + + std::vector managed_tensors_; +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/ops.h new file mode 100644 index 0000000000000000000000000000000000000000..eee8b1d798cbd2d60a324d482216ec7584d08ec3 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/ops.h @@ -0,0 +1,192 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace at::native { +at::Tensor& reshape_copy_out( + at::Tensor& out, + const at::Tensor& self, + const at::DimVector& proposed_shape, + bool infer_size = true); +at::Tensor& to_copy_out( + Tensor& out, + const Tensor& self, + bool non_blocking, + bool copy_strides, + std::optional memory_format); +} // namespace at::native + +namespace torch::jit { + +using SROpFunctor = SROperator (*)(Node* n); +struct SROperatorFunctor { + virtual SROperator Generate(Node* /*unused*/) { + SROperator out; + return out; + } + virtual ~SROperatorFunctor() = default; +}; + +TORCH_DECLARE_REGISTRY(SROperatorRegistry, SROperatorFunctor); + +#define REGISTER_OPERATOR_FUNCTOR(name, id, ...) \ + struct SROperatorFunctor_##id : public SROperatorFunctor { \ + SROpFunctor fn = __VA_ARGS__; \ + SROperator Generate(Node* n) override { \ + return fn(n); \ + } \ + }; \ + C10_REGISTER_CLASS(SROperatorRegistry, name, SROperatorFunctor_##id) + +TORCH_DECLARE_REGISTRY(SRNativeOperatorRegistry, SROperatorFunctor); +#define REGISTER_NATIVE_OPERATOR_FUNCTOR(name, id, ...) \ + struct SRNativeOperatorFunctor_##id : public SROperatorFunctor { \ + SROpFunctor fn = __VA_ARGS__; \ + SROperator Generate(Node* n) override { \ + return fn(n); \ + } \ + }; \ + C10_REGISTER_CLASS( \ + SRNativeOperatorRegistry, name, SRNativeOperatorFunctor_##id) + +inline at::Tensor create_empty_from(const at::Tensor& t) { + return at::detail::empty_cpu( + {0}, + c10::typeMetaToScalarType(t.dtype()), + t.layout(), + t.device(), + std::nullopt, + std::nullopt); +} + +inline at::Tensor create_empty_from( + at::IntArrayRef sizes, + const at::Tensor& t) { + return at::detail::empty_cpu( + sizes, + c10::typeMetaToScalarType(t.dtype()), + t.layout(), + t.device(), + std::nullopt, + std::nullopt); +} + +inline at::Tensor create_empty(c10::ScalarType dtype) { + return at::detail::empty_cpu( + {0}, dtype, std::nullopt, std::nullopt, std::nullopt, std::nullopt); +} + +inline at::Tensor create_empty_from( + const at::Tensor& t, + c10::ScalarType dtype) { + return at::detail::empty_cpu( + {0}, dtype, t.layout(), t.device(), std::nullopt, std::nullopt); +} + +inline at::Tensor create_empty_from(const at::Tensor& t, c10::Layout layout) { + return at::detail::empty_cpu( + {0}, + c10::typeMetaToScalarType(t.dtype()), + layout, + t.device(), + std::nullopt, + std::nullopt); +} + +inline at::Tensor create_empty_from(const at::Tensor& t, c10::Device device) { + return at::detail::empty_cpu( + {0}, + c10::typeMetaToScalarType(t.dtype()), + t.layout(), + device, + std::nullopt, + std::nullopt); +} + +inline at::Tensor create_empty_from( + const at::Tensor& t, + c10::MemoryFormat memory_format) { + return at::detail::empty_cpu( + {0}, + c10::typeMetaToScalarType(t.dtype()), + t.layout(), + t.device(), + std::nullopt, + memory_format); +} + +inline at::Tensor create_empty_from( + const at::Tensor& t, + c10::ScalarType dtype, + c10::MemoryFormat memory_format) { + return at::detail::empty_cpu( + {0}, dtype, t.layout(), t.device(), std::nullopt, memory_format); +} + +inline bool checkResizedDataPtr(at::Tensor& t) { + auto const prev_data_ptr = t.data_ptr(); + t.resize_({0}); + return prev_data_ptr == t.data_ptr(); +} + +inline void fastResizeToZero(at::Tensor& t) { + t.unsafeGetTensorImpl()->set_sizes_contiguous({0}); + TORCH_INTERNAL_ASSERT_DEBUG_ONLY(checkResizedDataPtr(t)); +} + +// check if an op has an out variant registered in Static Runtime +bool opIsRegistered(const c10::Symbol& op_name); +// check if Static Runtime can run an op natively. +// prim ops that are implemented directly in the jit interpreter are implemented +// as native ops in Static Runtime +bool nativeOpIsRegistered(const c10::Symbol& op_name); + +bool canReuseInputsOutputs( + Node* n, + const c10::FastMap& node_has_out_variant); +bool isOptimizableContainerType( + Node* n, + const c10::FastMap& node_has_out_variant); + +SROperator getOutOfPlaceOperation(Node* n); +SROperator getNativeOperation(Node* n); + +bool hasVarArgs(Node* n); + +inline std::string PrintNode(const Node* node) { + std::ostringstream ss; + node->print(ss, 0, nullptr, false); + return ss.str(); +} + +inline void LogAndDumpSchema(const Node* node) { + VLOG(1) << "Found schema mismatch for: " << node->schema(); +} + +inline bool sr_schema_check(torch::jit::Node* /*unused*/) { + return true; +} + +template +bool sr_schema_check( + torch::jit::Node* node, + Schema&& first, + Schemas&&... rest) { + auto is_match = node->matches(first) || sr_schema_check(node, rest...); + if (!is_match) { + torch::jit::LogAndDumpSchema(node); + } + return is_match; +} + +bool sr_schema_check_kind(torch::jit::Node* node, c10::Symbol node_kind); +} // namespace torch::jit + +C10_DECLARE_bool(static_runtime_enable_fast_math); + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/passes.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/passes.h new file mode 100644 index 0000000000000000000000000000000000000000..2169e4775263a39000b236828336d98ebb18f271 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/passes.h @@ -0,0 +1,96 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include + +namespace torch::jit { + +TORCH_API void FuseInferenceOpsForSparseNN( + std::shared_ptr& graph); + +TORCH_API void EliminateTrivialEquallySplit( + std::shared_ptr& graph); + +TORCH_API void FuseListUnpack(std::shared_ptr& graph); + +// If outputs_are_immutable is set to false, don't replace the view ops that +// produce aliases of graph outputs with the copy version. +TORCH_API void ReplaceWithCopy( + std::shared_ptr& graph, + bool outputs_are_immutable = true); + +TORCH_API void ReplacePermuteWithCopy( + std::shared_ptr& graph, + bool outputs_are_immutable = true); + +TORCH_API void ReplaceWithMaybeCopy( + std::shared_ptr& graph, + bool outputs_are_immutable = true); + +TORCH_API void RemoveImmutableInputDictLookups( + std::shared_ptr& graph); + +TORCH_API bool graphHasOp(std::shared_ptr& graph, const char* op_name); + +TORCH_API bool forwardHasOp(const Module& module, const char* op_name); + +TORCH_API void FuseSignLog1P(std::shared_ptr& graph); + +TORCH_API void UseVariadicTupleUnpack(const std::shared_ptr& graph); + +// c10::Symbol::fromQualString is a bit long to type everywhere, and +// we can't use a `using` statement since it's a static class function. +inline c10::Symbol fromQualString(const std::string& qual_string) { + return c10::Symbol::fromQualString(qual_string); +} + +// [Create owned refs for special values] +// StaticRuntimeBlockRunner moves its outputs to the return value at the end of +// run_impl. However, there's a corner case where this can cause problems. If +// we return a constant, then the only reference in the constants_ array can +// be destroyed by this move. +// We could add special logic to handle this in run_impl. But since this is a +// relatively rare corner case, it's simpler to just add an op that does nothing +// but create an owned reference to its input. This owned reference can be +// safely moved out of StaticRuntimeBlockRunner. Note that for scalars, +// this actually does a copy. +// Note that we have to do the same thing if we are returning a value from an +// outer scope in a sub-block. +TORCH_API void CreateOwnedRefsForSpecialValues(Graph& graph); + +// [Force non-empty outputs] +// It is technically possible for sub-blocks to not return anything. This is +// problematic for StaticRuntimeBlockRunner because it assumes that at least one +// output is being returned. Rather than slowing down SR with special logic for +// this corner case, we simply force blocks that return nothing to return None. +TORCH_API void ForceNonEmptyOutputs(Graph& graph); + +TORCH_API void UseVariadicGroupedAccessor(const std::shared_ptr& graph); + +TORCH_API void EliminateExtraPermuteOps(std::shared_ptr& graph); + +TORCH_API void EliminateNoOpSlice(std::shared_ptr& graph); + +TORCH_API void UseSplitAndSqueeze(std::shared_ptr& graph); + +// [Remove unnecessary outputs]] +// Removes outputs to reduce compute when it is not used later in the graph. +// Currently used to remove the max_indices output of embedding_bag, which +// isn't necessary to compute the main output. +TORCH_API void RemoveUnnecessaryOutputs(std::shared_ptr& graph); + +TORCH_API void RemoveUnnecessaryEmbeddingBagOutputs( + std::shared_ptr& graph); + +TORCH_API void FuseClampNaNToNum(std::shared_ptr& graph); + +TORCH_API void UseInPlaceGetRealInputsFromOptionalInputsV2( + std::shared_ptr& graph); + +TORCH_API void PrepackWeights(std::shared_ptr& graph); + +} // namespace torch::jit + +C10_DECLARE_bool(enable_clip_ranges_gather_fusions); + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/processed_node_wrapper.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/processed_node_wrapper.h new file mode 100644 index 0000000000000000000000000000000000000000..634dc574e72f47fa1ea683a7eb37145f0f60f5ff --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/processed_node_wrapper.h @@ -0,0 +1,216 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::jit { + +// The following class facilitates code reuse between ProcessedNodeInputWrapper +// and ProcessedNodeOutputWrapper via CRTP +template +class ProcessedNodeWrapperBase { + public: + class ProcessedNodeWrapperBaseIter { + public: + using iterator_category = std::forward_iterator_tag; + using value_type = at::Tensor; + using difference_type = size_t; + using pointer = const at::Tensor*; + using reference = const at::Tensor&; + + ProcessedNodeWrapperBaseIter() = default; + + ProcessedNodeWrapperBaseIter( + const DerivedWrapper* container, + size_t start_idx) + : container_(container), idx_(start_idx) {} + + ProcessedNodeWrapperBaseIter& operator++() { + TORCH_DCHECK_NE(idx_, container_->size()); + ++idx_; + return *this; + } + + ProcessedNodeWrapperBaseIter operator++(int) { + ProcessedNodeWrapperBaseIter old = *this; + ++(*this); + return old; + } + + reference operator*() const { + TORCH_CHECK(container_ != nullptr); + return (*container_)[idx_]; + } + + pointer operator->() const { + TORCH_CHECK(container_ != nullptr); + return &(*container_)[idx_]; + } + + friend bool operator==( + ProcessedNodeWrapperBaseIter lhs, + ProcessedNodeWrapperBaseIter rhs) { + TORCH_DCHECK_EQ(lhs.container_, rhs.container_); + return lhs.idx_ == rhs.idx_; + } + + friend bool operator!=( + ProcessedNodeWrapperBaseIter lhs, + ProcessedNodeWrapperBaseIter rhs) { + return !(lhs == rhs); + } + + private: + const DerivedWrapper* container_ = nullptr; + size_t idx_ = 0; + }; + + // NB: to mimic the behavior of at::ArrayRef, both iterators are + // the const version. + using iterator = ProcessedNodeWrapperBaseIter; + using const_iterator = ProcessedNodeWrapperBaseIter; + using size_type = size_t; + using value_type = at::Tensor; + + explicit ProcessedNodeWrapperBase(ProcessedNode& pnode) : pnode_(pnode) {} + + iterator begin() { + return ProcessedNodeWrapperBaseIter(static_cast(this), 0); + } + iterator end() { + return ProcessedNodeWrapperBaseIter( + static_cast(this), + static_cast(this)->size()); + } + + const_iterator begin() const { + return ProcessedNodeWrapperBaseIter( + static_cast(this), 0); + } + const_iterator end() const { + return ProcessedNodeWrapperBaseIter( + static_cast(this), + static_cast(this)->size()); + } + + const_iterator cbegin() const { + return ProcessedNodeWrapperBaseIter( + static_cast(this), 0); + } + const_iterator cend() const { + return ProcessedNodeWrapperBaseIter( + static_cast(this), + static_cast(this)->size()); + } + + bool empty() const { + return static_cast(this)->size() == 0; + } + + protected: + ProcessedNode& pnode_; +}; + +// A ProcessedNodeWrapperBase lets us use ProcessedNode directly in a context +// where a container of IValues is expected. This trick is handy for avoiding +// refcount bumps in perf-sensitive native ops. For example, suppose we have an +// op that takes a list of tensors as an argument and we've turned the op into a +// variadic variant in static runtime. To use the PyTorch library implementation +// of the op, we would have to pack the variadic arguments into a list: +// std::vector tensor_list; +// tensor_list.reserve(pnode->num_outputs()); +// for (const auto i : c10::irange(pnode->num_inputs()) +// tensor_list.push_back(pnode->Input(i).toTensor()); +// op_impl(tensor_list); +// Using ProcessedNodeWrapperBase, we can avoid this round of refcount bumps. +// All we need to do is turn `op_impl` into a template and pass it +// ProcessedNodeInputWrapper(*pnode)! +class ProcessedNodeInputWrapper + : public ProcessedNodeWrapperBase { + public: + // The last `back_elements_ignored` elements are not considered. + // Same for the first `front_elements_ignored` elements. + // This is useful for ops where + // only the first N elements are tensors (N < inputs.size()). + // For instance, the last argument to VarStack is an integer dimension. + explicit ProcessedNodeInputWrapper( + ProcessedNode& pnode, + size_t front_elements_ignored = 0, + size_t back_elements_ignored = 1) + : ProcessedNodeWrapperBase(pnode), + front_elements_ignored_(front_elements_ignored), + back_elements_ignored_(back_elements_ignored) { + TORCH_CHECK(front_elements_ignored_ <= pnode_.num_inputs()); + TORCH_CHECK( + back_elements_ignored_ <= + pnode_.num_inputs() - front_elements_ignored_); + } + + size_t size() const { + return pnode_.num_inputs() - back_elements_ignored_ - + front_elements_ignored_; + } + + const at::Tensor& operator[](size_t idx) const { + TORCH_CHECK(idx < size()); + return pnode_.Input(front_elements_ignored_ + idx).toTensor(); + } + + const at::Tensor& front() const { + TORCH_CHECK( + !empty(), + "Attempted to access front() of empty ProcessedNodeInputWrapper"); + return pnode_.Input(front_elements_ignored_).toTensor(); + } + + const at::Tensor& back() const { + TORCH_CHECK( + !empty(), + "Attempted to access back() of empty ProcessedNodeInputWrapper"); + return pnode_.Input(pnode_.num_inputs() - back_elements_ignored_ - 1) + .toTensor(); + } + + private: + size_t front_elements_ignored_; + size_t back_elements_ignored_; +}; + +// Similar to ProcessedNodeInputWrapper, but wraps outputs and allows for +// writing. +class ProcessedNodeOutputWrapper + : public ProcessedNodeWrapperBase { + public: + using ProcessedNodeWrapperBase< + ProcessedNodeOutputWrapper>::ProcessedNodeWrapperBase; + + size_t size() const { + return pnode_.num_outputs(); + } + + at::Tensor& operator[](size_t idx) const { + TORCH_CHECK(idx < size()); + return pnode_.Output(idx).toTensor(); + } + + at::Tensor& front() const { + TORCH_CHECK( + !empty(), + "Attempted to access front() of empty ProcessedNodeOutputWrapper"); + return pnode_.Output(0).toTensor(); + } + + at::Tensor& back() const { + TORCH_CHECK( + !empty(), + "Attempted to access back() of empty ProcessedNodeOutputWrapper"); + return pnode_.Output(size() - 1).toTensor(); + } +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/static_method.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/static_method.h new file mode 100644 index 0000000000000000000000000000000000000000..26d9043555d6575b127dac18735da727036d8a6f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/static_method.h @@ -0,0 +1,53 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::jit { + +class StaticMethod : public torch::IMethod { + public: + StaticMethod( + std::shared_ptr static_module, + std::string method_name) + : static_module_(std::move(static_module)), + method_name_(std::move(method_name)) { + TORCH_CHECK(static_module_); + } + + c10::IValue operator()( + std::vector args, + const IValueMap& kwargs = IValueMap()) const override { + return (*static_module_)(std::move(args), kwargs); + } + + const std::string& name() const override { + return method_name_; + } + + protected: + void setArgumentNames( + std::vector& argument_names_out) const override { + const auto& schema = static_module_->schema(); + CAFFE_ENFORCE(schema.has_value()); + const auto& arguments = schema->arguments(); + argument_names_out.clear(); + argument_names_out.reserve(arguments.size()); + std::transform( + arguments.begin(), + arguments.end(), + std::back_inserter(argument_names_out), + [](const c10::Argument& arg) -> std::string { return arg.name(); }); + } + + private: + std::shared_ptr static_module_; + std::string method_name_; +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/te_wrapper.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/te_wrapper.h new file mode 100644 index 0000000000000000000000000000000000000000..b359ca7db890fbae81cad1a1bd34ca7cb1977d8a --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/static/te_wrapper.h @@ -0,0 +1,49 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +namespace torch::jit { + +class TEWrapper { + public: + TEWrapper() = default; + void call(const std::vector& args); + + template + bool checkInput(const at::Tensor& t) { +#ifdef TORCH_ENABLE_LLVM + return t.is_contiguous() && t.dtype().Match(); +#else + return false; +#endif + } + +#ifdef TORCH_ENABLE_LLVM + void update(std::unique_ptr&& cg_); +#endif + + private: +#ifdef TORCH_ENABLE_LLVM + std::unique_ptr cg; +#endif +}; + +std::shared_ptr createDiv(); +std::shared_ptr createLogit(); +std::shared_ptr createRelu(); +std::shared_ptr createTanh(); +std::shared_ptr createSigmoid(); +std::shared_ptr createSignedLog1p(); +std::shared_ptr createClamp(); +std::shared_ptr createClampNanToNum(); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/symbolic_script.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/symbolic_script.h new file mode 100644 index 0000000000000000000000000000000000000000..48732494fef63473ce22ca94d4015a3c18dac5fb --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/symbolic_script.h @@ -0,0 +1,23 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// This file is temporary until native_functions.yaml and derivatives.yaml are +// merged. Ideally this should all go into native_functions.yaml + +#include +#include +#include + +namespace torch::jit { +struct GradientPair { + std::shared_ptr forward; + std::shared_ptr backward; +}; + +TORCH_API std::optional gradientInfoForSchema( + const FunctionSchema& schema); +TORCH_API bool hasGradientInfoForSchema(const FunctionSchema& schema); +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/symbolic_shape_registry.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/symbolic_shape_registry.h new file mode 100644 index 0000000000000000000000000000000000000000..9f2ea5b6ae517a455878b178ceea1d3aaf2f86f2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/symbolic_shape_registry.h @@ -0,0 +1,74 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// This file is temporary until native_functions.yaml and derivatives.yaml are +// merged. Ideally this should all go into native_functions.yaml + +#include +#include + +namespace torch::jit { + +/* +ADDING A NEW SHAPE GRAPH: +- For one node schema, there is one corresponding registered shape compute +graph. The schema of the graph should be the same except for Tensor arguments. +For every Tensor input in operator schema, there should be a List[int] +corresponding to that Tensor's shape. For example: "aten::linear(Tensor input, +Tensor weight, Tensor? bias=None) -> Tensor" ==> def linear(input: List[int], +weight: List[int], bias: Optional[List[int]]) + +Additionally, arguments which are unused at the end of the schema may be left +off. This allows sharing a single graph for multiple function schemas, such as +unary operators with different trailing arguments that do not affect the output +shape. + +The shape graph should return a new, unaliased List[int] (or tuple of lists for +multiple returns) and should not modify any input lists. This allows the shape +graphs to be composed and executed. + +The shape analysis (particularly for non-complete, or symbolic shapes) works by +partially evaluating the JIT IR. It may be possible for a Graph to be registered +that we cannot currently partially evaluate. If this happens, please file an +issue. There are lints registered to avoid particular known patterns (continue +or break or early return in a loop). Those may be improved in the future, please +file an issue if necessary. + +To debug (and write initially) the recommended flow is to define these functions +in python and iterate there. Functions should be added to +torch/jit/_shape_functions. + +To test operators, the preferred flow is through OpInfos, with +`assert_jit_shape_analysis=True`. If this is not feasible, you can look at tests +in `test_symbolic_shape_analysis.py` such as `test_adaptive_avg_pool2d`. + +Operators which take in a list of tensors, such as concat, are not yet +supported. Concat has been special cased and could be generalized as needed. +Please file an issue. +*/ + +struct BoundedShapeGraphs { + std::shared_ptr lower_bound; + std::shared_ptr upper_bound; +}; + +TORCH_API void RegisterShapeComputeGraphForSchema( + const FunctionSchema& schema, + const std::shared_ptr& g); + +TORCH_API std::optional> shapeComputeGraphForSchema( + const FunctionSchema& schema); + +TORCH_API std::optional boundedGraphsForSchema( + const FunctionSchema& schema); + +TORCH_API std::vector RegisteredShapeComputeSchemas(); + +TORCH_API void LintShapeComputeGraph( + const FunctionSchema* schema, + const std::shared_ptr& graph); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/symbolic_shape_registry_util.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/symbolic_shape_registry_util.h new file mode 100644 index 0000000000000000000000000000000000000000..324cfc5bf4f25f98004fe54a08dced86cfc318c9 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/symbolic_shape_registry_util.h @@ -0,0 +1,17 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +// This file is temporary until native_functions.yaml and derivatives.yaml are +// merged. Ideally this should all go into native_functions.yaml + +#include +#include + +namespace torch::jit { + +TORCH_API const OperatorMap& get_tensorexpr_elementwise_set(); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/vararg_functions.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/vararg_functions.h new file mode 100644 index 0000000000000000000000000000000000000000..45c5c94e9b37b9b84e5a79822258d163f2262fcf --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/vararg_functions.h @@ -0,0 +1,46 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include +#include +#include + +namespace torch::jit { + +void tupleUnpack(Stack& stack); + +void format(Stack& stack, size_t num_inputs); + +void einsum(Stack& stack, size_t num_inputs); + +void percentFormat(Stack& stack, size_t num_inputs); + +void listUnpack(Stack& stack, size_t num_outputs); + +void tupleConstruct(Stack& stack, size_t num_inputs); + +void namedTupleConstruct(Stack& stack, c10::TypePtr type, size_t num_inputs); + +void listConstruct(Stack& stack, const c10::Type& list_type, size_t num_inputs); + +void dictConstruct(Stack& stack, const c10::Type& type, size_t num_inputs); + +// as weak_ref will create a Object with a non-owning CompilationUnit reference, +// for use as a constant in the Graph to avoid a reference cycle +void createObject( + Stack& stack, + const at::ClassTypePtr& type, + bool as_weak_ref = false); + +void isinstance(Stack& stack, at::ArrayRef types); + +void tupleSlice(Stack& stack, size_t begin, size_t end); + +void dequantize(Stack& stack); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/variable_tensor_list.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/variable_tensor_list.h new file mode 100644 index 0000000000000000000000000000000000000000..335992c91015ae68f444be5f4f98a3173fe9a317 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/runtime/variable_tensor_list.h @@ -0,0 +1,22 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +namespace torch::jit { + +// a wrapper to mark places where we expect all the at::Tensors to be +// variables +struct variable_tensor_list : public std::vector { + variable_tensor_list() = default; + template + variable_tensor_list(InputIt first, InputIt last) + : std::vector(first, last) {} + explicit variable_tensor_list(std::vector&& tensor) + : std::vector(std::move(tensor)) {} +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/callstack_debug_info_serialization.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/callstack_debug_info_serialization.h new file mode 100644 index 0000000000000000000000000000000000000000..afccfce9cdfb397f7f57c33780a8c97d8a31890c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/callstack_debug_info_serialization.h @@ -0,0 +1,94 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +#include + +#include + +namespace c10 { +struct IValue; +} + +namespace torch::jit { + +class Pickler; +class InlinedCallStackSerializer { + public: + // Serialize InlinedCallStack as + // SerializedInlinedCallStack = + // [module_info, source range tag, SerializedInlinedCallStack] + // module_info = [ClassType.qualifiedName, instance_name] + // source_range_tag = unique source range id + c10::IValue serialize( + const InlinedCallStackPtr& cs_ptr, + const SourceRangeTagMap& source_range_tags); + + private: + // module_info = [ClassType.qualifiedName, instance_name] + c10::IValue serialize_module_instance_info( + const std::optional& m); + + // This caches serialized inlined callstack ptr, since many + // InlinedCallStackPtr can refer to the same one. + ska::flat_hash_map + serialized_inlined_callstack_; + // This caches serialized module instance info. + // There might be many nodes that are part of the same + // parent, grandparent etc. module. + ska::flat_hash_map serialized_module_instance_info_; +}; + +class TORCH_API CallStackDebugInfoPickler { + public: + CallStackDebugInfoPickler() = default; + + std::vector pickle( + const std::unordered_map& callstack_ptrs, + const SourceRangeTagMap& source_range_tags); + + private: + InlinedCallStackSerializer css_; +}; + +class InlinedCallStackDeserializer { + public: + InlinedCallStackPtr deserialize( + const c10::IValue& iv, + const ska::flat_hash_map& source_range_map, + const std::shared_ptr& cu); + + private: + std::optional deserialize_module_instance_info( + const c10::IValue& iv, + const std::shared_ptr& cu); + + ska:: + flat_hash_map, InlinedCallStackPtr> + cached_inlined_callstacks_; + ska::flat_hash_map, ModuleInstanceInfo> + cached_module_instance_info_; +}; + +class TORCH_API CallStackDebugInfoUnpickler { + public: + ska::flat_hash_map unpickle( + const at::DataPtr& data, + size_t size, + const ska::flat_hash_map& source_range_map, + const std::shared_ptr& cu); + + private: + InlinedCallStackDeserializer csds_; +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/export.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/export.h new file mode 100644 index 0000000000000000000000000000000000000000..d8778cb81b160e3f53680843dbe2b89fea97e0a6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/export.h @@ -0,0 +1,283 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace ONNX_NAMESPACE { +class ModelProto; +} + +namespace torch::jit { + +// This map is used to keep track of parameters that should be exported +// externally. When `defer_weight_export` is true, the returned map contains +// kv pairs that map {external reference name} -> {at::Tensor to be exported}. +// It is the responsibility of the caller to export these appropriately. +// +// For example, when exporting to a zip archive, the caller may write out files +// for each entry in the export map, with the filename being the key and the +// file contents being the raw tensor data. +using RawDataExportMap = std::unordered_map; + +using SymbolDimMap = std::map; +using DimSymbolMap = std::map; + +using NodeNameMap = std::unordered_map; + +// Used for modularized export settling function and node attributes. +using NodeAttrNameMap = std:: + unordered_map>; + +TORCH_API std::tuple< + std::shared_ptr<::ONNX_NAMESPACE::ModelProto>, + RawDataExportMap, + SymbolDimMap, + bool, + NodeNameMap> +export_onnx( + const std::shared_ptr& graph, + const std::map& initializers, + int64_t onnx_opset_version, + const std::unordered_map< + std::string, + std::unordered_map>& dynamic_axes, + bool defer_weight_export = false, + ::torch::onnx::OperatorExportTypes operator_export_type = + ::torch::onnx::OperatorExportTypes::ONNX, + bool strip_doc_string = true, + bool keep_initializers_as_inputs = true, + const std::map& custom_opsets = {}, + bool add_node_names = true, + bool use_external_data_format = false, + const std::string& onnx_file_path = std::string(), + const NodeAttrNameMap& node_attr_to_name = {}); + +TORCH_API std::string serialize_model_proto_to_string( + const std::shared_ptr<::ONNX_NAMESPACE::ModelProto>& model_proto); + +TORCH_API void check_onnx_proto(const std::string& proto_string); + +// Serializer for both oldsyle and unified format TorchScript serialization +class TORCH_API ScriptModuleSerializer { + public: + explicit ScriptModuleSerializer( + caffe2::serialize::PyTorchStreamWriter& export_writer) + : writer_(export_writer) {} + + void writeFiles(const std::string& code_dir); + void serialize( + const Module& module, + const ExtraFilesMap& extra_files, + bool bytecode_format, + bool save_mobile_debug_info); + void serialize_unified_format(Module& module, uint64_t script_module_id); + SerializationStorageContext& storage_context(); + + ~ScriptModuleSerializer() = default; + + private: + void convertNamedType(const c10::NamedTypePtr& class_type); + void convertTypes(const at::NamedTypePtr& root_type); + void writeExtraFiles(const Module& module, const ExtraFilesMap& extra_files); + void writeByteCode(const Module& module, bool save_mobile_debug_info); + void writeArchive( + const IValue& value, + const std::string& archive_name, + const std::string& archive_dir, + const std::string& tensor_dir, + bool use_storage_context = false, + bool skip_tensor_data = false); + void updateSourceRangeTags(const SourceRangeRecords& ranges); + + caffe2::serialize::PyTorchStreamWriter& writer_; + std::vector constant_table_; + + std::unordered_set converted_types_; + PrintDepsTable class_deps_; + TypeNameUniquer type_name_uniquer_; + // qualifier, e.g. '__torch__.Bar' -> PythonPrint for the file that will be + // created + OrderedDict file_streams_; + // Used to keep references of storages around during serialization to solve + // for ABA memory reuse problem hit when storages are created/destroyed + // during serialization process. Also used to coordinate sharing of storages + // between Script and eager modules in torch.package. + SerializationStorageContext storage_context_; + + // Uniquely identifies a SourceRange in a model. + // SourceRanges are associated with Nodes of Graphs. + // However for mobile deployment we dont intend to ship + // full JIT with capabilities of reading code and constructing + // graphs. + // Instead we serialize the Code generated from graph of the methods. + // Code is serialized in bytecode format that contains instructions + // corresponding to the nodes of the graph. Since original graph is gone, the + // question is how do we identify where the ops, in serialized bytecode, come + // from in original model code. We do this in two parts. + // 1. Associate a unique tag to SourceRange. + // 2. Serialize this unique_tag. + // 2.1 Meaning save instead of + // + // 3. During serializing model for mobile, i.e. bytecode generation, + // save unique tag of SourceRange corresponding to the Node. + // 4. During deserialization, read all the debug_pkl, to construct a map + // of and use tag saved with OPs in bytecode + // to lookup the source range. + // Strictly speaking we will serialize InlinedCallStack directly, which + // contains SourceRange. This way we have access to entire callstack and not + // just source information about where the node is, since bytecode inlines the + // graph before saving it. + SourceRangeTagMap source_range_tags_; + int64_t current_source_range_tag_{0}; +}; + +// For testing purposes +TORCH_API std::string pretty_print_onnx( + const std::shared_ptr& graph, + const std::map& initializers, + int64_t onnx_opset_version, + bool defer_weight_export, + ::torch::onnx::OperatorExportTypes operator_export_type = + ::torch::onnx::OperatorExportTypes::ONNX, + bool google_printer = false, + bool keep_initializers_as_inputs = true, + const std::map& custom_opsets = {}, + bool add_node_names = true); + +TORCH_API void ExportModule( + const Module& module, + std::ostream& out, + const ExtraFilesMap& metadata = ExtraFilesMap(), + bool bytecode_format = false, + bool save_mobile_debug_info = false, + bool use_flatbuffer = false); + +TORCH_API void ExportModule( + const Module& module, + const std::string& filename, + const ExtraFilesMap& metadata = ExtraFilesMap(), + bool bytecode_format = false, + bool save_mobile_debug_info = false, + bool use_flatbuffer = false); + +TORCH_API void ExportModule( + const Module& module, + const std::function& writer_func, + const ExtraFilesMap& metadata = ExtraFilesMap(), + bool bytecode_format = false, + bool save_mobile_debug_info = false, + bool use_flatbuffer = false); + +// Write the bytes of a pickle archive and the tensors referenced inside that +// archive +TORCH_API void writeArchiveAndTensors( + const std::string& archive_name, + const char* pickle_bytes, + size_t size, + const std::vector& tensors, + caffe2::serialize::PyTorchStreamWriter& out); + +// Surrounding system can install an additional hook to produce extra files +// with metadata based on environment every time a module is serialized. +using ExportModuleExtraFilesHook = std::function; +TORCH_API void SetExportModuleExtraFilesHook(ExportModuleExtraFilesHook hook); + +/** + * Generates new bytecode for a Script module and returns what the op list + * would be for a LiteScriptModule based off the current code base. If you + * have a LiteScriptModule and want to get the currently present + * list of ops call _export_operator_list instead. + */ +TORCH_API std::vector export_opnames(const Module& m); + +struct TORCH_API BytecodeEmitMode { + static bool is_default_value_for_unspecified_arg_enabled(); + static void set_default_value_for_unspecified_arg_enabled(bool enabled); + + static bool is_default_args_before_out_args_enabled(); + static void set_default_args_before_out_args_enabled(bool enabled); + + static bool is_emit_promoted_ops_enabled(); + static void set_default_emit_promoted_ops_enabled(bool enabled); +}; + +// RAII guard to switch the way JIT emits the bytecode for inputs. +// default_value_for_unspecified_arg: +// true: instruction of default argument values (like LOADC) is emitted. +// false: instruction of default argument values are not emitted. Instead +// they are fetched from operator schema. +// default_args_before_out_args (to forward compatible support +// operators allowing out arguments and default arguments): +// true: the number of specified arguments will deserialized to (#all_args - +// #default_args). false: the number of specified arguments will deserialized to +// (#all_args). +struct TORCH_API BytecodeEmitModeGuard { + BytecodeEmitModeGuard( + bool enable_default_value_for_unspecified_arg, + bool enable_default_args_before_out_args, + bool enable_emit_promoted_ops) + : prev_default_value_for_unspecified_arg_mode( + BytecodeEmitMode::is_default_value_for_unspecified_arg_enabled()), + prev_default_args_before_out_args( + BytecodeEmitMode::is_default_args_before_out_args_enabled()), + prev_default_emit_promoted_ops( + BytecodeEmitMode::is_emit_promoted_ops_enabled()) { + BytecodeEmitMode::set_default_value_for_unspecified_arg_enabled( + enable_default_value_for_unspecified_arg); + BytecodeEmitMode::set_default_args_before_out_args_enabled( + enable_default_args_before_out_args); + BytecodeEmitMode::set_default_emit_promoted_ops_enabled( + enable_emit_promoted_ops); + } + ~BytecodeEmitModeGuard() { + BytecodeEmitMode::set_default_value_for_unspecified_arg_enabled( + prev_default_value_for_unspecified_arg_mode); + BytecodeEmitMode::set_default_args_before_out_args_enabled( + prev_default_args_before_out_args); + BytecodeEmitMode::set_default_emit_promoted_ops_enabled( + prev_default_emit_promoted_ops); + } + bool prev_default_value_for_unspecified_arg_mode; + bool prev_default_args_before_out_args; + bool prev_default_emit_promoted_ops; +}; + +TORCH_API IValue to_tuple(std::vector ivalues); +TORCH_API IValue +Table(const std::vector>& entries); + +// TODO remove these switches once interface call is rolled out. +TORCH_API void enableMobileInterfaceCallExport(); +bool getMobileInterfaceCallExport(); + +TORCH_API CompilationOptions getOptionsFromGlobal(); + +TORCH_API void save_jit_module( + const Module& module, + const std::string& filename, + const ExtraFilesMap& extra_files = ExtraFilesMap()); + +TORCH_API DetachedBuffer::UniqueDetachedBuffer save_jit_module_to_bytes( + const Module& module, + const ExtraFilesMap& extra_files = ExtraFilesMap()); + +TORCH_API void save_jit_module_to_write_func( + const Module& module, + const ExtraFilesMap& extra_files, + bool save_mobile_debug_info, + const std::function& writer_func); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/export_bytecode.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/export_bytecode.h new file mode 100644 index 0000000000000000000000000000000000000000..608e70c7b5f337bf0798888f1773018562cba00b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/export_bytecode.h @@ -0,0 +1,46 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace torch::jit { + +struct TORCH_API CompilationOptions { + bool incl_interface_call = false; + bool enable_default_value_for_unspecified_arg = false; + bool enable_default_args_before_out_args = true; + bool enable_emit_promoted_ops = true; + int model_version = caffe2::serialize::kProducedBytecodeVersion; +}; + +TORCH_API mobile::Module jitModuleToMobile( + const Module& module, + const CompilationOptions& options); + +mobile::Code compileGraphToMobileCode( + const std::string& name, + const std::shared_ptr& graph, + const CompilationOptions& compilation_options, + BackendDebugInfoRecorder& debug_info_recorder); + +TORCH_API std::unique_ptr convertJitFunctionToMobileFunction( + const GraphFunction& function, + const CompilationOptions& options); + +TORCH_API IValue convertMobileFunctionToCodeTable( + const mobile::Function& func, + const CompilationOptions& compilation_options); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/flatbuffer_serializer.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/flatbuffer_serializer.h new file mode 100644 index 0000000000000000000000000000000000000000..cfd7d513a0c578bc7dd102ec2c98db8c9a7e582c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/flatbuffer_serializer.h @@ -0,0 +1,97 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +#include +#include +#include + +/** + * Defines the public API for serializing mobile modules to flatbuffer. + * Note that this header must not include or depend on flatbuffer-defined + * types, to avoid leaking those details to PyTorch clients. + */ + +namespace torch::jit { + +/// Maps file names to file contents. +using ExtraFilesMap = std::unordered_map; + +/** + * Represents a span of data. Typically owned by a UniqueDetachedBuffer. + */ +class TORCH_API DetachedBuffer final { + public: + /// Creates a new DetachedBuffer with an optional data owner. This interface + /// is provided to let users create objects of this type for testing. + DetachedBuffer(void* data, size_t size, void* internal_data_owner = nullptr) + : data_(data), size_(size), data_owner_(internal_data_owner) {} + + /// Returns a pointer to the data. + [[nodiscard]] void* data() { + return data_; + } + /// Returns a pointer to the data. + [[nodiscard]] const void* data() const { + return data_; + } + /// Returns the size of the data, in bytes. + [[nodiscard]] size_t size() const { + return size_; + } + + /// Wrapper type that typically owns data_owner_. + using UniqueDetachedBuffer = + std::unique_ptr>; + + private: + /// Deletes the owner, if present, and the buf itself. + /// Note: we could have provided a movable type with a destructor that did + /// this work, but the unique wrapper was easier in practice. + static void destroy(DetachedBuffer* buf); + + /// Provides access to destroy() for implementation and testing. + friend struct DetachedBufferFriend; + friend struct DetachedBufferTestingFriend; + + /// Pointer to the data. Not owned by this class. + void* data_; + /// The size of `data_`, in bytes. + size_t size_; + /// Opaque pointer to the underlying owner of `data_`. This class + /// (DetachedBuffer) does not own the owner or the data. It will typically be + /// owned by a UniqueDetachedBuffer that knows how to delete the owner along + /// with this class. + void* data_owner_; +}; + +TORCH_API void save_mobile_module( + const mobile::Module& module, + const std::string& filename, + const ExtraFilesMap& extra_files = ExtraFilesMap(), + const ExtraFilesMap& jit_sources = ExtraFilesMap(), + const std::vector& jit_constants = {}); + +TORCH_API DetachedBuffer::UniqueDetachedBuffer save_mobile_module_to_bytes( + const mobile::Module& module, + const ExtraFilesMap& extra_files = ExtraFilesMap(), + const ExtraFilesMap& jit_sources = ExtraFilesMap(), + const std::vector& jit_constants = {}); + +TORCH_API void save_mobile_module_to_func( + const mobile::Module& module, + const std::function& writer_func); + +// TODO(qihan): delete +TORCH_API bool register_flatbuffer_serializer(); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/flatbuffer_serializer_jit.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/flatbuffer_serializer_jit.h new file mode 100644 index 0000000000000000000000000000000000000000..fd11071344bc19fca9b1317d399d0c071928ac25 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/flatbuffer_serializer_jit.h @@ -0,0 +1,14 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit { + +TORCH_API bool register_flatbuffer_all(); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/import.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/import.h new file mode 100644 index 0000000000000000000000000000000000000000..48c18b7905f4c70fb2f765e151dd2ce6cd449fc2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/import.h @@ -0,0 +1,152 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include + +namespace caffe2::serialize { +class ReadAdapterInterface; +} // namespace caffe2::serialize + +namespace torch::jit { + +class DeserializationStorageContext; + +TORCH_API Module import_ir_module( + std::shared_ptr cu, + const std::string& filename, + std::optional device = std::nullopt, + bool load_debug_files = true); + +TORCH_API Module import_ir_module( + std::shared_ptr cu, + std::istream& in, + std::optional device = std::nullopt, + bool load_debug_files = true); + +TORCH_API Module import_ir_module( + std::shared_ptr cu, + std::unique_ptr rai, + std::optional device = std::nullopt, + bool load_debug_files = true); + +TORCH_API Module import_ir_module( + std::shared_ptr cu, + const std::string& filename, + std::optional device, + ExtraFilesMap& extra_files, + bool load_debug_files = true, + bool restore_shapes = false); + +// For reading unified serialization format from torch.Package +TORCH_API Module import_ir_module( + std::shared_ptr cu, + std::shared_ptr reader, + std::shared_ptr storage_context, + std::optional device, + const std::string& ts_id /* torchscript identifier inside package */); + +TORCH_API Module import_ir_module( + std::shared_ptr cu, + std::istream& in, + std::optional device, + ExtraFilesMap& extra_files, + bool load_debug_files = true, + bool restore_shapes = false); + +TORCH_API Module import_ir_module( + std::shared_ptr cu, + std::unique_ptr rai, + std::optional device, + ExtraFilesMap& extra_files, + bool load_debug_files = true); + +TORCH_API Module import_ir_module( + std::shared_ptr cu, + std::shared_ptr rai, + std::optional device, + ExtraFilesMap& extra_files, + bool load_debug_files = true); + +/// Loads a serialized `Module` from the given `istream`. +/// +/// The istream must contain a serialized `Module`, exported via +/// `torch::jit::ExportModule` in C++. +TORCH_API Module load( + std::istream& in, + std::optional device = std::nullopt, + bool load_debug_files = true); + +TORCH_API Module load( + std::istream& in, + std::optional device, + ExtraFilesMap& extra_files, + bool load_debug_files = true); + +/// Loads a serialized `Module` from the given `filename`. +/// +/// The file stored at the location given in `filename` must contain a +/// serialized `Module`, exported either via `ScriptModule.save()` in +/// Python or `torch::jit::ExportModule` in C++. +TORCH_API Module load( + const std::string& filename, + std::optional device = std::nullopt, + bool load_debug_files = true); + +TORCH_API Module load( + const std::string& filename, + std::optional device, + ExtraFilesMap& extra_files, + bool load_debug_files = true); + +/// Loads a serialized `Module` from the given shared_ptr `rai`. +/// +/// The reader adapter, which is for customized input stream, must contain a +/// serialized `Module`, exported either via `ScriptModule.save()` in +/// Python or `torch::jit::ExportModule` in C++. +TORCH_API Module load( + std::shared_ptr rai, + std::optional device = std::nullopt, + bool load_debug_files = true); + +TORCH_API Module load( + std::shared_ptr rai, + std::optional device, + ExtraFilesMap& extra_files, + bool load_debug_files = true); + +TORCH_API Module jitModuleFromSourceAndConstants( + const IValue& ivalue, + const ExtraFilesMap& source, + const std::vector& constants, + int32_t version); + +TORCH_API Module parse_and_initialize_jit_module( + const std::shared_ptr& data, + size_t size, + ExtraFilesMap& extra_files, + std::optional device = std::nullopt); + +TORCH_API Module load_jit_module_from_file( + const std::string& filename, + ExtraFilesMap& extra_files, + std::optional device = std::nullopt); + +TORCH_API Module load_jit_module_from_stream( + std::istream& in, + ExtraFilesMap& extra_files, + std::optional device = std::nullopt); + +TORCH_API c10::intrusive_ptr ObjLoaderFunc( + const at::StrongTypePtr& type, + IValue input); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/import_export_constants.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/import_export_constants.h new file mode 100644 index 0000000000000000000000000000000000000000..2722a6533036cd74343c12898c9e268c17329856 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/import_export_constants.h @@ -0,0 +1,24 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +namespace torch::jit { +constexpr size_t BYTECODE_INDEX_INSTRUCTION = 0; +constexpr size_t BYTECODE_INDEX_OPERATOR = 1; +constexpr size_t BYTECODE_INDEX_CONSTANT = 2; +constexpr size_t BYTECODE_INDEX_TYPE = 3; +constexpr size_t BYTECODE_INDEX_REGISTER_SIZE = 4; + +constexpr size_t BYTECODE_INDEX_SCHEMA_ARGUMENTS = 0; +constexpr size_t BYTECODE_INDEX_SCHEMA_RETURNS = 1; + +constexpr size_t BYTECODE_INDEX_ARGUMENT_NAME = 0; +constexpr size_t BYTECODE_INDEX_ARGUMENT_TYPE = 1; +constexpr size_t BYTECODE_INDEX_ARGUMENT_DEFAULT_VALUE = 2; + +constexpr size_t BYTECODE_INDEX_MODULE_DEBUG_HANDLES = 0; +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/import_export_functions.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/import_export_functions.h new file mode 100644 index 0000000000000000000000000000000000000000..ea0af9d7782890380984ac33d376fffbb8540d78 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/import_export_functions.h @@ -0,0 +1,20 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +// Functions that are used in both import and export processes + +namespace torch::jit { +using c10::IValue; +IValue expect_field( + c10::ivalue::TupleElements& elements, + const std::string& expected_name, + size_t entry); +std::string operator_str( + const std::string& name, + const std::string& overloadname); +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/import_export_helpers.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/import_export_helpers.h new file mode 100644 index 0000000000000000000000000000000000000000..40a4f968c1ab3994a113b5e850f3c3bdb97c0e19 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/import_export_helpers.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace caffe2::serialize { +class PyTorchStreamReader; +} + +namespace torch::jit { + +struct Source; + +// Convert a class type's qualifier name to the corresponding path the source +// file it should be written to. +// +// Qualifier is like: foo.bar.baz +// Returns: libs/foo/bar/baz.py +std::string qualifierToArchivePath( + const std::string& qualifier, + const std::string& export_prefix); + +std::shared_ptr findSourceInArchiveFromQualifier( + caffe2::serialize::PyTorchStreamReader& reader, + const std::string& export_prefix, + const std::string& qualifier); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/import_read.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/import_read.h new file mode 100644 index 0000000000000000000000000000000000000000..431430271c69db92255ab8786686b0f51c390dff --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/import_read.h @@ -0,0 +1,32 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace caffe2::serialize { +class PyTorchStreamReader; +} // namespace caffe2::serialize + +namespace torch::jit { + +TORCH_API IValue readArchiveAndTensors( + const std::string& archive_name, + const std::string& pickle_prefix, + const std::string& tensor_prefix, + std::optional type_resolver, + std::optional obj_loader, + std::optional device, + caffe2::serialize::PyTorchStreamReader& stream_reader, + c10::TypePtr (*type_parser)(const std::string&) = + Unpickler::defaultTypeParser, + std::shared_ptr storage_context = nullptr); + +bool check_zip_file( + const std::shared_ptr& rai); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/import_source.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/import_source.h new file mode 100644 index 0000000000000000000000000000000000000000..f279d6cb18e0f143076ce19c328c1ebbd19082be --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/import_source.h @@ -0,0 +1,105 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace torch::jit { + +using SourceLoader = std::function(const std::string&)>; + +struct SourceImporterImpl : public Resolver, + std::enable_shared_from_this { + SourceImporterImpl( + std::shared_ptr cu, + const std::vector* constant_table, + SourceLoader source_loader, + size_t version); + TypePtr findNamedType(const QualifiedName& name); + Function* findFunction(const QualifiedName& name); + void parseSourceIfNeeded(const std::string& qualifier); + void LEGACY_import_methods( + const Module& mod, + const std::shared_ptr& src); + + std::shared_ptr resolveValue( + const std::string& name, + GraphFunction& m, + const SourceRange& loc) override; + TypePtr resolveType(const std::string& name, const SourceRange& loc) override; + + private: + void importFunction(const std::string& qualifier, const Def& def); + void importNamedType(const std::string& qualifier, const ClassDef& class_def); + std::optional attributeAssignmentSpecialHandlingHack( + const QualifiedName& qualified_classname, + const Assign& assign); + void importClass( + const QualifiedName& qualified_classname, + const ClassDef& class_def, + bool is_module); + void importEnum( + const QualifiedName& qualified_name, + const ClassDef& enum_def); + void importNamedTuple( + const QualifiedName& qualified_name, + const ClassDef& named_tuple_def); + + void parsePossibleVersionNumber(Lexer& L); + + void parseImports(Lexer& L); + + std::shared_ptr cu_; + std::unordered_map> env_; + SourceLoader source_loader_; + std::optional version_ = std::nullopt; + std::unordered_set loaded_sources_; + // named types and functions loaded from a file but not yet defined because + // their type has not been requested yet. + std::unordered_map to_be_defined_; +}; + +// Given a directory of serialized TorchScript sources, +// This class allows the loading of individual named types in source. +// Resolves the dependencies between source files and parses +// the source files as necessary. + +struct TORCH_API SourceImporter { + SourceImporter( + // The compilation unit that will own the imported source + std::shared_ptr cu, + const std::vector* constant_table, + SourceLoader loader, + size_t version); + + TypePtr loadType(const QualifiedName& name) const; + + // Add the methods defined in `src` to the module `mod`, using SourceImporter + // to resolve any classes via loadType + void LEGACY_import_methods( + const Module& mod, + const std::shared_ptr& src); + ~SourceImporter(); + + private: + std::shared_ptr pImpl; +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/mobile_bytecode_generated.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/mobile_bytecode_generated.h new file mode 100644 index 0000000000000000000000000000000000000000..5e5ad04400f6db8f5b880363162539c75253e01d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/mobile_bytecode_generated.h @@ -0,0 +1,2605 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +// automatically generated by the FlatBuffers compiler, do not modify + + +#ifndef FLATBUFFERS_GENERATED_MOBILEBYTECODE_TORCH_JIT_MOBILE_SERIALIZATION_H_ +#define FLATBUFFERS_GENERATED_MOBILEBYTECODE_TORCH_JIT_MOBILE_SERIALIZATION_H_ + +#include "flatbuffers/flatbuffers.h" + +// Ensure the included flatbuffers.h is the same version as when this file was +// generated, otherwise it may not be compatible. +static_assert(FLATBUFFERS_VERSION_MAJOR == 24 && + FLATBUFFERS_VERSION_MINOR == 12 && + FLATBUFFERS_VERSION_REVISION == 23, + "Non-compatible flatbuffers version included"); + +namespace torch { +namespace jit { +namespace mobile { +namespace serialization { + +struct Int; + +struct Bool; + +struct Double; + +struct PerTensorAffineSchema; + +struct QuantizedSchema; +struct QuantizedSchemaBuilder; + +struct TensorMetadata; +struct TensorMetadataBuilder; + +struct String; +struct StringBuilder; + +struct Device; +struct DeviceBuilder; + +struct List; +struct ListBuilder; + +struct IntList; +struct IntListBuilder; + +struct DoubleList; +struct DoubleListBuilder; + +struct BoolList; +struct BoolListBuilder; + +struct Tuple; +struct TupleBuilder; + +struct Dict; +struct DictBuilder; + +struct ObjectType; +struct ObjectTypeBuilder; + +struct Object; +struct ObjectBuilder; + +struct ComplexDouble; + +struct EnumValue; +struct EnumValueBuilder; + +struct Instruction; + +struct Operator; +struct OperatorBuilder; + +struct Arg; +struct ArgBuilder; + +struct Schema; +struct SchemaBuilder; + +struct DebugInfo; +struct DebugInfoBuilder; + +struct Function; +struct FunctionBuilder; + +struct StorageData; +struct StorageDataBuilder; + +struct IValue; +struct IValueBuilder; + +struct ExtraFile; +struct ExtraFileBuilder; + +struct Module; +struct ModuleBuilder; + +enum class TypeType : uint8_t { + UNSET = 0, + CLASS_WITH_FIELD = 1, + CUSTOM_CLASS = 2, + CLASS_WITH_SETSTATE = 3, + NON_OBJ = 4, + MIN = UNSET, + MAX = NON_OBJ +}; + +inline const TypeType (&EnumValuesTypeType())[5] { + static const TypeType values[] = { + TypeType::UNSET, + TypeType::CLASS_WITH_FIELD, + TypeType::CUSTOM_CLASS, + TypeType::CLASS_WITH_SETSTATE, + TypeType::NON_OBJ + }; + return values; +} + +inline const char * const *EnumNamesTypeType() { + static const char * const names[6] = { + "UNSET", + "CLASS_WITH_FIELD", + "CUSTOM_CLASS", + "CLASS_WITH_SETSTATE", + "NON_OBJ", + nullptr + }; + return names; +} + +inline const char *EnumNameTypeType(TypeType e) { + if (::flatbuffers::IsOutRange(e, TypeType::UNSET, TypeType::NON_OBJ)) return ""; + const size_t index = static_cast(e); + return EnumNamesTypeType()[index]; +} + +enum class IValueUnion : uint8_t { + NONE = 0, + Int = 1, + Bool = 2, + Double = 3, + ComplexDouble = 4, + TensorMetadata = 5, + String = 6, + List = 7, + Tuple = 8, + Dict = 9, + Object = 10, + IntList = 11, + DoubleList = 12, + BoolList = 13, + Device = 14, + EnumValue = 15, + Function = 16, + MIN = NONE, + MAX = Function +}; + +inline const IValueUnion (&EnumValuesIValueUnion())[17] { + static const IValueUnion values[] = { + IValueUnion::NONE, + IValueUnion::Int, + IValueUnion::Bool, + IValueUnion::Double, + IValueUnion::ComplexDouble, + IValueUnion::TensorMetadata, + IValueUnion::String, + IValueUnion::List, + IValueUnion::Tuple, + IValueUnion::Dict, + IValueUnion::Object, + IValueUnion::IntList, + IValueUnion::DoubleList, + IValueUnion::BoolList, + IValueUnion::Device, + IValueUnion::EnumValue, + IValueUnion::Function + }; + return values; +} + +inline const char * const *EnumNamesIValueUnion() { + static const char * const names[18] = { + "NONE", + "Int", + "Bool", + "Double", + "ComplexDouble", + "TensorMetadata", + "String", + "List", + "Tuple", + "Dict", + "Object", + "IntList", + "DoubleList", + "BoolList", + "Device", + "EnumValue", + "Function", + nullptr + }; + return names; +} + +inline const char *EnumNameIValueUnion(IValueUnion e) { + if (::flatbuffers::IsOutRange(e, IValueUnion::NONE, IValueUnion::Function)) return ""; + const size_t index = static_cast(e); + return EnumNamesIValueUnion()[index]; +} + +template struct IValueUnionTraits { + static const IValueUnion enum_value = IValueUnion::NONE; +}; + +template<> struct IValueUnionTraits { + static const IValueUnion enum_value = IValueUnion::Int; +}; + +template<> struct IValueUnionTraits { + static const IValueUnion enum_value = IValueUnion::Bool; +}; + +template<> struct IValueUnionTraits { + static const IValueUnion enum_value = IValueUnion::Double; +}; + +template<> struct IValueUnionTraits { + static const IValueUnion enum_value = IValueUnion::ComplexDouble; +}; + +template<> struct IValueUnionTraits { + static const IValueUnion enum_value = IValueUnion::TensorMetadata; +}; + +template<> struct IValueUnionTraits { + static const IValueUnion enum_value = IValueUnion::String; +}; + +template<> struct IValueUnionTraits { + static const IValueUnion enum_value = IValueUnion::List; +}; + +template<> struct IValueUnionTraits { + static const IValueUnion enum_value = IValueUnion::Tuple; +}; + +template<> struct IValueUnionTraits { + static const IValueUnion enum_value = IValueUnion::Dict; +}; + +template<> struct IValueUnionTraits { + static const IValueUnion enum_value = IValueUnion::Object; +}; + +template<> struct IValueUnionTraits { + static const IValueUnion enum_value = IValueUnion::IntList; +}; + +template<> struct IValueUnionTraits { + static const IValueUnion enum_value = IValueUnion::DoubleList; +}; + +template<> struct IValueUnionTraits { + static const IValueUnion enum_value = IValueUnion::BoolList; +}; + +template<> struct IValueUnionTraits { + static const IValueUnion enum_value = IValueUnion::Device; +}; + +template<> struct IValueUnionTraits { + static const IValueUnion enum_value = IValueUnion::EnumValue; +}; + +template<> struct IValueUnionTraits { + static const IValueUnion enum_value = IValueUnion::Function; +}; + +bool VerifyIValueUnion(::flatbuffers::Verifier &verifier, const void *obj, IValueUnion type); +bool VerifyIValueUnionVector(::flatbuffers::Verifier &verifier, const ::flatbuffers::Vector<::flatbuffers::Offset> *values, const ::flatbuffers::Vector *types); + +FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(8) Int FLATBUFFERS_FINAL_CLASS { + private: + int64_t int_val_; + + public: + Int() + : int_val_(0) { + } + Int(int64_t _int_val) + : int_val_(::flatbuffers::EndianScalar(_int_val)) { + } + int64_t int_val() const { + return ::flatbuffers::EndianScalar(int_val_); + } + void mutate_int_val(int64_t _int_val) { + ::flatbuffers::WriteScalar(&int_val_, _int_val); + } +}; +FLATBUFFERS_STRUCT_END(Int, 8); + +FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(1) Bool FLATBUFFERS_FINAL_CLASS { + private: + uint8_t bool_val_; + + public: + Bool() + : bool_val_(0) { + } + Bool(bool _bool_val) + : bool_val_(::flatbuffers::EndianScalar(static_cast(_bool_val))) { + } + bool bool_val() const { + return ::flatbuffers::EndianScalar(bool_val_) != 0; + } + void mutate_bool_val(bool _bool_val) { + ::flatbuffers::WriteScalar(&bool_val_, static_cast(_bool_val)); + } +}; +FLATBUFFERS_STRUCT_END(Bool, 1); + +FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(8) Double FLATBUFFERS_FINAL_CLASS { + private: + double double_val_; + + public: + Double() + : double_val_(0) { + } + Double(double _double_val) + : double_val_(::flatbuffers::EndianScalar(_double_val)) { + } + double double_val() const { + return ::flatbuffers::EndianScalar(double_val_); + } + void mutate_double_val(double _double_val) { + ::flatbuffers::WriteScalar(&double_val_, _double_val); + } +}; +FLATBUFFERS_STRUCT_END(Double, 8); + +FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(8) PerTensorAffineSchema FLATBUFFERS_FINAL_CLASS { + private: + double q_scale_; + int32_t q_zero_point_; + int32_t padding0__; + + public: + PerTensorAffineSchema() + : q_scale_(0), + q_zero_point_(0), + padding0__(0) { + (void)padding0__; + } + PerTensorAffineSchema(double _q_scale, int32_t _q_zero_point) + : q_scale_(::flatbuffers::EndianScalar(_q_scale)), + q_zero_point_(::flatbuffers::EndianScalar(_q_zero_point)), + padding0__(0) { + (void)padding0__; + } + double q_scale() const { + return ::flatbuffers::EndianScalar(q_scale_); + } + void mutate_q_scale(double _q_scale) { + ::flatbuffers::WriteScalar(&q_scale_, _q_scale); + } + int32_t q_zero_point() const { + return ::flatbuffers::EndianScalar(q_zero_point_); + } + void mutate_q_zero_point(int32_t _q_zero_point) { + ::flatbuffers::WriteScalar(&q_zero_point_, _q_zero_point); + } +}; +FLATBUFFERS_STRUCT_END(PerTensorAffineSchema, 16); + +FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(8) ComplexDouble FLATBUFFERS_FINAL_CLASS { + private: + double real_; + double imag_; + + public: + ComplexDouble() + : real_(0), + imag_(0) { + } + ComplexDouble(double _real, double _imag) + : real_(::flatbuffers::EndianScalar(_real)), + imag_(::flatbuffers::EndianScalar(_imag)) { + } + double real() const { + return ::flatbuffers::EndianScalar(real_); + } + void mutate_real(double _real) { + ::flatbuffers::WriteScalar(&real_, _real); + } + double imag() const { + return ::flatbuffers::EndianScalar(imag_); + } + void mutate_imag(double _imag) { + ::flatbuffers::WriteScalar(&imag_, _imag); + } +}; +FLATBUFFERS_STRUCT_END(ComplexDouble, 16); + +FLATBUFFERS_MANUALLY_ALIGNED_STRUCT(4) Instruction FLATBUFFERS_FINAL_CLASS { + private: + int8_t op_; + int8_t padding0__; + uint16_t n_; + int32_t x_; + + public: + Instruction() + : op_(0), + padding0__(0), + n_(0), + x_(0) { + (void)padding0__; + } + Instruction(int8_t _op, uint16_t _n, int32_t _x) + : op_(::flatbuffers::EndianScalar(_op)), + padding0__(0), + n_(::flatbuffers::EndianScalar(_n)), + x_(::flatbuffers::EndianScalar(_x)) { + (void)padding0__; + } + int8_t op() const { + return ::flatbuffers::EndianScalar(op_); + } + void mutate_op(int8_t _op) { + ::flatbuffers::WriteScalar(&op_, _op); + } + uint16_t n() const { + return ::flatbuffers::EndianScalar(n_); + } + void mutate_n(uint16_t _n) { + ::flatbuffers::WriteScalar(&n_, _n); + } + int32_t x() const { + return ::flatbuffers::EndianScalar(x_); + } + void mutate_x(int32_t _x) { + ::flatbuffers::WriteScalar(&x_, _x); + } +}; +FLATBUFFERS_STRUCT_END(Instruction, 8); + +struct QuantizedSchema FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef QuantizedSchemaBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_QSCHEME = 4, + VT_SCALE = 6, + VT_ZERO_POINT = 8, + VT_SCALES = 10, + VT_ZERO_POINTS = 12, + VT_AXIS = 14 + }; + int8_t qscheme() const { + return GetField(VT_QSCHEME, 0); + } + bool mutate_qscheme(int8_t _qscheme = 0) { + return SetField(VT_QSCHEME, _qscheme, 0); + } + double scale() const { + return GetField(VT_SCALE, 0.0); + } + bool mutate_scale(double _scale = 0.0) { + return SetField(VT_SCALE, _scale, 0.0); + } + int32_t zero_point() const { + return GetField(VT_ZERO_POINT, 0); + } + bool mutate_zero_point(int32_t _zero_point = 0) { + return SetField(VT_ZERO_POINT, _zero_point, 0); + } + const torch::jit::mobile::serialization::TensorMetadata *scales() const { + return GetPointer(VT_SCALES); + } + torch::jit::mobile::serialization::TensorMetadata *mutable_scales() { + return GetPointer(VT_SCALES); + } + const torch::jit::mobile::serialization::TensorMetadata *zero_points() const { + return GetPointer(VT_ZERO_POINTS); + } + torch::jit::mobile::serialization::TensorMetadata *mutable_zero_points() { + return GetPointer(VT_ZERO_POINTS); + } + int32_t axis() const { + return GetField(VT_AXIS, 0); + } + bool mutate_axis(int32_t _axis = 0) { + return SetField(VT_AXIS, _axis, 0); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_QSCHEME, 1) && + VerifyField(verifier, VT_SCALE, 8) && + VerifyField(verifier, VT_ZERO_POINT, 4) && + VerifyOffset(verifier, VT_SCALES) && + verifier.VerifyTable(scales()) && + VerifyOffset(verifier, VT_ZERO_POINTS) && + verifier.VerifyTable(zero_points()) && + VerifyField(verifier, VT_AXIS, 4) && + verifier.EndTable(); + } +}; + +struct QuantizedSchemaBuilder { + typedef QuantizedSchema Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_qscheme(int8_t qscheme) { + fbb_.AddElement(QuantizedSchema::VT_QSCHEME, qscheme, 0); + } + void add_scale(double scale) { + fbb_.AddElement(QuantizedSchema::VT_SCALE, scale, 0.0); + } + void add_zero_point(int32_t zero_point) { + fbb_.AddElement(QuantizedSchema::VT_ZERO_POINT, zero_point, 0); + } + void add_scales(::flatbuffers::Offset scales) { + fbb_.AddOffset(QuantizedSchema::VT_SCALES, scales); + } + void add_zero_points(::flatbuffers::Offset zero_points) { + fbb_.AddOffset(QuantizedSchema::VT_ZERO_POINTS, zero_points); + } + void add_axis(int32_t axis) { + fbb_.AddElement(QuantizedSchema::VT_AXIS, axis, 0); + } + explicit QuantizedSchemaBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateQuantizedSchema( + ::flatbuffers::FlatBufferBuilder &_fbb, + int8_t qscheme = 0, + double scale = 0.0, + int32_t zero_point = 0, + ::flatbuffers::Offset scales = 0, + ::flatbuffers::Offset zero_points = 0, + int32_t axis = 0) { + QuantizedSchemaBuilder builder_(_fbb); + builder_.add_scale(scale); + builder_.add_axis(axis); + builder_.add_zero_points(zero_points); + builder_.add_scales(scales); + builder_.add_zero_point(zero_point); + builder_.add_qscheme(qscheme); + return builder_.Finish(); +} + +struct TensorMetadata FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef TensorMetadataBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_STORAGE_LOCATION_INDEX = 4, + VT_SCALAR_TYPE = 6, + VT_STORAGE_OFFSET = 8, + VT_SIZES = 10, + VT_STRIDES = 12, + VT_REQUIRES_GRAD = 14, + VT_QUANTIZED_SCHEMA = 16 + }; + uint32_t storage_location_index() const { + return GetField(VT_STORAGE_LOCATION_INDEX, 0); + } + bool mutate_storage_location_index(uint32_t _storage_location_index = 0) { + return SetField(VT_STORAGE_LOCATION_INDEX, _storage_location_index, 0); + } + int8_t scalar_type() const { + return GetField(VT_SCALAR_TYPE, 0); + } + bool mutate_scalar_type(int8_t _scalar_type = 0) { + return SetField(VT_SCALAR_TYPE, _scalar_type, 0); + } + int32_t storage_offset() const { + return GetField(VT_STORAGE_OFFSET, 0); + } + bool mutate_storage_offset(int32_t _storage_offset = 0) { + return SetField(VT_STORAGE_OFFSET, _storage_offset, 0); + } + const ::flatbuffers::Vector *sizes() const { + return GetPointer *>(VT_SIZES); + } + ::flatbuffers::Vector *mutable_sizes() { + return GetPointer<::flatbuffers::Vector *>(VT_SIZES); + } + const ::flatbuffers::Vector *strides() const { + return GetPointer *>(VT_STRIDES); + } + ::flatbuffers::Vector *mutable_strides() { + return GetPointer<::flatbuffers::Vector *>(VT_STRIDES); + } + bool requires_grad() const { + return GetField(VT_REQUIRES_GRAD, 0) != 0; + } + bool mutate_requires_grad(bool _requires_grad = 0) { + return SetField(VT_REQUIRES_GRAD, static_cast(_requires_grad), 0); + } + const torch::jit::mobile::serialization::QuantizedSchema *quantized_schema() const { + return GetPointer(VT_QUANTIZED_SCHEMA); + } + torch::jit::mobile::serialization::QuantizedSchema *mutable_quantized_schema() { + return GetPointer(VT_QUANTIZED_SCHEMA); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_STORAGE_LOCATION_INDEX, 4) && + VerifyField(verifier, VT_SCALAR_TYPE, 1) && + VerifyField(verifier, VT_STORAGE_OFFSET, 4) && + VerifyOffset(verifier, VT_SIZES) && + verifier.VerifyVector(sizes()) && + VerifyOffset(verifier, VT_STRIDES) && + verifier.VerifyVector(strides()) && + VerifyField(verifier, VT_REQUIRES_GRAD, 1) && + VerifyOffset(verifier, VT_QUANTIZED_SCHEMA) && + verifier.VerifyTable(quantized_schema()) && + verifier.EndTable(); + } +}; + +struct TensorMetadataBuilder { + typedef TensorMetadata Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_storage_location_index(uint32_t storage_location_index) { + fbb_.AddElement(TensorMetadata::VT_STORAGE_LOCATION_INDEX, storage_location_index, 0); + } + void add_scalar_type(int8_t scalar_type) { + fbb_.AddElement(TensorMetadata::VT_SCALAR_TYPE, scalar_type, 0); + } + void add_storage_offset(int32_t storage_offset) { + fbb_.AddElement(TensorMetadata::VT_STORAGE_OFFSET, storage_offset, 0); + } + void add_sizes(::flatbuffers::Offset<::flatbuffers::Vector> sizes) { + fbb_.AddOffset(TensorMetadata::VT_SIZES, sizes); + } + void add_strides(::flatbuffers::Offset<::flatbuffers::Vector> strides) { + fbb_.AddOffset(TensorMetadata::VT_STRIDES, strides); + } + void add_requires_grad(bool requires_grad) { + fbb_.AddElement(TensorMetadata::VT_REQUIRES_GRAD, static_cast(requires_grad), 0); + } + void add_quantized_schema(::flatbuffers::Offset quantized_schema) { + fbb_.AddOffset(TensorMetadata::VT_QUANTIZED_SCHEMA, quantized_schema); + } + explicit TensorMetadataBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateTensorMetadata( + ::flatbuffers::FlatBufferBuilder &_fbb, + uint32_t storage_location_index = 0, + int8_t scalar_type = 0, + int32_t storage_offset = 0, + ::flatbuffers::Offset<::flatbuffers::Vector> sizes = 0, + ::flatbuffers::Offset<::flatbuffers::Vector> strides = 0, + bool requires_grad = false, + ::flatbuffers::Offset quantized_schema = 0) { + TensorMetadataBuilder builder_(_fbb); + builder_.add_quantized_schema(quantized_schema); + builder_.add_strides(strides); + builder_.add_sizes(sizes); + builder_.add_storage_offset(storage_offset); + builder_.add_storage_location_index(storage_location_index); + builder_.add_requires_grad(requires_grad); + builder_.add_scalar_type(scalar_type); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateTensorMetadataDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + uint32_t storage_location_index = 0, + int8_t scalar_type = 0, + int32_t storage_offset = 0, + const std::vector *sizes = nullptr, + const std::vector *strides = nullptr, + bool requires_grad = false, + ::flatbuffers::Offset quantized_schema = 0) { + auto sizes__ = sizes ? _fbb.CreateVector(*sizes) : 0; + auto strides__ = strides ? _fbb.CreateVector(*strides) : 0; + return torch::jit::mobile::serialization::CreateTensorMetadata( + _fbb, + storage_location_index, + scalar_type, + storage_offset, + sizes__, + strides__, + requires_grad, + quantized_schema); +} + +struct String FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef StringBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_DATA = 4 + }; + const ::flatbuffers::String *data() const { + return GetPointer(VT_DATA); + } + ::flatbuffers::String *mutable_data() { + return GetPointer<::flatbuffers::String *>(VT_DATA); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_DATA) && + verifier.VerifyString(data()) && + verifier.EndTable(); + } +}; + +struct StringBuilder { + typedef String Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_data(::flatbuffers::Offset<::flatbuffers::String> data) { + fbb_.AddOffset(String::VT_DATA, data); + } + explicit StringBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateString( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::String> data = 0) { + StringBuilder builder_(_fbb); + builder_.add_data(data); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateStringDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const char *data = nullptr) { + auto data__ = data ? _fbb.CreateString(data) : 0; + return torch::jit::mobile::serialization::CreateString( + _fbb, + data__); +} + +struct Device FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef DeviceBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_STR = 4 + }; + const ::flatbuffers::String *str() const { + return GetPointer(VT_STR); + } + ::flatbuffers::String *mutable_str() { + return GetPointer<::flatbuffers::String *>(VT_STR); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_STR) && + verifier.VerifyString(str()) && + verifier.EndTable(); + } +}; + +struct DeviceBuilder { + typedef Device Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_str(::flatbuffers::Offset<::flatbuffers::String> str) { + fbb_.AddOffset(Device::VT_STR, str); + } + explicit DeviceBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateDevice( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::String> str = 0) { + DeviceBuilder builder_(_fbb); + builder_.add_str(str); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateDeviceDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const char *str = nullptr) { + auto str__ = str ? _fbb.CreateString(str) : 0; + return torch::jit::mobile::serialization::CreateDevice( + _fbb, + str__); +} + +struct List FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ListBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_ITEMS = 4, + VT_ANNOTATION_STR = 6 + }; + const ::flatbuffers::Vector *items() const { + return GetPointer *>(VT_ITEMS); + } + ::flatbuffers::Vector *mutable_items() { + return GetPointer<::flatbuffers::Vector *>(VT_ITEMS); + } + const ::flatbuffers::String *annotation_str() const { + return GetPointer(VT_ANNOTATION_STR); + } + ::flatbuffers::String *mutable_annotation_str() { + return GetPointer<::flatbuffers::String *>(VT_ANNOTATION_STR); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_ITEMS) && + verifier.VerifyVector(items()) && + VerifyOffset(verifier, VT_ANNOTATION_STR) && + verifier.VerifyString(annotation_str()) && + verifier.EndTable(); + } +}; + +struct ListBuilder { + typedef List Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_items(::flatbuffers::Offset<::flatbuffers::Vector> items) { + fbb_.AddOffset(List::VT_ITEMS, items); + } + void add_annotation_str(::flatbuffers::Offset<::flatbuffers::String> annotation_str) { + fbb_.AddOffset(List::VT_ANNOTATION_STR, annotation_str); + } + explicit ListBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateList( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::Vector> items = 0, + ::flatbuffers::Offset<::flatbuffers::String> annotation_str = 0) { + ListBuilder builder_(_fbb); + builder_.add_annotation_str(annotation_str); + builder_.add_items(items); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateListDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *items = nullptr, + const char *annotation_str = nullptr) { + auto items__ = items ? _fbb.CreateVector(*items) : 0; + auto annotation_str__ = annotation_str ? _fbb.CreateString(annotation_str) : 0; + return torch::jit::mobile::serialization::CreateList( + _fbb, + items__, + annotation_str__); +} + +struct IntList FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef IntListBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_ITEMS = 4 + }; + const ::flatbuffers::Vector *items() const { + return GetPointer *>(VT_ITEMS); + } + ::flatbuffers::Vector *mutable_items() { + return GetPointer<::flatbuffers::Vector *>(VT_ITEMS); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_ITEMS) && + verifier.VerifyVector(items()) && + verifier.EndTable(); + } +}; + +struct IntListBuilder { + typedef IntList Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_items(::flatbuffers::Offset<::flatbuffers::Vector> items) { + fbb_.AddOffset(IntList::VT_ITEMS, items); + } + explicit IntListBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateIntList( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::Vector> items = 0) { + IntListBuilder builder_(_fbb); + builder_.add_items(items); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateIntListDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *items = nullptr) { + auto items__ = items ? _fbb.CreateVector(*items) : 0; + return torch::jit::mobile::serialization::CreateIntList( + _fbb, + items__); +} + +struct DoubleList FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef DoubleListBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_ITEMS = 4 + }; + const ::flatbuffers::Vector *items() const { + return GetPointer *>(VT_ITEMS); + } + ::flatbuffers::Vector *mutable_items() { + return GetPointer<::flatbuffers::Vector *>(VT_ITEMS); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_ITEMS) && + verifier.VerifyVector(items()) && + verifier.EndTable(); + } +}; + +struct DoubleListBuilder { + typedef DoubleList Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_items(::flatbuffers::Offset<::flatbuffers::Vector> items) { + fbb_.AddOffset(DoubleList::VT_ITEMS, items); + } + explicit DoubleListBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateDoubleList( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::Vector> items = 0) { + DoubleListBuilder builder_(_fbb); + builder_.add_items(items); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateDoubleListDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *items = nullptr) { + auto items__ = items ? _fbb.CreateVector(*items) : 0; + return torch::jit::mobile::serialization::CreateDoubleList( + _fbb, + items__); +} + +struct BoolList FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef BoolListBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_ITEMS = 4 + }; + const ::flatbuffers::Vector *items() const { + return GetPointer *>(VT_ITEMS); + } + ::flatbuffers::Vector *mutable_items() { + return GetPointer<::flatbuffers::Vector *>(VT_ITEMS); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_ITEMS) && + verifier.VerifyVector(items()) && + verifier.EndTable(); + } +}; + +struct BoolListBuilder { + typedef BoolList Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_items(::flatbuffers::Offset<::flatbuffers::Vector> items) { + fbb_.AddOffset(BoolList::VT_ITEMS, items); + } + explicit BoolListBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateBoolList( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::Vector> items = 0) { + BoolListBuilder builder_(_fbb); + builder_.add_items(items); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateBoolListDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *items = nullptr) { + auto items__ = items ? _fbb.CreateVector(*items) : 0; + return torch::jit::mobile::serialization::CreateBoolList( + _fbb, + items__); +} + +struct Tuple FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef TupleBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_ITEMS = 4 + }; + const ::flatbuffers::Vector *items() const { + return GetPointer *>(VT_ITEMS); + } + ::flatbuffers::Vector *mutable_items() { + return GetPointer<::flatbuffers::Vector *>(VT_ITEMS); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_ITEMS) && + verifier.VerifyVector(items()) && + verifier.EndTable(); + } +}; + +struct TupleBuilder { + typedef Tuple Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_items(::flatbuffers::Offset<::flatbuffers::Vector> items) { + fbb_.AddOffset(Tuple::VT_ITEMS, items); + } + explicit TupleBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateTuple( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::Vector> items = 0) { + TupleBuilder builder_(_fbb); + builder_.add_items(items); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateTupleDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *items = nullptr) { + auto items__ = items ? _fbb.CreateVector(*items) : 0; + return torch::jit::mobile::serialization::CreateTuple( + _fbb, + items__); +} + +struct Dict FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef DictBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_KEYS = 4, + VT_VALUES = 6, + VT_ANNOTATION_STR = 8 + }; + const ::flatbuffers::Vector *keys() const { + return GetPointer *>(VT_KEYS); + } + ::flatbuffers::Vector *mutable_keys() { + return GetPointer<::flatbuffers::Vector *>(VT_KEYS); + } + const ::flatbuffers::Vector *values() const { + return GetPointer *>(VT_VALUES); + } + ::flatbuffers::Vector *mutable_values() { + return GetPointer<::flatbuffers::Vector *>(VT_VALUES); + } + const ::flatbuffers::String *annotation_str() const { + return GetPointer(VT_ANNOTATION_STR); + } + ::flatbuffers::String *mutable_annotation_str() { + return GetPointer<::flatbuffers::String *>(VT_ANNOTATION_STR); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_KEYS) && + verifier.VerifyVector(keys()) && + VerifyOffset(verifier, VT_VALUES) && + verifier.VerifyVector(values()) && + VerifyOffset(verifier, VT_ANNOTATION_STR) && + verifier.VerifyString(annotation_str()) && + verifier.EndTable(); + } +}; + +struct DictBuilder { + typedef Dict Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_keys(::flatbuffers::Offset<::flatbuffers::Vector> keys) { + fbb_.AddOffset(Dict::VT_KEYS, keys); + } + void add_values(::flatbuffers::Offset<::flatbuffers::Vector> values) { + fbb_.AddOffset(Dict::VT_VALUES, values); + } + void add_annotation_str(::flatbuffers::Offset<::flatbuffers::String> annotation_str) { + fbb_.AddOffset(Dict::VT_ANNOTATION_STR, annotation_str); + } + explicit DictBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateDict( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::Vector> keys = 0, + ::flatbuffers::Offset<::flatbuffers::Vector> values = 0, + ::flatbuffers::Offset<::flatbuffers::String> annotation_str = 0) { + DictBuilder builder_(_fbb); + builder_.add_annotation_str(annotation_str); + builder_.add_values(values); + builder_.add_keys(keys); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateDictDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *keys = nullptr, + const std::vector *values = nullptr, + const char *annotation_str = nullptr) { + auto keys__ = keys ? _fbb.CreateVector(*keys) : 0; + auto values__ = values ? _fbb.CreateVector(*values) : 0; + auto annotation_str__ = annotation_str ? _fbb.CreateString(annotation_str) : 0; + return torch::jit::mobile::serialization::CreateDict( + _fbb, + keys__, + values__, + annotation_str__); +} + +struct ObjectType FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ObjectTypeBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_TYPE_NAME = 4, + VT_TYPE = 6, + VT_ATTR_NAMES = 8 + }; + const ::flatbuffers::String *type_name() const { + return GetPointer(VT_TYPE_NAME); + } + ::flatbuffers::String *mutable_type_name() { + return GetPointer<::flatbuffers::String *>(VT_TYPE_NAME); + } + torch::jit::mobile::serialization::TypeType type() const { + return static_cast(GetField(VT_TYPE, 0)); + } + bool mutate_type(torch::jit::mobile::serialization::TypeType _type = static_cast(0)) { + return SetField(VT_TYPE, static_cast(_type), 0); + } + const ::flatbuffers::Vector<::flatbuffers::Offset<::flatbuffers::String>> *attr_names() const { + return GetPointer> *>(VT_ATTR_NAMES); + } + ::flatbuffers::Vector<::flatbuffers::Offset<::flatbuffers::String>> *mutable_attr_names() { + return GetPointer<::flatbuffers::Vector<::flatbuffers::Offset<::flatbuffers::String>> *>(VT_ATTR_NAMES); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_TYPE_NAME) && + verifier.VerifyString(type_name()) && + VerifyField(verifier, VT_TYPE, 1) && + VerifyOffset(verifier, VT_ATTR_NAMES) && + verifier.VerifyVector(attr_names()) && + verifier.VerifyVectorOfStrings(attr_names()) && + verifier.EndTable(); + } +}; + +struct ObjectTypeBuilder { + typedef ObjectType Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_type_name(::flatbuffers::Offset<::flatbuffers::String> type_name) { + fbb_.AddOffset(ObjectType::VT_TYPE_NAME, type_name); + } + void add_type(torch::jit::mobile::serialization::TypeType type) { + fbb_.AddElement(ObjectType::VT_TYPE, static_cast(type), 0); + } + void add_attr_names(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset<::flatbuffers::String>>> attr_names) { + fbb_.AddOffset(ObjectType::VT_ATTR_NAMES, attr_names); + } + explicit ObjectTypeBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateObjectType( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::String> type_name = 0, + torch::jit::mobile::serialization::TypeType type = torch::jit::mobile::serialization::TypeType::UNSET, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset<::flatbuffers::String>>> attr_names = 0) { + ObjectTypeBuilder builder_(_fbb); + builder_.add_attr_names(attr_names); + builder_.add_type_name(type_name); + builder_.add_type(type); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateObjectTypeDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const char *type_name = nullptr, + torch::jit::mobile::serialization::TypeType type = torch::jit::mobile::serialization::TypeType::UNSET, + const std::vector<::flatbuffers::Offset<::flatbuffers::String>> *attr_names = nullptr) { + auto type_name__ = type_name ? _fbb.CreateString(type_name) : 0; + auto attr_names__ = attr_names ? _fbb.CreateVector<::flatbuffers::Offset<::flatbuffers::String>>(*attr_names) : 0; + return torch::jit::mobile::serialization::CreateObjectType( + _fbb, + type_name__, + type, + attr_names__); +} + +struct Object FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ObjectBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_TYPE_INDEX = 4, + VT_STATE = 6, + VT_ATTRS = 8, + VT_SETSTATE_FUNC = 10 + }; + uint32_t type_index() const { + return GetField(VT_TYPE_INDEX, 0); + } + bool mutate_type_index(uint32_t _type_index = 0) { + return SetField(VT_TYPE_INDEX, _type_index, 0); + } + uint32_t state() const { + return GetField(VT_STATE, 0); + } + bool mutate_state(uint32_t _state = 0) { + return SetField(VT_STATE, _state, 0); + } + const ::flatbuffers::Vector *attrs() const { + return GetPointer *>(VT_ATTRS); + } + ::flatbuffers::Vector *mutable_attrs() { + return GetPointer<::flatbuffers::Vector *>(VT_ATTRS); + } + uint32_t setstate_func() const { + return GetField(VT_SETSTATE_FUNC, 0); + } + bool mutate_setstate_func(uint32_t _setstate_func = 0) { + return SetField(VT_SETSTATE_FUNC, _setstate_func, 0); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_TYPE_INDEX, 4) && + VerifyField(verifier, VT_STATE, 4) && + VerifyOffset(verifier, VT_ATTRS) && + verifier.VerifyVector(attrs()) && + VerifyField(verifier, VT_SETSTATE_FUNC, 4) && + verifier.EndTable(); + } +}; + +struct ObjectBuilder { + typedef Object Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_type_index(uint32_t type_index) { + fbb_.AddElement(Object::VT_TYPE_INDEX, type_index, 0); + } + void add_state(uint32_t state) { + fbb_.AddElement(Object::VT_STATE, state, 0); + } + void add_attrs(::flatbuffers::Offset<::flatbuffers::Vector> attrs) { + fbb_.AddOffset(Object::VT_ATTRS, attrs); + } + void add_setstate_func(uint32_t setstate_func) { + fbb_.AddElement(Object::VT_SETSTATE_FUNC, setstate_func, 0); + } + explicit ObjectBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateObject( + ::flatbuffers::FlatBufferBuilder &_fbb, + uint32_t type_index = 0, + uint32_t state = 0, + ::flatbuffers::Offset<::flatbuffers::Vector> attrs = 0, + uint32_t setstate_func = 0) { + ObjectBuilder builder_(_fbb); + builder_.add_setstate_func(setstate_func); + builder_.add_attrs(attrs); + builder_.add_state(state); + builder_.add_type_index(type_index); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateObjectDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + uint32_t type_index = 0, + uint32_t state = 0, + const std::vector *attrs = nullptr, + uint32_t setstate_func = 0) { + auto attrs__ = attrs ? _fbb.CreateVector(*attrs) : 0; + return torch::jit::mobile::serialization::CreateObject( + _fbb, + type_index, + state, + attrs__, + setstate_func); +} + +struct EnumValue FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef EnumValueBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_TYPE_NAME = 4, + VT_VALUE = 6 + }; + const ::flatbuffers::String *type_name() const { + return GetPointer(VT_TYPE_NAME); + } + ::flatbuffers::String *mutable_type_name() { + return GetPointer<::flatbuffers::String *>(VT_TYPE_NAME); + } + uint32_t value() const { + return GetField(VT_VALUE, 0); + } + bool mutate_value(uint32_t _value = 0) { + return SetField(VT_VALUE, _value, 0); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_TYPE_NAME) && + verifier.VerifyString(type_name()) && + VerifyField(verifier, VT_VALUE, 4) && + verifier.EndTable(); + } +}; + +struct EnumValueBuilder { + typedef EnumValue Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_type_name(::flatbuffers::Offset<::flatbuffers::String> type_name) { + fbb_.AddOffset(EnumValue::VT_TYPE_NAME, type_name); + } + void add_value(uint32_t value) { + fbb_.AddElement(EnumValue::VT_VALUE, value, 0); + } + explicit EnumValueBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateEnumValue( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::String> type_name = 0, + uint32_t value = 0) { + EnumValueBuilder builder_(_fbb); + builder_.add_value(value); + builder_.add_type_name(type_name); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateEnumValueDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const char *type_name = nullptr, + uint32_t value = 0) { + auto type_name__ = type_name ? _fbb.CreateString(type_name) : 0; + return torch::jit::mobile::serialization::CreateEnumValue( + _fbb, + type_name__, + value); +} + +struct Operator FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef OperatorBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_NAME = 4, + VT_OVERLOAD_NAME = 6, + VT_NUM_ARGS_SERIALIZED = 8 + }; + const ::flatbuffers::String *name() const { + return GetPointer(VT_NAME); + } + ::flatbuffers::String *mutable_name() { + return GetPointer<::flatbuffers::String *>(VT_NAME); + } + const ::flatbuffers::String *overload_name() const { + return GetPointer(VT_OVERLOAD_NAME); + } + ::flatbuffers::String *mutable_overload_name() { + return GetPointer<::flatbuffers::String *>(VT_OVERLOAD_NAME); + } + int32_t num_args_serialized() const { + return GetField(VT_NUM_ARGS_SERIALIZED, -1); + } + bool mutate_num_args_serialized(int32_t _num_args_serialized = -1) { + return SetField(VT_NUM_ARGS_SERIALIZED, _num_args_serialized, -1); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_NAME) && + verifier.VerifyString(name()) && + VerifyOffset(verifier, VT_OVERLOAD_NAME) && + verifier.VerifyString(overload_name()) && + VerifyField(verifier, VT_NUM_ARGS_SERIALIZED, 4) && + verifier.EndTable(); + } +}; + +struct OperatorBuilder { + typedef Operator Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_name(::flatbuffers::Offset<::flatbuffers::String> name) { + fbb_.AddOffset(Operator::VT_NAME, name); + } + void add_overload_name(::flatbuffers::Offset<::flatbuffers::String> overload_name) { + fbb_.AddOffset(Operator::VT_OVERLOAD_NAME, overload_name); + } + void add_num_args_serialized(int32_t num_args_serialized) { + fbb_.AddElement(Operator::VT_NUM_ARGS_SERIALIZED, num_args_serialized, -1); + } + explicit OperatorBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateOperator( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::String> name = 0, + ::flatbuffers::Offset<::flatbuffers::String> overload_name = 0, + int32_t num_args_serialized = -1) { + OperatorBuilder builder_(_fbb); + builder_.add_num_args_serialized(num_args_serialized); + builder_.add_overload_name(overload_name); + builder_.add_name(name); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateOperatorDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const char *name = nullptr, + const char *overload_name = nullptr, + int32_t num_args_serialized = -1) { + auto name__ = name ? _fbb.CreateString(name) : 0; + auto overload_name__ = overload_name ? _fbb.CreateString(overload_name) : 0; + return torch::jit::mobile::serialization::CreateOperator( + _fbb, + name__, + overload_name__, + num_args_serialized); +} + +struct Arg FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ArgBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_NAME = 4, + VT_TYPE = 6, + VT_DEFAULT_VALUE = 8 + }; + const ::flatbuffers::String *name() const { + return GetPointer(VT_NAME); + } + ::flatbuffers::String *mutable_name() { + return GetPointer<::flatbuffers::String *>(VT_NAME); + } + const ::flatbuffers::String *type() const { + return GetPointer(VT_TYPE); + } + ::flatbuffers::String *mutable_type() { + return GetPointer<::flatbuffers::String *>(VT_TYPE); + } + uint32_t default_value() const { + return GetField(VT_DEFAULT_VALUE, 0); + } + bool mutate_default_value(uint32_t _default_value = 0) { + return SetField(VT_DEFAULT_VALUE, _default_value, 0); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_NAME) && + verifier.VerifyString(name()) && + VerifyOffset(verifier, VT_TYPE) && + verifier.VerifyString(type()) && + VerifyField(verifier, VT_DEFAULT_VALUE, 4) && + verifier.EndTable(); + } +}; + +struct ArgBuilder { + typedef Arg Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_name(::flatbuffers::Offset<::flatbuffers::String> name) { + fbb_.AddOffset(Arg::VT_NAME, name); + } + void add_type(::flatbuffers::Offset<::flatbuffers::String> type) { + fbb_.AddOffset(Arg::VT_TYPE, type); + } + void add_default_value(uint32_t default_value) { + fbb_.AddElement(Arg::VT_DEFAULT_VALUE, default_value, 0); + } + explicit ArgBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateArg( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::String> name = 0, + ::flatbuffers::Offset<::flatbuffers::String> type = 0, + uint32_t default_value = 0) { + ArgBuilder builder_(_fbb); + builder_.add_default_value(default_value); + builder_.add_type(type); + builder_.add_name(name); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateArgDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const char *name = nullptr, + const char *type = nullptr, + uint32_t default_value = 0) { + auto name__ = name ? _fbb.CreateString(name) : 0; + auto type__ = type ? _fbb.CreateString(type) : 0; + return torch::jit::mobile::serialization::CreateArg( + _fbb, + name__, + type__, + default_value); +} + +struct Schema FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef SchemaBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_ARGUMENTS = 4, + VT_RETURNS = 6 + }; + const ::flatbuffers::Vector<::flatbuffers::Offset> *arguments() const { + return GetPointer> *>(VT_ARGUMENTS); + } + ::flatbuffers::Vector<::flatbuffers::Offset> *mutable_arguments() { + return GetPointer<::flatbuffers::Vector<::flatbuffers::Offset> *>(VT_ARGUMENTS); + } + const ::flatbuffers::Vector<::flatbuffers::Offset> *returns() const { + return GetPointer> *>(VT_RETURNS); + } + ::flatbuffers::Vector<::flatbuffers::Offset> *mutable_returns() { + return GetPointer<::flatbuffers::Vector<::flatbuffers::Offset> *>(VT_RETURNS); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_ARGUMENTS) && + verifier.VerifyVector(arguments()) && + verifier.VerifyVectorOfTables(arguments()) && + VerifyOffset(verifier, VT_RETURNS) && + verifier.VerifyVector(returns()) && + verifier.VerifyVectorOfTables(returns()) && + verifier.EndTable(); + } +}; + +struct SchemaBuilder { + typedef Schema Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_arguments(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> arguments) { + fbb_.AddOffset(Schema::VT_ARGUMENTS, arguments); + } + void add_returns(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> returns) { + fbb_.AddOffset(Schema::VT_RETURNS, returns); + } + explicit SchemaBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateSchema( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> arguments = 0, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> returns = 0) { + SchemaBuilder builder_(_fbb); + builder_.add_returns(returns); + builder_.add_arguments(arguments); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateSchemaDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const std::vector<::flatbuffers::Offset> *arguments = nullptr, + const std::vector<::flatbuffers::Offset> *returns = nullptr) { + auto arguments__ = arguments ? _fbb.CreateVector<::flatbuffers::Offset>(*arguments) : 0; + auto returns__ = returns ? _fbb.CreateVector<::flatbuffers::Offset>(*returns) : 0; + return torch::jit::mobile::serialization::CreateSchema( + _fbb, + arguments__, + returns__); +} + +struct DebugInfo FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef DebugInfoBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_DEBUG_HANDLE = 4 + }; + const ::flatbuffers::Vector *debug_handle() const { + return GetPointer *>(VT_DEBUG_HANDLE); + } + ::flatbuffers::Vector *mutable_debug_handle() { + return GetPointer<::flatbuffers::Vector *>(VT_DEBUG_HANDLE); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_DEBUG_HANDLE) && + verifier.VerifyVector(debug_handle()) && + verifier.EndTable(); + } +}; + +struct DebugInfoBuilder { + typedef DebugInfo Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_debug_handle(::flatbuffers::Offset<::flatbuffers::Vector> debug_handle) { + fbb_.AddOffset(DebugInfo::VT_DEBUG_HANDLE, debug_handle); + } + explicit DebugInfoBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateDebugInfo( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::Vector> debug_handle = 0) { + DebugInfoBuilder builder_(_fbb); + builder_.add_debug_handle(debug_handle); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateDebugInfoDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *debug_handle = nullptr) { + auto debug_handle__ = debug_handle ? _fbb.CreateVector(*debug_handle) : 0; + return torch::jit::mobile::serialization::CreateDebugInfo( + _fbb, + debug_handle__); +} + +struct Function FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef FunctionBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_QN = 4, + VT_INSTRUCTIONS = 6, + VT_OPERATORS = 8, + VT_CONSTANTS = 10, + VT_TYPE_ANNOTATIONS = 12, + VT_REGISTER_SIZE = 14, + VT_SCHEMA = 16, + VT_DEBUG_INFO = 18, + VT_CLASS_TYPE = 20 + }; + const ::flatbuffers::String *qn() const { + return GetPointer(VT_QN); + } + ::flatbuffers::String *mutable_qn() { + return GetPointer<::flatbuffers::String *>(VT_QN); + } + const ::flatbuffers::Vector *instructions() const { + return GetPointer *>(VT_INSTRUCTIONS); + } + ::flatbuffers::Vector *mutable_instructions() { + return GetPointer<::flatbuffers::Vector *>(VT_INSTRUCTIONS); + } + const ::flatbuffers::Vector<::flatbuffers::Offset> *operators() const { + return GetPointer> *>(VT_OPERATORS); + } + ::flatbuffers::Vector<::flatbuffers::Offset> *mutable_operators() { + return GetPointer<::flatbuffers::Vector<::flatbuffers::Offset> *>(VT_OPERATORS); + } + const ::flatbuffers::Vector *constants() const { + return GetPointer *>(VT_CONSTANTS); + } + ::flatbuffers::Vector *mutable_constants() { + return GetPointer<::flatbuffers::Vector *>(VT_CONSTANTS); + } + const ::flatbuffers::Vector<::flatbuffers::Offset<::flatbuffers::String>> *type_annotations() const { + return GetPointer> *>(VT_TYPE_ANNOTATIONS); + } + ::flatbuffers::Vector<::flatbuffers::Offset<::flatbuffers::String>> *mutable_type_annotations() { + return GetPointer<::flatbuffers::Vector<::flatbuffers::Offset<::flatbuffers::String>> *>(VT_TYPE_ANNOTATIONS); + } + int32_t register_size() const { + return GetField(VT_REGISTER_SIZE, 0); + } + bool mutate_register_size(int32_t _register_size = 0) { + return SetField(VT_REGISTER_SIZE, _register_size, 0); + } + const torch::jit::mobile::serialization::Schema *schema() const { + return GetPointer(VT_SCHEMA); + } + torch::jit::mobile::serialization::Schema *mutable_schema() { + return GetPointer(VT_SCHEMA); + } + const torch::jit::mobile::serialization::DebugInfo *debug_info() const { + return GetPointer(VT_DEBUG_INFO); + } + torch::jit::mobile::serialization::DebugInfo *mutable_debug_info() { + return GetPointer(VT_DEBUG_INFO); + } + uint32_t class_type() const { + return GetField(VT_CLASS_TYPE, 0); + } + bool mutate_class_type(uint32_t _class_type = 0) { + return SetField(VT_CLASS_TYPE, _class_type, 0); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_QN) && + verifier.VerifyString(qn()) && + VerifyOffset(verifier, VT_INSTRUCTIONS) && + verifier.VerifyVector(instructions()) && + VerifyOffset(verifier, VT_OPERATORS) && + verifier.VerifyVector(operators()) && + verifier.VerifyVectorOfTables(operators()) && + VerifyOffset(verifier, VT_CONSTANTS) && + verifier.VerifyVector(constants()) && + VerifyOffset(verifier, VT_TYPE_ANNOTATIONS) && + verifier.VerifyVector(type_annotations()) && + verifier.VerifyVectorOfStrings(type_annotations()) && + VerifyField(verifier, VT_REGISTER_SIZE, 4) && + VerifyOffset(verifier, VT_SCHEMA) && + verifier.VerifyTable(schema()) && + VerifyOffset(verifier, VT_DEBUG_INFO) && + verifier.VerifyTable(debug_info()) && + VerifyField(verifier, VT_CLASS_TYPE, 4) && + verifier.EndTable(); + } +}; + +struct FunctionBuilder { + typedef Function Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_qn(::flatbuffers::Offset<::flatbuffers::String> qn) { + fbb_.AddOffset(Function::VT_QN, qn); + } + void add_instructions(::flatbuffers::Offset<::flatbuffers::Vector> instructions) { + fbb_.AddOffset(Function::VT_INSTRUCTIONS, instructions); + } + void add_operators(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> operators) { + fbb_.AddOffset(Function::VT_OPERATORS, operators); + } + void add_constants(::flatbuffers::Offset<::flatbuffers::Vector> constants) { + fbb_.AddOffset(Function::VT_CONSTANTS, constants); + } + void add_type_annotations(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset<::flatbuffers::String>>> type_annotations) { + fbb_.AddOffset(Function::VT_TYPE_ANNOTATIONS, type_annotations); + } + void add_register_size(int32_t register_size) { + fbb_.AddElement(Function::VT_REGISTER_SIZE, register_size, 0); + } + void add_schema(::flatbuffers::Offset schema) { + fbb_.AddOffset(Function::VT_SCHEMA, schema); + } + void add_debug_info(::flatbuffers::Offset debug_info) { + fbb_.AddOffset(Function::VT_DEBUG_INFO, debug_info); + } + void add_class_type(uint32_t class_type) { + fbb_.AddElement(Function::VT_CLASS_TYPE, class_type, 0); + } + explicit FunctionBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateFunction( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::String> qn = 0, + ::flatbuffers::Offset<::flatbuffers::Vector> instructions = 0, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> operators = 0, + ::flatbuffers::Offset<::flatbuffers::Vector> constants = 0, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset<::flatbuffers::String>>> type_annotations = 0, + int32_t register_size = 0, + ::flatbuffers::Offset schema = 0, + ::flatbuffers::Offset debug_info = 0, + uint32_t class_type = 0) { + FunctionBuilder builder_(_fbb); + builder_.add_class_type(class_type); + builder_.add_debug_info(debug_info); + builder_.add_schema(schema); + builder_.add_register_size(register_size); + builder_.add_type_annotations(type_annotations); + builder_.add_constants(constants); + builder_.add_operators(operators); + builder_.add_instructions(instructions); + builder_.add_qn(qn); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateFunctionDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const char *qn = nullptr, + const std::vector *instructions = nullptr, + const std::vector<::flatbuffers::Offset> *operators = nullptr, + const std::vector *constants = nullptr, + const std::vector<::flatbuffers::Offset<::flatbuffers::String>> *type_annotations = nullptr, + int32_t register_size = 0, + ::flatbuffers::Offset schema = 0, + ::flatbuffers::Offset debug_info = 0, + uint32_t class_type = 0) { + auto qn__ = qn ? _fbb.CreateString(qn) : 0; + auto instructions__ = instructions ? _fbb.CreateVectorOfStructs(*instructions) : 0; + auto operators__ = operators ? _fbb.CreateVector<::flatbuffers::Offset>(*operators) : 0; + auto constants__ = constants ? _fbb.CreateVector(*constants) : 0; + auto type_annotations__ = type_annotations ? _fbb.CreateVector<::flatbuffers::Offset<::flatbuffers::String>>(*type_annotations) : 0; + return torch::jit::mobile::serialization::CreateFunction( + _fbb, + qn__, + instructions__, + operators__, + constants__, + type_annotations__, + register_size, + schema, + debug_info, + class_type); +} + +struct StorageData FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef StorageDataBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_DATA = 4 + }; + const ::flatbuffers::Vector *data() const { + return GetPointer *>(VT_DATA); + } + ::flatbuffers::Vector *mutable_data() { + return GetPointer<::flatbuffers::Vector *>(VT_DATA); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_DATA) && + verifier.VerifyVector(data()) && + verifier.EndTable(); + } +}; + +struct StorageDataBuilder { + typedef StorageData Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_data(::flatbuffers::Offset<::flatbuffers::Vector> data) { + fbb_.AddOffset(StorageData::VT_DATA, data); + } + explicit StorageDataBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateStorageData( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::Vector> data = 0) { + StorageDataBuilder builder_(_fbb); + builder_.add_data(data); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateStorageDataDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const std::vector *data = nullptr) { + if (data) { _fbb.ForceVectorAlignment(data->size(), sizeof(uint8_t), 16); } + auto data__ = data ? _fbb.CreateVector(*data) : 0; + return torch::jit::mobile::serialization::CreateStorageData( + _fbb, + data__); +} + +struct IValue FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef IValueBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_VAL_TYPE = 4, + VT_VAL = 6 + }; + torch::jit::mobile::serialization::IValueUnion val_type() const { + return static_cast(GetField(VT_VAL_TYPE, 0)); + } + const void *val() const { + return GetPointer(VT_VAL); + } + template const T *val_as() const; + const torch::jit::mobile::serialization::Int *val_as_Int() const { + return val_type() == torch::jit::mobile::serialization::IValueUnion::Int ? static_cast(val()) : nullptr; + } + const torch::jit::mobile::serialization::Bool *val_as_Bool() const { + return val_type() == torch::jit::mobile::serialization::IValueUnion::Bool ? static_cast(val()) : nullptr; + } + const torch::jit::mobile::serialization::Double *val_as_Double() const { + return val_type() == torch::jit::mobile::serialization::IValueUnion::Double ? static_cast(val()) : nullptr; + } + const torch::jit::mobile::serialization::ComplexDouble *val_as_ComplexDouble() const { + return val_type() == torch::jit::mobile::serialization::IValueUnion::ComplexDouble ? static_cast(val()) : nullptr; + } + const torch::jit::mobile::serialization::TensorMetadata *val_as_TensorMetadata() const { + return val_type() == torch::jit::mobile::serialization::IValueUnion::TensorMetadata ? static_cast(val()) : nullptr; + } + const torch::jit::mobile::serialization::String *val_as_String() const { + return val_type() == torch::jit::mobile::serialization::IValueUnion::String ? static_cast(val()) : nullptr; + } + const torch::jit::mobile::serialization::List *val_as_List() const { + return val_type() == torch::jit::mobile::serialization::IValueUnion::List ? static_cast(val()) : nullptr; + } + const torch::jit::mobile::serialization::Tuple *val_as_Tuple() const { + return val_type() == torch::jit::mobile::serialization::IValueUnion::Tuple ? static_cast(val()) : nullptr; + } + const torch::jit::mobile::serialization::Dict *val_as_Dict() const { + return val_type() == torch::jit::mobile::serialization::IValueUnion::Dict ? static_cast(val()) : nullptr; + } + const torch::jit::mobile::serialization::Object *val_as_Object() const { + return val_type() == torch::jit::mobile::serialization::IValueUnion::Object ? static_cast(val()) : nullptr; + } + const torch::jit::mobile::serialization::IntList *val_as_IntList() const { + return val_type() == torch::jit::mobile::serialization::IValueUnion::IntList ? static_cast(val()) : nullptr; + } + const torch::jit::mobile::serialization::DoubleList *val_as_DoubleList() const { + return val_type() == torch::jit::mobile::serialization::IValueUnion::DoubleList ? static_cast(val()) : nullptr; + } + const torch::jit::mobile::serialization::BoolList *val_as_BoolList() const { + return val_type() == torch::jit::mobile::serialization::IValueUnion::BoolList ? static_cast(val()) : nullptr; + } + const torch::jit::mobile::serialization::Device *val_as_Device() const { + return val_type() == torch::jit::mobile::serialization::IValueUnion::Device ? static_cast(val()) : nullptr; + } + const torch::jit::mobile::serialization::EnumValue *val_as_EnumValue() const { + return val_type() == torch::jit::mobile::serialization::IValueUnion::EnumValue ? static_cast(val()) : nullptr; + } + const torch::jit::mobile::serialization::Function *val_as_Function() const { + return val_type() == torch::jit::mobile::serialization::IValueUnion::Function ? static_cast(val()) : nullptr; + } + void *mutable_val() { + return GetPointer(VT_VAL); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_VAL_TYPE, 1) && + VerifyOffset(verifier, VT_VAL) && + VerifyIValueUnion(verifier, val(), val_type()) && + verifier.EndTable(); + } +}; + +template<> inline const torch::jit::mobile::serialization::Int *IValue::val_as() const { + return val_as_Int(); +} + +template<> inline const torch::jit::mobile::serialization::Bool *IValue::val_as() const { + return val_as_Bool(); +} + +template<> inline const torch::jit::mobile::serialization::Double *IValue::val_as() const { + return val_as_Double(); +} + +template<> inline const torch::jit::mobile::serialization::ComplexDouble *IValue::val_as() const { + return val_as_ComplexDouble(); +} + +template<> inline const torch::jit::mobile::serialization::TensorMetadata *IValue::val_as() const { + return val_as_TensorMetadata(); +} + +template<> inline const torch::jit::mobile::serialization::String *IValue::val_as() const { + return val_as_String(); +} + +template<> inline const torch::jit::mobile::serialization::List *IValue::val_as() const { + return val_as_List(); +} + +template<> inline const torch::jit::mobile::serialization::Tuple *IValue::val_as() const { + return val_as_Tuple(); +} + +template<> inline const torch::jit::mobile::serialization::Dict *IValue::val_as() const { + return val_as_Dict(); +} + +template<> inline const torch::jit::mobile::serialization::Object *IValue::val_as() const { + return val_as_Object(); +} + +template<> inline const torch::jit::mobile::serialization::IntList *IValue::val_as() const { + return val_as_IntList(); +} + +template<> inline const torch::jit::mobile::serialization::DoubleList *IValue::val_as() const { + return val_as_DoubleList(); +} + +template<> inline const torch::jit::mobile::serialization::BoolList *IValue::val_as() const { + return val_as_BoolList(); +} + +template<> inline const torch::jit::mobile::serialization::Device *IValue::val_as() const { + return val_as_Device(); +} + +template<> inline const torch::jit::mobile::serialization::EnumValue *IValue::val_as() const { + return val_as_EnumValue(); +} + +template<> inline const torch::jit::mobile::serialization::Function *IValue::val_as() const { + return val_as_Function(); +} + +struct IValueBuilder { + typedef IValue Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_val_type(torch::jit::mobile::serialization::IValueUnion val_type) { + fbb_.AddElement(IValue::VT_VAL_TYPE, static_cast(val_type), 0); + } + void add_val(::flatbuffers::Offset val) { + fbb_.AddOffset(IValue::VT_VAL, val); + } + explicit IValueBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateIValue( + ::flatbuffers::FlatBufferBuilder &_fbb, + torch::jit::mobile::serialization::IValueUnion val_type = torch::jit::mobile::serialization::IValueUnion::NONE, + ::flatbuffers::Offset val = 0) { + IValueBuilder builder_(_fbb); + builder_.add_val(val); + builder_.add_val_type(val_type); + return builder_.Finish(); +} + +struct ExtraFile FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ExtraFileBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_NAME = 4, + VT_CONTENT = 6 + }; + const ::flatbuffers::String *name() const { + return GetPointer(VT_NAME); + } + ::flatbuffers::String *mutable_name() { + return GetPointer<::flatbuffers::String *>(VT_NAME); + } + const ::flatbuffers::String *content() const { + return GetPointer(VT_CONTENT); + } + ::flatbuffers::String *mutable_content() { + return GetPointer<::flatbuffers::String *>(VT_CONTENT); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyOffset(verifier, VT_NAME) && + verifier.VerifyString(name()) && + VerifyOffset(verifier, VT_CONTENT) && + verifier.VerifyString(content()) && + verifier.EndTable(); + } +}; + +struct ExtraFileBuilder { + typedef ExtraFile Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_name(::flatbuffers::Offset<::flatbuffers::String> name) { + fbb_.AddOffset(ExtraFile::VT_NAME, name); + } + void add_content(::flatbuffers::Offset<::flatbuffers::String> content) { + fbb_.AddOffset(ExtraFile::VT_CONTENT, content); + } + explicit ExtraFileBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateExtraFile( + ::flatbuffers::FlatBufferBuilder &_fbb, + ::flatbuffers::Offset<::flatbuffers::String> name = 0, + ::flatbuffers::Offset<::flatbuffers::String> content = 0) { + ExtraFileBuilder builder_(_fbb); + builder_.add_content(content); + builder_.add_name(name); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateExtraFileDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + const char *name = nullptr, + const char *content = nullptr) { + auto name__ = name ? _fbb.CreateString(name) : 0; + auto content__ = content ? _fbb.CreateString(content) : 0; + return torch::jit::mobile::serialization::CreateExtraFile( + _fbb, + name__, + content__); +} + +struct Module FLATBUFFERS_FINAL_CLASS : private ::flatbuffers::Table { + typedef ModuleBuilder Builder; + enum FlatBuffersVTableOffset FLATBUFFERS_VTABLE_UNDERLYING_TYPE { + VT_BYTECODE_VERSION = 4, + VT_EXTRA_FILES = 6, + VT_METHODS = 8, + VT_STATE_OBJ = 10, + VT_IVALUES = 12, + VT_STORAGE_DATA_SIZE = 14, + VT_STORAGE_DATA = 16, + VT_OBJECT_TYPES = 18, + VT_JIT_SOURCES = 20, + VT_JIT_CONSTANTS = 22, + VT_OPERATOR_VERSION = 24, + VT_MOBILE_IVALUE_SIZE = 26 + }; + uint32_t bytecode_version() const { + return GetField(VT_BYTECODE_VERSION, 0); + } + bool mutate_bytecode_version(uint32_t _bytecode_version = 0) { + return SetField(VT_BYTECODE_VERSION, _bytecode_version, 0); + } + const ::flatbuffers::Vector<::flatbuffers::Offset> *extra_files() const { + return GetPointer> *>(VT_EXTRA_FILES); + } + ::flatbuffers::Vector<::flatbuffers::Offset> *mutable_extra_files() { + return GetPointer<::flatbuffers::Vector<::flatbuffers::Offset> *>(VT_EXTRA_FILES); + } + const ::flatbuffers::Vector *methods() const { + return GetPointer *>(VT_METHODS); + } + ::flatbuffers::Vector *mutable_methods() { + return GetPointer<::flatbuffers::Vector *>(VT_METHODS); + } + uint32_t state_obj() const { + return GetField(VT_STATE_OBJ, 0); + } + bool mutate_state_obj(uint32_t _state_obj = 0) { + return SetField(VT_STATE_OBJ, _state_obj, 0); + } + const ::flatbuffers::Vector<::flatbuffers::Offset> *ivalues() const { + return GetPointer> *>(VT_IVALUES); + } + ::flatbuffers::Vector<::flatbuffers::Offset> *mutable_ivalues() { + return GetPointer<::flatbuffers::Vector<::flatbuffers::Offset> *>(VT_IVALUES); + } + int32_t storage_data_size() const { + return GetField(VT_STORAGE_DATA_SIZE, 0); + } + bool mutate_storage_data_size(int32_t _storage_data_size = 0) { + return SetField(VT_STORAGE_DATA_SIZE, _storage_data_size, 0); + } + const ::flatbuffers::Vector<::flatbuffers::Offset> *storage_data() const { + return GetPointer> *>(VT_STORAGE_DATA); + } + ::flatbuffers::Vector<::flatbuffers::Offset> *mutable_storage_data() { + return GetPointer<::flatbuffers::Vector<::flatbuffers::Offset> *>(VT_STORAGE_DATA); + } + const ::flatbuffers::Vector<::flatbuffers::Offset> *object_types() const { + return GetPointer> *>(VT_OBJECT_TYPES); + } + ::flatbuffers::Vector<::flatbuffers::Offset> *mutable_object_types() { + return GetPointer<::flatbuffers::Vector<::flatbuffers::Offset> *>(VT_OBJECT_TYPES); + } + const ::flatbuffers::Vector<::flatbuffers::Offset> *jit_sources() const { + return GetPointer> *>(VT_JIT_SOURCES); + } + ::flatbuffers::Vector<::flatbuffers::Offset> *mutable_jit_sources() { + return GetPointer<::flatbuffers::Vector<::flatbuffers::Offset> *>(VT_JIT_SOURCES); + } + const ::flatbuffers::Vector *jit_constants() const { + return GetPointer *>(VT_JIT_CONSTANTS); + } + ::flatbuffers::Vector *mutable_jit_constants() { + return GetPointer<::flatbuffers::Vector *>(VT_JIT_CONSTANTS); + } + uint32_t operator_version() const { + return GetField(VT_OPERATOR_VERSION, 0); + } + bool mutate_operator_version(uint32_t _operator_version = 0) { + return SetField(VT_OPERATOR_VERSION, _operator_version, 0); + } + uint32_t mobile_ivalue_size() const { + return GetField(VT_MOBILE_IVALUE_SIZE, 0); + } + bool mutate_mobile_ivalue_size(uint32_t _mobile_ivalue_size = 0) { + return SetField(VT_MOBILE_IVALUE_SIZE, _mobile_ivalue_size, 0); + } + bool Verify(::flatbuffers::Verifier &verifier) const { + return VerifyTableStart(verifier) && + VerifyField(verifier, VT_BYTECODE_VERSION, 4) && + VerifyOffset(verifier, VT_EXTRA_FILES) && + verifier.VerifyVector(extra_files()) && + verifier.VerifyVectorOfTables(extra_files()) && + VerifyOffset(verifier, VT_METHODS) && + verifier.VerifyVector(methods()) && + VerifyField(verifier, VT_STATE_OBJ, 4) && + VerifyOffset(verifier, VT_IVALUES) && + verifier.VerifyVector(ivalues()) && + verifier.VerifyVectorOfTables(ivalues()) && + VerifyField(verifier, VT_STORAGE_DATA_SIZE, 4) && + VerifyOffset(verifier, VT_STORAGE_DATA) && + verifier.VerifyVector(storage_data()) && + verifier.VerifyVectorOfTables(storage_data()) && + VerifyOffset(verifier, VT_OBJECT_TYPES) && + verifier.VerifyVector(object_types()) && + verifier.VerifyVectorOfTables(object_types()) && + VerifyOffset(verifier, VT_JIT_SOURCES) && + verifier.VerifyVector(jit_sources()) && + verifier.VerifyVectorOfTables(jit_sources()) && + VerifyOffset(verifier, VT_JIT_CONSTANTS) && + verifier.VerifyVector(jit_constants()) && + VerifyField(verifier, VT_OPERATOR_VERSION, 4) && + VerifyField(verifier, VT_MOBILE_IVALUE_SIZE, 4) && + verifier.EndTable(); + } +}; + +struct ModuleBuilder { + typedef Module Table; + ::flatbuffers::FlatBufferBuilder &fbb_; + ::flatbuffers::uoffset_t start_; + void add_bytecode_version(uint32_t bytecode_version) { + fbb_.AddElement(Module::VT_BYTECODE_VERSION, bytecode_version, 0); + } + void add_extra_files(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> extra_files) { + fbb_.AddOffset(Module::VT_EXTRA_FILES, extra_files); + } + void add_methods(::flatbuffers::Offset<::flatbuffers::Vector> methods) { + fbb_.AddOffset(Module::VT_METHODS, methods); + } + void add_state_obj(uint32_t state_obj) { + fbb_.AddElement(Module::VT_STATE_OBJ, state_obj, 0); + } + void add_ivalues(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> ivalues) { + fbb_.AddOffset(Module::VT_IVALUES, ivalues); + } + void add_storage_data_size(int32_t storage_data_size) { + fbb_.AddElement(Module::VT_STORAGE_DATA_SIZE, storage_data_size, 0); + } + void add_storage_data(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> storage_data) { + fbb_.AddOffset(Module::VT_STORAGE_DATA, storage_data); + } + void add_object_types(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> object_types) { + fbb_.AddOffset(Module::VT_OBJECT_TYPES, object_types); + } + void add_jit_sources(::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> jit_sources) { + fbb_.AddOffset(Module::VT_JIT_SOURCES, jit_sources); + } + void add_jit_constants(::flatbuffers::Offset<::flatbuffers::Vector> jit_constants) { + fbb_.AddOffset(Module::VT_JIT_CONSTANTS, jit_constants); + } + void add_operator_version(uint32_t operator_version) { + fbb_.AddElement(Module::VT_OPERATOR_VERSION, operator_version, 0); + } + void add_mobile_ivalue_size(uint32_t mobile_ivalue_size) { + fbb_.AddElement(Module::VT_MOBILE_IVALUE_SIZE, mobile_ivalue_size, 0); + } + explicit ModuleBuilder(::flatbuffers::FlatBufferBuilder &_fbb) + : fbb_(_fbb) { + start_ = fbb_.StartTable(); + } + ::flatbuffers::Offset Finish() { + const auto end = fbb_.EndTable(start_); + auto o = ::flatbuffers::Offset(end); + return o; + } +}; + +inline ::flatbuffers::Offset CreateModule( + ::flatbuffers::FlatBufferBuilder &_fbb, + uint32_t bytecode_version = 0, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> extra_files = 0, + ::flatbuffers::Offset<::flatbuffers::Vector> methods = 0, + uint32_t state_obj = 0, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> ivalues = 0, + int32_t storage_data_size = 0, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> storage_data = 0, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> object_types = 0, + ::flatbuffers::Offset<::flatbuffers::Vector<::flatbuffers::Offset>> jit_sources = 0, + ::flatbuffers::Offset<::flatbuffers::Vector> jit_constants = 0, + uint32_t operator_version = 0, + uint32_t mobile_ivalue_size = 0) { + ModuleBuilder builder_(_fbb); + builder_.add_mobile_ivalue_size(mobile_ivalue_size); + builder_.add_operator_version(operator_version); + builder_.add_jit_constants(jit_constants); + builder_.add_jit_sources(jit_sources); + builder_.add_object_types(object_types); + builder_.add_storage_data(storage_data); + builder_.add_storage_data_size(storage_data_size); + builder_.add_ivalues(ivalues); + builder_.add_state_obj(state_obj); + builder_.add_methods(methods); + builder_.add_extra_files(extra_files); + builder_.add_bytecode_version(bytecode_version); + return builder_.Finish(); +} + +inline ::flatbuffers::Offset CreateModuleDirect( + ::flatbuffers::FlatBufferBuilder &_fbb, + uint32_t bytecode_version = 0, + const std::vector<::flatbuffers::Offset> *extra_files = nullptr, + const std::vector *methods = nullptr, + uint32_t state_obj = 0, + const std::vector<::flatbuffers::Offset> *ivalues = nullptr, + int32_t storage_data_size = 0, + const std::vector<::flatbuffers::Offset> *storage_data = nullptr, + const std::vector<::flatbuffers::Offset> *object_types = nullptr, + const std::vector<::flatbuffers::Offset> *jit_sources = nullptr, + const std::vector *jit_constants = nullptr, + uint32_t operator_version = 0, + uint32_t mobile_ivalue_size = 0) { + auto extra_files__ = extra_files ? _fbb.CreateVector<::flatbuffers::Offset>(*extra_files) : 0; + auto methods__ = methods ? _fbb.CreateVector(*methods) : 0; + auto ivalues__ = ivalues ? _fbb.CreateVector<::flatbuffers::Offset>(*ivalues) : 0; + auto storage_data__ = storage_data ? _fbb.CreateVector<::flatbuffers::Offset>(*storage_data) : 0; + auto object_types__ = object_types ? _fbb.CreateVector<::flatbuffers::Offset>(*object_types) : 0; + auto jit_sources__ = jit_sources ? _fbb.CreateVector<::flatbuffers::Offset>(*jit_sources) : 0; + auto jit_constants__ = jit_constants ? _fbb.CreateVector(*jit_constants) : 0; + return torch::jit::mobile::serialization::CreateModule( + _fbb, + bytecode_version, + extra_files__, + methods__, + state_obj, + ivalues__, + storage_data_size, + storage_data__, + object_types__, + jit_sources__, + jit_constants__, + operator_version, + mobile_ivalue_size); +} + +inline bool VerifyIValueUnion(::flatbuffers::Verifier &verifier, const void *obj, IValueUnion type) { + switch (type) { + case IValueUnion::NONE: { + return true; + } + case IValueUnion::Int: { + return verifier.VerifyField(static_cast(obj), 0, 8); + } + case IValueUnion::Bool: { + return verifier.VerifyField(static_cast(obj), 0, 1); + } + case IValueUnion::Double: { + return verifier.VerifyField(static_cast(obj), 0, 8); + } + case IValueUnion::ComplexDouble: { + return verifier.VerifyField(static_cast(obj), 0, 8); + } + case IValueUnion::TensorMetadata: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case IValueUnion::String: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case IValueUnion::List: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case IValueUnion::Tuple: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case IValueUnion::Dict: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case IValueUnion::Object: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case IValueUnion::IntList: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case IValueUnion::DoubleList: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case IValueUnion::BoolList: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case IValueUnion::Device: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case IValueUnion::EnumValue: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + case IValueUnion::Function: { + auto ptr = reinterpret_cast(obj); + return verifier.VerifyTable(ptr); + } + default: return true; + } +} + +inline bool VerifyIValueUnionVector(::flatbuffers::Verifier &verifier, const ::flatbuffers::Vector<::flatbuffers::Offset> *values, const ::flatbuffers::Vector *types) { + if (!values || !types) return !values && !types; + if (values->size() != types->size()) return false; + for (::flatbuffers::uoffset_t i = 0; i < values->size(); ++i) { + if (!VerifyIValueUnion( + verifier, values->Get(i), types->GetEnum(i))) { + return false; + } + } + return true; +} + +inline const torch::jit::mobile::serialization::Module *GetModule(const void *buf) { + return ::flatbuffers::GetRoot(buf); +} + +inline const torch::jit::mobile::serialization::Module *GetSizePrefixedModule(const void *buf) { + return ::flatbuffers::GetSizePrefixedRoot(buf); +} + +inline Module *GetMutableModule(void *buf) { + return ::flatbuffers::GetMutableRoot(buf); +} + +inline torch::jit::mobile::serialization::Module *GetMutableSizePrefixedModule(void *buf) { + return ::flatbuffers::GetMutableSizePrefixedRoot(buf); +} + +inline const char *ModuleIdentifier() { + return "PTMF"; +} + +inline bool ModuleBufferHasIdentifier(const void *buf) { + return ::flatbuffers::BufferHasIdentifier( + buf, ModuleIdentifier()); +} + +inline bool SizePrefixedModuleBufferHasIdentifier(const void *buf) { + return ::flatbuffers::BufferHasIdentifier( + buf, ModuleIdentifier(), true); +} + +inline bool VerifyModuleBuffer( + ::flatbuffers::Verifier &verifier) { + return verifier.VerifyBuffer(ModuleIdentifier()); +} + +inline bool VerifySizePrefixedModuleBuffer( + ::flatbuffers::Verifier &verifier) { + return verifier.VerifySizePrefixedBuffer(ModuleIdentifier()); +} + +inline void FinishModuleBuffer( + ::flatbuffers::FlatBufferBuilder &fbb, + ::flatbuffers::Offset root) { + fbb.Finish(root, ModuleIdentifier()); +} + +inline void FinishSizePrefixedModuleBuffer( + ::flatbuffers::FlatBufferBuilder &fbb, + ::flatbuffers::Offset root) { + fbb.FinishSizePrefixed(root, ModuleIdentifier()); +} + +} // namespace serialization +} // namespace mobile +} // namespace jit +} // namespace torch + +#endif // FLATBUFFERS_GENERATED_MOBILEBYTECODE_TORCH_JIT_MOBILE_SERIALIZATION_H_ +// @generated + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/onnx.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/onnx.h new file mode 100644 index 0000000000000000000000000000000000000000..45d67dc480dddf573c951d42be5fc187175d8eaf --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/onnx.h @@ -0,0 +1,23 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED( + "-Winconsistent-missing-destructor-override") +C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Wsuggest-override") +C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED( + "-Wdeprecated-dynamic-exception-spec") +#include +C10_DIAGNOSTIC_POP() +C10_DIAGNOSTIC_POP() +C10_DIAGNOSTIC_POP() +#include + +namespace torch::jit { + +TORCH_API std::string prettyPrint(const ::ONNX_NAMESPACE::ModelProto& model); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/pickle.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/pickle.h new file mode 100644 index 0000000000000000000000000000000000000000..0cdf6cded25e7574958df43ea4bf338dc5bc6ba2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/pickle.h @@ -0,0 +1,145 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace torch::jit { + +/// Pickle an IValue by calling a function to handle writing the data. +/// +/// `writer` is a function that takes in a pointer to a chunk of memory and its +/// size and consumes it. +/// +/// See `jit::pickle` for more details. +TORCH_API void pickle( + std::function writer, + const IValue& ivalue, + std::vector* tensor_table = nullptr); + +/// Save a `torch::IValue` in a format compatible with Python's `pickle` module +/// +/// If present, `tensor_table` is a pointer to a table in which tensors that +/// are contained within `ivalue` are stored, and the bytes returned by the +/// pickler will only include references to these tensors in the table. This can +/// be used to keep the binary blob size small. +/// If not provided, tensors are stored in the same byte stream as the pickle +/// data, similar to `torch.save()` in eager Python. +/// +/// Pickled values can be loaded in Python and C++: +/// \rst +/// .. code-block:: cpp +/// +/// torch::IValue float_value(2.3); +/// +/// // TODO: when tensors are stored in the pickle, delete this +/// std::vector tensor_table; +/// auto data = torch::jit::pickle(float_value, &tensor_table); +/// +/// std::vector ivalues = +/// torch::jit::unpickle(data.data(), data.size()); +/// +/// .. code-block:: python +/// +/// values = torch.load('data.pkl') +/// print(values) +/// +/// \endrst +TORCH_API std::vector pickle( + const IValue& ivalue, + std::vector* tensor_table = nullptr); + +/// Save a `torch::IValue` in a format that can be loaded by both +/// `torch::pickle_load` in C++ and `torch.load` in Python. +TORCH_API std::vector pickle_save(const IValue& ivalue); + +/// Deserialize a `torch::IValue` from bytes produced by either +/// `torch::pickle_save` in C++ or `torch.save` in Python +TORCH_API IValue pickle_load(const std::vector& data); + +/// Deserialize a `torch::IValue` from bytes produced by either +/// `torch::pickle_save` in C++ or `torch.save` in Python with custom object. +TORCH_API IValue pickle_load_obj(std::string_view data); + +/// `reader` is a function that takes in a size to read from some pickled +/// binary. `reader` should remember where it last read, and return +/// the number of bytes read. +/// See `torch::pickle` for details. +/// type_resolver is used to resolve any JIT type based on type str +TORCH_API IValue unpickle( + std::function reader, + TypeResolver type_resolver, + c10::ArrayRef tensor_table, + c10::TypePtr (*type_parser)(const std::string&) = + Unpickler::defaultTypeParser, + ObjLoader obj_loader = nullptr); + +/// Decode a chunk of memory containing pickled data into its `torch::IValue`s. +/// +/// If any `torch::IValue`s in the pickled data are `Object`s, then a +/// `class_resolver` function must be provided. +/// +/// See `torch::pickle` for details. +TORCH_API IValue unpickle( + const char* data, + size_t size, + TypeResolver type_resolver = nullptr, + c10::ArrayRef tensor_table = {}, + c10::TypePtr (*type_parser)(const std::string&) = + Unpickler::defaultTypeParser); + +/// Decode a chunk of memory containing pickled data into its `torch::IValue`s. +/// +/// If any `torch::IValue`s in the pickled data are `Object`s, then a +/// `class_resolver` function must be provided. +/// +/// See `torch::pickle` for details. +TORCH_API IValue unpickle( + const char* data, + size_t size, + ObjLoader obj_loader, + TypeResolver type_resolver = nullptr, + c10::ArrayRef tensor_table = {}, + c10::TypePtr (*type_parser)(const std::string&) = + Unpickler::defaultTypeParser); + +#ifndef C10_MOBILE +class VectorReader : public caffe2::serialize::ReadAdapterInterface { + public: + VectorReader(std::vector data) : data_(std::move(data)) {} + + size_t size() const override { + return data_.size(); + } + + size_t read(uint64_t pos, void* buf, size_t n, const char* what) + const override; + + private: + std::vector data_; +}; + +class StringViewReader : public caffe2::serialize::ReadAdapterInterface { + public: + StringViewReader(std::string_view data) : data_(data) {} + + size_t size() const override { + return data_.size(); + } + + size_t read(uint64_t pos, void* buf, size_t n, const char* what) + const override; + + private: + std::string_view data_; +}; +#endif +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/pickler.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/pickler.h new file mode 100644 index 0000000000000000000000000000000000000000..6361fed7b397d707816fbde1dedfebf39b0f2422 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/pickler.h @@ -0,0 +1,190 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace torch::jit { + +using ::c10::IValue; + +class TORCH_API Pickler { + AT_DISALLOW_COPY_AND_ASSIGN(Pickler); + + public: + Pickler(std::function writer) + : Pickler(std::move(writer), nullptr, nullptr, nullptr) {} + + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) + Pickler( + std::function writer, + std::vector* tensor_table, + std::function type_renamer, + std::vector* memoized_class_types, + std::function get_tensor_id = nullptr, + bool tag_aggregates = true) + : writer_(std::move(writer)), + tensor_table_(tensor_table), + type_renamer_(std::move(type_renamer)), + memoized_class_types_(memoized_class_types), + get_tensor_id_(std::move(get_tensor_id)), + tag_aggregates_(tag_aggregates) {} + ~Pickler(); + + // Push protocol onto the stack + void protocol(); + + // Push STOP PickleOpCode onto the stack + void stop(); + + void pushIValue(const IValue& ivalue); + + void startTuple(); + void endTuple(); + + const std::vector& tensorData() { + return tensor_data_; + } + + void pushEmptyDict(); + void pushDict(const IValue& ivalue); + void pushInt(int64_t value); + void pushLong(const std::string& data); + + private: + void pushIValueImpl(const IValue& ivalue); + void startTypeTag(); + void endTypeTag(const IValue& value); + void pushBool(bool value); + void pushDouble(double value); + void pushComplexDouble(const IValue& value); + void pushGenericList(const IValue& ivalue); + void pushIntList(const IValue& ivalue); + void pushList(const IValue& ivalue); + void pushTensor(const IValue& ivalue); + void pushTensorReference(const IValue& ivalue); + void pushLiteralTensor(const IValue& ivalue); + void pushLiteralSparseTensor(const at::Tensor& tensor); + void pushTuple(const IValue& ivalue); + void pushString(const std::string& string); + void pushDevice(const IValue& ivalue); +#ifdef USE_DISTRIBUTED + void pushRRef(const IValue& ivalue); +#endif + // unmemoized version + void pushStringImpl(const std::string& string); + void pushStorageOfTensor(const at::Tensor& tensor); + + void pushBinGet(uint32_t memo_id); + void pushSpecializedList( + const IValue& ivalue, + const char* list_name, + const std::function& item_pusher); + void pushGlobal(std::string_view module_name, std::string_view class_name); + // raw string data is appended directly to the byte stream + void pushBytes(const std::string& string); + void pushTensorData(const at::Tensor& tensor); + + // Add a BINPUT op and return the memoization id used + size_t pushNextBinPut(); + + const void* getPointer(const IValue& ivalue); + + // Caller checks that bufferPos_ > 0 + void flushNonEmpty() { + writer_(buffer_.data(), bufferPos_); + bufferPos_ = 0; + } + + void flush() { + if (bufferPos_ != 0) { + flushNonEmpty(); + } + } + + // These convert values to bytes and add them to the stack (NB: since T is to + // the left of a '::', its type cannot be deduced by the compiler so one must + // explicitly instantiate the template, i.e. push(int) works, push(int) + // does not) + static constexpr size_t kBufferSize = 256; + template + void push(std::common_type_t value) { + const char* begin = reinterpret_cast(&value); + if (bufferPos_ + sizeof(T) > buffer_.size()) { + flushNonEmpty(); + } + static_assert(sizeof(T) <= kBufferSize, "Buffer size assumption"); + memcpy(buffer_.data() + bufferPos_, begin, sizeof(T)); + bufferPos_ += sizeof(T); + } + + // Stream to write binary data to + // Code shouldn't call writer_ directly without first flushing. + std::function writer_; + + // Buffer to avoid calling a writer_ on a per-byte basis. + std::array buffer_; + size_t bufferPos_{0}; + + // Stack of opcodes/data + std::vector stack_; + + // External table of tensors to serialize. If this is missing, then tensors + // are serialized directly into the pickle + std::vector* tensor_table_; + + // TODO: only use this if necessary (add a pass to find all shared ivalues, + // and only memoize those) + uint32_t memo_id_ = 0; + + // Memoization of IValues that have been written (index in table is used for + // BINPUT opcodes) to enable shared references + c10::FastMap memoized_ivalue_map_; + + // because we de-dup ivalues based on their raw pointer address in the above + // map we need to keep all the memoized values alive during the pickle. + // Otherwise, it is possible that a raw address gets reused for another + // object, and we will alias it to the old object at that address. + std::vector memoized_ivalues_; + + std::function type_renamer_; + + // List of all the types that it wrote, inspect from the IValues it wrote. + std::vector* memoized_class_types_; + + // Function to grab next id_name for tensor storage, function is responsible + // for returning unique ids + std::function get_tensor_id_; + + // List of tensor storages to serialize in the same binary as the pickle data + // similar to ivalues, they are memoized using BINPUT + std::vector tensor_data_; + c10::FastMap memoized_storage_map_; + + c10::FastMap memoized_globals_map_; + c10::FastMap memoized_strings_map_; + c10::FastMap memoized_devices_map_; + // when true, List and Dict objects will be wrapped in a + // torch.jit._pickle.restore_type_tag call to correctly set the dynamic + // TorchScript type for the object. When true the thing unpickling must have + // torch installed. + bool tag_aggregates_; +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/pickler_helper.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/pickler_helper.h new file mode 100644 index 0000000000000000000000000000000000000000..523e912a392aab0104f6279d32b90c12c6d3bd74 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/pickler_helper.h @@ -0,0 +1,239 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include + +namespace torch::jit { + +// See Python's pickletools.py for a detailed description of each of these codes +enum class PickleOpCode : char { + MARK = '(', + STOP = '.', + POP = '0', + POP_MARK = '1', + DUP = '2', + FLOAT = 'F', + INT = 'I', + BININT = 'J', + BININT1 = 'K', + LONG = 'L', + BININT2 = 'M', + NONE = 'N', + PERSID = 'P', + BINPERSID = 'Q', + REDUCE = 'R', + STRING = 'S', + BINSTRING = 'T', + SHORT_BINSTRING = 'U', + // NB: Avoid using UNICODE as it is a macro in the Windows API + UNICODE_ = 'V', + BINUNICODE = 'X', + APPEND = 'a', + BUILD = 'b', + GLOBAL = 'c', + DICT = 'd', + EMPTY_DICT = '}', + APPENDS = 'e', + GET = 'g', + BINGET = 'h', + INST = 'i', + LONG_BINGET = 'j', + LIST = 'l', + EMPTY_LIST = ']', + OBJ = 'o', + PUT = 'p', + BINPUT = 'q', + LONG_BINPUT = 'r', + SETITEM = 's', + TUPLE = 't', + EMPTY_TUPLE = ')', + SETITEMS = 'u', + BINFLOAT = 'G', + + // Protocol 2 + // NOLINTNEXTLINE(readability-redundant-inline-specifier) + PROTO = char('\x80'), + NEWOBJ = '\x81', + EXT1 = '\x82', + EXT2 = '\x83', + EXT4 = '\x84', + TUPLE1 = '\x85', + TUPLE2 = '\x86', + TUPLE3 = '\x87', + NEWTRUE = '\x88', + NEWFALSE = '\x89', + LONG1 = '\x8a', + LONG4 = '\x8b', + + // Protocol 3 (Python 3.x) + BINBYTES = 'B', + SHORT_BINBYTES = 'C', + + // Protocol 4 + // NOLINTNEXTLINE(readability-redundant-inline-specifier) + SHORT_BINUNICODE = char('\x8c'), + BINUNICODE8 = '\x8d', + BINBYTES8 = '\x8e', + EMPTY_SET = '\x8f', + ADDITEMS = '\x90', + FROZENSET = '\x91', + NEWOBJ_EX = '\x92', + STACK_GLOBAL = '\x93', + MEMOIZE = '\x94', + FRAME = '\x95' +}; + +struct WriteableTensorData { + const char* data() const { + return static_cast(tensor_.storage().data()); + } + size_t sizeInBytes() const { + return size_; + } + size_t nbytes() const { + return tensor_.storage().nbytes(); + } + bool storageHasDeleter() const { + return tensor_.storage().data_ptr().get_context() != nullptr; + } + + private: + friend TORCH_API WriteableTensorData + getWriteableTensorData(const at::Tensor& tensor, bool to_cpu); + at::Tensor tensor_; + uint64_t size_; +}; + +// returns a (tensor, record_size) for a tensor, converting it to a CPU tensor +// if it was CUDA and to_cpu is True. +TORCH_API WriteableTensorData +getWriteableTensorData(const at::Tensor& tensor, bool to_cpu = true); + +// if the cls has __getstate__/__setstate__ +// assert they have the right schema and return true, +// otherwise return false +bool checkHasValidSetGetState(const c10::ClassType& cls); + +// Declare BackendMeta serialization and deserialization function pointer types. +using BackendMetaPtr = std::function< + void(const at::Tensor&, std::unordered_map&)>; + +// A allowlist of device type, currently available is PrivateUse1 +TORCH_API std::unordered_set& GetBackendMetaAllowlist(); + +// Dynamically obtain serialization function pairs +// that require the corresponding backend. +TORCH_API std::array< + std::optional>, + at::COMPILE_TIME_MAX_DEVICE_TYPES>& +GetBackendMetaSerialization(); + +// Return a map of Tensor Metadata which including BackendMetaData for +// serialization. For now, it only takes care of `conj` and `neg` bit. +inline std::unordered_map getTensorMetadata( + const at::Tensor& t) { + // We don't support serializing `ZeroTensor` as it is not public + // facing yet. + TORCH_CHECK( + !t._is_zerotensor(), + "ZeroTensor is not serializable,", + " please file an issue if required."); + std::unordered_map metadata{}; + + // Only add meta-data if the value is not default. + if (t.is_conj()) { + metadata["conj"] = true; + } + if (t.is_neg()) { + metadata["neg"] = true; + } + // Only add BackendMetaData for custom backend if the function pointer is + // registered. + int device_type = static_cast(t.device().type()); + const auto& BackendMetaSerialization = GetBackendMetaSerialization(); + if (BackendMetaSerialization[device_type].has_value()) { + // Pass the tensor and metadata map references as parameters to the custom + // serialization function. + BackendMetaPtr fptr = BackendMetaSerialization[device_type].value().first; + fptr(t, metadata); + } + return metadata; +} + +// set Tensor Metadata based on the map. +// Refer: getTensorMetadata +inline void setTensorMetadata( + const at::Tensor& t, + std::unordered_map metadata) { + auto iter_end = metadata.end(); + auto iter_temp = metadata.find("conj"); + if (iter_temp != iter_end) { + t._set_conj(true); + metadata.erase(iter_temp); + } + iter_temp = metadata.find("neg"); + if (iter_temp != iter_end) { + t._set_neg(true); + metadata.erase(iter_temp); + } + // Only set BackendMetaData for custom backend if the function pointer is + // registered. + int device_type = static_cast(t.device().type()); + const auto& BackendMetaSerialization = GetBackendMetaSerialization(); + if (BackendMetaSerialization[device_type].has_value()) { + // Pass the tensor and metadata map references as parameters to the custom + // deserialization function. + BackendMetaPtr fptr = BackendMetaSerialization[device_type].value().second; + fptr(t, metadata); + } +} + +// set Tensor metadata based on the map. +// NOTE: This overload is required by unpickler.cpp +inline void setTensorMetadata( + const at::Tensor& t, + const c10::Dict& metadata_idict) { + std::unordered_map metadata; + for (auto& pair : metadata_idict) { + auto key = *pair.key().toString(); + metadata[key] = pair.value().toBool(); + } + setTensorMetadata(t, std::move(metadata)); +} + +// Register function pointer of Tensor BackendMetadata for serialization. +inline void TensorBackendMetaRegistry( + c10::DeviceType t, + const BackendMetaPtr& get_fptr, + const BackendMetaPtr& set_fptr) { + // allowlist verification + // Only if the devicetype is in the allowlist, + // we allow the serialization extension to be registered for backendmeta data. + const auto& DeviceTypeAllowlist = GetBackendMetaAllowlist(); + TORCH_CHECK( + DeviceTypeAllowlist.find(t) != DeviceTypeAllowlist.end(), + "It is not allowed to register the serialization method ", + "of backendMeta data for PrivateUse1. ", + "If you have related serialization requirements, ", + "please expand the allowlist"); + // Register function pointer + int device_type = static_cast(t); + auto& BackendMetaSerialization = GetBackendMetaSerialization(); + TORCH_CHECK( + !BackendMetaSerialization[device_type].has_value(), + "The tensor BackendMeta serialization function pointer for ", + t, + " has been registered."); + BackendMetaSerialization[device_type] = + std::optional>( + std::make_pair(get_fptr, set_fptr)); +} + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/python_print.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/python_print.h new file mode 100644 index 0000000000000000000000000000000000000000..7cefbe091fa581766880dcde6a7ae49a4a56705d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/python_print.h @@ -0,0 +1,61 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include +#include + +namespace torch::jit { + +struct Method; +struct Module; +struct PythonPrintImpl; + +struct PrintDepsTable { + void add(const c10::NamedTypePtr& type); + + size_t size() const { + return table_.size(); + } + + const c10::NamedTypePtr& operator[](size_t index) const { + return table_[index]; + } + + private: + std::vector table_; + std::unordered_set non_unique_; +}; + +struct TORCH_API PythonPrint { + PythonPrint( + std::vector& constant_table, + PrintDepsTable& deps_table, + c10::TypePrinter type_printer = nullptr, + bool enforce_importable = false); + + void printNamedType(const c10::NamedTypePtr& classType); + void printFunction(const Function& callee); + void printMethod(const Function& callee); + + std::string str() const; + const SourceRangeRecords& ranges() const; + uint64_t minVersion() const; + + private: + std::shared_ptr pImpl; +}; + +TORCH_API bool printerHasSpecialCaseFor(c10::Symbol sym); + +TORCH_API void jitModuleToPythonCodeAndConstants( + const Module& module, + ExtraFilesMap* jit_sources, // output + std::vector* constants // output +); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/source_range_serialization.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/source_range_serialization.h new file mode 100644 index 0000000000000000000000000000000000000000..3ff838cc62409bd0a5b9f14c9a60ea1e744c223f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/source_range_serialization.h @@ -0,0 +1,71 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +#include +#include + +namespace c10 { +struct IValue; +} + +namespace torch::jit { + +class Pickler; +class SourceRangeSerializer; +static constexpr size_t kByteOffsetIndex = 0; +static constexpr size_t kSourceRangeIndex = 1; +static constexpr size_t kSourceRangeTagIndex = 2; +constexpr std::string_view kFormatWithStringTable = "FORMAT_WITH_STRING_TABLE"; + +class SourceRangePickler { + public: + SourceRangePickler(); + + std::vector pickle( + const SourceRangeRecords& ranges, + const SourceRangeTagMap& source_range_tags); + + private: + std::shared_ptr srs; +}; + +class SourceRangeDeserializer { + public: + SourceRangeDeserializer() = default; + explicit SourceRangeDeserializer(const c10::IValue& text_table) { + for (const auto& x : text_table.toTuple()->elements()) { + text_table_.emplace_back(std::make_shared(x.toStringRef())); + } + } + SourceRange deserialize(const c10::IValue& iv); + + private: + std::shared_ptr deserialize_source(const c10::IValue& iv); + std::unordered_map< + c10::intrusive_ptr, + std::shared_ptr> + cached_sources; + std::vector> text_table_; +}; + +class SourceRangeUnpickler { + public: + virtual std::optional findSourceRangeThatGenerated( + const SourceRange& range) = 0; + + virtual ~SourceRangeUnpickler() = default; +}; + +TORCH_API void setShouldUseFormatWithStringTable( + bool should_use_format_with_string_table); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/source_range_serialization_impl.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/source_range_serialization_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..35161796290f4cd756de6f789d8bd645828da80f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/source_range_serialization_impl.h @@ -0,0 +1,33 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit { + +// Do this clownyness with virtual functions because of the split +// between ATen core and torch + +class ConcreteSourceRangeUnpickler : public SourceRangeUnpickler { + public: + ConcreteSourceRangeUnpickler(at::DataPtr&& data, size_t size); + + std::optional findSourceRangeThatGenerated( + const SourceRange& range) override; + + private: + at::DataPtr data; + size_t size; + + void unpickle(); + + std::mutex mutex; + std::shared_ptr deserializer; + std::shared_ptr unpickled_records; +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/storage_context.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/storage_context.h new file mode 100644 index 0000000000000000000000000000000000000000..fac2b53c200ebeba2daf2affbafc3a6b15fe6fd6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/storage_context.h @@ -0,0 +1,88 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit { + +// Used in torch.package and TorchScript serialization to coordinate +// sharing of storages between models. Also used to create deterministic +// naming for storages. +class TORCH_API SerializationStorageContext { + public: + explicit SerializationStorageContext() = default; + SerializationStorageContext operator=(const SerializationStorageContext&) = + delete; + SerializationStorageContext(const SerializationStorageContext&) = delete; + + uint64_t getOrAddStorage(const c10::Storage& storage) { + if (!hasStorage(storage)) { + uint64_t size = storage_id_map_.size(); + storage_id_map_[storage] = size; + } + return storage_id_map_[storage]; + } + + bool hasStorage(const c10::Storage& storage) { + return storage_id_map_.find(storage) != storage_id_map_.end(); + } + + ~SerializationStorageContext() = default; + + private: + class StorageSerializationHash { + public: + size_t operator()(const c10::Storage& storage) const { + return std::hash()( + reinterpret_cast(storage.unsafeGetStorageImpl())); + } + }; + + class StorageSerializationEqual { + public: + bool operator()(const c10::Storage& lhs, const c10::Storage& rhs) const { + return lhs.unsafeGetStorageImpl() == rhs.unsafeGetStorageImpl(); + } + }; + + std::unordered_map< + c10::Storage, + uint64_t, + StorageSerializationHash, + StorageSerializationEqual> + storage_id_map_; +}; + +// Used in torch.package and TorchScript deserialization to coordinate +// sharing of storages between models. +class TORCH_API DeserializationStorageContext { + public: + explicit DeserializationStorageContext() = default; + DeserializationStorageContext operator=( + const DeserializationStorageContext&) = delete; + DeserializationStorageContext(const DeserializationStorageContext&) = delete; + + void addStorage(std::string name, c10::Storage storage) { + TORCH_INTERNAL_ASSERT(!hasStorage(name)); + name_storage_map_.emplace(std::move(name), std::move(storage)); + } + + bool hasStorage(const std::string& name) { + return name_storage_map_.find(name) != name_storage_map_.end(); + } + + c10::Storage getStorage(const std::string& name) { + TORCH_INTERNAL_ASSERT(hasStorage(name)); + return name_storage_map_.find(name)->second; + } + ~DeserializationStorageContext() = default; + + private: + std::unordered_map name_storage_map_; +}; + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/type_name_uniquer.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/type_name_uniquer.h new file mode 100644 index 0000000000000000000000000000000000000000..62747c3514fa4fba667368b2830b78c3f8355dc3 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/type_name_uniquer.h @@ -0,0 +1,36 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::jit { + +/** + * class TypeNameUniquer + * + * Generates a unique name for every type `t` passed in. Types that compare + * equal with EqualType will receive the same unique name. + * + * This is used during Module::save(), to resolve type name collisions during + * serialization. + */ +class TORCH_API TypeNameUniquer { + public: + c10::QualifiedName getUniqueName(c10::ConstNamedTypePtr t); + + private: + NameMangler mangler_; + std::unordered_set used_names_; + std::unordered_map< + c10::ConstNamedTypePtr, + c10::QualifiedName, + HashType, + EqualType> + name_map_; +}; +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/unpickler.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/unpickler.h new file mode 100644 index 0000000000000000000000000000000000000000..02fc9ba8d543c72132359bb1ccc846402c498574 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/serialization/unpickler.h @@ -0,0 +1,211 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include +#include + +namespace torch::jit { + +using TypeResolver = + std::function; + +using ObjLoader = std::function< + c10::intrusive_ptr(const at::StrongTypePtr&, IValue)>; + +class DeserializationStorageContext; + +// [unpickler refactor] there is some cruft around PickleOpCode::BUILD, +// PickleOpCode::NEWOBJ, and the last_opcode_ member below that should be +// deleted at some point, the Pickler doesn't produce it and it's only around to +// support models saved before 1.1 +class TORCH_API Unpickler { + AT_DISALLOW_COPY_AND_ASSIGN(Unpickler); + + using TypeParserT = c10::TypePtr (*)(const std::string&); + + public: + // tensors inside the pickle are references to the tensor_table. + // class_resolver is to resolve strong class type, type_resolver_ is + // to resolve any JIT type. class_resolver and type_resolver are not merged + // here because some use cases need to get strong class type that + // type_resolver_ can not return. + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) + Unpickler( + std::function reader, + TypeResolver type_resolver, + c10::ArrayRef tensor_table, + TypeParserT type_parser = defaultTypeParser) + : reader_(std::move(reader)), + tensor_table_(tensor_table), + type_resolver_(std::move(type_resolver)), + use_storage_device_(false), + type_parser_(type_parser), + version_(caffe2::serialize::kProducedFileFormatVersion) {} + + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) + Unpickler( + std::function reader, + TypeResolver type_resolver, + c10::ArrayRef tensor_table, + ObjLoader obj_loader, + TypeParserT type_parser = defaultTypeParser) + : reader_(std::move(reader)), + tensor_table_(tensor_table), + type_resolver_(std::move(type_resolver)), + obj_loader_(std::move(obj_loader)), + use_storage_device_(false), + type_parser_(type_parser), + version_(caffe2::serialize::kProducedFileFormatVersion) {} + + // tensors inside the pickle contain meta-data, the raw tensor + // dead is retrieved by calling `read_record`. + // NOLINTNEXTLINE(cppcoreguidelines-pro-type-member-init) + Unpickler( + std::function reader, + TypeResolver type_resolver, + ObjLoader obj_loader, + std::function read_record, + std::optional device, + bool use_storage_device = false, + TypeParserT type_parser = defaultTypeParser, + std::shared_ptr storage_context = nullptr) + : reader_(std::move(reader)), + type_resolver_(std::move(type_resolver)), + obj_loader_(std::move(obj_loader)), + read_record_(std::move(read_record)), + device_(device), + use_storage_device_(use_storage_device), + type_parser_(type_parser), + storage_context_(std::move(storage_context)), + version_(caffe2::serialize::kProducedFileFormatVersion) {} + + Unpickler(Unpickler&&) = delete; + Unpickler& operator=(Unpickler&&) = delete; + ~Unpickler() = default; + + // consume the pickle stream, producing an IValue from the contents. + // Type Tags: the pickler will restore the type tags on + // List and Dict objects when possible IValue is an Object. + // Otherwise, Dict and List objects will end up with Any as their tag. + // If you know the type of the ivalue, tags can be restored with + // restoreAccurateTypeTags + IValue parse_ivalue(); + + // [type tag serialization] + // This is used to determine whether to restore type tags be recursively + // descending into the returned stack object (if version_number <= 2), or + // if version_number >= 3, to use the type strings included in the pickle + // archive for container types. By default this is set to + // `kProducedFileFormatVersion` so unless you're loading a pickle file + // from alongside a corresponding `version` file, you don't need to set + // the version manually. + void set_version(uint64_t version_number) { + version_ = version_number; + } + + static c10::TypePtr defaultTypeParser(const std::string& str) { + ScriptTypeParser parser; + return parser.parseType(str); + } + + private: + // No arguments ensures that a template argument must be specified + // so that the number of bytes read / type read is explicit + template + T read() { + T item; + if (sizeof(T) <= buffer_remaining_) { + // Fast path: entirely from buffer. + memcpy(&item, buffer_.data() + buffer_pos_, sizeof(T)); + buffer_remaining_ -= sizeof(T); + buffer_pos_ += sizeof(T); + } else { + // Don't over-template the slow path, to avoid code size bloat. + readSlowWithBuffer(reinterpret_cast(&item), sizeof(T)); + } + return item; + } + void readSlowWithBuffer(char* dest, size_t sz); + std::string readBytes(size_t num_bytes); + + double readFloat(); + void readGlobal( + const std::string& module_name, + const std::string& class_name); + void rebuildTensor(bool quantized); + void rebuildParameter(); + void rebuildTensorFromTypeV2(); + void rebuildSparseTensor(); +#ifdef USE_DISTRIBUTED + void rebuildRRef(); +#endif + PickleOpCode readInstruction(); + PickleOpCode readOpCode() { + return static_cast(read()); + } + std::string readString(); + void readList(IValue list_ivalue); + void readListElements(IValue list_ivalue, size_t start); + void setInput(size_t memo_id); + void run(); + + // Returns the number of bytes read. This should statefully + // remember the position. Don't call reader_ directly. + std::function reader_; + // Small buffer to avoid calling reader_ on a per-byte basis. + std::array buffer_; + size_t buffer_pos_{0}; + size_t buffer_remaining_{0}; + + std::vector stack_; + + // globals are represented on the stack as IValue integer indices + // into this list + std::vector> globals_; + std::vector memo_table_; + std::vector marks_; + c10::ArrayRef tensor_table_; + + // When deserializing types on lists and dicts, cache the type here + // so we don't have to parse the same type multiple times. Strings + // are already de-duplicated and replaced with BINGETs in the + // pickler, so we can just use the actual data pointer of each string. + std::unordered_map type_cache_; + + // optionally nullptr, needs to be present for creating classes + TypeResolver type_resolver_; + ObjLoader obj_loader_; + IValue empty_tuple_; + + std::function read_record_; + std::optional device_; + // When set to true, Unpickler will ignore the pickled device and use the + // device of the DataPtr returned by the read_record_ function. The default + // value of this flag is false. + const bool use_storage_device_; + + TypeParserT type_parser_{defaultTypeParser}; + + // Used for torch.package to enable sharing of storages across + // ScriptModules and eager modules + std::shared_ptr storage_context_; + + // See [type tag serialization] + uint64_t version_; + + // See [NOTE] skip_next_read_global + uint8_t skip_next_read_global = 0; +}; + +void restoreAccurateTypeTags(const IValue& root, const c10::TypePtr& type_tag); + +} // namespace torch::jit + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/analysis.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/analysis.h new file mode 100644 index 0000000000000000000000000000000000000000..d0b79418577b6a7bbabd2acbb418175ac5542924 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/analysis.h @@ -0,0 +1,403 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include + +namespace torch::jit::tensorexpr { +class HasRand : public IRVisitor { + public: + HasRand(StmtPtr stmt) : stmt_(std::move(stmt)) { + stmt_->accept(this); + } + + bool has_rand() const { + return has_rand_; + } + + private: + void visit(const IntrinsicsPtr& v) override { + if (v->op_type() == IntrinsicsOp::kRand) { + has_rand_ = true; + } else { + IRVisitor::visit(v); + } + } + StmtPtr stmt_; + bool has_rand_ = false; +}; + +template +class NodeFinder : public IRVisitor { + public: + void visit(const NodePtr& v) override { + nodes.push_back((NodePtr)v); + IRVisitor::visit(v); + } + + static std::vector> find(const StmtPtr& s) { + NodeFinder nf; + s->accept(&nf); + return nf.nodes; + } + + static std::vector> find(const ExprPtr& e) { + NodeFinder nf; + e->accept(&nf); + return nf.nodes; + } + + std::vector> nodes; +}; + +class VarFinder : public IRVisitor { + public: + void visit(const VarPtr& v) override { + vars_.insert(v); + IRVisitor::visit(v); + } + + static std::unordered_set find(const StmtPtr& s) { + VarFinder nf; + s->accept(&nf); + return nf.vars(); + } + + static std::unordered_set find(const ExprPtr& e) { + VarFinder nf; + e->accept(&nf); + return nf.vars(); + } + + const std::unordered_set& vars() { + return vars_; + } + + private: + std::unordered_set vars_; +}; + +class BufFinder : public IRVisitor { + public: + void visit(const BufPtr& v) override { + bufs_.insert(v); + IRVisitor::visit(v); + } + + static std::unordered_set find(const StmtPtr& s) { + BufFinder nf; + s->accept(&nf); + return nf.bufs(); + } + + static std::unordered_set find(const ExprPtr& e) { + BufFinder nf; + e->accept(&nf); + return nf.bufs(); + } + + const std::unordered_set& bufs() { + return bufs_; + } + + private: + std::unordered_set bufs_; +}; + +// Finds all kinds of write operations to the provided Buf. +class WritesToBuf : public IRVisitor { + public: + WritesToBuf(BufPtr target) : target_(std::move(target)) {} + + std::vector writes() { + return writes_; + } + + static std::vector find(const StmtPtr& s, BufPtr b) { + WritesToBuf finder(std::move(b)); + s->accept(&finder); + return finder.writes(); + } + + private: + void visit(const StorePtr& v) override { + if (v->buf() == target_) { + writes_.push_back(v); + } + } + + void visit(const AtomicAddPtr& v) override { + if (v->buf() == target_) { + writes_.push_back(v); + } + } + + BufPtr target_; + std::vector writes_; +}; + +class StmtsReadingBuf : public IRVisitor { + public: + StmtsReadingBuf(BufPtr target) : target_(std::move(target)) {} + + std::vector reads() { + return reads_; + } + + static std::vector find(const StmtPtr& s, BufPtr b) { + StmtsReadingBuf finder(std::move(b)); + s->accept(&finder); + return finder.reads(); + } + + private: + bool readsBuffer(const StmtPtr& s) { + auto loads = NodeFinder::find(s); + for (const auto& l : loads) { + if (l->buf() == target_) { + return true; + } + } + return false; + } + + void visit(const StorePtr& v) override { + if (readsBuffer(v)) { + reads_.push_back(v); + } + } + + void visit(const LetPtr& v) override { + if (readsBuffer(v)) { + reads_.push_back(v); + } + } + + void visit(const CondPtr& v) override { + if (readsBuffer(v)) { + reads_.push_back(v); + } + } + + void visit(const AtomicAddPtr& v) override { + if (readsBuffer(v)) { + reads_.push_back(v); + } + } + + BufPtr target_; + std::vector reads_; +}; + +class ExternalAllocBufFinder : public IRVisitor { + public: + void visit(const ExternalCallWithAllocPtr& v) override { + const auto& bufs_out = v->buf_out_args(); + bufs_.insert(bufs_out.begin(), bufs_out.end()); + IRVisitor::visit(v); + } + + static std::unordered_set find(const StmtPtr& s) { + ExternalAllocBufFinder f; + s->accept(&f); + return f.bufs(); + } + + static std::unordered_set find(const ExprPtr& e) { + ExternalAllocBufFinder f; + e->accept(&f); + return f.bufs(); + } + + const std::unordered_set& bufs() { + return bufs_; + } + + private: + std::unordered_set bufs_; +}; + +// Traverses the IR to determine if a particular Var is modified within it. +class ModifiesVarChecker : public IRVisitor { + public: + ModifiesVarChecker(VarPtr v) : var_(std::move(v)) {} + + static bool check(const StmtPtr& s, VarPtr v) { + ModifiesVarChecker checker(std::move(v)); + s->accept(&checker); + return checker.found(); + } + + bool found() { + return found_; + } + + private: + void visit(const StorePtr& v) override { + if (v->buf()->base_handle() == var_) { + found_ = true; + return; + } + IRVisitor::visit(v); + } + + void visit(const AtomicAddPtr& v) override { + if (v->buf()->base_handle() == var_) { + found_ = true; + return; + } + IRVisitor::visit(v); + } + + void visit(const LetPtr& v) override { + if (v->var() == var_) { + found_ = true; + return; + } + IRVisitor::visit(v); + } + + void visit(const ForPtr& v) override { + if (v->var() == var_) { + found_ = true; + return; + } + IRVisitor::visit(v); + } + + VarPtr var_; + bool found_{false}; +}; + +// Traverse the Block stmt to identify the live range of the specified buf. The +// live range, indicated by a pair of integers, specifies the first and last +// stmt in block stmts that access to the buf. +class BufLiveRange : public IRVisitor { + public: + BufLiveRange(BufPtr b) : buf_(std::move(b)) {} + + static std::tuple liveRange(const StmtPtr& s, BufPtr b) { + BlockPtr block = to(s); + // We Only analyze buffer live ranges for block stmts. + if (!block) { + return std::make_tuple(0, 0); + } + + BufLiveRange analyzer(std::move(b)); + block->accept(&analyzer); + return analyzer.getLiveRange(); + } + + private: + std::tuple getLiveRange() { + return std::make_tuple(begin_, end_); + } + + bool hasBufReads(const StmtPtr& s) { + auto loads1 = NodeFinder::find(s); + for (const auto& l : loads1) { + if (l->buf() == buf_) { + return true; + } + } + auto loads2 = NodeFinder::find(s); + for (const auto& l : loads2) { + for (const auto& lb : l->buf_args()) { + if (lb == buf_) { + return true; + } + } + } + auto loads3 = NodeFinder::find(s); + for (const auto& l : loads3) { + for (const auto& lb : l->buf_args()) { + if (lb == buf_) { + return true; + } + } + } + return false; + } + + bool hasBufWrites(const StmtPtr& s) { + auto writes1 = NodeFinder::find(s); + for (const auto& w : writes1) { + if (w->buf() == buf_) { + return true; + } + } + auto writes2 = NodeFinder::find(s); + for (const auto& w : writes2) { + if (w->buf() == buf_) { + return true; + } + } + auto writes3 = NodeFinder::find(s); + for (const auto& w : writes3) { + for (const auto& wb : w->buf_out_args()) { + if (wb == buf_) { + return true; + } + } + } + return false; + } + + void findAccAndUpdateLiveRange(const StmtPtr& s) { + bool has_reads = hasBufReads(s), has_writes = hasBufWrites(s); + if (has_reads || has_writes) { + if (begin_ == -1) { + begin_ = curr_index_; + }; + end_ = curr_index_; + } + } + + void visit(const BlockPtr& v) override { + for (const StmtPtr& s : *v) { + curr_index_ += 1; + findAccAndUpdateLiveRange(s); + } + } + + BufPtr buf_; + int32_t begin_ = -1; + int32_t end_ = -1; + int32_t curr_index_ = -1; +}; + +// A class that analyzes the given program relevant for Block backend +// It creates a map of multi dim buffers and their flat versions +class CreateBufferMap : public IRVisitor { + public: + const std::unordered_map& getBufferMap() const { + return map_input_to_tensor_bufs_; + } + + private: + void visit(const StorePtr& v) override { + auto load_node = to(v->value()); + if (load_node) { + auto t_buf = load_node->buf(); + map_input_to_tensor_bufs_.emplace(t_buf->name_hint(), v->buf()); + } else { + auto add_node = to(v->value()); + auto mul_node = to(v->value()); + // This means for now, v->value() can be Add or Mul + TORCH_INTERNAL_ASSERT(add_node || mul_node, buildErrorMessage()); + map_input_to_tensor_bufs_.emplace(v->buf()->name_hint(), v->buf()); + } + v->value()->accept(this); + } + std::unordered_map map_input_to_tensor_bufs_; +}; + +} // namespace torch::jit::tensorexpr + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/cpp_intrinsics.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/cpp_intrinsics.h new file mode 100644 index 0000000000000000000000000000000000000000..0d977fc7f1920907d1779793a6a2ef6884fe3c98 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/cpp_intrinsics.h @@ -0,0 +1,37 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +namespace torch::jit::tensorexpr { + +constexpr auto cpp_intrinsics_definition = R"( +namespace std { + +template , int> = 0> +T rsqrt(T v) { + return 1.0f / std::sqrt(v); +} + +template , int> = 0> +T frac(T v) { + T intpart; + return std::modf(v, &intpart); +} + +template +To bitcast(const From& v) { + assert(sizeof(To) == sizeof(From)); + To res; + std::memcpy(&res, &v, sizeof(From)); + return res; +} + +} // namespace std +)"; + +} // namespace torch::jit::tensorexpr + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/cuda_random.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/cuda_random.h new file mode 100644 index 0000000000000000000000000000000000000000..d0ba0493e2eaf79e75179370279d05d37c7ab7e9 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/cuda_random.h @@ -0,0 +1,105 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +namespace torch::jit::tensorexpr { + +constexpr auto philox_random_string = R"( + +class Philox { +public: + __device__ inline Philox(unsigned long long seed, + unsigned long long subsequence, + unsigned long long offset) { + key.x = (unsigned int)seed; + key.y = (unsigned int)(seed >> 32); + counter = make_uint4(0, 0, 0, 0); + counter.z = (unsigned int)(subsequence); + counter.w = (unsigned int)(subsequence >> 32); + STATE = 0; + incr_n(offset / 4); + } + + __device__ inline unsigned long operator()() { + if(STATE == 0) { + uint4 counter_ = counter; + uint2 key_ = key; + for(int i = 0; i < 9; i++) { + counter_ = single_round(counter_, key_); + key_.x += (kPhilox10A); key_.y += (kPhilox10B); + } + output = single_round(counter_, key_); + incr(); + } + unsigned long ret; + switch(STATE) { + case 0: ret = output.x; break; + case 1: ret = output.y; break; + case 2: ret = output.z; break; + case 3: ret = output.w; break; + } + STATE = (STATE + 1) % 4; + return ret; + } + +private: + uint4 counter; + uint4 output; + uint2 key; + unsigned int STATE; + __device__ inline void incr_n(unsigned long long n) { + unsigned int nlo = (unsigned int)(n); + unsigned int nhi = (unsigned int)(n >> 32); + counter.x += nlo; + if (counter.x < nlo) + nhi++; + counter.y += nhi; + if (nhi <= counter.y) + return; + if (++counter.z) + return; + ++counter.w; + } + __device__ inline void incr() { + if (++counter.x) + return; + if (++counter.y) + return; + if (++counter.z) + return; + ++counter.w; + } + __device__ unsigned int mulhilo32(unsigned int a, unsigned int b, + unsigned int *result_high) { + *result_high = __umulhi(a, b); + return a*b; + } + + __device__ inline uint4 single_round(uint4 ctr, uint2 key) { + unsigned int hi0; + unsigned int hi1; + unsigned int lo0 = mulhilo32(kPhiloxSA, ctr.x, &hi0); + unsigned int lo1 = mulhilo32(kPhiloxSB, ctr.z, &hi1); + + uint4 ret = {hi1 ^ ctr.y ^ key.x, lo1, hi0 ^ ctr.w ^ key.y, lo0}; + return ret; + } + + static const unsigned long kPhilox10A = 0x9E3779B9; + static const unsigned long kPhilox10B = 0xBB67AE85; + static const unsigned long kPhiloxSA = 0xD2511F53; + static const unsigned long kPhiloxSB = 0xCD9E8D57; +}; + +// Inverse of 2^32. +#define M_RAN_INVM32 2.3283064e-10f +__device__ __inline__ float Uint32ToFloat(unsigned int x) { + return x * M_RAN_INVM32; +} + +)"; + +} // namespace torch::jit::tensorexpr + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/eval.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/eval.h new file mode 100644 index 0000000000000000000000000000000000000000..bacdb882de08365383e5548ecdfd80f8253fa359 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/eval.h @@ -0,0 +1,330 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace torch::jit::tensorexpr { + +class InterpValue { + public: + InterpValue() : dtype_(kInt) { + Intvalues.push_back(0); + } + + template + InterpValue(Dtype dtype, T v) : dtype_(dtype) { +#define TYPE_CASE(Type, Name) \ + if (dtype == k##Name) { \ + Name##values.push_back(v); \ + return; \ + } + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, TYPE_CASE) +#undef TYPE_CASE + throw unsupported_dtype(); + } + +#define VALUE_CTOR(Type, Name) \ + InterpValue(Type v) : dtype_(k##Name) { \ + Name##values.push_back(v); \ + } + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, VALUE_CTOR) +#undef VALUE_CTOR + + explicit InterpValue(c10::quint8 v) : dtype_(kQUInt8) { + QUInt8values.emplace_back(v.val_); + } + + explicit InterpValue(c10::qint8 v) : dtype_(kQInt8) { + QInt8values.emplace_back(v.val_); + } + +#define VALUE_VEC_CTOR(Type, Name) \ + InterpValue(const std::vector& v) \ + : dtype_(Dtype(k##Name, v.size())), Name##values(v) {} + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, VALUE_VEC_CTOR) + VALUE_VEC_CTOR(c10::quint8, QUInt8) + VALUE_VEC_CTOR(c10::qint8, QInt8) +#undef VALUE_VEC_CTOR + + template + T as() const; + + template + const std::vector& as_vec() const; + + int64_t intValue() const; + + Dtype dtype() const { + return dtype_; + } + + private: + Dtype dtype_; + +#define VALUE_STORAGE(Type, Name) std::vector Name##values; + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, VALUE_STORAGE) + VALUE_STORAGE(c10::qint8, QInt8) + VALUE_STORAGE(c10::quint8, QUInt8) +#undef VALUE_STORAGE + void* ptr{nullptr}; +}; + +#define VALUE_AS_DISPATCH(Type, Name) \ + template <> \ + inline Type InterpValue::as() const { \ + if (dtype_ != k##Name) { \ + throw unsupported_dtype(); \ + } \ + return Name##values[0]; \ + } +AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, VALUE_AS_DISPATCH) +VALUE_AS_DISPATCH(c10::quint8, QUInt8) +VALUE_AS_DISPATCH(c10::qint8, QInt8) +#undef VALUE_AS_DISPATCH + +#define VALUE_AS_VEC_DISPATCH(Type, Name) \ + template <> \ + inline const std::vector& InterpValue::as_vec() const { \ + if (dtype_.scalar_type() != ScalarType::Name) { \ + throw unsupported_dtype(); \ + } \ + return Name##values; \ + } +AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, VALUE_AS_VEC_DISPATCH) +VALUE_AS_VEC_DISPATCH(c10::quint8, QUInt8) +VALUE_AS_VEC_DISPATCH(c10::qint8, QInt8) +#undef VALUE_AS_VEC_DISPATCH + +template +auto underlyingValue(Type x) { + return x; +} + +template <> +inline auto underlyingValue(c10::quint8 x) { + return x.val_; +} + +template <> +inline auto underlyingValue(c10::qint8 x) { + return x.val_; +} + +template +To raw_bitcast(const From& src) { + TORCH_CHECK(sizeof(To) == sizeof(From), "Invalid bitcast invocation"); + To storage; + std::memcpy(&storage, &src, sizeof(To)); + return storage; +} + +class SimpleIREvaluatorImpl; +class TORCH_API SimpleIREvaluator : public CodeGen { + public: + SimpleIREvaluator( + StmtPtr stmt, + const std::vector& buffer_args, + at::Device device = at::kCPU, + const std::string& kernel_func_name = "func"); + + ~SimpleIREvaluator() override; + + void call(const std::vector& args) override; + void call_raw(const std::vector& args) override; + + template + void operator()(const Ts&... ts) { + std::vector args({CallArg(ts)...}); + call(args); + } + + void bindVar(const VarPtr& v, const ExprPtr& e); + InterpValue value() const; + + private: + void bindArg(const BufferArg& buf, void* data); + void expand_intrinsics() { + GenericIntrinsicsExpander intrinsics_expander; + apply_mutator(&intrinsics_expander); + } + + std::unique_ptr impl_; +}; + +template +class ExprEval { + public: + using BufferArg = CodeGen::BufferArg; + using CallArg = CodeGen::CallArg; + + template + ExprEval(const ExprHandle& expr, Ts... ts) + : ExprEval(expr, {BufferArg(ts)...}) {} + + ExprEval(const ExprHandle& expr, const std::vector& buffer_args) + : dtype_(expr.dtype()) { + std::vector buffer_args_extended = buffer_args; + BufHandle ret_buf("ret_val", {1}, dtype_); + std::vector indices; + ExprHandle zero = IntImm::make(0); + indices.reserve(ret_buf.ndim()); + for (size_t i = 0; i < ret_buf.ndim(); i++) { + indices.push_back(zero); + } + StmtPtr store_stmt = Store::make(ret_buf, indices, expr); + buffer_args_extended.emplace_back(ret_buf); + codegen_.reset(new CodeGenType(store_stmt, buffer_args_extended)); + } + + template + void operator()(Ts... ts) { + call(ts...); + } + + void operator()(const std::vector& call_args) { + call(call_args); + } + + void bindVar(VarPtr v, ExprPtr e) { + codegen_->bindVar(v, e); + } + + void bindVar(const VarHandle& v, const ExprHandle& e) { + codegen_->bindVar(v.node(), e.node()); + } + + template + void call(Ts... ts) { + call({CallArg(ts)...}); + } + + void call(const std::vector& call_args) { + std::vector call_args_extended = call_args; + switch (dtype_.scalar_type()) { +#define TYPE_CASE(Type, Name) \ + case ScalarType::Name: { \ + std::vector ret_val_arg(1); \ + call_args_extended.emplace_back(ret_val_arg); \ + codegen_->call(call_args_extended); \ + ret_value_ = InterpValue(ret_val_arg[0]); \ + } break; + AT_FORALL_SCALAR_TYPES_AND2(Half, BFloat16, TYPE_CASE); + TYPE_CASE(c10::quint8, QUInt8); + TYPE_CASE(c10::qint8, QInt8); +#undef TYPE_CASE + case ScalarType::Bool: { + std::vector ret_val_arg(1); + call_args_extended.emplace_back(ret_val_arg.data()); + codegen_->call(call_args_extended); + ret_value_ = InterpValue((bool)ret_val_arg[0]); + } break; + default: + throw unsupported_dtype(); + } + } + + void call_raw(const std::vector& args) { + std::vector args_extended = args; + switch (dtype_.scalar_type()) { +#define TYPE_CASE(Type, Name) \ + case ScalarType::Name: { \ + std::vector ret_val_arg(1); \ + args_extended.push_back(ret_val_arg.data()); \ + codegen_->call_raw(args_extended); \ + ret_value_ = InterpValue(ret_val_arg[0]); \ + } break; + AT_FORALL_SCALAR_TYPES_AND2(Half, BFloat16, TYPE_CASE); + TYPE_CASE(c10::quint8, QUInt8); + TYPE_CASE(c10::qint8, QInt8); +#undef TYPE_CASE + case ScalarType::Bool: { + std::vector ret_val_arg(1); + args_extended.push_back(ret_val_arg.data()); + codegen_->call_raw(args_extended); + ret_value_ = InterpValue((bool)ret_val_arg[0]); + } break; + default: + throw unsupported_dtype(); + } + } + + template + T value(const std::vector& args) { + call_raw(args); + return ret_value_.as(); + } + + template + T value(Ts... ts) { + call(std::forward(ts)...); + return ret_value_.as(); + } + + Dtype dtype() { + return dtype_; + } + + private: + Dtype dtype_; + std::unique_ptr codegen_; + InterpValue ret_value_; +}; + +// Evaluates the given expression and returns an int64_t value if the result of +// the given expression is int64_t. +std::optional evalInt(ExprPtr e); + +// Substitutes the given vars with their corresponding expressions in the input +// expression. +inline ExprPtr Substitute(const ExprPtr& expr, const VarMapping& var_mapping) { + VarSubMutator var_sub(var_mapping); + return expr->accept_mutator(&var_sub); +} + +// Substitutes the given vars with their corresponding expressions in the input +// statement. +inline StmtPtr Substitute(const StmtPtr& stmt, const VarMapping& var_mapping) { + VarSubMutator var_sub(var_mapping); + return stmt->accept_mutator(&var_sub); +} + +// Creates a clone of the input expression and substitutes the given vars with +// their corresponding expressions in the clone. +// NOTE: This works because cloning reuses variables and does not create new +// ones, and `VarMapping` input has variables as the key. +inline ExprPtr SubstituteInClone( + const ExprPtr& expr, + const VarMapping& var_mapping) { + VarSubMutator var_sub(var_mapping); + return Expr::clone(expr)->accept_mutator(&var_sub); +} + +// Creates a clone of the input statement and substitutes the given vars with +// their corresponding expressions in the clone. +// NOTE: This works because cloning reuses variables and does not create new +// ones, and `VarMapping` input has variables as the key. +inline StmtPtr SubstituteInClone( + const StmtPtr& stmt, + const VarMapping& var_mapping) { + VarSubMutator var_sub(var_mapping); + return Stmt::clone(stmt)->accept_mutator(&var_sub); +} + +} // namespace torch::jit::tensorexpr + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/exceptions.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/exceptions.h new file mode 100644 index 0000000000000000000000000000000000000000..a131ec071c32c3a6731691fd1f2328559d10ab99 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/exceptions.h @@ -0,0 +1,90 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +// Forward declarations of types + +namespace torch::jit::tensorexpr { +class Expr; +class Stmt; +} // namespace torch::jit::tensorexpr + +// Forward declarations of functions +namespace std { +TORCH_API std::string to_string( + const torch::jit::tensorexpr::ExprPtr& /*expr*/); +TORCH_API std::string to_string( + const torch::jit::tensorexpr::StmtPtr& /*stmt*/); +} // namespace std + +namespace torch::jit::tensorexpr { + +class unsupported_dtype : public std::runtime_error { + public: + explicit unsupported_dtype() : std::runtime_error("UNSUPPORTED DTYPE") {} + explicit unsupported_dtype(const std::string& err) + : std::runtime_error("UNSUPPORTED DTYPE: " + err) {} +}; + +class out_of_range_index : public std::runtime_error { + public: + explicit out_of_range_index() : std::runtime_error("OUT OF RANGE INDEX") {} + explicit out_of_range_index(const std::string& err) + : std::runtime_error("OUT OF RANGE INDEX: " + err) {} +}; + +class unimplemented_lowering : public std::runtime_error { + public: + explicit unimplemented_lowering() + : std::runtime_error("UNIMPLEMENTED LOWERING") {} + explicit unimplemented_lowering(const ExprPtr& expr) + : std::runtime_error("UNIMPLEMENTED LOWERING: " + std::to_string(expr)) {} + explicit unimplemented_lowering(const StmtPtr& stmt) + : std::runtime_error("UNIMPLEMENTED LOWERING: " + std::to_string(stmt)) {} +}; + +class malformed_input : public std::runtime_error { + public: + explicit malformed_input() : std::runtime_error("MALFORMED INPUT") {} + explicit malformed_input(const std::string& err) + : std::runtime_error("MALFORMED INPUT: " + err) {} + explicit malformed_input(const ExprPtr& expr) + : std::runtime_error("MALFORMED INPUT: " + std::to_string(expr)) {} + explicit malformed_input(const std::string& err, const ExprPtr& expr) + : std::runtime_error( + "MALFORMED INPUT: " + err + " - " + std::to_string(expr)) {} + explicit malformed_input(const StmtPtr& stmt) + : std::runtime_error("MALFORMED INPUT: " + std::to_string(stmt)) {} + explicit malformed_input(const std::string& err, const StmtPtr& stmt) + : std::runtime_error( + "MALFORMED INPUT: " + err + " - " + std::to_string(stmt)) {} +}; + +class malformed_ir : public std::runtime_error { + public: + explicit malformed_ir() : std::runtime_error("MALFORMED IR") {} + explicit malformed_ir(const std::string& err) + : std::runtime_error("MALFORMED IR: " + err) {} + explicit malformed_ir(const ExprPtr& expr) + : std::runtime_error("MALFORMED IR: " + std::to_string(expr)) {} + explicit malformed_ir(const std::string& err, const ExprPtr& expr) + : std::runtime_error( + "MALFORMED IR: " + err + " - " + std::to_string(expr)) {} + explicit malformed_ir(const StmtPtr& stmt) + : std::runtime_error("MALFORMED IR: " + std::to_string(stmt)) {} + explicit malformed_ir(const std::string& err, const StmtPtr& stmt) + : std::runtime_error( + "MALFORMED IR: " + err + " - " + std::to_string(stmt)) {} +}; + +TORCH_API std::string buildErrorMessage(const std::string& s = ""); + +} // namespace torch::jit::tensorexpr + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/external_functions_registry.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/external_functions_registry.h new file mode 100644 index 0000000000000000000000000000000000000000..d638dfa5911bd01af113d94cba196825f61fc532 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/external_functions_registry.h @@ -0,0 +1,62 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::jit::tensorexpr { + +// The external functions that could be called from NNC must have the same +// signature defined by `NNCExternalFunction`. +// +// Why this signature? +// It was picked for two reasons: 1) it should be generic enough to represent +// most of the ops we might want to call, 2) it should be possible to generate a +// code for this call in LLVM codegen. +// The first 5 parameters allow to pass any number of contiguous CPU tensors in +// case we need to run aten ops (TODO: support different devices). The first +// buffer in the array is assumed to be the output buffer. We couldn't use +// `at::Tensor` (or `c10::IValue`) type there directly as it would mean that +// we'd need to declare it in LLVM codegen in LLVM IR form, which would be very +// cumbersome and hard to maintain. Note that the dimensions of all tensors are +// concatenated into a single array buf_dims. We do not need to pass its length, +// since it can be deduced from total number of buffers and their ranks. +// +// The last 2 arguments allow to pass any non-tensor arguments encoded as an +// array of int64_t values. The way they are encoded is not specified and could +// be arbitrary - whatever the most convenient for the specific bridge function +// is. +// +// The bridge functions must not throw exceptions - properly propagating them +// from the generated code is too cumbersome, and thus all calls to functions +// that could throw must be wrapped with try-catch blocks. +using NNCExternalFunction = void (*)( + int64_t bufs_num, + void** buf_data, + int64_t* buf_ranks, + int64_t* buf_dims, + int64_t* buf_strides, + int8_t* buf_dtypes, + int64_t args_num, + int64_t* extra_args); + +// Return a global map "function-name" -> "function-pointer" for all registered +// in NNC external functions +TORCH_API std::unordered_map& +getNNCFunctionRegistry(); + +// To register a new external function in NNC one needs to create an instance of +// this struct +struct RegisterNNCExternalFunction { + RegisterNNCExternalFunction(const std::string& name, NNCExternalFunction fn) { + getNNCFunctionRegistry()[name] = fn; + } +}; + +} // namespace torch::jit::tensorexpr + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/graph_opt.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/graph_opt.h new file mode 100644 index 0000000000000000000000000000000000000000..27427893f3f71dad8da98a25080b7e17995a636c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/graph_opt.h @@ -0,0 +1,116 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit::tensorexpr { + +// Optimize aten::cat ops in the given subgraph. +// +// Moving users of cat to its inputs. +// Cat ops get lowered into multiple loops, one per input. When the result +// of cat is used by some other op, it results in a situation where inlining +// of cat does not happen. This in turn results in intermediate buffers +// being created for the result of cat, since it is not inlined. +// +// For example, consider the following graph: +// graph(%x : Float(10, strides=[1], device=cpu), +// %y : Float(20, strides=[1], device=cpu)): +// %dim : int = prim::Constant[value=0]() +// %xy_list : Tensor[] = prim::ListConstruct(%x, %y) +// %cat : Float(60, strides=[1], device=cpu) = aten::cat(%xy_list, %dim) +// %5 : Float(60, strides=[1], device=cpu) = aten::log(%cat) +// return (%5))IR"; +// +// This will get lowered into: +// Allocate(aten_cat); +// for (...) +// aten_cat[...] = x[...] +// for (...) +// aten_cat[...] = y[...] +// for (...) +// aten_log[...] = log(aten_cat[...]) +// Free(aten_cat); +// Note that aten_cat is not inlined into aten_log and it results in +// an intermediate buffer allocation as well. +// +// Optimization: +// We move the ops that use the result of `cat` into its inputs whenever +// possible. +// +// The graph above will be transformed to: +// graph(%x : Float(10, strides=[1], device=cpu), +// %y : Float(20, strides=[1], device=cpu)): +// %3 : int = prim::Constant[value=0]() +// %7 : Float(10, strides=[1], device=cpu) = aten::log(%x) +// %8 : Float(20, strides=[1], device=cpu) = aten::log(%y) +// %9 : Tensor[] = prim::ListConstruct(%7, %8) +// %10 : Float(60, strides=[1], device=cpu) = aten::cat(%9, %3) +// return (%10) +// +// This will get lowered into: +// for (...) +// aten_cat[...] = log(x[...]) +// for (...) +// aten_cat[...] = log(y[...]) +// aten_cat is the output buffer here. + +bool OptimizeCat(const std::shared_ptr& graph); + +TORCH_API void annotateInputShapes( + const std::shared_ptr& graph, + const std::vector>& example_inputs); +TORCH_API std::shared_ptr removeUnusedSelfArgument( + const std::shared_ptr& graph); +TORCH_API std::shared_ptr removeGraphOutput( + const std::shared_ptr& graph, + size_t idx); +TORCH_API std::shared_ptr replaceListOutputWithTuple( + const std::shared_ptr& graph); + +// Perform \p ITERS rounds of "trimming" for the given \p GRAPH. +// +// Trimming means that we try to remove a small portion of the graph while +// keeping it valid. This is useful for debugging when we try to find a minimal +// example reproducing the issue at hand. When ITERS is 0, the graph remains +// unchanged, when ITERS is a big number, the graph usually becomes empty. +TORCH_API std::shared_ptr trimGraph( + const std::shared_ptr& graph, + int64_t iters); + +// Scan all values in the given graph and replace each dimension with a size Xi +// present in \p SIZES with a symbolic shape Yi. Return a vector of symbol +// values [Y0, Y1, .., Yn]. +// +// For example: +// Input: +// graph(%x : Float(10, 20, 30, 40)): +// %y : Float(10, 20, 30, 40) = aten::relu(%x) +// return %y +// +// If we run makeShapesSymbolic(graph, {20, 40}), then we'll get: +// +// graph(%x : Float(10, SS(-3), 30, SS(-5))): +// %y : Float(10, SS(-3), 30, SS(-5)) = aten::relu(%x) +// return %y +// +// and get {-3, -5} as the return value. +TORCH_API std::vector makeShapesSymbolic( + std::shared_ptr& graph, + const std::vector& sizes); + +// Inspect the graph and report whether it can be converted to TE IR. +// TODO: add error reporting for graphs that can't be converted. +TORCH_API bool isGraphCompilable(const std::shared_ptr& graph); + +// Examine the graph and (hackily) fill in missing tensor type info, such as +// scalar type, device, and strides. Ideally, this should be done by a proper +// dtype/device/shape propagation passes, but until they are ready we can use +// this, not always correct, workaround pass. +TORCH_API void fixupMissingShapeInfo(const std::shared_ptr& graph); + +} // namespace torch::jit::tensorexpr + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/intrinsic_symbols.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/intrinsic_symbols.h new file mode 100644 index 0000000000000000000000000000000000000000..ba5b5ff6728bec6e50e9151c29a9e1d2885693e2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/intrinsic_symbols.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#ifdef TORCH_ENABLE_LLVM +#include + +namespace torch { +namespace jit { +namespace tensorexpr { + +struct SymbolAddress { + const char* symbol; + void* address; + + SymbolAddress(const char* sym, void* addr) : symbol(sym), address(addr) {} +}; + +c10::ArrayRef getIntrinsicSymbols(); + +} // namespace tensorexpr +} // namespace jit +} // namespace torch +#endif // TORCH_ENABLE_LLVM + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/ir_printer.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/ir_printer.h new file mode 100644 index 0000000000000000000000000000000000000000..033244123919bc33842771e479c989221daf897b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/ir_printer.h @@ -0,0 +1,137 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include +#include +#include + +namespace torch::jit::tensorexpr { + +class Tensor; + +class TORCH_API IRPrinter : public IRVisitor { + public: + explicit IRPrinter(std::ostream& os) : printer_os_(this, os) {} + + void print(ExprHandle /*expr*/); + void print(Expr& /*expr*/); + void print(Stmt& /*stmt*/); + void visit(const AddPtr& v) override; + void visit(const SubPtr& v) override; + void visit(const MulPtr& v) override; + void visit(const DivPtr& v) override; + void visit(const ModPtr& v) override; + void visit(const MaxPtr& v) override; + void visit(const MinPtr& v) override; + void visit(const AndPtr& v) override; + void visit(const OrPtr& v) override; + void visit(const XorPtr& v) override; + void visit(const LshiftPtr& v) override; + void visit(const RshiftPtr& v) override; + void visit(const CompareSelectPtr& v) override; +#define IMM_PRINT_VISIT(Type, Name) void visit(const Name##ImmPtr& v) override; + AT_FORALL_SCALAR_TYPES_AND3(Bool, Half, BFloat16, IMM_PRINT_VISIT) +#undef IMM_PRINT_VISIT + void visit(const CastPtr& v) override; + void visit(const BitCastPtr& v) override; + void visit(const VarPtr& v) override; + void visit(const BufPtr& v) override; + void visit(const RampPtr& v) override; + void visit(const LoadPtr& v) override; + void visit(const BroadcastPtr& v) override; + void visit(const IfThenElsePtr& v) override; + void visit(const IntrinsicsPtr& v) override; + void visit(const TermPtr& v) override; + void visit(const PolynomialPtr& v) override; + void visit(const RoundOffPtr& v) override; + void visit(const MaxTermPtr& v) override; + void visit(const MinTermPtr& v) override; + void visit(const ReduceOpPtr& v) override; + + void visit(const AtomicAddPtr& v) override; + void visit(const SyncThreadsPtr& v) override; + void visit(const ExternalCallPtr& v) override; + void visit(const ExternalCallWithAllocPtr& v) override; + void visit(const StorePtr& v) override; + void visit(const ForPtr& v) override; + void visit(const CondPtr& v) override; + void visit(const BlockPtr& v) override; + void visit(const AllocatePtr& v) override; + void visit(const FreePtr& v) override; + void visit(const FreeExtPtr& v) override; + void visit(const PlacementAllocatePtr& v) override; + void visit(const LetPtr& v) override; + + // A child class may have a difference rule for generating dtype + // string, e.g. CUDA needs int64_t to be generated as long long. + virtual std::string dtypeToCppString(const Dtype& dtype); + + std::ostream& os() { + return printer_os_; + } + + class PrinterStream : public std::ostream { + public: + PrinterStream(IRPrinter* printer, std::ostream& os) + : std::ostream(os.rdbuf()), printer_(printer) { + initialize_imbue(); + } + + void initialize_imbue(); + + IRPrinter* printer() { + return printer_; + } + + private: + IRPrinter* printer_ = nullptr; + }; + + protected: + std::string to_string(CompareSelectOperation op); + + UniqueNameManager* name_manager() { + return &name_manager_; + } + void emitIndent(); + + // NOLINTNEXTLINE(cppcoreguidelines-non-private-member-variables-in-classes) + int indent_ = 0; + + private: + PrinterStream printer_os_; + UniqueNameManager name_manager_; +}; + +TORCH_API std::ostream& operator<<(std::ostream& stream, const Expr& /*expr*/); +TORCH_API std::ostream& operator<<( + std::ostream& stream, + const ExprHandle& /*expr*/); +TORCH_API std::ostream& operator<<(std::ostream& stream, const Stmt& /*stmt*/); +TORCH_API std::ostream& operator<<(std::ostream& stream, const Tensor& /*t*/); + +TORCH_API void print(const ExprPtr& expr); +TORCH_API void print(const StmtPtr& stmt); +TORCH_API void print(const Tensor& t); + +} // namespace torch::jit::tensorexpr + +namespace std { + +using torch::jit::tensorexpr::Expr; +using torch::jit::tensorexpr::ExprPtr; +using torch::jit::tensorexpr::Stmt; +using torch::jit::tensorexpr::StmtPtr; +using torch::jit::tensorexpr::Tensor; + +TORCH_API std::string to_string(const ExprPtr& expr); +TORCH_API std::string to_string(const StmtPtr& stmt); +TORCH_API std::string to_string(const Tensor& t); +} // namespace std + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/operators/conv2d.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/operators/conv2d.h new file mode 100644 index 0000000000000000000000000000000000000000..ca63d7b9be1773cec3ada78416bd2c865427722d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/operators/conv2d.h @@ -0,0 +1,106 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::jit::tensorexpr { + +// An API to compute 2D depthwise convolutions with bias. +TORCH_API Tensor conv2d_depthwise( + BufHandle input, + BufHandle weight, + BufHandle bias, + int stride, + int pad, + int groups); + +// An API to compute 2D depthwise convolutions without bias. +TORCH_API Tensor conv2d_depthwise( + BufHandle input, + BufHandle weight, + int stride, + int pad, + int groups); + +TORCH_API Tensor conv2d_depthwise( + BufHandle input, + BufHandle weight, + BufHandle bias, + ExprHandle N, + ExprHandle C, + ExprHandle H, + ExprHandle W, + ExprHandle K, + ExprHandle CperG, + ExprHandle R, + ExprHandle S, + ExprHandle stride, + ExprHandle pad, + ExprHandle groups); + +TORCH_API Tensor conv2d_depthwise( + BufHandle input, + BufHandle weight, + ExprHandle N, + ExprHandle C, + ExprHandle H, + ExprHandle W, + ExprHandle K, + ExprHandle CperG, + ExprHandle R, + ExprHandle S, + ExprHandle stride, + ExprHandle pad, + ExprHandle groups); + +bool conv2dIsSupported( + const TensorInfo& input, + const TensorInfo& weight, + const TensorInfo& bias, + const std::vector& stride, + const std::vector& pad, + const std::vector& dilation, + int64_t groups); +bool mkldnnPrepackedConvIsSupported( + const TensorInfo& input, + const TensorInfo& weight, + const std::vector& stride, + const std::vector& pad, + const std::vector& dilation, + int64_t groups); +Tensor computeConv2d( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); +Tensor computeConv1d( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); +Tensor computePrepackedConv2dClampRun( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); +Tensor computePrepackedLinearClampRun( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); +Tensor computeMkldnnPrepackedConvRun( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); +} // namespace torch::jit::tensorexpr + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/operators/matmul.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/operators/matmul.h new file mode 100644 index 0000000000000000000000000000000000000000..1090455818d68d23348eb870b8930d1def34358b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/operators/matmul.h @@ -0,0 +1,25 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit::tensorexpr { + +Tensor computeMatmul( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); +Tensor computeAddMM( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); + +} // namespace torch::jit::tensorexpr + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/operators/misc.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/operators/misc.h new file mode 100644 index 0000000000000000000000000000000000000000..6a1f61a2fb09e0342292b72c2c23d3a801df3a10 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/operators/misc.h @@ -0,0 +1,99 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::jit::tensorexpr { + +struct TensorInfo { + std::vector dims; + c10::ScalarType dtype; +}; +std::optional getTensorInfo(const BufHandle& b); + +int64_t normalizeAndCheckIndex(int64_t idx, int64_t list_size); + +// Convert boolean to integer, if needed. +ExprHandle boolToInteger(const ExprHandle& x); +ExprHandle promoteToDtype(ExprHandle e, ScalarType dt); +void promoteInputs( + std::vector& inputs, + const int typeConstraints = kAllTypes); +ExprHandle promoteIntegerToDefaultType(const ExprHandle& e); +ExprHandle promoteHalfToFloat(const ExprHandle& e); +ExprHandle demoteOutput( + const ExprHandle& e, + const std::optional type); + +std::vector broadcastShapes( + std::vector> shapes); +std::vector broadcastShapes( + const std::vector& a, + const std::vector& b); + +std::vector valueShape(const ArgValue& v); +ExprHandle tensorOrConstant( + const ArgValue& v, + const std::vector& axes); +ExprHandle scalarOrConstant(const ArgValue& v); +ExprHandle broadcast(const BufHandle& b, const std::vector& axes); +ExprHandle constant(const ArgValue& v); + +ExprHandle clamp( + const ExprHandle& cmin, + const ExprHandle& cmax, + const ExprHandle& input); + +Tensor computeChunk( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); +Tensor computeTranspose( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); +Tensor computeExpand( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); +Tensor computeReshape( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); +Tensor computeFlatten( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); +Tensor computeCatWoConditionals( + const std::vector& inputs, + const std::vector& outputShape); +Tensor computeCat( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); +Tensor computeEmbedding( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); + +} // namespace torch::jit::tensorexpr + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/operators/norm.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/operators/norm.h new file mode 100644 index 0000000000000000000000000000000000000000..3d712cca1beae7547bd5f24f366f050d8783e5c5 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/operators/norm.h @@ -0,0 +1,19 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit::tensorexpr { + +Tensor computeBatchNorm( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); + +} // namespace torch::jit::tensorexpr + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/operators/operators.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/operators/operators.h new file mode 100644 index 0000000000000000000000000000000000000000..8625cbf737729e3f33095526e2b712a3f35575c8 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/operators/operators.h @@ -0,0 +1,15 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/operators/pointwise.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/operators/pointwise.h new file mode 100644 index 0000000000000000000000000000000000000000..cd8035fdaf773a5467a015e5113b8b5f4bd20fe8 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/operators/pointwise.h @@ -0,0 +1,87 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit::tensorexpr { + +TORCH_API Tensor computeSign( + const std::vector& inputs, + const std::vector& outputShape, + const std::optional>& outputStrides = std::nullopt); + +Tensor computeOneOperand( + const std::string& name, + const std::vector& inputValues, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + const std::function& innerExpr, + const int checkParamTypes = kAllTypes); +Tensor computeTwoOperand( + const std::string& name, + const std::vector& inputValues, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + const std::function& + innerExpr); +Tensor computeTwoOperandWithAlpha( + const std::string& name, + const std::vector& inputValues, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + const std::function& + innerExpr); +Tensor computeConditionWithTwoOperand( + const std::string& name, + const std::vector& inputValues, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + const std::function< + ExprHandle(const ExprHandle&, const ExprHandle&, const ExprHandle&)>& + innerExpr); +Tensor computeThreeOperand( + const std::string& name, + const std::vector& inputValues, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + const std::function< + ExprHandle(const ExprHandle&, const ExprHandle&, const ExprHandle&)>& + innerExpr, + bool promote_inputs = true); +Tensor computeFourOperand( + const std::string& name, + const std::vector& inputValues, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + const std::function& innerExpr); +Tensor computeNoop( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); + +Tensor computeScalar( + const std::string& name, + const std::vector& inputValues, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + const std::function& + innerExpr); + +} // namespace torch::jit::tensorexpr + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/operators/quantization.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/operators/quantization.h new file mode 100644 index 0000000000000000000000000000000000000000..9ebd11bc633c053ad5a3ff6df24f56f0f95bdc35 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/operators/quantization.h @@ -0,0 +1,154 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit::tensorexpr { + +TORCH_API ExprHandle quantizePerTensorQParamFromArg(ArgValue arg); + +TORCH_API double immQScale(const BufHandle& qx); + +TORCH_API int64_t immQZero(const BufHandle& qx); + +TORCH_API ScalarType immQDType(const BufHandle& qx); + +TORCH_API bool isQuantized(const BufHandle& qx); + +TORCH_API Tensor computeQuantizePerTensor( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); + +TORCH_API Tensor computeQuantizePerTensorExternalCall( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); + +TORCH_API Tensor computeQuantizedConv1d( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); + +TORCH_API Tensor computeQuantizedConv2dPrepack( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); + +TORCH_API Tensor computeQuantizedConv2d( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); + +TORCH_API Tensor computeQuantizedConv2dRelu( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); + +TORCH_API Tensor computeQuantizedLinear( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); + +TORCH_API Tensor computeQuantizedLinearRelu( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); + +TORCH_API Tensor computeQuantizedAdd( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); + +Tensor computeQuantizedAddExternalCall( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); + +TORCH_API Tensor computeQuantizedMul( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); + +TORCH_API Tensor computeQuantizedMulScalar( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); + +TORCH_API Tensor computeQuantizedCat( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); + +TORCH_API Tensor computeQuantizedRelu( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); + +TORCH_API Tensor computeDequantize( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); + +TORCH_API Tensor computeDequantizeExternalCall( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); + +TORCH_API Tensor computeUpsampleNearest2d( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); + +TORCH_API Tensor computeUpsampleNearest2dExternalCall( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); + +TORCH_API Tensor computeQuantizedSigmoidExternalCall( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device /*unused*/); +} // namespace torch::jit::tensorexpr + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/operators/reduction.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/operators/reduction.h new file mode 100644 index 0000000000000000000000000000000000000000..c9e3cb67920a3984118cea1c7ddb1837d4080cec --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/operators/reduction.h @@ -0,0 +1,37 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit::tensorexpr { + +TORCH_API Tensor computeSum( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); +TORCH_API Tensor computeMean( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); +TORCH_API Tensor computeAdaptiveAvgPool2d( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); +Tensor computeMax( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + const std::optional& outputType, + at::Device device); + +} // namespace torch::jit::tensorexpr + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/operators/softmax.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/operators/softmax.h new file mode 100644 index 0000000000000000000000000000000000000000..675a6c0bc795913bc588be6084aad749eb4bf153 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/operators/softmax.h @@ -0,0 +1,18 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::jit::tensorexpr { + +Tensor computeSoftmax( + const std::vector& inputs, + const std::vector& outputShape, + const std::vector& outputStrides, + bool log_softmax); + +} // namespace torch::jit::tensorexpr + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/stmt.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/stmt.h new file mode 100644 index 0000000000000000000000000000000000000000..331fc954825ce4d2416ec55877203891df1cee59 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/stmt.h @@ -0,0 +1,1017 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include + +#include + +namespace torch::jit::tensorexpr { + +// The common base between all statement node. +class TORCH_API Stmt : public std::enable_shared_from_this { + public: + Stmt() = default; + virtual ~Stmt() = default; + virtual void accept(IRVisitor* visitor) = 0; + virtual StmtPtr accept_mutator(IRMutator* mutator) = 0; + + StmtPtr get_parent() const { + return parent_ ? parent_->getptr() : nullptr; + } + + /* + * Make a deep copy of the given statement. + * + * All statements and expressions used in children of the statement are + * cloned. Note that the variables are not deep-copied since they are + * immutable. + */ + static StmtPtr clone(const StmtPtr& s); + + protected: + static void set_parent(const StmtPtr& s, Stmt* new_parent) { + s->parent_ = new_parent; + } + std::shared_ptr getptr() { + return shared_from_this(); + } + + private: + Stmt* parent_ = nullptr; +}; + +template +class StmtNode : public Stmt { + public: + using StmtNodeBase = StmtNode; + void accept(IRVisitor* visitor) override { + visitor->visit(static_to(getptr())); + } + StmtPtr accept_mutator(IRMutator* mutator) override; + friend Op; + + private: + StmtNode() = default; +}; + +template +StmtPtr StmtNode::accept_mutator(IRMutator* mutator) { + return mutator->mutate(static_to(getptr())); +} + +// Concrete Stmt classes +class TORCH_API Block : public StmtNode { + public: + static BlockPtr make(const std::vector& stmts) { + std::vector valid_stmts; + for (auto& stmt : stmts) { + if (!stmt) { + continue; + } + valid_stmts.push_back(stmt); + } + if (valid_stmts.empty()) { + return nullptr; + } + return alloc(valid_stmts); + } + + size_t nstmts() const { + return stmts_.size(); + } + bool empty() const { + return stmts_.empty(); + } + + void prepend_stmt(const StmtPtr& s) { + if (s->get_parent()) { + throw malformed_input("Block prepend Stmt with existing parent", s); + } + + stmts_.push_front(s); + set_parent(s, this); + } + void append_stmt(const StmtPtr& s) { + if (s->get_parent()) { + throw malformed_input("Block append Stmt with existing parent", s); + } + + stmts_.push_back(s); + set_parent(s, this); + } + + void insert_stmt_before(const StmtPtr& s, const StmtPtr& before) { + if (s->get_parent()) { + throw malformed_input("Block append Stmt with existing parent", s); + } + + auto pos = std::find(stmts_.begin(), stmts_.end(), before); + if (pos == stmts_.end()) { + throw malformed_input( + "Inserting after statement that is not in block", s); + } + + stmts_.insert(pos, s); + set_parent(s, this); + } + + void insert_stmt_after(const StmtPtr& s, const StmtPtr& after) { + if (s->get_parent()) { + throw malformed_input("Block append Stmt with existing parent", s); + } + + auto pos = std::find(stmts_.begin(), stmts_.end(), after); + if (pos == stmts_.end()) { + throw malformed_input( + "Inserting after statement that is not in block", s); + } + + ++pos; + + stmts_.insert(pos, s); + set_parent(s, this); + } + + bool replace_stmt(const StmtPtr& old_stmt, const StmtPtr& new_stmt) { + if (new_stmt->get_parent()) { + throw malformed_input( + "Block replace Stmt with existing parent", new_stmt); + } + + auto pos = std::find(stmts_.begin(), stmts_.end(), old_stmt); + if (pos == stmts_.end()) { + return false; + } + stmts_.insert(pos, new_stmt); + stmts_.erase(pos); + set_parent(old_stmt, nullptr); + set_parent(new_stmt, this); + return true; + } + + // Creates a new block by cloning `this` block and replacing the given + // statement with a new statement. Note that `old_stmt` refers to a statement + // in `this` block. If the `old_stmt` is not found, it will return `nullptr`. + BlockPtr clone_and_replace(const StmtPtr& old_stmt, const StmtPtr& new_stmt) { + if (new_stmt->get_parent()) { + throw malformed_input( + "Block replace Stmt with existing parent", new_stmt); + } + + std::vector stmts(stmts_.begin(), stmts_.end()); + std::vector cloned_stmts(stmts.size()); + bool found = false; + for (int i = 0; i < static_cast(stmts.size()); ++i) { + if (stmts[i] == old_stmt) { + found = true; + cloned_stmts[i] = new_stmt; + } else { + cloned_stmts[i] = Stmt::clone(stmts[i]); + } + } + if (!found) { + return nullptr; + } + return alloc(cloned_stmts); + } + + bool remove_stmt(const StmtPtr& stmt) { + auto pos = std::find(stmts_.begin(), stmts_.end(), stmt); + if (pos == stmts_.end()) { + return false; + } + + set_parent(stmt, nullptr); + stmts_.erase(pos); + return true; + } + + std::list stmts() const { + return stmts_; + } + + void clear() { + for (const auto& s : stmts_) { + set_parent(s, nullptr); + } + stmts_.clear(); + } + + void set_stmts(const std::vector& stmts) { + clear(); + init(stmts); + } + + explicit Block(const std::vector& stmts) { + init(stmts); + } + + typedef std::list::iterator iterator; + typedef std::list::const_iterator const_iterator; + + iterator begin() { + return stmts_.begin(); + } + + const_iterator begin() const { + return stmts_.begin(); + } + + iterator end() { + return stmts_.end(); + } + + const_iterator end() const { + return stmts_.end(); + } + + StmtPtr front() { + return stmts_.front(); + } + + StmtPtr front() const { + return stmts_.front(); + } + + StmtPtr back() { + return stmts_.back(); + } + + StmtPtr back() const { + return stmts_.back(); + } + + void splice(Block::iterator it, const BlockPtr& other) { + for (const StmtPtr& s : *other) { + set_parent(s, this); + } + + stmts_.splice(it, other->stmts_); + } + + static BlockPtr getSharedParent(StmtPtr p1, StmtPtr p2) { + std::unordered_set enclosing; + + StmtPtr p1_p = std::move(p1); + while (p1_p) { + if (BlockPtr b = to(p1_p)) { + enclosing.insert(b); + } + p1_p = p1_p->get_parent(); + } + + StmtPtr p2_p = std::move(p2); + while (p2_p) { + if (BlockPtr b = to(p2_p)) { + if (enclosing.count(b) != 0) { + return b; + } + } + p2_p = p2_p->get_parent(); + } + + return nullptr; + } + + // returns the immediate child containing statement s. + StmtPtr getEnclosedRoot(StmtPtr s) const { + while (s && s->get_parent().get() != this) { + s = s->get_parent(); + } + return s; + } + + private: + std::list stmts_; + + void init(const std::vector& stmts) { + for (const StmtPtr& s : stmts) { + if (!s) { + continue; + } + if (!s->get_parent()) { + // If we get here, it's a bug, but we cannot throw an error from a + // constructor. But IR verifier would catch this. + set_parent(s, this); + } + + stmts_.push_back(s); + } + } +}; + +class TORCH_API Store : public StmtNode { + public: + VarPtr base_handle() const { + return buf_->base_handle(); + } + std::vector indices() const { + return indices_; + } + ExprPtr flat_index() const { + TORCH_CHECK(indices_.size() == 1, "Indices haven't been flattened."); + return indices_[0]; + } + ExprPtr value() const { + return value_; + } + BufPtr buf() const { + return buf_; + } + + void set_buf(BufPtr buf) { + buf_ = std::move(buf); + } + + void set_indices(std::vector indices) { + indices_ = std::move(indices); + } + + void set_value(ExprPtr value) { + value_ = std::move(value); + } + + static StorePtr make( + const BufHandle& buf, + const std::vector& indices, + const ExprHandle& value); + + Store(BufPtr buf, std::vector indices, ExprPtr value); + + private: + BufPtr buf_; + std::vector indices_; + ExprPtr value_; +}; + +// Allocate a buffer of given shapes and dtypes and bind it with the given +// buffer var. The life span is at most through the current program, until it is +// explicitly freed. An unfreed memory is likely considered an error. +class TORCH_API Allocate : public StmtNode { + public: + static AllocatePtr make(const BufHandle& buf_handle) { + return alloc(buf_handle.node()); + } + + VarPtr buffer_var() const { + return buf_->base_handle(); + } + + Dtype dtype() const { + return buf_->dtype(); + } + + const std::vector dims() const { + return buf_->dims(); + } + + BufPtr buf() const { + return buf_; + } + + void set_buf(BufPtr buf) { + buf_ = std::move(buf); + } + + explicit Allocate(BufPtr buf) : buf_(std::move(buf)) {} + + private: + BufPtr buf_; + // TODO: add memory types. +}; + +// PlacementAllocate is a variation of the Allocate operator in NNC IR. It does +// not allocate memory but reuse the memory of another buffer for the given +// buffer. +class TORCH_API PlacementAllocate : public StmtNode { + public: + static PlacementAllocatePtr make( + const BufHandle& buf_handle, + const BufHandle& buf_handle_to_reuse) { + return alloc( + buf_handle.node(), buf_handle_to_reuse.node()); + } + + BufPtr buf() const { + return buf_; + } + + BufPtr buf_to_reuse() const { + return buf_to_reuse_; + } + + void set_buf(BufPtr buf) { + buf_ = std::move(buf); + } + + void set_buf_to_reuse(BufPtr buf) { + buf_to_reuse_ = std::move(buf); + } + + explicit PlacementAllocate(BufPtr buf, BufPtr buf_to_reuse) + : buf_(std::move(buf)), buf_to_reuse_(std::move(buf_to_reuse)) {} + + private: + BufPtr buf_; + BufPtr buf_to_reuse_; +}; + +// Free the specific buffer. It is an error. +class TORCH_API Free : public StmtNode { + public: + static FreePtr make(const BufHandle& buf_handle) { + return alloc(buf_handle.node()); + } + + VarPtr buffer_var() const { + return buf_->base_handle(); + } + + BufPtr buf() const { + return buf_; + } + + void set_buf(BufPtr buf) { + buf_ = std::move(buf); + } + + explicit Free(BufPtr buf) : buf_(std::move(buf)) {} + + private: + BufPtr buf_; +}; + +class TORCH_API FreeExt : public StmtNode { + public: + static FreeExtPtr make(const std::vector& bufs); + + std::vector bufs() const { + return bufs_; + } + + void set_bufs(std::vector bufs) { + bufs_ = std::move(bufs); + } + + explicit FreeExt(std::vector bufs) : bufs_(std::move(bufs)) {} + + private: + std::vector bufs_; +}; + +class TORCH_API Let : public StmtNode { + public: + static LetPtr make(const VarHandle& var, const ExprHandle& val) { + return alloc(var.node(), val.node()); + } + + Let(VarPtr var, ExprPtr val) : var_(std::move(var)), val_(std::move(val)) {} + + VarPtr var() const { + return var_; + } + + ExprPtr value() const { + return val_; + } + + void set_var(VarPtr var) { + var_ = std::move(var); + } + + void set_val(ExprPtr val) { + val_ = std::move(val); + } + + private: + VarPtr var_; + ExprPtr val_; +}; + +class TORCH_API Cond : public StmtNode { + public: + static CondPtr make( + const ExprHandle& condition, + const StmtPtr& true_stmt, + const StmtPtr& false_stmt) { + return alloc(condition.node(), true_stmt, false_stmt); + } + + ExprPtr condition() const { + return condition_; + } + + BlockPtr true_stmt() const { + return true_stmt_; + } + + BlockPtr false_stmt() const { + return false_stmt_; + } + + void set_condition(ExprPtr condition) { + condition_ = std::move(condition); + } + + void set_true_stmt(StmtPtr true_stmt) { + if (true_stmt) { + BlockPtr b = to(true_stmt); + if (!b) { + b = alloc(std::vector({std::move(true_stmt)})); + } + true_stmt_ = b; + set_parent(true_stmt_, this); + } + } + + void set_false_stmt(StmtPtr false_stmt) { + if (false_stmt) { + BlockPtr b = to(false_stmt); + if (!b) { + b = alloc(std::vector({std::move(false_stmt)})); + } + false_stmt_ = b; + set_parent(false_stmt_, this); + } + } + + Cond(ExprPtr condition, StmtPtr true_stmt, StmtPtr false_stmt) + : condition_(std::move(condition)) { + set_true_stmt(std::move(true_stmt)); + set_false_stmt(std::move(false_stmt)); + } + + CondPtr cloneWithNewBodies( + const StmtPtr& true_stmt, + const StmtPtr& false_stmt) { + return alloc(condition_, true_stmt, false_stmt); + } + + CondPtr cloneWithNewBody(const StmtPtr& true_stmt) { + return alloc(condition_, true_stmt, nullptr); + } + + private: + ExprPtr condition_; + BlockPtr true_stmt_ = nullptr; + BlockPtr false_stmt_ = nullptr; +}; + +class TORCH_API LoopOptions { + public: + enum { + IDX_UNSET = -1, + IDX_X = 0, + IDX_Y = 1, + IDX_Z = 2, + IDX_W = 3, + IDX_MAX = IDX_W, + }; + // GPU Block Index + bool is_gpu_block_index() const { + return gpu_block_index_ != IDX_UNSET; + } + + int gpu_block_index() const { + return gpu_block_index_; + } + + std::string gpu_block_index_str() const { + if (!is_gpu_block_index()) { + throw malformed_input("Has no GPU block index"); + } + + // NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays) + static constexpr const char* kBlockIndexNames[] = { + "blockIdx.x", + "blockIdx.y", + "blockIdx.z", + "blockIdx.w", + }; + + if (gpu_block_index_ < IDX_X || gpu_block_index_ > IDX_MAX) { + throw malformed_input("invalid GPU block index"); + } + + return kBlockIndexNames[gpu_block_index_]; + } + + void set_gpu_block_index(int index) { + if (index == IDX_UNSET) { + gpu_block_index_ = IDX_UNSET; + } + + if (is_gpu_thread_index()) { + throw std::runtime_error("Cannot set both gpu block and thread index"); + } + if (is_gpu_block_index() && gpu_block_index() != index) { + throw std::runtime_error("Cannot set a previously set block index"); + } + gpu_block_index_ = index; + } + + // GPU Thread Index + bool is_gpu_thread_index() const { + return gpu_thread_index() != IDX_UNSET; + } + + int gpu_thread_index() const { + return gpu_thread_index_; + } + + std::string gpu_thread_index_str() const { + if (!is_gpu_thread_index()) { + throw malformed_input("has no GPU thread index"); + } + + // NOLINTNEXTLINE(modernize-avoid-c-arrays,cppcoreguidelines-avoid-c-arrays) + static constexpr const char* kThreadIndexNames[] = { + "threadIdx.x", "threadIdx.y", "threadIdx.z", "threadIdx.w"}; + + if (gpu_thread_index_ < IDX_X || gpu_thread_index_ > IDX_MAX) { + throw malformed_input("invalid GPU thread index"); + } + + return kThreadIndexNames[gpu_thread_index_]; + } + + void set_gpu_thread_index(int index) { + if (index == IDX_UNSET) { + gpu_thread_index_ = IDX_UNSET; + } + + if (is_gpu_block_index()) { + throw std::runtime_error("Cannot set both gpu thread and block index"); + } + if (is_gpu_thread_index() && gpu_thread_index() != index) { + throw std::runtime_error("Cannot set a previously set thread index"); + } + gpu_thread_index_ = index; + } + + void set_parallel() { + is_parallel_ = true; + } + + bool is_parallel() const { + return is_parallel_; + } + + std::string ToString() const { + if (is_gpu_block_index()) { + return gpu_block_index_str(); + } else if (is_gpu_thread_index()) { + return gpu_thread_index_str(); + } else if (is_parallel()) { + return "parallel"; + } + return ""; + } + + bool isDefault() const { + return gpu_block_index_ == IDX_UNSET && gpu_thread_index_ == IDX_UNSET && + !is_parallel_; + } + + void set_buffer_mapping(const std::unordered_map& map) { + map_input_to_tensor_bufs_ = map; + } + + std::unordered_map get_buffer_mapping() const { + return map_input_to_tensor_bufs_; + } + + private: + int gpu_block_index_{IDX_UNSET}; + int gpu_thread_index_{IDX_UNSET}; + bool is_parallel_{false}; + std::unordered_map map_input_to_tensor_bufs_; +}; + +class TORCH_API For : public StmtNode { + public: + VarPtr var() const { + return var_; + } + ExprPtr start() const { + return start_; + } + ExprPtr stop() const { + return stop_; + } + BlockPtr body() const { + return body_; + } + static ForPtr make( + const VarHandle& var, + const ExprHandle& start, + const ExprHandle& stop, + const StmtPtr& body) { + if (!body) { + return nullptr; + } + return alloc(var.node(), start.node(), stop.node(), body); + } + static ForPtr make( + const VarHandle& var, + const ExprHandle& start, + const ExprHandle& stop, + const StmtPtr& body, + const LoopOptions& loop_options) { + if (!body) { + return nullptr; + } + return alloc( + var.node(), start.node(), stop.node(), body, loop_options); + } + const LoopOptions loop_options() const { + return loop_options_; + } + + For(VarPtr var, ExprPtr start, ExprPtr stop, StmtPtr body) + : var_(std::move(var)), start_(std::move(start)), stop_(std::move(stop)) { + BlockPtr b = to(body); + if (!b) { + b = alloc(std::vector({std::move(body)})); + } + body_ = b; + set_parent(body_, this); + } + + For(VarPtr var, + ExprPtr start, + ExprPtr stop, + StmtPtr body, + LoopOptions loop_options) + : var_(std::move(var)), + start_(std::move(start)), + stop_(std::move(stop)), + loop_options_(std::move(loop_options)) { + if (!var_) { + throw malformed_input("invalid Var in For loop"); + } else if (!start_) { + throw malformed_input("invalid Start in For loop"); + } else if (!stop_) { + throw malformed_input("invalid Stop in For loop"); + } else if (!body || body->get_parent()) { + throw malformed_input("invalid Body in For loop"); + } + + BlockPtr b = to(body); + if (!b) { + b = alloc(std::vector({std::move(body)})); + } + body_ = b; + set_parent(body_, this); + } + + void set_gpu_block_index(int block_index) { + loop_options_.set_gpu_block_index(block_index); + } + + void set_gpu_thread_index(int thread_index) { + loop_options_.set_gpu_thread_index(thread_index); + } + + void set_parallel() { + loop_options_.set_parallel(); + } + + bool is_parallel() const { + return loop_options_.is_parallel(); + } + + void set_buffer_map(const std::unordered_map& map) { + loop_options_.set_buffer_mapping(map); + } + + ForPtr cloneWithNewBody(const StmtPtr& body) const { + return alloc(var_, start_, stop_, body, loop_options_); + } + + BlockPtr removeBody() { + auto res = body_; + set_parent(res, nullptr); + body_ = nullptr; + return res; + } + + void set_body(StmtPtr body) { + BlockPtr b = to(body); + if (!b) { + b = alloc(std::vector({std::move(body)})); + } + body_ = b; + set_parent(body_, this); + } + + void set_start(ExprPtr start) { + start_ = std::move(start); + } + + void set_stop(ExprPtr stop) { + stop_ = std::move(stop); + } + + void set_var(VarPtr var) { + var_ = std::move(var); + } + + private: + VarPtr var_; + ExprPtr start_; + ExprPtr stop_; + BlockPtr body_; + LoopOptions loop_options_; +}; + +// A backend specific IR Node that implements atomic-add. +// This node could only shows up as an internal with GPU backends. +// TODO: move to this an internal IR. +// TODO: make IR nodes extensible. +class TORCH_API AtomicAdd : public StmtNode { + public: + AtomicAdd(BufPtr buf, std::vector indices, ExprPtr value) + : buf_(std::move(buf)), + indices_(std::move(indices)), + value_(std::move(value)) {} + + VarPtr base_handle() const { + return buf_->base_handle(); + } + + BufPtr buf() const { + return buf_; + } + + ExprPtr flat_index() const { + TORCH_CHECK(indices_.size() == 1, "Indices haven't been flattened."); + return indices_[0]; + } + + ExprPtr value() const { + return value_; + } + + const std::vector& indices() const { + return indices_; + } + + void set_buf(BufPtr buf) { + buf_ = std::move(buf); + } + + void set_indices(std::vector indices) { + indices_ = std::move(indices); + } + + void set_value(ExprPtr value) { + value_ = std::move(value); + } + + private: + BufPtr buf_; + std::vector indices_; + ExprPtr value_; +}; + +class TORCH_API SyncThreads : public StmtNode { + public: + SyncThreads() = default; +}; + +/* + * ExternalCall statement represents a call to an external function that would + * compute the contents of the output buffer. An ExternalCall statement consists + * of: + * 1) output buffer - the buffer that'll be initialized by the call + * 2) external function name - a key from the NNC function registry to lookup + * the actual function to call + * 3) buffer arguments - the input buffers used by the function + * 4) non-buffer arguments - scalar arguments to pass to the function + * + * An example: + * A = nnc_conv2d(buf_args={Input, Weight, Bias}, args={1}) + * Here 'A' is the output buffer, "nnc_conv2d" is the function name, the buffer + * arguments are 'Input', 'Weight', and 'Bias', and there is a single non-buffer + * argument - 1. + * + * The semantics of the scalar arguments is defined solely by the implementation + * of the external function. + */ +class TORCH_API ExternalCall : public StmtNode { + public: + static ExternalCallPtr make( + BufHandle buf, + const std::string& func_name, + const std::vector& buf_args, + const std::vector& args); + + BufPtr buf() const { + return buf_; + } + + std::string func_name() const { + return func_name_; + } + + std::vector buf_args() const { + return buf_args_; + } + + std::vector args() const { + return args_; + } + + void set_buf(BufPtr buf) { + buf_ = std::move(buf); + } + + void set_buf_args(std::vector buf_args) { + buf_args_ = std::move(buf_args); + } + + void set_args(std::vector args) { + args_ = std::move(args); + } + + ExternalCall( + BufPtr buf, + std::string func_name, + std::vector buf_args, + std::vector args) + : buf_(std::move(buf)), + func_name_(std::move(func_name)), + buf_args_(std::move(buf_args)), + args_(std::move(args)) {} + + private: + BufPtr buf_; + std::string func_name_; + std::vector buf_args_; + std::vector args_; +}; + +class TORCH_API ExternalCallWithAlloc : public StmtNode { + public: + static ExternalCallWithAllocPtr make( + const std::string& func_name, + const std::vector& buf_out_args, + const std::vector& buf_args, + const std::vector& args); + + std::vector buf_out_args() const { + return buf_out_args_; + } + + std::string func_name() const { + return func_name_; + } + + std::vector buf_args() const { + return buf_args_; + } + + std::vector args() const { + return args_; + } + + void set_buf_out_args(std::vector buf_out_args) { + buf_out_args_ = std::move(buf_out_args); + } + + void set_buf_args(std::vector buf_args) { + buf_args_ = std::move(buf_args); + } + + void set_args(std::vector args) { + args_ = std::move(args); + } + + ExternalCallWithAlloc( + std::string func_name, + std::vector buf_out_args, + std::vector buf_args, + std::vector args) + : func_name_(std::move(func_name)), + buf_out_args_(std::move(buf_out_args)), + buf_args_(std::move(buf_args)), + args_(std::move(args)) {} + + private: + std::string func_name_; + std::vector buf_out_args_; + std::vector buf_args_; + std::vector args_; +}; + +} // namespace torch::jit::tensorexpr + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/unique_name_manager.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/unique_name_manager.h new file mode 100644 index 0000000000000000000000000000000000000000..7ef8ec508cbffcf573d68d34f65636b4f04e11b4 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/jit/tensorexpr/unique_name_manager.h @@ -0,0 +1,38 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include + +namespace torch::jit::tensorexpr { + +class VarHandle; +class Var; + +using VarNameMap = std::unordered_map; + +// A manager to get unique names from vars. +// It starts with the name hints of the var and append "_" + $counter until it +// hits a unique name. +class TORCH_API UniqueNameManager { + public: + const std::string& get_unique_name(const VarHandle& v); + + const std::string& get_unique_name(const VarPtr& v); + + private: + friend class ScopedVarName; + VarNameMap unique_name_mapping_; + std::unordered_map unique_name_count_; + std::unordered_set all_unique_names_; +}; + +} // namespace torch::jit::tensorexpr + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/backend/backend_data.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/backend/backend_data.h new file mode 100644 index 0000000000000000000000000000000000000000..75f05b99b69aca3439c9b5be53685dd65622ebcc --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/backend/backend_data.h @@ -0,0 +1,64 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +namespace torch::lazy { + +class TORCH_API BackendData { + public: + struct Info { + /** + * Used by Lazy Graph Executor to tag info on BackendData objs + * */ + virtual ~Info() = default; + }; + /** + * Represents (Tensor) data stored on a backend device + * in its native format. + * */ + using Handle = int64_t; + + BackendData(BackendDevice device, Shape shape) + : device_(std::move(device)), shape_(std::move(shape)) {} + + virtual ~BackendData() = default; + + const BackendDevice& device() const { + return device_; + } + + const Shape& shape() const { + return shape_; + } + + Info* info() const { + return info_.get(); + } + + std::shared_ptr SetInfo(std::shared_ptr info) { + std::swap(info, info_); + return info; + } + + virtual Handle GetHandle() = 0; + + virtual void Assign(const BackendData& data) = 0; + + virtual bool HasValue() const = 0; + + private: + BackendDevice device_; + Shape shape_; + std::shared_ptr info_; +}; + +using BackendDataPtr = std::shared_ptr; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/backend/backend_device.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/backend/backend_device.h new file mode 100644 index 0000000000000000000000000000000000000000..b5e697035bb12196ea0d59b4b5cda3ab1fa9cee3 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/backend/backend_device.h @@ -0,0 +1,105 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include +#include +#include + +namespace c10 { +struct Device; +} + +namespace torch::lazy { + +// Backend should extend it and define their own supported hardware types. +struct TORCH_API BackendDeviceType { + int8_t type{(int8_t)at::kCPU}; + // Note: previous default value was '0', which actually maps to at::kCPU, at + // least now it is explicit, we may want to make default/undefined semantics + // more clear though + BackendDeviceType() : type((int8_t)at::kCPU) {} + BackendDeviceType(int8_t type) : type(type) {} + + virtual ~BackendDeviceType() = default; + virtual std::string toString() const { + return "Unknown"; + } +}; + +class TORCH_API BackendDevice { + public: + // The default constructor will set both the device type and ordinal + // to backend specific defaults. + BackendDevice(); + BackendDevice(std::shared_ptr&& type, int64_t ordinal); + + int8_t type() const; + int64_t ordinal() const { + return ordinal_; + } + + bool operator==(const BackendDevice& other) const { + return compare(other) == 0; + } + bool operator!=(const BackendDevice& other) const { + return compare(other) != 0; + } + bool operator<(const BackendDevice& rhs) const { + return compare(rhs) < 0; + } + + std::string toString() const; + + private: + int compare(const BackendDevice& rhs) const; + + // Use shared_ptr instead of unique_ptr so that BackendDevice can be copied. + std::shared_ptr type_; + int64_t ordinal_; +}; + +TORCH_API std::ostream& operator<<( + std::ostream& os, + const BackendDevice& device); + +// Helpers for converting a c10::Device to BackendDevice and vice versa. +TORCH_API BackendDevice atenDeviceToBackendDevice(const c10::Device& device); +TORCH_API c10::Device backendDeviceToAtenDevice(const BackendDevice& device); + +// Tries to extract the backend device out of the lazy tensor. Returns nullopt +// if the input is not a lazy tensor. +TORCH_API std::optional GetBackendDevice( + const at::ITensorListRef tensors); +TORCH_API std::optional GetBackendDevice( + const at::TensorList tensors); +TORCH_API std::optional GetBackendDevice( + const at::Tensor& tensor); +TORCH_API std::optional GetBackendDevice( + const std::optional& device); + +// For variadic template. +TORCH_API std::optional GetBackendDevice(); + +C10_DIAGNOSTIC_PUSH_AND_IGNORED_IF_DEFINED("-Winfinite-recursion") +template +std::optional GetBackendDevice( + const T& tensor, + const Args&... forward_tensors) { + auto optional_device = GetBackendDevice(tensor); + if (optional_device) { + return optional_device; + } + return GetBackendDevice(forward_tensors...); +} +C10_DIAGNOSTIC_POP() + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/backend/backend_interface.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/backend/backend_interface.h new file mode 100644 index 0000000000000000000000000000000000000000..9f885d4d4c71618d6c824f197ef109b0c0c76dfc --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/backend/backend_interface.h @@ -0,0 +1,160 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace torch::lazy { + +struct IrBuilder; + +/** + * Work in progress- don't treat this as a stable interface yet! + */ +class TORCH_API BackendImplInterface { + public: + virtual ~BackendImplInterface() = default; + + /** + * Initialization/Teardown + * */ + // No-op by default. Allows custom functionality to be exposed through + // extension bindings. + virtual void InitializeAtenBindings() const {} + + virtual void PrepareToExit() const = 0; + + /** + * Configuration + * */ + + virtual void SetRngSeed(size_t seed) const = 0; + + /** + * IR Tracing + * */ + + virtual const IrBuilder* GetIrBuilder() const = 0; + + /** + * Data Transfer + * */ + + virtual BackendDataPtr MakeComputationDataFromTensor( + const at::Tensor& tensor, + const Shape& shape, + const BackendDevice& device) const = 0; + virtual BackendDataPtr MakeComputationDataFromScalar( + const at::Scalar& scalar, + const torch::lazy::BackendDevice& device) const = 0; + virtual BackendDataPtr CreateDataPlaceholder( + const BackendDevice& device, + const Shape& shape) const = 0; + + // Gets backend data if the node is a device data node. Otherwise returns + // nullptr + virtual BackendDataPtr GetComputationDataFromNode(const Node*) const = 0; + + virtual at::Tensor MakeTensorFromComputationData( + const BackendDataPtr data, + std::optional logical_scalar_type) const = 0; + + /** + * Lowering, Compilation, Execution + * */ + + virtual std::unique_ptr CreateLoweringContext( + const std::string& name, + BackendDevice device, + c10::ArrayRef post_order, + Util::EmissionMap emit_status) const = 0; + + virtual std::unique_ptr CreateLoweringContext( + const std::string& name, + BackendDevice device) const = 0; + + // TODO(whc) need to keep this? + virtual std::vector GetCompilationDevices( + const std::string& device, + c10::ArrayRef devices) const = 0; + + virtual std::vector Compile( + std::vector instances) const = 0; + + virtual std::vector ExecuteComputation( + torch::lazy::ComputationPtr computation, + c10::ArrayRef arguments, + const BackendDevice& device) const = 0; + + /** + * Device Configuration + * */ + + // Set or get the default device type. + // For backends used with virtual c10::Devices, this configures what real + // device type the backend should use, and matters if the backend supports + // more than one type of real device. + virtual std::shared_ptr GetDefaultDeviceType() const = 0; + virtual void SetDefaultDeviceType(int8_t type) = 0; + + // Set or get the default device ordinal. + // For backends that supports multi-device, this configures what the + // default device the backend should use. + virtual int64_t GetDefaultDeviceOrdinal() const = 0; + virtual void SetDefaultDeviceOrdinal(int64_t) = 0; + + // Specify which aten device should be used for eager fallback + // may change depending on current 'Default' DeviceType + virtual at::DeviceType EagerFallbackDeviceType() const = 0; + + // Query all available backend devices + virtual std::vector GetBackendDevices() const = 0; + + virtual std::string CreateMetricReport() const { + return ""; + } + + // Map a particular c10:: device to a concrete backend device + // Note:: c10:: devices may be virtual or concrete. xla:: and lazy:: are + // virtual devices, meaning they may map to a gpu, tpu, etc. behind the + // scenes. In the future, non-virtual c10:: devices may also use lazy tensors + // through a mode, in which case these APIs should still work, but should be + // identity mappings. + virtual BackendDevice GetBackendDevice(c10::Device device) const = 0; + + // TODO(whc) + // Additional APIs expected for supporting distributed training, to be + // designed + + /** + * Debug/Metrics + * */ + + // virtual std::map GetMetrics() const = 0; + + // virtual MemoryInfo GetMemoryInfo(const std::string& device) = 0; + + virtual std::string GetComputationBackendText( + const ComputationPtr computation) const = 0; +}; + +class TORCH_API BackendRegistrar { + public: + BackendRegistrar(const BackendImplInterface* backend_impl_interface); +}; + +TORCH_API bool hasBackend(); +TORCH_API const BackendImplInterface* getBackend(); + +TORCH_API const IrBuilder* getIrBuilder(); + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/backend/lowering_context.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/backend/lowering_context.h new file mode 100644 index 0000000000000000000000000000000000000000..8de72ec167fdd9d27412ba13b8ac143e16d2c0b1 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/backend/lowering_context.h @@ -0,0 +1,115 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include +#include +#include +#include + +namespace torch::lazy { + +class TORCH_API Computation { + public: + virtual int parameters_size() const = 0; + + virtual const std::vector& parameter_shapes() const = 0; + + virtual const std::vector& parameter_names() const = 0; + + virtual const Shape& result_shape() const = 0; + + virtual const std::string to_string() const = 0; + + virtual ~Computation() = default; + + // Indicates whether this computation is being executed inside a mark step + // Assume false unless set otherwise + bool in_mark_step = false; +}; + +using ComputationPtr = std::shared_ptr; + +// Keeps track of the code generation state. +class TORCH_API LoweringContext { + public: + LoweringContext(const std::string& name, BackendDevice device); + LoweringContext( + const std::string& name, + BackendDevice device, + c10::ArrayRef post_order, + Util::EmissionMap emit_status); + + virtual ~LoweringContext() = default; + + static std::unique_ptr Create( + const std::string& name, + BackendDevice device, + c10::ArrayRef post_order, + Util::EmissionMap emit_status); + + static std::unique_ptr Create( + const std::string& name, + BackendDevice device); + + const BackendDevice& device() const { + return device_; + } + + // Retrieves the vector holding all the tensors associated with the parameter + // instructions which have been created. + const std::vector& GetParametersData() const; + + // Adds a new input/output alias. + virtual void SetUpAlias( + const std::vector& output_index, + int64_t param_number, + const std::vector& param_index, + bool must_alias = false) { + // Dummy default implementation to do nothing. + } + + // Check if parameter shape matches result at index. + virtual bool CheckResultShape( + const BackendDataPtr& parameter_data, + size_t result_idx) { + // Dummy default implementation to do nothing. + return false; + } + + // Adds the given output as a component of the result tuple and returns its + // assigned position within the tuple. + virtual size_t AddResult(const torch::lazy::Output& output) = 0; + + // Associates the given output with the input parameter of the given index and + // shape. Only used for the operator-by-operator execution, mostly for + // debugging purposes. + virtual void AddParameter( + const torch::lazy::Output& output, + size_t index, + const Shape& shape, + const std::string& name) = 0; + + // Build the computation capturing all the operations created with the + // embedded builder (returned by the builder() API). + virtual ComputationPtr Build() = 0; + + size_t GetEmittedNodeCount() const { + return emit_status_.size(); + } + + protected: + BackendDevice device_; + std::vector parameters_; + std::vector parameter_sequence_; + Util::EmissionMap emit_status_; +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/cache.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/cache.h new file mode 100644 index 0000000000000000000000000000000000000000..a34161654e64d8277e0af2508360dcaa9aa782b8 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/cache.h @@ -0,0 +1,148 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +/** + * Cache utils in this file is adapted from PyTorch/XLA + * https://github.com/pytorch/xla/blob/e0e5f937a0ba8d904f9608137dc8c51ba439df2d/third_party/xla_client/cache.h + */ + +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace torch::lazy { + +// Generic key and object cache with LRU expiration policy. The objects of type +// T will be stored as std::shared_ptr and taken and returned as such, by the +// cache API. +template < + typename K, + typename T, + typename H = std::hash, + typename E = std::equal_to> +class Cache { + public: + using TypePtr = std::shared_ptr; + using Element = std::pair; + + explicit Cache(size_t max_size) : max_size_(max_size) {} + + // Adds an object to the cache, unless it already exists. If the cache grows + // beyond the limit set during construction, the oldest used object will be + // removed from the cache. + TypePtr Add(K key, TypePtr object) { + if (!max_size_) { + return object; + } + std::lock_guard slock(lock_); + element_list_.emplace_front(Element(std::move(key), std::move(object))); + auto it = element_list_.begin(); + auto emplace_result = element_map_.emplace(&it->first, it); + if (!emplace_result.second) { + element_list_.erase(it); + DoLRU(emplace_result.first->second); + } else if (element_list_.size() > max_size_) { + Element* last = &element_list_.back(); + element_map_.erase(&last->first); + element_list_.pop_back(); + } + return emplace_result.first->second->second; + } + + // Retrieves the existing object if it exists. If it does, its position in + // the LRU list gets moved to the head of the list. + // Returns nullptr if no object with the specified key is found within the + // cache. + TypePtr Get(const K& key) { + if (!max_size_) { + return nullptr; + } + std::lock_guard slock(lock_); + auto it = element_map_.find(&key); + if (it == element_map_.end()) { + return nullptr; + } + DoLRU(it->second); + return it->second->second; + } + + TypePtr GetLatest() { + std::lock_guard g(lock_); + TORCH_CHECK(!element_list_.empty()); + return element_list_.front().second; + } + + bool Erase(const K& key) { + if (!max_size_) { + return false; + } + std::lock_guard slock(lock_); + auto it = element_map_.find(&key); + if (it == element_map_.end()) { + return false; + } + auto lit = it->second; + element_map_.erase(it); + element_list_.erase(lit); + return true; + } + + void Clear() { + if (!max_size_) { + return; + } + std::lock_guard slock(lock_); + element_map_.clear(); + element_list_.clear(); + } + + int Numel() const { + if (!max_size_) { + return 0; + } + std::lock_guard g(lock_); + TORCH_CHECK(element_map_.size() == element_list_.size()); + return element_map_.size(); + } + + private: + using ElementList = std::list; + + struct Hasher { + size_t operator()(const K* key) const { + return hasher(*key); + } + + H hasher; + }; + + struct Equaler { + bool operator()(const K* k1, const K* k2) const { + return equaler(*k1, *k2); + } + + E equaler; + }; + + using ElementMap = std:: + unordered_map; + + void DoLRU(typename ElementList::iterator it) { + element_list_.splice(element_list_.begin(), element_list_, it); + } + + mutable std::mutex lock_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const size_t max_size_ = 0; + ElementList element_list_; + ElementMap element_map_; +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/config.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/config.h new file mode 100644 index 0000000000000000000000000000000000000000..83ff42b145fa390c600e4f005c29c1993fdbbff9 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/config.h @@ -0,0 +1,31 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include + +TORCH_DECLARE_bool(torch_lazy_ir_debug); +TORCH_DECLARE_bool(torch_lazy_handle_special_scalars); +TORCH_DECLARE_bool(torch_lazy_all_numbers_special_scalars); +TORCH_DECLARE_bool(torch_lazy_param_aliasing); +TORCH_DECLARE_bool(torch_lazy_reuse_ir); +TORCH_DECLARE_bool(torch_lazy_use_thread_pool); +TORCH_DECLARE_bool(torch_lazy_enable_device_data_cache); + +TORCH_DECLARE_int(torch_lazy_compilation_cache_size); +TORCH_DECLARE_int(torch_lazy_device_data_cache_size); +TORCH_DECLARE_int(torch_lazy_io_thread_pool_size); +TORCH_DECLARE_int(torch_lazy_metrics_samples); +TORCH_DECLARE_int(torch_lazy_trim_graph_check_frequency); +TORCH_DECLARE_int(torch_lazy_trim_graph_size); + +TORCH_DECLARE_string(torch_lazy_metrics_percentiles); + +TORCH_DECLARE_int(torch_lazy_shape_cache_size); + +namespace torch::lazy { +TORCH_API std::string& getLTCForceFallback(); +} + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/debug_util.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/debug_util.h new file mode 100644 index 0000000000000000000000000000000000000000..7c3ce3171ce299d10a487dc5f85a9861d7d123c6 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/debug_util.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +namespace torch::lazy { + +TORCH_API std::function()>& +GetPythonFramesFunction(); + +TORCH_API std::string GetFirstUserFrameInPython(); + +class TORCH_API DebugUtil { + public: + enum GraphFormat { + kText, + kDot, + kBackend, + }; + + static GraphFormat GetDefaultGraphFormat(); + + // Dumps the current Python frame and the IR Graph whose roots are the IR + // values held at the tensors. If indices is not nullptr, it selects the + // indices of the tensors whose graph will be emitted. + static std::string GetTensorsGraphInfo( + c10::ArrayRef tensors, + const std::vector* indices, + GraphFormat format = GetDefaultGraphFormat()); + + // If the environment variable LTC_SAVE_TENSORS_FILE is set to the proper + // output path, an instance of the report returned by GetTensorsGraphInfo() is + // saved. + static void SaveTensorsGraphInfo( + const char* name, + c10::ArrayRef tensors, + const std::vector* indices, + GraphFormat format = GetDefaultGraphFormat()); + + static bool ExperimentEnabled(const std::string& name); +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/dynamic_ir.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/dynamic_ir.h new file mode 100644 index 0000000000000000000000000000000000000000..d21d6feb0ba58ad5e17f7e92b1e14c4334c3475e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/dynamic_ir.h @@ -0,0 +1,53 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include +#include +#include +#include +#include + +namespace torch::lazy { + +/** + * The goal of "dynamic" Nodes is to patch a hole in our tracing. + * Previously, if a user called `sizes` on a Tensor, it would leak out + * of our tracing system, as `sizes` returns a torch.Size or an int. To + * prevent this from happening, we introduce DimensionNode, a new type + * of Node that abstracts the operation of getting the dimensions of a + * Tensor. + * + * Consider the following example: + * ``` + * numel = x.shape()[0] * x.shape()[1] + * ``` + * + * Here, `x.shape()[i]` will be a SizeNode (subclass of DimensionNode), + * and the multiplication of the two SizeNodes will be represented by + * a SizeMul (also a subclass of DimensionNode). Through this, we can + * prevent `numel` from being represented as a Python int and thus + * burned into the Graph. + */ + +class TORCH_API DimensionNode { + public: + virtual bool isSymbolic() const { + return false; + } + virtual int64_t getDynamicValue() const { + TORCH_CHECK(false, "NYI"); + } + virtual int64_t getStaticValue() const { + TORCH_CHECK(false, "NYI"); + } + virtual ~DimensionNode() = default; +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/hash.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/hash.h new file mode 100644 index 0000000000000000000000000000000000000000..f224aae7bf62257f83e4e5db89c843a9323777a1 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/hash.h @@ -0,0 +1,247 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +/** + * Hash utils in this file is adapted from PyTorch/XLA + * https://github.com/pytorch/xla/blob/e0e5f937a0ba8d904f9608137dc8c51ba439df2d/third_party/xla_client/util.h + */ +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace torch::lazy { + +using size_t = std::size_t; + +class TORCH_API hash_t : public c10::uint128 { + public: + // Switch from typedef hash_t = uint128 to provide explicit casters + hash_t(int8_t val) : uint128(static_cast(val)) {} + hash_t(int16_t val) : uint128(static_cast(val)) {} + hash_t(int32_t val) : uint128(static_cast(val)) {} + hash_t(int64_t val) : uint128(static_cast(val)) {} + hash_t(uint32_t val) : uint128(val) {} + hash_t(uint64_t val) : uint128(val) {} + hash_t(uint128 val) : uint128(val) {} + hash_t(uint64_t top, uint64_t bottom) : uint128(top, bottom) {} + hash_t() = default; +}; + +// Std* functions use 64-bit hash +size_t TORCH_API StdDataHash(const void* data, size_t size); + +size_t TORCH_API StdHashCombine(uintmax_t a, uintmax_t b); + +// Other functions are all 128-bit +hash_t TORCH_API HashBlock(const void* data, size_t n, const hash_t& seed); + +hash_t TORCH_API DataHash(const void* data, size_t size); + +hash_t TORCH_API HashCombine(const hash_t& a, const hash_t& b); + +size_t TORCH_API HashReduce(const hash_t& a); + +// Returns a string representation of a hash +std::string TORCH_API HashToString(const hash_t& a); + +struct HashReducer { + size_t operator()(const hash_t& value) const { + return HashReduce(value); + } +}; + +static inline hash_t StringHash(const char* data) { + return DataHash(data, std::strlen(data)); +} + +// Automatic templated implementation for 'arithmetic' types +template >* = nullptr> +hash_t Hash(const T& value) { + return DataHash(&value, sizeof(value)); +} + +// added because on macos builds the vector specialization +// breaks falling through to the templated arithmetic types above +hash_t TORCH_API Hash(const std::vector& value); + +// Specialized implementations for proprietary types +static inline hash_t Hash(const c10::ScalarType& value) { + return DataHash(&value, sizeof(value)); +} + +static inline hash_t Hash(const c10::MemoryFormat& value) { + return DataHash(&value, sizeof(value)); +} + +static inline hash_t Hash(const c10::DeviceType& value) { + return DataHash(&value, sizeof(value)); +} + +static inline hash_t Hash(const c10::Device& value) { + return HashCombine(Hash(value.type()), Hash(value.index())); +} + +static inline hash_t Hash(const c10::Layout& value) { + return DataHash(&value, sizeof(value)); +} + +static inline hash_t Hash(const c10::Scalar& value) { + switch (value.type()) { + case c10::ScalarType::ComplexDouble: + return Hash(value.toComplexDouble()); + case c10::ScalarType::Double: + return Hash(value.toDouble()); + case c10::ScalarType::Long: + return Hash(value.toLong()); + case c10::ScalarType::Bool: + return Hash(value.toBool()); + default: + TORCH_INTERNAL_ASSERT(false, "Unknown scalar type.", value.type()); + } +} + +static inline hash_t TensorHash(const at::Tensor& tensor) { + at::Tensor ctensor = tensor.contiguous(); + int64_t size = ctensor.numel() * ctensor.element_size(); + switch (ctensor.scalar_type()) { + case at::ScalarType::Bool: + return DataHash(ctensor.const_data_ptr(), size); + case at::ScalarType::Byte: + return DataHash(ctensor.const_data_ptr(), size); + case at::ScalarType::Char: + return DataHash(ctensor.const_data_ptr(), size); + case at::ScalarType::Short: + return DataHash(ctensor.const_data_ptr(), size); + case at::ScalarType::Int: + return DataHash(ctensor.const_data_ptr(), size); + case at::ScalarType::Long: + return DataHash(ctensor.const_data_ptr(), size); + case at::ScalarType::Float: + return DataHash(ctensor.const_data_ptr(), size); + case at::ScalarType::Double: + return DataHash(ctensor.const_data_ptr(), size); + case at::ScalarType::BFloat16: + return DataHash(ctensor.const_data_ptr(), size); + case at::ScalarType::Half: + return DataHash(ctensor.const_data_ptr(), size); + case at::ScalarType::ComplexFloat: + return DataHash(ctensor.const_data_ptr>(), size); + case at::ScalarType::ComplexDouble: + return DataHash(ctensor.const_data_ptr>(), size); + case at::ScalarType::UInt16: + return DataHash(ctensor.const_data_ptr(), size); + case at::ScalarType::UInt32: + return DataHash(ctensor.const_data_ptr(), size); + case at::ScalarType::UInt64: + return DataHash(ctensor.const_data_ptr(), size); + default: + TORCH_INTERNAL_ASSERT( + false, "Unsupported scalar type:", ctensor.scalar_type()); + } +} + +static inline hash_t Hash(const std::string& value) { + return DataHash(value.data(), value.size()); +} + +static inline hash_t Hash(const std::string_view& value) { + return DataHash(value.data(), value.size()); +} + +static inline hash_t Hash(const at::Generator& value) { + return TensorHash(value.get_state()); +} + +// Taken from glibc's implementation of hashing optionals, +// we want to include a contribution to the hash to distinguish +// cases where one or another option was null, but we hope it doesn't +// collide with an actually scalar value. +// +// Use an arbitrary randomly-selected 64-bit integer rather than a +// small constant that we then hash at runtime so we don't have to +// repeatedly hash a constant at runtime. +// NOLINTNEXTLINE(*-narrowing-conversions) +static const int64_t kNullOpt = 0x8655d738f3678dda; + +// Hashing for std::optional types contributes to hash +// for optionals with null value, important to distinguish +// between and cases +template +hash_t Hash(const std::optional& value) { + if (value.has_value()) { + return Hash(value.value()); + } else { + return kNullOpt; + } +} + +// Hashing of containers +// Forward declare to allow hashes of vectors of vectors to work. +template +hash_t ContainerHash(const T& values); + +template +hash_t Hash(const std::vector& values) { + return ContainerHash(values); +} + +// Need a special case for std::optional? +template +hash_t Hash(const std::optional>& value) { + if (value.has_value()) { + return ContainerHash(value.value()); + } else { + return kNullOpt; + } +} + +template +hash_t Hash(const std::set& values) { + return ContainerHash(values); +} + +template +hash_t Hash(const std::pair& values) { + return HashCombine(Hash(values.first), Hash(values.second)); +} + +static inline hash_t Hash(const hash_t& value) { + return value; +} + +template +hash_t Hash(c10::ArrayRef values) { + return ContainerHash(values); +} + +template +hash_t ContainerHash(const T& values) { + hash_t h(static_cast(0x85ebca77c2b2ae63)); + for (const auto& value : values) { + h = HashCombine(h, Hash(value)); + } + return h; +} + +// Varargs hashing +template +hash_t MHash() { + return hash_t(static_cast(0x165667b19e3779f9)); +} + +template +hash_t MHash(T value, Targs... Fargs) { + return HashCombine(Hash(value), MHash(Fargs...)); +} + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/helpers.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/helpers.h new file mode 100644 index 0000000000000000000000000000000000000000..440e880fcb76b7e60e7a383d6b820ab6a3c7ae34 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/helpers.h @@ -0,0 +1,75 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +// TODO: Consolidate this file with util.h + +namespace torch::lazy { + +// Converts an iterable container to a vector of int64's. +template +static std::vector ToI64Vector(const S& input) { + return ToVector(input); +} + +// Creates a set of dimension by dropping the drop_dims ones. +TORCH_API std::vector DropDimensions( + c10::ArrayRef sizes, + c10::ArrayRef drop_dims); + +// Get the canonical dimension index in the [0, rank) interval. Negative +// indices are interpreted as follows: -1 is rank-1, -2 is rank-2 etc. +TORCH_API int64_t GetCanonicalDimensionIndex(int64_t dim, int64_t rank); + +// Same as above, for multiple dimensions. +TORCH_API std::vector GetCanonicalDimensionIndices( + c10::ArrayRef dimensions, + int64_t rank); + +// Returns the canonical position in the dim dimension, handling negative +// values for the position. +TORCH_API int64_t GetCanonicalPosition( + c10::ArrayRef dimensions, + int64_t dim, + int64_t pos); + +// Creates a transposition from the given input and dimensions. +TORCH_API std::vector MakeTransposePermutation( + int64_t dim0, + int64_t dim1, + int64_t rank); + +// Calculates the protomoted shape to which the input shapes should be +// broadcasted for an elementwise operation. The size of the common dimensions +// (2,3,4 for shape1, and 0,1,2 for shape2) must either match, or either one +// of the two be 1. +// Example: +// shape1 = [9, 7, 6, 1, 2] +// shape2 = [6, 5, 2] +// result_shape = [9, 7, 6, 5, 2] +TORCH_API std::vector GetPromotedShape( + c10::ArrayRef shape1_dims, + c10::ArrayRef shape2_dims); + +TORCH_API Shape +GetPromotedBinaryOpShape(const Shape& shape1, const Shape& shape2); + +TORCH_API std::vector StrSplit(std::string_view text, char delim); + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/internal_ops/ltc_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/internal_ops/ltc_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..29c0ebbbe8c4471051f45a7a00e3138c2e54c2f8 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/internal_ops/ltc_ops.h @@ -0,0 +1,54 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include + +#include + +namespace torch::lazy { + +class TORCH_API OpKindWrapper { + public: + explicit OpKindWrapper(const char* name) : name_(name) {} + + const OpKind& operator*() const { + return get(); + } + + operator OpKind() const { + return get(); + } + + private: + const OpKind& get() const { + c10::call_once(once_, [this]() { op_kind_ = OpKind::Get(name_); }); + return op_kind_; + } + + const char* name_; + mutable OpKind op_kind_; + mutable c10::once_flag once_; +}; + +const OpKindWrapper ltc_all_to_all("lazy_tensors::all_to_all"); +const OpKindWrapper ltc_cast("lazy_tensors::cast"); +const OpKindWrapper ltc_collective_permute("lazy_tensors::collective_permute"); +const OpKindWrapper ltc_cross_replica_sum("lazy_tensors::cross_replica_sum"); +const OpKindWrapper ltc_device_data("lazy_tensors::device_data"); +const OpKindWrapper ltc_get_dimensions_size( + "lazy_tensors::ltc_get_dimensions_size"); +const OpKindWrapper ltc_moving_average("lazy_tensors::moving_average"); +const OpKindWrapper ltc_nms("lazy_tensors::nms"); +const OpKindWrapper ltc_not_supported("lazy_tensors::not_supported"); +const OpKindWrapper ltc_replication_pad("lazy_tensors::replication_pad"); +const OpKindWrapper ltc_replication_pad_backward( + "lazy_tensors::replication_pad_backward"); +const OpKindWrapper ltc_tensor_data("lazy_tensors::tensor_data"); + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir.h new file mode 100644 index 0000000000000000000000000000000000000000..3c096234558d1bfbafe7d4cecf9b2d306480f979 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir.h @@ -0,0 +1,294 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +TORCH_DECLARE_bool(ltc_enable_dynamic_shapes); + +namespace torch::lazy { + +static const hash_t kHashSeed(static_cast(0x5a2d296e9)); + +class Node; +struct Output; +struct Value; + +using NodePtr = std::shared_ptr; + +// The Kind of operation a Node can be associated to. +struct TORCH_API OpKind { + OpKind() = default; + explicit OpKind(c10::Symbol op) : op(op) {} + + bool operator==(const OpKind& rhs) const { + return op == rhs.op; + } + bool operator!=(const OpKind& rhs) const { + return !operator==(rhs); + } + bool operator<(const OpKind& rhs) const { + return c10::unique_t(op) < c10::unique_t(rhs.op); + } + + hash_t hash() const; + + std::string ToString() const { + return op.toQualString(); + } + + // Retrieves an existing operation object, or creates a new one. Operations + // that are specific to lazy tensors, should live within the 'lazy_tensors::' + // namespace. + static OpKind Get(const std::string& name); + + c10::Symbol op; +}; + +inline std::ostream& operator<<(std::ostream& stream, const OpKind& op) { + stream << op.ToString(); + return stream; +} + +using OpList = c10::ArrayRef; + +hash_t OperandHashes( + const OpList& operands, + const hash_t& seed, + bool bakeInSizes); +// A node in the graph. Nodes for operations which require extra data to be +// stored for lowering should inherit from this class and add an operation +// specific member there. For example, a constant might create a new +// NodeConstant class (inheriting from Node) with an extra lazy_tensors::Literal +// field, or a tensor value might create a new NodeTensor with a computation +// client data handle in it. +class TORCH_API Node { + public: + static bool enableDynamicShape(); + + // Creates a new node with the given op name. The op is a unique identifier + // for the operation. The num_outputs tells how many outputs a given operation + // generates. + // + // None leaf node's node_hash does not contains shape information always. + // So we pass in the hash value rather than a function. + Node(OpKind op, size_t num_outputs); + + // Construct node with operands and shapes + Node( + OpKind op, + OpList operands, + std::vector&& shapes, + size_t num_outputs = 1); + + // Construct node with operands and no shape + Node(OpKind op, OpList operands, size_t num_outputs = 1); + + // Construct node with shape and no operands + Node(OpKind op, Shape shape, size_t num_outputs = 1); + + virtual ~Node() = default; + + const OpKind& op() const { + return op_; + } + + size_t num_outputs() const { + return num_outputs_; + } + + // Retrieves the full shape of the IR Node. + virtual c10::ArrayRef shapes() const; + + virtual const Shape& shape(size_t output_index = 0) const; + + // Add the shape computed by the shape_fn + void addComputedShape(const std::function& shape_fn); + + // Compute the shape using the provided shape_fn if not previously cached + Shape computeShape(const std::function& shape_fn); + + virtual const std::vector& operands() const; + + virtual const Output& operand(size_t i) const; + + // Gets operand at index i if index is valid, or kNullOutput otherwise. + virtual const Output& nullable_operand(size_t i) const; + + // Returns the hash of the dag used to look up the compiled graph + virtual hash_t hash() const = 0; + + // Returns the hash of the dag used to for shape caching + virtual hash_t shapeHash() const = 0; + + const MetaData& metadata() const { + return metadata_; + } + + UserMetaData* user_metadata() const { + return user_metadata_.get(); + } + + std::shared_ptr SetUserMetadata( + std::shared_ptr user_meta) { + std::swap(user_metadata_, user_meta); + return user_meta; + } + + virtual std::string ToString() const; + + private: + // The ID of the operation captured by this node. + OpKind op_; + size_t num_outputs_ = 1; + + // The IR specific metadata attached to the IR node. + MetaData metadata_; + // The IR framework user can attach a user defined metadata object deriving + // from UserMetaData. + std::shared_ptr user_metadata_; + + protected: + // Adds node's index output number as operand. + void AddOperand(const NodePtr& node, size_t index = 0); + + std::vector shapes_; + // A node holds a real reference to its operands. + std::vector operands_; + // Outputs do not hold references on the nodes, and neither do the uses, since + // otherwise we get into circular reference counting. + std::vector operands_as_outputs_; +}; + +inline std::ostream& operator<<(std::ostream& stream, const Node& node) { + stream << node.ToString(); + return stream; +} + +// Note: Keep this version of NodeCast for smooth PyTorch/XLA migration, and +// clean up once the migration is done. +template +const T* NodeCast(const Node* node, OpKind op) { + if (op != node->op()) { + return nullptr; + } +#ifdef NDEBUG + return static_cast(node); +#else + return &dynamic_cast(*node); +#endif +} + +template +const T* NodeCast(const Node* node) { + if (T::ClassOpKind() != node->op()) { + return nullptr; + } + // TODO: Some IR classes share the same opkind, such as Mean and MeanDim, so + // static_cast is not safe here. Unless we have opkind unique for each class, + // we have to use dynamic_cast here. + return dynamic_cast(node); +} + +// Represents a specific output produced by a node. Since the output of a node +// can be composed by multiple outputs, the node+index coordinates fully qualify +// each single output. +struct TORCH_API Output { + struct Hasher { + size_t operator()(const Output& output) const; + }; + + Output() = default; + explicit Output(const Node* node, size_t index = 0) + : node(node), index(index) {} + + hash_t hash() const; + hash_t shapeHash() const; + + bool operator==(const Output& rhs) const { + return node == rhs.node && index == rhs.index; + } + + // To compare the operands of to-be-constructed node and to-be-reused node + bool operator==(const Value& rhs) const; + + bool operator!=(const Output& rhs) const { + return !operator==(rhs); + } + + const Shape& shape() const { + return node->shape(index); + } + + std::string ToString() const; + + // The node providing the output. + const Node* node{nullptr}; + // The index in the node's output this output refers to. + size_t index{0}; +}; + +inline std::ostream& operator<<(std::ostream& stream, const Output& output) { + stream << output.ToString(); + return stream; +} + +template +using OutputMap = std::unordered_map; + +// Represents an input/operand for a Node object. +struct TORCH_API Value { + Value() = default; + /* implicit */ Value(NodePtr&& node, size_t index = 0) + : node(std::move(node)), index(index) {} + /* implicit */ Value(const NodePtr& node, size_t index = 0) + : node(node), index(index) {} + + hash_t hash() const; + hash_t shapeHash() const; + + operator bool() const { + return node != nullptr; + } + + operator Output() const { + return Output(node.get(), index); + } + + const Shape& shape() const { + return node->shape(index); + } + + Node* operator->() const { + return node.get(); + } + + NodePtr node; + size_t index = 0; +}; + +} // namespace torch::lazy + +namespace c10 { +// Explicit template instantiation to make ArrayRef work +template class at::ArrayRef; +} // namespace c10 + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir_builder.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir_builder.h new file mode 100644 index 0000000000000000000000000000000000000000..c1b1c2abc8f342f42fea0e315b2d6762cace2d33 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir_builder.h @@ -0,0 +1,153 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include + +// This file is part of the backend interface. So, ops shouldn't be added or +// removed without due process The exception to this being the view ops which +// will be removed soon pending functionalization + +namespace torch::lazy { + +template +NodePtr ReuseNode(Args&&... args) { + if (FLAGS_torch_lazy_reuse_ir) { + return LookupNodeFromTrieCache(std::forward(args)...); + } + return nullptr; +} + +// Caching an IR node into TrieCache +static inline void CacheNode(NodePtr node) { + if (FLAGS_torch_lazy_reuse_ir) { + TrieCache::Get()->Insert(std::move(node)); + } +} + +template +NodePtr MakeNode(Args&&... args) { + return std::make_shared(std::forward(args)...); +} + +// op is passed in for a more efficient node casting, see the implementation of +// NodeCast +template +NodePtr ReuseOrMakeNode(Args&&... args) { + NodePtr node = ReuseNode(std::forward(args)...); + if (!node) { + node = MakeNode(std::forward(args)...); + CacheNode(node); + } + return node; +} + +struct IrBuilder { + virtual NodePtr MakeDeviceData( + const std::shared_ptr& data) const = 0; + virtual NodePtr MakeScalar( + const at::Scalar& value, + const at::ScalarType& type) const = 0; + virtual NodePtr MakeExpand( + const Value& input0, + const std::vector& size, + const bool& is_scalar_expand) const = 0; + virtual NodePtr MakeCast( + const Value& input0, + const at::ScalarType& dtype, + const std::optional& stype = std::nullopt) const = 0; + virtual NodePtr MakeTensorList(const OpList& inputs) const = 0; + virtual NodePtr MakeGeneric( + const OpKind& op, + const OpList& operands, + const Shape& shape, + const size_t& num_outputs = 1, + const hash_t& hash_seed = static_cast(0x5a2d296e9)) const = 0; + + // dynamic ir nodes + virtual NodePtr MakeSizeNode(const Value& input, size_t dim) const = 0; + virtual NodePtr MakeSizeAdd(const Value& a, const Value& b) const = 0; + virtual NodePtr MakeSizeMul(const Value& a, const Value& b) const = 0; + virtual NodePtr MakeSizeDiv(const Value& a, const Value& b) const = 0; + + virtual ~IrBuilder() = default; +}; + +static inline NodePtr MakeDeviceData(const std::shared_ptr& data) { + return getIrBuilder()->MakeDeviceData(data); +} +static inline NodePtr MakeScalar( + const at::Scalar& value, + const at::ScalarType& type) { + return getIrBuilder()->MakeScalar(value, type); +} +static inline NodePtr MakeExpand( + const Value& input0, + const std::vector& size, + const bool& is_scalar_expand) { + return getIrBuilder()->MakeExpand(input0, size, is_scalar_expand); +} +static inline NodePtr MakeCast( + const Value& input0, + const at::ScalarType& dtype, + const std::optional& stype = std::nullopt) { + return getIrBuilder()->MakeCast(input0, dtype, stype); +} +static inline NodePtr MakeTensorList(const OpList& inputs) { + return getIrBuilder()->MakeTensorList(inputs); +} +static inline NodePtr MakeGeneric( + const OpKind& op, + const OpList& operands, + const Shape& shape, + const size_t& num_outputs = 1, + const hash_t& hash_seed = static_cast(0x5a2d296e9)) { + return getIrBuilder()->MakeGeneric( + op, operands, shape, num_outputs, hash_seed); +} + +// dynamic ir nodes +static inline NodePtr MakeSizeNode(const Value& input, size_t dim) { + return getIrBuilder()->MakeSizeNode(input, dim); +} +static inline NodePtr MakeSizeAdd(const Value& a, const Value& b) { + return getIrBuilder()->MakeSizeAdd(a, b); +} +static inline NodePtr MakeSizeMul(const Value& a, const Value& b) { + return getIrBuilder()->MakeSizeAdd(a, b); +} +static inline NodePtr MakeSizeDiv(const Value& a, const Value& b) { + return getIrBuilder()->MakeSizeDiv(a, b); +} + +inline Value GetSymIntValue(const c10::SymInt& a) { + if (auto ma = a.maybe_as_int()) { + return Value(MakeScalar(*ma, at::kLong), 0); + } else { + return Value( + dynamic_cast(a.toSymNodeImplUnowned()) + ->node_, + 0); + } +} + +// TODO: this should return Value +inline std::vector GetSymIntArrayRefValue(c10::SymIntArrayRef arr) { + std::vector r; + for (const auto& a : arr) { + r.emplace_back(a.guard_int(__FILE__, __LINE__)); + } + return r; +} + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir_dump_util.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir_dump_util.h new file mode 100644 index 0000000000000000000000000000000000000000..7a27bd8dbec82daf15acb6ac695a8aba86dac08d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir_dump_util.h @@ -0,0 +1,35 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include + +namespace torch::lazy { + +class BackendDevice; + +class TORCH_API DumpUtil { + public: + static std::string ToDot(c10::ArrayRef nodes); + + static std::string PostOrderToDot( + c10::ArrayRef post_order, + c10::ArrayRef roots); + + static std::string ToText(c10::ArrayRef nodes); + + static std::string PostOrderToText( + c10::ArrayRef post_order, + c10::ArrayRef roots); + + static std::string ToBackend( + c10::ArrayRef values, + const BackendDevice& device); +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir_metadata.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir_metadata.h new file mode 100644 index 0000000000000000000000000000000000000000..8b913e2342810b322e4470fb01332a98a40d5116 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir_metadata.h @@ -0,0 +1,56 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include + +namespace torch::lazy { +struct SourceLocation { + std::string file; + std::string function; + int line = -1; +}; + +TORCH_API void EmitShortFrameInfo( + std::ostream& stream, + const std::vector& frames); + +TORCH_API std::ostream& operator<<( + std::ostream& stream, + const std::vector& frames); + +// The base class for user defined metadata which is possible to attach to IR +// nodes. +struct TORCH_API UserMetaData { + virtual ~UserMetaData() = default; +}; + +struct TORCH_API MetaData { + std::string scope; + std::vector frame_info; +}; + +// TODO(whc) is this going to be used outside of in IR decompositions? +// RAII data structure to be used a stack variable to enter a new IR scope. IR +// scope names will appear in the IR and will help identifying the source of the +// single IR nodes. +struct TORCH_API ScopePusher { + explicit ScopePusher(const std::string& name); + ~ScopePusher(); + ScopePusher(ScopePusher&& other) = delete; + ScopePusher(const ScopePusher&) = delete; + ScopePusher& operator=(const ScopePusher&) = delete; + ScopePusher& operator=(ScopePusher&&) = delete; + + static void ResetScopes(); +}; + +TORCH_API MetaData GetMetaDataIfDebugging(); + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir_util.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir_util.h new file mode 100644 index 0000000000000000000000000000000000000000..bb2f2420a1028cd6aa1d5b58e456dcf767904e5e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ir_util.h @@ -0,0 +1,50 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +namespace torch::lazy { + +class TORCH_API Util { + public: + // Tracks the emission status of the nodes during the post-order generation. + // It helps tracking loops within the computation graphs. + enum EmitStatus { + kNotEmitted, + kEmitting, + kEmitted, + }; + + using EmissionMap = std::unordered_map; + + // Computes the post order from the given node, without using recursion. The + // emission map can be used as saved state, for multiple separate calls to + // this API. The returned post-order can be empty if the node has already been + // emitted inside the emission map. An error is generated if a loop is + // detected. + static std::vector ComputePostOrder( + const Node* node, + EmissionMap* emap); + + static std::vector ComputePostOrder( + c10::ArrayRef nodes, + EmissionMap* emap); + + // Same as above, but computes the post order on the set of nodes specified as + // argument. + static std::vector ComputePostOrder( + c10::ArrayRef nodes); + + // Retrieves the number of nodes within the graph whose sink are passed in the + // nodes argument. + static size_t GetGraphSize(c10::ArrayRef nodes); +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/lazy_graph_executor.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/lazy_graph_executor.h new file mode 100644 index 0000000000000000000000000000000000000000..f7937602a8f3877b698ec06844bb5445d3e580ea --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/lazy_graph_executor.h @@ -0,0 +1,434 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace torch::lazy { + +class TORCH_API LazyGraphExecutor { + public: + struct DeviceDataInfo : public BackendData::Info { + DeviceDataInfo(int64_t tensor_id, bool read_only) + : tensor_id(tensor_id), read_only(read_only) {} + + int64_t tensor_id = 0; + bool read_only = false; + }; + + // Register a lazy graph executor instance that can be retrieved using Get() + static void Register(LazyGraphExecutor* /*executor*/); + static LazyGraphExecutor* Get(); + + virtual ~LazyGraphExecutor() = default; + + // Override these methods to perform custom tensor registration and + // unregistration Note: It is vital that the parent implementations are also + // called in order for the tensors to show up in the live tensor list + virtual void RegisterTensor(std::shared_ptr data); + virtual void UnregisterTensor(LazyTensor::Data* data); + + // Seed for random generator. + // Override to supply your own DeviceContextArena. + virtual Value GetRngSeed(const BackendDevice& device); + virtual uint64_t GetRunningSeed(const BackendDevice& device); + virtual void SetRngSeed(const BackendDevice& device, uint64_t seed); + + void DeviceBarrier(const BackendDevice& device); + + BackendDataPtr GetDeviceData( + const at::Tensor& tensor, + const BackendDevice& device); + + BackendDataPtr GetDeviceData( + const at::Scalar& value, + at::ScalarType scalar_type, + const BackendDevice& device); + + // Retrieves the set of lazy tensors which are currently live in the system, + // for the given device. If device is nullptr, the live tensors for all + // devices will be returned. Returned tensors are sorted by device as primary + // key, and by unique ID as secondary key. + std::vector GetLiveTensors(const BackendDevice* device); + + // Makes sure that any outstanding IR operation accumulated over live tensors, + // gets turned into device data. If wait is true, the sync operation will be + // run synchronously. The devices argument, if not empty, tells the devices + // which should be partecipating into the replicated computation. + virtual void SyncLiveTensorsGraph( + const BackendDevice* device, + c10::ArrayRef devices, + bool wait); + + // Applies all the pending IR operations queued over the input tensors. All + // the tensors must be on the same device. If wait is true, the sync operation + // will be run synchronously. The devices argument, if not empty, tells the + // devices which should be partecipating into the replicated computation. + void SyncTensorsGraph( + std::vector* tensors, + c10::ArrayRef devices, + bool wait, + bool sync_ltc_data); + + // Marks an execution step, which allows the tensor framework to understand + // the computation boundaries. + // Override to supply your own DeviceContextArena. + virtual void MarkStep(const BackendDevice& device); + + // Waits for all the outstanding operations on all the supplied devices. + // If devices is empty, the wait will happen for all local devices. + void WaitDeviceOps(c10::ArrayRef devices); + + // Retrieves the PyTorch CPU tensors behind the lazy tensors IR operations. + // All the tensors must be on the same device. + std::vector GetTensors(std::vector* tensors); + + size_t IncTrimCounter() const; + + // Dumps the backend specific text of the computation accumulated in the graph + // which is attached the tensors. + std::string DumpBackendComputation(const std::vector& tensors); + + Value GetDeviceDataIrValue( + const at::Scalar& value, + c10::ScalarType type, + const BackendDevice& device); + Value GetIrValueForScalar( + const at::Scalar& value, + c10::ScalarType type, + const BackendDevice& device); + Value GetIrValueForScalar( + const at::Scalar& value, + const BackendDevice& device); + + // TODO: even though this API is currently used **only** in codegen to + // generate real scalar IR values vs scalar tensors, we would like to + // use it in other cases where `GetIrValueForXXXScalar` is used, as well + // In order to do that, we need to untangle the cases where we don't need + // `expand` and where we don't expect a scalar tensor + Value GetIrValueForScalarFromCodegen( + const at::Scalar& value, + const BackendDevice& device); + Value GetIrValueForExpandedScalar( + const at::Scalar& value, + const Shape& shape, + const BackendDevice& device); + + struct CachedComputation { + explicit CachedComputation(ComputationPtr computation) + : computation(std::move(computation)) {} + + ComputationPtr computation; + }; + + using ComputationCache = Cache; + + ComputationCache* GetComputationCache(); + + hash_t GetGraphHash(const std::vector& tensors); + + // Clear the computation cache. + void ClearComputationCache(); + // Remove a specific computation cache entry from its hash. + void RemoveFromComputationCache(const hash_t& hash); + + protected: + // TODO(alanwaketan): Revisit if all of them need to be accessible to + // derived classes. + + struct SyncTensorsConfig { + // Whether we want to force data on the target tensors (hence trimming + // the IR graph above them). + bool force_ltc_data = true; + // Whether when setting the data, the other properties of the tensor + // state should be reset. + bool sync_ltc_data = true; + }; + + struct SyncTensorCollection { + SyncTensorCollection() : hash(0) {} + + SyncTensorsConfig config; + std::vector indices; + hash_t hash; + std::vector unlocker; + BackendDevice device; + }; + + struct PostOrderData { + std::vector post_order; + Util::EmissionMap emission_map; + std::vector parameters_data; + std::vector parameter_sequence; + }; + + // Locking: + // We perform two kinds of operations of tensors, synchronous and + // asynchronous. The ApplyPendingGraph() are synchronous, as we need the + // device data result immediately. Before the synchronous operations can + // start, they need to wait that the pending asynchronous operations have + // completed. Synchronous operations do not hold device locks, since they are + // strictly sequential, dictated by the PyTorch execution order. The + // SyncTensorsGraph() is asynchronous, and returns immediately after having + // scheduled the asynchronous operation. While executing, the asynchronous + // operations will hold locks on all the participating devices (in most common + // cases there will be only one device). + // Since asynchronous operations capture device locks, only one asynchronous + // operation can execute at the same time, on a given device. Tensor + // operations which send data to device do not need to hold any device locks + // while doing so. Only operations which _use_ device data (computations, and + // transfer from server) need to wait for asynchronous operations to complete + // (barrier). + + class DeviceLocker { + public: + explicit DeviceLocker(BackendDevice device) : device_(std::move(device)) {} + + const BackendDevice& device() const { + return device_; + } + + void Lock(); + void Unlock(std::exception_ptr exptr); + void Barrier(); + + private: + void CheckResetException(); + + BackendDevice device_; + std::mutex mutex_; + std::condition_variable cv_; + bool locked_ = false; + std::exception_ptr exptr_; + }; + + class DeviceLockerArena { + public: + static DeviceLockerArena* Get(); + + std::shared_ptr GetLocker(const BackendDevice& device); + + void DeviceBarrier(const BackendDevice& device); + + // Use a set to impose an order on the device locking sequence (ABBA + // prevention). + std::vector LockDevices( + const std::set& devices); + + private: + ExceptionCleanup LockDevice(const BackendDevice& device); + + std::mutex mutex_; + std::map> lockers_; + }; + + class DataCacheArena { + public: + static DataCacheArena* Get(); + + BackendDataPtr GetDeviceData( + const at::Tensor& tensor, + const BackendDevice& device); + + BackendDataPtr GetDeviceData( + const at::Scalar& value, + at::ScalarType scalar_type, + const BackendDevice& device); + + private: + struct TensorHasher { + size_t operator()(const at::Tensor& tensor) const; + }; + struct TensorComparer { + bool operator()(const at::Tensor& tensor1, const at::Tensor& tensor2) + const; + }; + + explicit DataCacheArena(size_t max_cache_size); + + using DataCache = + Cache; + + DataCache* GetDataCache(const BackendDevice& device); + + size_t max_cache_size_ = 0; + std::mutex mutex_; + std::map> device_caches_; + }; + + // The DeviceContextArena holds per device live information and statistics, + // among which the lazy tensors which are currently alive in the system. This + // is used to create computation "barriers" in order to flush pending + // operations and ensure the same computations are created during the training + // loops. + // TODO(alanwaketan): Add a registry such that we don't need to make all + // related methods virtual. + class DeviceContextArena { + protected: + struct DeviceContext { + std::mutex lock; + std::map> tensors_data; + uint64_t seed = 101; + uint64_t running_seed = 101; + Value seed_ir_value; + }; + + public: + static DeviceContextArena* Get(); + virtual ~DeviceContextArena() = default; + + void RegisterTensor(std::shared_ptr data); + void UnregisterTensor(LazyTensor::Data* data); + + std::vector GetLiveTensors(const BackendDevice* device); + + // Overriding it allow derived class to use their own IRs for Value. + virtual Value GetRngSeed(const BackendDevice& device); + uint64_t GetRunningSeed(const BackendDevice& device); + void SetRngSeed(const BackendDevice& device, uint64_t seed); + + void MarkStep(const BackendDevice& device); + + std::vector GetActiveDevices(); + + protected: + DeviceContext* GetDeviceContext(const BackendDevice& device); + + void ForAllDeviceContexts( + const std::function& fn, + const BackendDevice* device); + + // Overriding it allow derived class to use their own conversions. + virtual Value IrValueFromScalar( + const at::Scalar& value, + at::ScalarType scalar_type, + const BackendDevice& device); + + private: + std::vector GetAllDeviceContexts(); + + std::mutex lock_; + std::map device_contexts_; + }; + + struct Async { + Async( + SyncTensorCollection* coll, + std::vector parameters_data, + std::vector tensors_data, + ComputationCache::TypePtr cached_computation); + virtual ~Async() = default; + + void Wait(); + + MultiWait mwait; + std::vector indices; + std::vector unlocker; + std::vector parameters_data; + BackendDevice device; + ComputationCache::TypePtr cached_computation; + std::vector tensors_data; + }; + + void ResetTrimCounter() const; + + // Waits for this SyncTensorCollection's device barrier and acquire the lock. + virtual void TensorCollectionBarrier(SyncTensorCollection* coll); + + // One can override to insert your own profiler. + virtual PostOrderData RunPostOrder( + const std::vector& ir_values, + SyncTensorCollection* coll); + + private: + struct CompilationResult { + BackendDevice device; + size_t emitted_nodes = 0; + ComputationPtr computation; + std::vector parameters_data; + }; + + virtual bool ShouldSyncTensor(const LazyTensorPtr& tensor) const; + + SyncTensorCollection CollectSyncTensors( + const std::vector& tensors, + const SyncTensorsConfig& config); + + std::vector CollectRoots( + const std::vector& tensors, + c10::ArrayRef indices); + + std::vector SetTensorData( + std::vector* tensors, + const SyncTensorsConfig& config, + c10::ArrayRef indices, + const std::vector& tensor_data_vec); + + void ExtractIRAndPrepareTensorData( + std::vector* tensors, + const SyncTensorsConfig& config, + c10::ArrayRef indices, + std::vector& ir_values, + std::vector& tensor_data_vec); + + std::shared_ptr TryRunCachedSync( + std::vector* tensors, + SyncTensorCollection* coll, + PostOrderData* po_data, + const std::vector& tensor_data_vec); + + CompilationResult Compile( + const std::vector& tensors, + c10::ArrayRef devices, + const SyncTensorCollection& coll, + PostOrderData* po_data, + const std::vector& ir_values); + + ComputationCache::TypePtr LookupCachedCompile(const hash_t& hash); + + std::shared_ptr SyncTensorsGraphInternal( + std::vector* tensors, + c10::ArrayRef devices, + const SyncTensorsConfig& config); + + // Schedules the execution of a sync tensors operation in background. The + // asynchronous operation will hold the device locks by capturing the ones + // present within the coll structure. + std::shared_ptr ScheduleSyncTensorsGraph( + SyncTensorCollection* coll, + std::vector parameters_data, + std::vector tensors_data, + ComputationCache::TypePtr cached_computation); + + std::shared_ptr ScheduleSyncTensorsGraph( + std::vector* tensors, + SyncTensorCollection* coll, + std::vector parameters_data, + ComputationCache::TypePtr cached_computation, + const std::vector& tensor_data_vec); + + std::vector GetTensorsFused(std::vector* tensors); + + std::vector FetchTensors( + std::vector* tensors, + c10::ArrayRef tensors_data, + const std::vector* indices); + + // Gathers the device data for all the input tensors, after an + // asynchronous operation. + std::vector GatherTensorsData( + const std::vector& tensors, + c10::ArrayRef indices, + c10::ArrayRef tensors_data); +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/metrics.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/metrics.h new file mode 100644 index 0000000000000000000000000000000000000000..a175d9358ce87beb902226c1700403b5c0f70bc5 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/metrics.h @@ -0,0 +1,293 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +/** + * This file is adapted from PyTorch/XLA + * https://github.com/pytorch/xla/blob/e0e5f937a0ba8d904f9608137dc8c51ba439df2d/third_party/xla_client/metrics.h + */ + +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +#include + +namespace torch::lazy { + +struct TORCH_API Sample { + Sample() = default; + Sample(int64_t timestamp_ns, double value) + : timestamp_ns(timestamp_ns), value(value) {} + + int64_t timestamp_ns = 0; + double value = 0; +}; + +using MetricReprFn = std::function; + +// Class used to collect time-stamped numeric samples. The samples are stored in +// a circular buffer whose size can be configured at constructor time. +class TORCH_API MetricData { + public: + // Creates a new MetricData object with the internal circular buffer storing + // max_samples samples. The repr_fn argument allow to specify a function which + // pretty-prints a sample value. + MetricData(MetricReprFn repr_fn, size_t max_samples); + + // Returns the total values of all the samples being posted to this metric. + double Accumulator() const; + + size_t TotalSamples() const; + + void AddSample(int64_t timestamp_ns, double value); + + // Returns a vector with all the current samples, from the oldest to the + // newer. If accumulator is not nullptr, it will receive the current value of + // the metrics' accumulator (the sum of all posted values). If total_samples + // is not nullptr, it will receive the count of the posted values. + std::vector Samples(double* accumulator, size_t* total_samples) const; + + std::string Repr(double value) const { + return repr_fn_(value); + } + + void Reset(); + + bool IsValid() const { + return TotalSamples() > 0; + } + + private: + mutable std::mutex lock_; + MetricReprFn repr_fn_; + size_t count_ = 0; + std::vector samples_; + double accumulator_ = 0.0; +}; + +// Counters are a very lightweight form of metrics which do not need to track +// sample time. +class TORCH_API CounterData { + public: + CounterData() : value_(0) {} + + void AddValue(int64_t value) { + value_ += value; + } + + int64_t Value() const { + return value_; + } + + void Reset() { + value_ = 0; + } + + bool IsValid() const { + return value_ > 0; + } + + private: + std::atomic value_; +}; + +class TORCH_API MetricsArena { + public: + static MetricsArena* Get(); + + void ResetCounters(); + void ResetMetrics(); + + // Registers a new metric in the global arena. + void RegisterMetric( + const std::string& name, + MetricReprFn repr_fn, + size_t max_samples, + std::shared_ptr* data); + + void RegisterCounter( + const std::string& name, + std::shared_ptr* data); + + void ForEachMetric( + const std::function& metric_func); + + void ForEachCounter( + const std::function& + counter_func); + + std::vector GetMetricNames(); + + MetricData* GetMetric(const std::string& name); + + std::vector GetCounterNames(); + + CounterData* GetCounter(const std::string& name); + + private: + std::mutex lock_; + std::map> metrics_; + std::map> counters_; +}; + +// Emits the value in a to_string() conversion. +TORCH_API std::string MetricFnValue(double value); +// Emits the value in a humanized bytes representation. +TORCH_API std::string MetricFnBytes(double value); +// Emits the value in a humanized time representation. The value is expressed in +// nanoseconds EPOCH time. +TORCH_API std::string MetricFnTime(double value); + +// The typical use of a Metric is one in which it gets created either in a +// global scope context: +// static Metric* metric = new Metric("RpcCount"); +// Or within a function scope: +// void MyFunction(...) { +// static Metric* metric = new Metric("RpcCount"); +// ... +// metric->AddSample(ts_nanos, some_value); +// } +class TORCH_API Metric { + public: + explicit Metric( + std::string name, + MetricReprFn repr_fn = MetricFnValue, + size_t max_samples = 0); + + const std::string& Name() const { + return name_; + } + + double Accumulator() const; + + void AddSample(int64_t timestamp_ns, double value); + + void AddSample(double value); + + std::vector Samples(double* accumulator, size_t* total_samples) const; + + std::string Repr(double value) const; + + private: + MetricData* GetData() const; + + std::string name_; + MetricReprFn repr_fn_; + size_t max_samples_; + mutable std::shared_ptr data_ptr_; + mutable std::atomic data_; +}; + +// A Counter is a lightweight form of metric which tracks an integer value which +// can increase or decrease. +// A typical use is as: +// static Counter* counter = new Counter("MyCounter"); +// ... +// counter->AddValue(+1); +class TORCH_API Counter { + public: + explicit Counter(std::string name); + + void AddValue(int64_t value) { + GetData()->AddValue(value); + } + + int64_t Value() const { + return GetData()->Value(); + } + + private: + CounterData* GetData() const; + + std::string name_; + mutable std::shared_ptr data_ptr_; + mutable std::atomic data_; +}; + +#define TORCH_LAZY_COUNTER(name, value) \ + do { \ + static ::torch::lazy::Counter* __counter = \ + new ::torch::lazy::Counter(name); \ + __counter->AddValue(value); \ + } while (0) + +#define TORCH_LAZY_FN_COUNTER(ns) TORCH_LAZY_COUNTER(c10::str(ns, __func__), 1) + +#define TORCH_LAZY_VALUE_METRIC(name, value) \ + do { \ + static ::torch::lazy::Metric* __metric = \ + new ::torch::lazy::Metric(name, torch::lazy::MetricFnValue); \ + __metric->AddSample(value); \ + } while (0) + +// Creates a report with the current metrics statistics. +TORCH_API std::string CreateMetricReport(); + +// Creates a report with the selected metrics statistics. +TORCH_API std::string CreateMetricReport( + const std::vector& counter_names, + const std::vector& metric_names); + +// Returns the currently registered metric names. Note that the list can grow +// since metrics are usually function initialized (they are static function +// variables). +TORCH_API std::vector GetMetricNames(); + +// Retrieves the metric data of a given metric, or nullptr if such metric does +// not exist. +TORCH_API MetricData* GetMetric(const std::string& name); + +// Returns the currently registered counter names. Note that the list can grow +// since counters are usually function initialized (they are static function +// variables). +TORCH_API std::vector GetCounterNames(); + +// Retrieves the counter data of a given counter, or nullptr if such counter +// does not exist. +TORCH_API CounterData* GetCounter(const std::string& name); + +// Retrieves the current EPOCH time in nanoseconds. +TORCH_API int64_t NowNs(); + +// Scope based utility class TORCH_API to measure the time the code takes within +// a given C++ scope. +class TORCH_API TimedSection { + public: + explicit TimedSection(Metric* metric) : metric_(metric), start_(NowNs()) {} + + TimedSection(TimedSection&& other) = delete; + TimedSection(const TimedSection&) = delete; + TimedSection& operator=(const TimedSection&) = delete; + TimedSection& operator=(TimedSection&&) = delete; + ~TimedSection() { + int64_t now = NowNs(); + metric_->AddSample(now, static_cast(now - start_)); + } + + double Elapsed() const { + return 1e-9 * static_cast(NowNs() - start_); + } + + private: + Metric* metric_; + int64_t start_; +}; + +#define TORCH_LAZY_TIMED(name) \ + static torch::lazy::Metric* timed_metric = \ + new torch::lazy::Metric(name, torch::lazy::MetricFnTime); \ + torch::lazy::TimedSection timed_section(timed_metric) + +#define TORCH_LAZY_FN_COUNTER_TIMED_TRACING(ns) \ + TORCH_LAZY_FN_COUNTER(ns); \ + TORCH_LAZY_TIMED("LazyTracing") + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/multi_wait.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/multi_wait.h new file mode 100644 index 0000000000000000000000000000000000000000..c808d3cc6dc6221ea61fee86f86e114e5fdee08c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/multi_wait.h @@ -0,0 +1,65 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +/** + * This file is adapted from PyTorch/XLA + * https://github.com/pytorch/xla/blob/e0e5f937a0ba8d904f9608137dc8c51ba439df2d/third_party/xla_client/multi_wait.h + */ + +#pragma once + +#include +#include +#include +#include +#include + +#include + +namespace torch::lazy { + +// Support waiting for a number of tasks to complete. +class TORCH_API MultiWait { + public: + explicit MultiWait(size_t count) : count_(count) {} + + // Signal the completion of a single task. + void Done(); + + // Waits until at least count (passed as constructor value) completions + // happened. + void Wait(); + + // Same as above, but waits up to wait_seconds. + void Wait(double wait_seconds); + + // Resets the threshold counter for the MultiWait object. The completed count + // is also reset to zero. + void Reset(size_t count); + + // Creates a completer functor which signals the mult wait object once func + // has completed. Handles exceptions by signaling the multi wait with the + // proper status value. This API returns a function which captures a MultiWait + // reference, so care must be taken such that the reference remains valid for + // the whole lifetime of the returned function. + std::function Completer(std::function func); + + // Similar as the above API, but with explicit capture of the MultiWait shared + // pointer. + static std::function Completer( + std::shared_ptr mwait, + std::function func); + + private: + void Complete(const std::function& func); + + std::mutex mutex_; + std::condition_variable cv_; + size_t count_ = 0; + size_t completed_count_ = 0; + std::exception_ptr exptr_; +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ops/arithmetic_ir_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ops/arithmetic_ir_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..ff4fe2341d2bd27f1e13d716782ddaced8cb2b79 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ops/arithmetic_ir_ops.h @@ -0,0 +1,17 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::lazy { + +TORCH_API NodePtr operator+(const Value& node1, const Value& node2); +TORCH_API NodePtr operator-(const Value& node1, const Value& node2); +TORCH_API NodePtr operator*(const Value& node1, const Value& node2); +TORCH_API NodePtr operator/(const Value& node1, const Value& node2); + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ops/utils.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ops/utils.h new file mode 100644 index 0000000000000000000000000000000000000000..f900b65fa2280f3cf08fc43942ba815ee3e6c45f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/ops/utils.h @@ -0,0 +1,44 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#include + +#include +#include + +namespace torch::lazy { + +TORCH_API bool StrideIsSupported(c10::ArrayRef stride); + +TORCH_API std::vector GetArrayStridePermutation( + c10::ArrayRef stride); + +TORCH_API Shape MakeDiagonalShape( + const Shape& shape, + int64_t offset, + int64_t dim1, + int64_t dim2); + +TORCH_API Shape +MakePermuteShape(const Shape& source_shape, c10::ArrayRef permutation); + +TORCH_API Shape MakeSelectShape( + const Shape& shape, + int64_t dim, + int64_t start, + int64_t end, + int64_t stride); + +TORCH_API int64_t GetStride(int64_t start, int64_t end, int64_t stride); + +TORCH_API std::vector BuildSqueezedDimensions( + c10::ArrayRef dimensions, + int64_t squeeze_dim); + +TORCH_API std::vector BuildUnsqueezedDimensions( + c10::ArrayRef dimensions, + int64_t squeeze_dim); + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/permutation_util.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/permutation_util.h new file mode 100644 index 0000000000000000000000000000000000000000..6a3d5aaa7946fb920ac8d63c3b7524925a4b7b3c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/permutation_util.h @@ -0,0 +1,46 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +namespace torch::lazy { + +TORCH_API std::vector InversePermutation( + c10::ArrayRef input_permutation); + +TORCH_API bool IsPermutation(c10::ArrayRef permutation); + +// Gathers the input using the order specified by the permutation. For each i, +// output[i] = dimensions[permutation[i]]. The given permutation must be the +// same size as the input. +template +std::vector PermuteDimensions( + c10::ArrayRef permutation, + const Container& dimensions) { + using T = typename Container::value_type; + TORCH_CHECK( + dimensions.size() == permutation.size(), + "Invalid permutation specified. dimensions.size() != permutation.size() (", + dimensions.size(), + " vs. ", + permutation.size(), + ")"); + TORCH_CHECK( + IsPermutation(permutation), + "Invalid permutation specified. Permutation is not permutation"); + std::vector output(dimensions.size()); + for (const auto i : c10::irange(permutation.size())) { + output[i] = dimensions[permutation[i]]; + } + return output; +} + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/shape.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/shape.h new file mode 100644 index 0000000000000000000000000000000000000000..7d4fdb375aa018a79d2df149297705c7cc4bc3f3 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/shape.h @@ -0,0 +1,83 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include + +TORCH_DECLARE_bool(ltc_enable_symbolic_shapes); + +namespace torch::lazy { + +class TORCH_API Shape { + public: + Shape() = default; + + Shape( + at::ScalarType scalar_type, + c10::ArrayRef sizes, + std::optional> is_symbolic = std::nullopt); + + std::string to_string() const; + + c10::ScalarType scalar_type() const { + return scalar_type_; + } + void set_scalar_type(at::ScalarType value) { + scalar_type_ = value; + } + + int64_t dim() const { + return static_cast(sizes_.size()); + } + c10::ArrayRef sizes() const { + return sizes_; + } + int64_t size(int64_t dim) const { + return sizes_.at(dim); + } + void set_size(int64_t dim, int64_t size) { + sizes_.at(dim) = size; + } + + const std::optional>& is_symbolic() const { + return is_symbolic_; + } + + // Makes a copy with symbolic dims applied + Shape with_symbolic_dims( + std::optional> symbolic_dims) const; + + size_t numel() const; + hash_t hash(bool bakeInSizes) const; + + bool operator==(const Shape& other) const; + + private: + c10::ScalarType scalar_type_{c10::ScalarType::Undefined}; + + // Sizes are the upper bound sizes for a tensor, used by XLA. + std::vector sizes_; + // Stores which dimensions are symbolic + // If nullopt, either it hasn't been initialized or the symbolic + // dimensions are not calculable + std::optional> is_symbolic_ = std::nullopt; +}; + +TORCH_API std::ostream& operator<<(std::ostream& out, const Shape& shape); + +TORCH_API bool symbolicShapeEnabled(); +// Calculate and applies symbolic shapes onto the +// Shape objects passed to result_shapes +TORCH_API void applySymbolicShapesOnLT( + const char* schema_str, + std::vector args, + std::vector& result_shapes); +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/shape_inference.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/shape_inference.h new file mode 100644 index 0000000000000000000000000000000000000000..9d1e9a1cd4c0087e9361f29e5c5319586cb05a3f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/shape_inference.h @@ -0,0 +1,127 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace torch::lazy { +// Turn clang-format off, as we rely on the whole signature being on one line +// for codegen. +// clang-format off +TORCH_API std::vector compute_shape__adaptive_avg_pool2d(const at::Tensor & self, at::IntArrayRef output_size); +TORCH_API std::vector compute_shape__adaptive_avg_pool2d_backward(const at::Tensor & grad_output, const at::Tensor & self); +TORCH_API std::vector compute_shape__adaptive_avg_pool3d(const at::Tensor & self, at::IntArrayRef output_size); +TORCH_API std::vector compute_shape__adaptive_avg_pool3d_backward(const at::Tensor & grad_output, const at::Tensor & self); +TORCH_API std::vector compute_shape_abs(const at::Tensor & self); +TORCH_API std::vector compute_shape_arange_out(const at::Scalar & start, const at::Scalar & end, const at::Scalar & step, at::Tensor & out); +TORCH_API std::vector compute_shape_bernoulli(const at::Tensor & self, ::std::optional generator); +TORCH_API std::vector compute_shape_bernoulli(const at::Tensor & self, double p, ::std::optional generator); +TORCH_API std::vector compute_shape_binary_cross_entropy(const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction); +TORCH_API std::vector compute_shape_binary_cross_entropy_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction); +TORCH_API std::vector compute_shape_cat(at::TensorList tensors, int64_t dim); +TORCH_API std::vector compute_shape_cholesky(const at::Tensor & self, bool upper); +TORCH_API std::vector compute_shape_clamp_min(const at::Tensor & self, const at::Scalar & min); +TORCH_API std::vector compute_shape_clone(const at::Tensor & self, ::std::optional memory_format); +TORCH_API std::vector compute_shape_constant_pad_nd(const at::Tensor & self, at::IntArrayRef pad, const at::Scalar & value); +TORCH_API std::vector compute_shape_convolution(const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool transposed, at::IntArrayRef output_padding, int64_t groups); +TORCH_API std::vector compute_shape_convolution_backward(const at::Tensor & grad_output, const at::Tensor & input, const at::Tensor & weight, at::OptionalIntArrayRef bias_sizes, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool transposed, at::IntArrayRef output_padding, int64_t groups, ::std::array output_mask); +TORCH_API std::vector compute_shape_embedding(const at::Tensor & weight, const at::Tensor & indices, int64_t padding_idx, bool scale_grad_by_freq, bool sparse); +TORCH_API std::vector compute_shape_embedding_dense_backward(const at::Tensor & grad_output, const at::Tensor & indices, int64_t num_weights, int64_t padding_idx, bool scale_grad_by_freq); +TORCH_API std::vector compute_shape_expand(const at::Tensor & self, at::IntArrayRef size, bool implicit); +TORCH_API std::vector compute_shape_expand(const at::Tensor & self, c10::SymIntArrayRef size, bool implicit); +TORCH_API std::vector compute_shape_flip(const at::Tensor & self, at::IntArrayRef dims); +TORCH_API std::vector compute_shape_glu_backward(const at::Tensor & grad_output, const at::Tensor & self, int64_t dim); +TORCH_API std::vector compute_shape_glu_jvp(const at::Tensor & glu, const at::Tensor & x, const at::Tensor & dx, int64_t dim); +TORCH_API std::vector compute_shape_grid_sampler_2d(const at::Tensor & input, const at::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners); +TORCH_API std::vector compute_shape_grid_sampler_2d_backward(const at::Tensor & grad_output, const at::Tensor & input, const at::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, ::std::array output_mask); +TORCH_API std::vector compute_shape_index_select(const at::Tensor & self, int64_t dim, const at::Tensor & index); +TORCH_API std::vector compute_shape_inverse(const at::Tensor & self); +TORCH_API std::vector compute_shape_isnan(const at::Tensor & self); +TORCH_API std::vector compute_shape_log_sigmoid_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & buffer); +TORCH_API std::vector compute_shape_log_sigmoid_forward(const at::Tensor & self); +TORCH_API std::vector compute_shape_logdet(const at::Tensor & self); +TORCH_API std::vector compute_shape_logical_and(const at::Tensor & self, const at::Tensor & other); +TORCH_API std::vector compute_shape_logical_not(const at::Tensor & self); +TORCH_API std::vector compute_shape_logical_or(const at::Tensor & self, const at::Tensor & other); +TORCH_API std::vector compute_shape_logical_xor(const at::Tensor & self, const at::Tensor & other); +TORCH_API std::vector compute_shape_masked_fill(const at::Tensor & self, const at::Tensor & mask, const at::Scalar & value); +TORCH_API std::vector compute_shape_masked_fill(const at::Tensor & self, const at::Tensor & mask, const at::Tensor & value); +TORCH_API std::vector compute_shape_max(const at::Tensor & self); +TORCH_API std::vector compute_shape_mean(const at::Tensor & self, ::std::optional dtype); +TORCH_API std::vector compute_shape_min(const at::Tensor & self); +TORCH_API std::vector compute_shape_mv(const at::Tensor & self, const at::Tensor & vec); +TORCH_API std::vector compute_shape_native_batch_norm(const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, const ::std::optional & running_mean, const ::std::optional & running_var, bool training, double momentum, double eps); +TORCH_API std::vector compute_shape_native_batch_norm_backward(const at::Tensor & grad_out, const at::Tensor & input, const ::std::optional & weight, const ::std::optional & running_mean, const ::std::optional & running_var, const ::std::optional & save_mean, const ::std::optional & save_invstd, bool train, double eps, ::std::array output_mask); +TORCH_API std::vector compute_shape_native_dropout(const at::Tensor & input, double p, ::std::optional train); +TORCH_API std::vector compute_shape_native_dropout_backward(const at::Tensor & grad_output, const at::Tensor & mask, double scale); +TORCH_API std::vector compute_shape_native_layer_norm(const at::Tensor & input, at::IntArrayRef normalized_shape, const ::std::optional & weight, const ::std::optional & bias, double eps); +TORCH_API std::vector compute_shape_native_layer_norm_backward(const at::Tensor & grad_out, const at::Tensor & input, at::IntArrayRef normalized_shape, const at::Tensor & mean, const at::Tensor & rstd, const ::std::optional & weight, const ::std::optional & bias, ::std::array output_mask); +TORCH_API std::vector compute_shape_new_empty_strided(const at::Tensor & self, at::IntArrayRef size, at::IntArrayRef stride, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); +TORCH_API std::vector compute_shape_nll_loss2d_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, int64_t ignore_index, const at::Tensor & total_weight); +TORCH_API std::vector compute_shape_nll_loss2d_forward(const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, int64_t ignore_index); +TORCH_API std::vector compute_shape_nonzero(const at::Tensor & self); +TORCH_API std::vector compute_shape_normal_functional(const at::Tensor & self, double mean, double std, ::std::optional generator); +TORCH_API std::vector compute_shape_random(const at::Tensor & self, ::std::optional generator); +TORCH_API std::vector compute_shape_random(const at::Tensor & self, int64_t to, ::std::optional generator); +TORCH_API std::vector compute_shape_random(const at::Tensor & self, int64_t from, ::std::optional to, ::std::optional generator); +TORCH_API std::vector compute_shape_relu(const at::Tensor & self); +TORCH_API std::vector compute_shape_repeat(const at::Tensor & self, at::IntArrayRef repeats); +TORCH_API std::vector compute_shape_slogdet(const at::Tensor & self); +TORCH_API std::vector compute_shape_smooth_l1_loss_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, int64_t reduction, double beta); +TORCH_API std::vector compute_shape_sort(const at::Tensor & self, int64_t dim, bool descending); +TORCH_API std::vector compute_shape_stack(at::TensorList tensors, int64_t dim); +TORCH_API std::vector compute_shape_std(const at::Tensor & self, bool unbiased); +TORCH_API std::vector compute_shape_std(const at::Tensor & self, at::OptionalIntArrayRef dim, bool unbiased, bool keepdim); +TORCH_API std::vector compute_shape_std(const at::Tensor & self, at::OptionalIntArrayRef dim, const ::std::optional & correction, bool keepdim); +TORCH_API std::vector compute_shape_sum(const at::Tensor & self, ::std::optional dtype); +TORCH_API std::vector compute_shape__to_copy(const at::Tensor & self, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, bool non_blocking, ::std::optional memory_format); +TORCH_API std::vector compute_shape_take(const at::Tensor & self, const at::Tensor & index); +TORCH_API std::vector compute_shape_trace(const at::Tensor & self); +TORCH_API std::vector compute_shape_zero(const at::Tensor & self); +TORCH_API std::vector compute_shape_narrow_copy_symint(const at::Tensor & self, int64_t dim, int64_t start, c10::SymInt length); +TORCH_API std::vector compute_shape_hardswish(const at::Tensor & self); +TORCH_API std::vector compute_shape_hardswish_backward(const at::Tensor & grad_output, const at::Tensor & self); +TORCH_API std::vector compute_shape_selu(const at::Tensor & self); +TORCH_API std::vector compute_shape_uniform(const at::Tensor & self, double from, double to, ::std::optional generator); + +// Non-Native ops +TORCH_API std::vector compute_shape_scalar(const at::Scalar& value, const at::ScalarType& type); +TORCH_API std::vector compute_shape_expand(const Output& input0, const std::vector& size, const bool& is_scalar_expand); +TORCH_API std::vector compute_shape_view(const Output& input0, const std::vector& output_sizes); +TORCH_API std::vector compute_shape_cast(const Output& input0, const at::ScalarType& dtype, const ::std::optional& stype); + +// View Ops +// (Now that functionalization pass is used, we should kill these in a later PR) +TORCH_API std::vector compute_shape_as_strided_view_update(const Output& target, const Output& input, const std::vector& size, const std::vector& stride, const int64_t& storage_offset); +TORCH_API std::vector compute_shape_as_strided(const Output& input, const std::vector& size, const std::vector& stride, const int64_t& storage_offset); +TORCH_API std::vector compute_shape_diagonal_view_update(const Output& target, const Output& input, const int64_t& offset, const int64_t& dim1, const int64_t& dim2); +TORCH_API std::vector compute_shape_diagonal(const Output& input, const int64_t& offset, const int64_t& dim1, const int64_t& dim2); +TORCH_API std::vector compute_shape_narrow_view_update(const Output& input, const Output& source, const std::vector& base_indices); +TORCH_API std::vector compute_shape_narrow(const Output& input, const std::vector& base_indices, const std::vector& sizes); +TORCH_API std::vector compute_shape_permute(const Output& input, const std::vector& dims); +TORCH_API std::vector compute_shape_resize(const Output& input, const std::vector& size); +TORCH_API std::vector compute_shape_select_view_update(const Output& target, const Output& source, const int64_t& dim, const int64_t& start, const int64_t& end, const int64_t& stride); +TORCH_API std::vector compute_shape_select(const Output& input, const int64_t& dim, const int64_t& start, const int64_t& end, const int64_t& stride); +TORCH_API std::vector compute_shape_squeeze(const Output& input, const int& dim); +TORCH_API std::vector compute_shape_unsqueeze(const Output& input, const int& dim); + +TORCH_API std::vector compute_shape_select_scatter(const at::Tensor & self, const at::Tensor & src, int64_t dim, int64_t index); +TORCH_API std::vector compute_shape_diagonal_scatter(const at::Tensor & self, const at::Tensor & src, int64_t offset, int64_t dim1, int64_t dim2); +TORCH_API std::vector compute_shape_slice_scatter_symint(const at::Tensor & self, const at::Tensor & src, int64_t dim, ::std::optional start, ::std::optional end, c10::SymInt step); +TORCH_API std::vector compute_shape_as_strided_scatter_symint(const at::Tensor & self, const at::Tensor & src, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional storage_offset); +// clang-format on +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/tensor.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/tensor.h new file mode 100644 index 0000000000000000000000000000000000000000..16a363079fa8fe784f82d8b0dc391264e6edd76f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/tensor.h @@ -0,0 +1,270 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include + +namespace torch::lazy { + +class TORCH_API SymNodeImpl : public c10::SymNodeImpl { + public: + SymNodeImpl(NodePtr ptr) : node_(std::move(ptr)) {} + NodePtr node_; +}; + +class LazyTensor; +using LazyTensorPtr = c10::intrusive_ptr; + +class TORCH_API LazyTensor : public c10::intrusive_ptr_target { + public: + // This is the core lazy tensor data structure where all the tensor data is + // held. The lazy tensor is nothing more than a shared pointer to a Data + // object. + struct Data { + Data(BackendDataPtr handle, BackendDevice device) + : handle(std::move(handle)), + device(std::move(device)), + unique_id(GetNextTensorId()) {} + Data(Value ir_value, BackendDevice device) + : ir_value(std::move(ir_value)), + device(std::move(device)), + unique_id(GetNextTensorId()) {} + Data(at::Tensor tensor_data, BackendDevice device) + : tensor_data(std::move(tensor_data)), + device(std::move(device)), + unique_id(GetNextTensorId()) {} + // TODO(alanwaketan): Remove this ctor. This is a + // temporary ctor to ease XLA LTC migration. It depends on + // XLA's Functionalization integration. + Data(BackendDevice device) + : device(std::move(device)), unique_id(GetNextTensorId()) {} + + Data(Data&& other) = delete; + Data(const Data&) = delete; + Data& operator=(const Data&) = delete; + Data& operator=(Data&&) = delete; + virtual ~Data(); + + BackendDataPtr handle; + Value ir_value; + std::optional tensor_data; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const BackendDevice device; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const int64_t unique_id = 0; + size_t generation = 1; + }; + + static LazyTensorPtr Create( + const at::Tensor& tensor, + const BackendDevice& device); + static LazyTensorPtr Create(Value ir_value, const BackendDevice& device); + static LazyTensorPtr Create(const BackendDataPtr& handle); + static LazyTensorPtr Create(std::shared_ptr data); + + // The default ctor previously created a null LazyTensor (one with no 'data' + // obj). Creating a null LazyTensor is no longer possible, since the same can + // be achieved by creating a null LazyTensorPtr and it is way too confusing to + // have to check both lazy_tensor_ptr && *lazy_tensor_ptr, so everywhere that + // used to rely on a LazyTensor obj with a null Data can now rely on a null + // LazyTensorPtr instead. + LazyTensor() = delete; + LazyTensor(const LazyTensor&) = default; + LazyTensor(LazyTensor&&) noexcept = default; + LazyTensor& operator=(const LazyTensor&) = default; + LazyTensor& operator=(LazyTensor&&) noexcept = default; + + ~LazyTensor() override = default; + + size_t generation() const { + return data()->generation; + } + + // Override it to use your own Shape. + virtual int64_t size(int64_t dim) const; + + // Override it to use your own graph executor. + virtual at::Tensor ToTensor(bool detached); + + void ShallowCopyTo(const LazyTensorPtr& dest) const; + + // Assigns the tensor value to the lazy tensor. + void SetTensor(at::Tensor tensor); + + void UpdateFromTensor(const at::Tensor& tensor, bool sync); + void UpdateFromTensorOut(const at::Tensor& tensor); + void UpdateFromTensorOut(const LazyTensorPtr& tensor); + + const std::shared_ptr& data() const; + + // Override it to use your own type conversion. + virtual at::ScalarType dtype() const; + + MaybeRef shape() const; + + const BackendDevice& GetDevice() const; + int64_t GetUniqueId() const; + + // Fetches the data behind the tensor. If the tensor has a graph defining + // its current value, executes the graph and fetches the data result. + BackendDataPtr GetDataHandle(); + + // Fetches the current value of the data, which can be missing (nullptr) + // in case the tensor has a graph defining its current value, + BackendDataPtr CurrentDataHandle() const; + + void SetDataHandle(BackendDataPtr handle); + void SetDataHandle(BackendDataPtr handle, bool sync); + + // Retrieves the current IR Node, or nullptr in case no active IR Node is + // available. + Value CurrentIrValue() const; + + // Retrieves the IR Node representing this LazyTensor. One will be created if + // missing. Note that although this is a const API, it actually changes the + // internal state of the object. + Value GetIrValue() const; + + void SetIrValue(Value ir_value); + void SetInPlaceIrValue(Value ir_value); + + std::optional CurrentTensorData() const; + + std::vector MakeOutputTensors(const NodePtr& node) const; + + LazyTensorPtr CopyTensorToDevice(const BackendDevice& device); + + // Applies the queue of operations in preparation for using the data. + // Override it to use your own graph executor. + virtual void ApplyPendingGraph(); + + // Override it to set extra information. + virtual void AssignIrValue(Value ir_value) const; + + protected: + explicit LazyTensor(std::shared_ptr data); + + void SetTensorData(at::Tensor tensor_data); + + // We build a graph accumulating operations, but at a given point we + // need to force a rendering, otherwise the graph can grow without control. + // Think: + // for i in range(0, 100000): + // a = a + b + void TryLimitGraphSize(); + + // Override it to instantiate your own data. + virtual Value GetIrValueForTensor( + const at::Tensor& tensor, + const BackendDevice& device) const; + + Value CreateTensorNode(const BackendDataPtr& data, bool read_only) const; + + private: + LazyTensor(const at::Tensor& tensor, const BackendDevice& device); + LazyTensor(Value ir_value, const BackendDevice& device); + explicit LazyTensor(const BackendDataPtr& handle); + + static int64_t GetNextTensorId(); + + std::shared_ptr data_; +}; + +// Utils to convert at::Tensor to LazyTensor, and vice versa. + +// Section 0: c10::Tensorlist ==> lazy::TensorList +// note: GetTensorList is not totally parallel to GetLtcTensor; A TensorList +// skips +// the LazyTensor wrappers, assuming that the list of underlying IR nodes +// is actually more useful for downstream computations. TBD. +TORCH_API torch::lazy::Value GetTensorList(at::ITensorListRef tensors); + +// Section 1: at::Tensor => LazyTensor. +// Extracts the LazyTensor out of an at::Tensor. Returns a null LazyTensor +// if the tensor is not a lazy tensor. +TORCH_API LazyTensorPtr TryGetLtcTensor(const at::Tensor& tensor); + +// Extracts the LazyTensor out of an at::Tensor. Throws an exception +// if the tensor is not a lazy tensor. +TORCH_API LazyTensorPtr GetLtcTensor(const at::Tensor& tensor); + +// Same as above, applied to a list of tensors. +TORCH_API std::vector GetLtcTensors( + c10::ArrayRef tensors); + +// If tensor is a lazy tensor type, returns the LazyTensor embedded within it, +// otherwise creates a new lazy tensor type with tensor as data. +TORCH_API LazyTensorPtr GetOrCreateLtcTensor( + const std::optional& tensor, + const BackendDevice& device); + +TORCH_API LazyTensorPtr GetLtcTensorOrCreateForWrappedNumber( + const at::Tensor& tensor, + const BackendDevice& device); + +// Section 2: LazyTensor => at::Tensor. +// Creates an ATen tensor from an LazyTensor. +TORCH_API at::Tensor CreateAtenFromLtcTensor(const LazyTensorPtr& ltc_tensor); +TORCH_API at::Tensor CreateAtenFromLtcTensor(LazyTensor&& ltc_tensor); + +// Note [Lazy Tensor Functionalization] +// The functionalization pass is implemented by wrapping all TensorImpl +// objects in C++ with an extra FunctionalTensorWrapper object, +// that knows how to perform functionalization +// +// Certain functions in the aten API serve as entry/exit points for +// functionalization, where we need to perform the wrapping/unwrapping: +// - aten::to.device +// - aten::empty + +// Given a non-lazy tensor, this function creates a lazy tensor on the specified +// (lazy) device. The functionalize_output determines whether or not we should +// wrap the output in a "functional wrapper". +// +// How do you know whether to pass true/false for functionalize_output? +// +// Case 1: nonlazy -> lazy +// If you're implementing a function that takes in nonlazy tensors and returns +// lazy tensors, then you should think of that function as an "entrypoint" to +// functionalization, and use functionalize_output=true Examples include: +// - factory functions (the LTC kernel for at::empty) +// - CPU -> Lazy device conversions (the LTC kernel for at::to_device) +// +// Case 2: lazy -> lazy +// If you're implementing a function that takes in lazy tensors and returns +// lazy tensors, +// **but** requires creating lazy tensors internally, +// then you can assume that the current function is running inside of some +// outer context where functionalization is already running, that will take +// care of doing the wrapping for you, and use functionalize_output=true +// Examples include: +// - CPU fallback (takes in lazy tensors, converts to cpu, calls kernel, +// converts returns back to lazy tensors). +TORCH_API at::Tensor to_lazy_tensor( + const at::Tensor& self, + const c10::TensorOptions& options, + at::Device device, + bool non_blocking, + bool functionalize_output); + +template +auto TupleAtenFromLtcTensorsImpl( + const std::vector& tensors, + std::index_sequence /*unused*/) { + return std::make_tuple(CreateAtenFromLtcTensor(tensors[Indices])...); +} + +template +auto TupleAtenFromLtcTensors(const std::vector& tensors) { + return TupleAtenFromLtcTensorsImpl(tensors, std::make_index_sequence{}); +} + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/tensor_impl.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/tensor_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..161008918c8ccb22011e0f13ef2cae0e16f5b133 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/tensor_impl.h @@ -0,0 +1,66 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +namespace torch::lazy { + +// Tensor implementation class used to be fed to the at::Tensor. +// Its scope is just to handle an LazyTensor. +class TORCH_API LTCTensorImpl final : public c10::TensorImpl { + public: + explicit LTCTensorImpl(const LazyTensorPtr& tensor); + explicit LTCTensorImpl(const LazyTensor& tensor); + explicit LTCTensorImpl(LazyTensor&& tensor); + + LazyTensorPtr tensor() { + return tensor_; + } + + void set_tensor(const LazyTensorPtr& lazy_tensor); + + void force_refresh_sizes() { + generation_ = 0; + } + + c10::intrusive_ptr shallow_copy_and_detach( + const c10::VariableVersion& version_counter, + bool allow_tensor_metadata_change) const override; + + c10::intrusive_ptr shallow_copy_and_detach( + c10::VariableVersion&& version_counter, + bool allow_tensor_metadata_change) const override; + + void shallow_copy_from(const c10::intrusive_ptr& impl) override; + + at::IntArrayRef sizes_custom() const override; + at::IntArrayRef strides_custom() const override; + int64_t numel_custom() const override; + int64_t storage_offset_custom() const override; + int64_t dim_custom() const override; + bool is_strides_like_custom(at::MemoryFormat memory_format) const override; + c10::SymBool sym_is_non_overlapping_and_dense_custom() const override; + + c10::SymBool sym_is_contiguous_custom( + at::MemoryFormat memory_format) const override; + c10::SymIntArrayRef sym_sizes_custom() const override; + c10::SymIntArrayRef sym_strides_custom() const override; + c10::SymInt sym_numel_custom() const override; + + private: + void setup_size_properties(); + + LazyTensorPtr tensor_; + mutable std::optional> sym_sizes_; + size_t generation_{0}; +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/tensor_util.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/tensor_util.h new file mode 100644 index 0000000000000000000000000000000000000000..e47484f16265b9675645aa947f06d5cdf59d54cf --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/tensor_util.h @@ -0,0 +1,81 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include + +#include +#include + +namespace torch::lazy { + +TORCH_API std::vector ComputeArrayStrides( + c10::ArrayRef sizes); + +TORCH_API std::vector DataHandlesToTensors( + c10::ArrayRef data_handles, + at::ScalarType dest_element_type); + +// Uploads an ATEN tensor data to the device and fetches the corresponding +// device data handle. +TORCH_API BackendDataPtr +TensorToDataHandle(const at::Tensor& tensor, const BackendDevice& device); + +// Retrieves the device data handles by parallel uploading data onto the +// corresponding devices. +TORCH_API std::vector CreateTensorsData( + const std::vector& tensors, + const std::vector& devices); + +// Makes a deep copy of an ATEN tensor. +inline at::Tensor CopyTensor(const at::Tensor& ref) { + return ref.to(ref.options(), /*non_blocking=*/false, /*copy=*/true); +} + +// Same as above, with an additional cast. +inline at::Tensor CopyTensor( + const at::Tensor& ref, + at::ScalarType dest_type, + bool copy = true) { + return ref.to(ref.options().dtype(dest_type), /*non_blocking=*/false, copy); +} + +template +T OptionalOr(const std::optional& value, T defval) { + return value ? static_cast(*value) : defval; +} + +// Unwraps tensor to target dtype if it's a wrapped number. +inline at::Tensor UnwrapNumber(const at::Tensor& tensor, at::ScalarType dtype) { + return tensor.unsafeGetTensorImpl()->is_wrapped_number() ? tensor.to(dtype) + : tensor; +} + +template +at::Scalar MakeIntScalar(T value) { + return at::Scalar(static_cast(value)); +} + +// Routing values to device data maximizes the changes for compilation cache +// hits, but it can prevent the compiler to perform optimizations. So tensor +// values which are within a given set, are routed to constant scalars if this +// API returns true. +TORCH_API bool IsSpecialScalar(const at::Scalar& value); + +// Note: returns a reference instead of a fresh tensor to avoid refcount bumps. +inline const at::Tensor& maybe_unwrap_functional(const at::Tensor& tensor) { + if (at::functionalization::impl::isFunctionalTensor(tensor)) { + return at::functionalization::impl::unsafeGetFunctionalWrapper(tensor) + ->value(); + } else { + return tensor; + } +} + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/thread_pool.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/thread_pool.h new file mode 100644 index 0000000000000000000000000000000000000000..ac54c00f81a1bc0b7d47ce5d6402b98f1007f104 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/thread_pool.h @@ -0,0 +1,41 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +/** + * This file is adapted from PyTorch/XLA + * https://github.com/pytorch/xla/blob/e0e5f937a0ba8d904f9608137dc8c51ba439df2d/third_party/xla_client/metrics.h + */ + +#pragma once + +#include +#include +#include + +#include + +namespace torch::lazy { + +// NOLINTNEXTLINE(cppcoreguidelines-special-member-functions) +class TORCH_API Completion { + public: + class Data; + + explicit Completion(std::shared_ptr data); + + ~Completion(); + + void Wait(); + + private: + std::shared_ptr data_; +}; + +// Schedules a closure which might wait for IO or other events/conditions. +TORCH_API void ScheduleIoClosure(std::function closure); +TORCH_API Completion +ScheduleIoClosureWithCompletion(std::function closure); + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/trie.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/trie.h new file mode 100644 index 0000000000000000000000000000000000000000..ca5fc4645c2e69fd3894c382a7fcf47dc64ff398 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/trie.h @@ -0,0 +1,82 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +#include +#include +#include + +namespace torch::lazy { + +struct TORCH_API TrieNode { + static size_t GetNextUniqueId() { + static thread_local size_t id_generator = 0; + return id_generator++; + } + + size_t unique_id; + size_t hit_counter; + NodePtr ir_node; + std::list> successors; + + TrieNode() : unique_id(GetNextUniqueId()), hit_counter(0), ir_node(nullptr) {} + explicit TrieNode(NodePtr node) + : unique_id(GetNextUniqueId()), + hit_counter(0), + ir_node(std::move(node)) {} +}; + +class TORCH_API TrieCache { + public: + static TrieCache* Get(); + + TrieNode* Current() const; + // Take an iterator as the input because we want to move the corresponding + // node in the successor list to achieve a LRU caching effect + void SetCurrent(std::list>::iterator& iter); + // Used in MarkStep to indicate the end of one tracing + void ResetCurrent(); + + // Create a new TrieNode for ir_node and insert into the TrieCache + void Insert(NodePtr ir_node); + + // Clear all TrieCache nodes + // TODO: Because we don't expect user to explicitly call this function via + // a Python API, we may need to introduce a threshold on the size of the cache + // to avoid holding tensors for too long. + void Clear(); + + void DumpToDotFile(const std::string& file_name); + + private: + TrieCache(); + + std::shared_ptr root_; + TrieNode* current_; +}; + +template +NodePtr LookupNodeFromTrieCache(Args&&... args) { + auto& successors = TrieCache::Get()->Current()->successors; + for (auto it = successors.begin(); it != successors.end(); it++) { + NodePtr ir_node = (*it)->ir_node; + const T* concrete_node = NodeCast(ir_node.get()); + if (concrete_node && + concrete_node->CanBeReused(std::forward(args)...)) { + TORCH_LAZY_COUNTER( + "IrNodeReused_" + c10::demangle((typeid(T).name())), 1); + (*it)->hit_counter++; + TrieCache::Get()->SetCurrent(it); + return ir_node; + } + } + return nullptr; +} + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/unique.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/unique.h new file mode 100644 index 0000000000000000000000000000000000000000..718cac504751dc9b08fc72d69fcf91dbfbe77376 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/unique.h @@ -0,0 +1,59 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +/** + * Unique in this file is adapted from PyTorch/XLA + * https://github.com/pytorch/xla/blob/e0e5f937a0ba8d904f9608137dc8c51ba439df2d/third_party/xla_client/unique.h + */ + +#pragma once + +#include + +#include +#include + +namespace torch::lazy { + +// Helper class to allow tracking zero or more things, which should be forcibly +// be one only thing. +template > +class Unique { + public: + std::pair set(const T& value) { + if (value_) { + TORCH_CHECK(C()(*value_, value), "'", *value_, "' vs '", value); + return std::pair(false, *value_); + } + value_ = value; + return std::pair(true, *value_); + } + + operator bool() const { + return value_.has_value(); + } + operator const T&() const { + return *value_; + } + const T& operator*() const { + return *value_; + } + const T* operator->() const { + return value_.operator->(); + } + + std::set AsSet() const { + std::set vset; + if (value_.has_value()) { + vset.insert(*value_); + } + return vset; + } + + private: + std::optional value_; +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/util.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/util.h new file mode 100644 index 0000000000000000000000000000000000000000..4324148de300340fa58151cc73ee6032f1f530ae --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/core/util.h @@ -0,0 +1,130 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +/** + * Most of the utils in this file is adapted from PyTorch/XLA + * https://github.com/pytorch/xla/blob/e0e5f937a0ba8d904f9608137dc8c51ba439df2d/third_party/xla_client/util.h + */ + +#pragma once + +#include +#include +#include + +#include +#include + +namespace torch::lazy { + +// Similar to c10::scope_exit but with a status. +// TODO(alanwaketan): Consolidate it with c10::scope_exit. +template +class Cleanup { + public: + using StatusType = T; + + explicit Cleanup(std::function&& func) + : func_(std::move(func)) {} + Cleanup(Cleanup&& ref) noexcept + : func_(std::move(ref.func_)), status_(std::move(ref.status_)) {} + Cleanup(const Cleanup&) = delete; + + ~Cleanup() { + if (func_ != nullptr) { + func_(std::move(status_)); + } + } + + Cleanup& operator=(const Cleanup&) = delete; + + Cleanup& operator=(Cleanup&& ref) noexcept { + if (this != &ref) { + func_ = std::move(ref.func_); + status_ = std::move(ref.status_); + } + return *this; + } + + void Release() { + func_ = nullptr; + } + + void SetStatus(StatusType&& status) { + status_ = std::move(status); + } + + const StatusType& GetStatus() const { + return status_; + } + + private: + std::function func_; + StatusType status_; +}; + +using ExceptionCleanup = Cleanup; + +// Allows APIs which might return const references and values, to not be forced +// to return values in the signature. +// TODO(alanwaketan): This is clever, but is there really no std or c10 +// supports? Needs more investigations. +template +class MaybeRef { + public: + /* implicit */ MaybeRef(const T& ref) : ref_(ref) {} + /* implicit */ MaybeRef(T&& value) + : storage_(std::move(value)), ref_(*storage_) {} + + const T& Get() const { + return ref_; + } + const T& operator*() const { + return Get(); + } + operator const T&() const { + return Get(); + } + + bool IsStored() const { + return storage_.has_value(); + } + + private: + std::optional storage_; + // NOLINTNEXTLINE(cppcoreguidelines-avoid-const-or-ref-data-members) + const T& ref_; +}; + +template +std::vector Iota(size_t size, T init = 0, T incr = 1) { + std::vector result(size); + T value = init; + for (size_t i = 0; i < size; ++i, value += incr) { + result[i] = value; + } + return result; +} + +template +std::vector ToVector(const S& input) { + return std::vector(input.begin(), input.end()); +} + +template +std::optional> ToOptionalVector( + c10::OptionalArrayRef arrayRef) { + if (arrayRef) { + return arrayRef->vec(); + } + return std::nullopt; +} + +template +std::underlying_type_t GetEnumValue(T value) { + return static_cast>(value); +} + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/generated/LazyIr.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/generated/LazyIr.h new file mode 100644 index 0000000000000000000000000000000000000000..dfd6a881958c2d7eb0cbbe5006d4302669450d4d --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/generated/LazyIr.h @@ -0,0 +1,10312 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// This file contains autogenerated LazyTensor IR nodes +#include +#include +#include +#include +#include +#include +#include +#include "torch/csrc/lazy/ts_backend/ts_node.h" + +namespace torch { +namespace lazy { +using at::operator<<; + +// kNullValue is used to contribute a static hash value any time +// a node has an Optional input that is nullopt. It is important +// to differentiate between HASH(std::nullopt, something) and HASH(something, std::nullopt), +// and using kNullValue in the hash function in the order of arguments +// serves this purpose. +static const torch::lazy::Value kNullValue = torch::lazy::Value(); + +class AdaptiveAvgPool2d : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::_adaptive_avg_pool2d); + } + + AdaptiveAvgPool2d(const torch::lazy::Value& self, const ::std::vector& output_size, std::vector&& shapes) + : TsNode( + AdaptiveAvgPool2d::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(output_size)), + output_size(output_size) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", output_size=" << output_size; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::vector& output_size) const { + size_t i = 0; + return (operand(i++) == self && + this->output_size == output_size); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("output_size", output_size); + + torch::lazy::TSOpVector _adaptive_avg_pool2d_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(_adaptive_avg_pool2d_out.size(), 1); + + return _adaptive_avg_pool2d_out; + + } + + + ::std::vector output_size; + + +}; + +class AdaptiveAvgPool2dBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::_adaptive_avg_pool2d_backward); + } + + AdaptiveAvgPool2dBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + AdaptiveAvgPool2dBackward::ClassOpKind(), + OpList{grad_output, self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector _adaptive_avg_pool2d_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(_adaptive_avg_pool2d_backward_out.size(), 1); + + return _adaptive_avg_pool2d_backward_out; + + } + + + + + +}; + +class LogSoftmax : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::_log_softmax); + } + + LogSoftmax(const torch::lazy::Value& self, const int64_t& dim, const bool& half_to_float, std::vector&& shapes) + : TsNode( + LogSoftmax::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim, half_to_float)), + dim(dim), + half_to_float(half_to_float) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + ss << ", half_to_float=" << half_to_float; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dim, const bool& half_to_float) const { + size_t i = 0; + return (operand(i++) == self && + this->dim == dim && + this->half_to_float == half_to_float); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back("half_to_float", half_to_float); + + torch::lazy::TSOpVector _log_softmax_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(_log_softmax_out.size(), 1); + + return _log_softmax_out; + + } + + + int64_t dim; + bool half_to_float; + + +}; + +class LogSoftmaxBackwardData : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::_log_softmax_backward_data); + } + + LogSoftmaxBackwardData(const torch::lazy::Value& grad_output, const torch::lazy::Value& output, const int64_t& dim, const at::ScalarType& input_dtype, std::vector&& shapes) + : TsNode( + LogSoftmaxBackwardData::ClassOpKind(), + OpList{grad_output, output}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim, input_dtype)), + dim(dim), + input_dtype(input_dtype) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + ss << ", input_dtype=" << input_dtype; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& output, const int64_t& dim, const at::ScalarType& input_dtype) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == output && + this->dim == dim && + this->input_dtype == input_dtype); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back("input_dtype", input_dtype); + + torch::lazy::TSOpVector _log_softmax_backward_data_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(_log_softmax_backward_data_out.size(), 1); + + return _log_softmax_backward_data_out; + + } + + + int64_t dim; + at::ScalarType input_dtype; + + +}; + +class ReshapeAliasCopy : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::_reshape_alias_copy); + } + + ReshapeAliasCopy(const torch::lazy::Value& self, const ::std::vector& size, const ::std::vector& stride, std::vector&& shapes) + : TsNode( + ReshapeAliasCopy::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(size, stride)), + size(size), + stride(stride) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", size=" << size; + ss << ", stride=" << stride; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::vector& size, const ::std::vector& stride) const { + size_t i = 0; + return (operand(i++) == self && + this->size == size && + this->stride == stride); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("size", size); + arguments.emplace_back("stride", stride); + + torch::lazy::TSOpVector _reshape_alias_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(_reshape_alias_copy_out.size(), 1); + + return _reshape_alias_copy_out; + + } + + + ::std::vector size; + ::std::vector stride; + + +}; + +class Softmax : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::_softmax); + } + + Softmax(const torch::lazy::Value& self, const int64_t& dim, const bool& half_to_float, std::vector&& shapes) + : TsNode( + Softmax::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim, half_to_float)), + dim(dim), + half_to_float(half_to_float) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + ss << ", half_to_float=" << half_to_float; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dim, const bool& half_to_float) const { + size_t i = 0; + return (operand(i++) == self && + this->dim == dim && + this->half_to_float == half_to_float); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back("half_to_float", half_to_float); + + torch::lazy::TSOpVector _softmax_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(_softmax_out.size(), 1); + + return _softmax_out; + + } + + + int64_t dim; + bool half_to_float; + + +}; + +class SoftmaxBackwardData : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::_softmax_backward_data); + } + + SoftmaxBackwardData(const torch::lazy::Value& grad_output, const torch::lazy::Value& output, const int64_t& dim, const at::ScalarType& input_dtype, std::vector&& shapes) + : TsNode( + SoftmaxBackwardData::ClassOpKind(), + OpList{grad_output, output}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim, input_dtype)), + dim(dim), + input_dtype(input_dtype) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + ss << ", input_dtype=" << input_dtype; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& output, const int64_t& dim, const at::ScalarType& input_dtype) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == output && + this->dim == dim && + this->input_dtype == input_dtype); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back("input_dtype", input_dtype); + + torch::lazy::TSOpVector _softmax_backward_data_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(_softmax_backward_data_out.size(), 1); + + return _softmax_backward_data_out; + + } + + + int64_t dim; + at::ScalarType input_dtype; + + +}; + +class Abs : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::abs); + } + + Abs(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Abs::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector abs_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(abs_out.size(), 1); + + return abs_out; + + } + + + + + +}; + +class AddTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::add); + } + + AddTensor(const torch::lazy::Value& self, const torch::lazy::Value& other, const torch::lazy::Value& alpha, std::vector&& shapes) + : TsNode( + AddTensor::ClassOpKind(), + OpList{self, other, alpha}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other, const torch::lazy::Value& alpha) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other && + operand(i++) == alpha); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("alpha", loctx->GetOutputOp(operand(i++))); + torch::lazy::TSOpVector add_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(add_out.size(), 1); + + return add_out; + + } + + + + + +}; + +class Addcdiv : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::addcdiv); + } + + Addcdiv(const torch::lazy::Value& self, const torch::lazy::Value& tensor1, const torch::lazy::Value& tensor2, const torch::lazy::Value& value, std::vector&& shapes) + : TsNode( + Addcdiv::ClassOpKind(), + OpList{self, tensor1, tensor2, value}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& tensor1, const torch::lazy::Value& tensor2, const torch::lazy::Value& value) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == tensor1 && + operand(i++) == tensor2 && + operand(i++) == value); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("value", loctx->GetOutputOp(operand(i++))); + torch::lazy::TSOpVector addcdiv_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(addcdiv_out.size(), 1); + + return addcdiv_out; + + } + + + + + +}; + +class Addcmul : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::addcmul); + } + + Addcmul(const torch::lazy::Value& self, const torch::lazy::Value& tensor1, const torch::lazy::Value& tensor2, const torch::lazy::Value& value, std::vector&& shapes) + : TsNode( + Addcmul::ClassOpKind(), + OpList{self, tensor1, tensor2, value}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& tensor1, const torch::lazy::Value& tensor2, const torch::lazy::Value& value) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == tensor1 && + operand(i++) == tensor2 && + operand(i++) == value); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("value", loctx->GetOutputOp(operand(i++))); + torch::lazy::TSOpVector addcmul_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(addcmul_out.size(), 1); + + return addcmul_out; + + } + + + + + +}; + +class Addmm : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::addmm); + } + + Addmm(const torch::lazy::Value& self, const torch::lazy::Value& mat1, const torch::lazy::Value& mat2, const torch::lazy::Value& beta, const torch::lazy::Value& alpha, std::vector&& shapes) + : TsNode( + Addmm::ClassOpKind(), + OpList{self, mat1, mat2, beta, alpha}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& mat1, const torch::lazy::Value& mat2, const torch::lazy::Value& beta, const torch::lazy::Value& alpha) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == mat1 && + operand(i++) == mat2 && + operand(i++) == beta && + operand(i++) == alpha); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(2); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("beta", loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("alpha", loctx->GetOutputOp(operand(i++))); + torch::lazy::TSOpVector addmm_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(addmm_out.size(), 1); + + return addmm_out; + + } + + + + + +}; + +class AliasCopy : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::alias_copy); + } + + AliasCopy(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + AliasCopy::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector alias_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(alias_copy_out.size(), 1); + + return alias_copy_out; + + } + + + + + +}; + +class All : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::all); + } + + All(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + All::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector all_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(all_out.size(), 1); + + return all_out; + + } + + + + + +}; + +class Any : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::any); + } + + Any(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Any::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector any_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(any_out.size(), 1); + + return any_out; + + } + + + + + +}; + +class ArangeStartOut : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::arange); + } + + ArangeStartOut(const torch::lazy::Value& start, const torch::lazy::Value& end, const torch::lazy::Value& step, const torch::lazy::Value& out, std::vector&& shapes) + : TsNode( + ArangeStartOut::ClassOpKind(), + OpList{start, end, step, out}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& start, const torch::lazy::Value& end, const torch::lazy::Value& step, const torch::lazy::Value& out) const { + size_t i = 0; + return (operand(i++) == start && + operand(i++) == end && + operand(i++) == step && + operand(i++) == out); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("out", loctx->GetOutputOp(operand(i++))); + torch::lazy::TSOpVector arange_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(arange_out.size(), 1); + + return arange_out; + + } + + + + + +}; + +class AsStridedCopy : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::as_strided_copy); + } + + AsStridedCopy(const torch::lazy::Value& self, const ::std::vector& size, const ::std::vector& stride, const ::std::optional& storage_offset, std::vector&& shapes) + : TsNode( + AsStridedCopy::ClassOpKind(), + OpList{self, storage_offset.value_or(kNullValue)}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(size, stride)), + size(size), + stride(stride) + { + has_storage_offset = !!storage_offset; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", size=" << size; + ss << ", stride=" << stride; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::vector& size, const ::std::vector& stride, const ::std::optional& storage_offset) const { + size_t i = 0; + return (operand(i++) == self && + nullable_operand(i++) == storage_offset.value_or(kNullValue) && + this->size == size && + this->stride == stride); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("size", size); + arguments.emplace_back("stride", stride); + arguments.emplace_back(has_storage_offset ? loctx->GetOutputOp(operand(i++)) : nullptr); + + torch::lazy::TSOpVector as_strided_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(as_strided_copy_out.size(), 1); + + return as_strided_copy_out; + + } + + + ::std::vector size; + ::std::vector stride; + bool has_storage_offset: 1; + +}; + +class AsStridedScatter : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::as_strided_scatter); + } + + AsStridedScatter(const torch::lazy::Value& self, const torch::lazy::Value& src, const ::std::vector& size, const ::std::vector& stride, const ::std::optional& storage_offset, std::vector&& shapes) + : TsNode( + AsStridedScatter::ClassOpKind(), + OpList{self, src, storage_offset.value_or(kNullValue)}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(size, stride)), + size(size), + stride(stride) + { + has_storage_offset = !!storage_offset; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", size=" << size; + ss << ", stride=" << stride; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& src, const ::std::vector& size, const ::std::vector& stride, const ::std::optional& storage_offset) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == src && + nullable_operand(i++) == storage_offset.value_or(kNullValue) && + this->size == size && + this->stride == stride); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(5); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("size", size); + arguments.emplace_back("stride", stride); + arguments.emplace_back(has_storage_offset ? loctx->GetOutputOp(operand(i++)) : nullptr); + + torch::lazy::TSOpVector as_strided_scatter_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(as_strided_scatter_out.size(), 1); + + return as_strided_scatter_out; + + } + + + ::std::vector size; + ::std::vector stride; + bool has_storage_offset: 1; + +}; + +class AvgPool2d : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::avg_pool2d); + } + + AvgPool2d(const torch::lazy::Value& self, const ::std::vector& kernel_size, const ::std::vector& stride, const ::std::vector& padding, const bool& ceil_mode, const bool& count_include_pad, const ::std::optional& divisor_override, std::vector&& shapes) + : TsNode( + AvgPool2d::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_override)), + kernel_size(kernel_size), + stride(stride), + padding(padding), + ceil_mode(ceil_mode), + count_include_pad(count_include_pad), + divisor_override(divisor_override) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", kernel_size=" << kernel_size; + ss << ", stride=" << stride; + ss << ", padding=" << padding; + ss << ", ceil_mode=" << ceil_mode; + ss << ", count_include_pad=" << count_include_pad; + if (divisor_override.has_value()) { + ss << ", divisor_override=" << divisor_override.value(); + } else { + ss << ", divisor_override=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::vector& kernel_size, const ::std::vector& stride, const ::std::vector& padding, const bool& ceil_mode, const bool& count_include_pad, const ::std::optional& divisor_override) const { + size_t i = 0; + return (operand(i++) == self && + this->kernel_size == kernel_size && + this->stride == stride && + this->padding == padding && + this->ceil_mode == ceil_mode && + this->count_include_pad == count_include_pad && + ((!this->divisor_override&&!divisor_override) || (this->divisor_override&&divisor_override && *(this->divisor_override) == *divisor_override))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(7); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("kernel_size", kernel_size); + arguments.emplace_back("stride", stride); + arguments.emplace_back("padding", padding); + arguments.emplace_back("ceil_mode", ceil_mode); + arguments.emplace_back("count_include_pad", count_include_pad); + arguments.emplace_back("divisor_override", divisor_override); + + torch::lazy::TSOpVector avg_pool2d_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(avg_pool2d_out.size(), 1); + + return avg_pool2d_out; + + } + + + ::std::vector kernel_size; + ::std::vector stride; + ::std::vector padding; + bool ceil_mode; + bool count_include_pad; + ::std::optional divisor_override; + + +}; + +class AvgPool2dBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::avg_pool2d_backward); + } + + AvgPool2dBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const ::std::vector& kernel_size, const ::std::vector& stride, const ::std::vector& padding, const bool& ceil_mode, const bool& count_include_pad, const ::std::optional& divisor_override, std::vector&& shapes) + : TsNode( + AvgPool2dBackward::ClassOpKind(), + OpList{grad_output, self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_override)), + kernel_size(kernel_size), + stride(stride), + padding(padding), + ceil_mode(ceil_mode), + count_include_pad(count_include_pad), + divisor_override(divisor_override) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", kernel_size=" << kernel_size; + ss << ", stride=" << stride; + ss << ", padding=" << padding; + ss << ", ceil_mode=" << ceil_mode; + ss << ", count_include_pad=" << count_include_pad; + if (divisor_override.has_value()) { + ss << ", divisor_override=" << divisor_override.value(); + } else { + ss << ", divisor_override=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const ::std::vector& kernel_size, const ::std::vector& stride, const ::std::vector& padding, const bool& ceil_mode, const bool& count_include_pad, const ::std::optional& divisor_override) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == self && + this->kernel_size == kernel_size && + this->stride == stride && + this->padding == padding && + this->ceil_mode == ceil_mode && + this->count_include_pad == count_include_pad && + ((!this->divisor_override&&!divisor_override) || (this->divisor_override&&divisor_override && *(this->divisor_override) == *divisor_override))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(8); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("kernel_size", kernel_size); + arguments.emplace_back("stride", stride); + arguments.emplace_back("padding", padding); + arguments.emplace_back("ceil_mode", ceil_mode); + arguments.emplace_back("count_include_pad", count_include_pad); + arguments.emplace_back("divisor_override", divisor_override); + + torch::lazy::TSOpVector avg_pool2d_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(avg_pool2d_backward_out.size(), 1); + + return avg_pool2d_backward_out; + + } + + + ::std::vector kernel_size; + ::std::vector stride; + ::std::vector padding; + bool ceil_mode; + bool count_include_pad; + ::std::optional divisor_override; + + +}; + +class Baddbmm : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::baddbmm); + } + + Baddbmm(const torch::lazy::Value& self, const torch::lazy::Value& batch1, const torch::lazy::Value& batch2, const torch::lazy::Value& beta, const torch::lazy::Value& alpha, std::vector&& shapes) + : TsNode( + Baddbmm::ClassOpKind(), + OpList{self, batch1, batch2, beta, alpha}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& batch1, const torch::lazy::Value& batch2, const torch::lazy::Value& beta, const torch::lazy::Value& alpha) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == batch1 && + operand(i++) == batch2 && + operand(i++) == beta && + operand(i++) == alpha); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(2); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("beta", loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("alpha", loctx->GetOutputOp(operand(i++))); + torch::lazy::TSOpVector baddbmm_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(baddbmm_out.size(), 1); + + return baddbmm_out; + + } + + + + + +}; + +class Bernoulli : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::bernoulli); + } + + Bernoulli(const torch::lazy::Value& self, const ::std::optional& generator, std::vector&& shapes) + : TsNode( + Bernoulli::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(generator)), + generator(generator) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + if (generator.has_value()) { + ss << ", generator=" << "torch.Generator()"; + } else { + ss << ", generator=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::optional& generator) const { + size_t i = 0; + return (operand(i++) == self && + ((!this->generator&&!generator) || (this->generator&&generator && *(this->generator) == *generator))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("generator", generator); + torch::lazy::TSOpVector bernoulli_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(bernoulli_out.size(), 1); + + return bernoulli_out; + + } + + + ::std::optional generator; + + +}; + +class BernoulliP : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::bernoulli); + } + + BernoulliP(const torch::lazy::Value& self, const double& p, const ::std::optional& generator, std::vector&& shapes) + : TsNode( + BernoulliP::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(p, generator)), + p(p), + generator(generator) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", p=" << p; + if (generator.has_value()) { + ss << ", generator=" << "torch.Generator()"; + } else { + ss << ", generator=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const double& p, const ::std::optional& generator) const { + size_t i = 0; + return (operand(i++) == self && + this->p == p && + ((!this->generator&&!generator) || (this->generator&&generator && *(this->generator) == *generator))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("p", p); + kwarguments.emplace_back("generator", generator); + torch::lazy::TSOpVector bernoulli_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(bernoulli_out.size(), 1); + + return bernoulli_out; + + } + + + double p; + ::std::optional generator; + + +}; + +class BinaryCrossEntropy : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::binary_cross_entropy); + } + + BinaryCrossEntropy(const torch::lazy::Value& self, const torch::lazy::Value& target, const ::std::optional& weight, const int64_t& reduction, std::vector&& shapes) + : TsNode( + BinaryCrossEntropy::ClassOpKind(), + OpList{self, target, weight.value_or(kNullValue)}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(reduction)), + reduction(reduction) + { + has_weight = !!weight; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", reduction=" << reduction; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& target, const ::std::optional& weight, const int64_t& reduction) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == target && + nullable_operand(i++) == weight.value_or(kNullValue) && + this->reduction == reduction); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(has_weight ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back("reduction", reduction); + + torch::lazy::TSOpVector binary_cross_entropy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(binary_cross_entropy_out.size(), 1); + + return binary_cross_entropy_out; + + } + + + int64_t reduction; + bool has_weight: 1; + +}; + +class BinaryCrossEntropyBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::binary_cross_entropy_backward); + } + + BinaryCrossEntropyBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& target, const ::std::optional& weight, const int64_t& reduction, std::vector&& shapes) + : TsNode( + BinaryCrossEntropyBackward::ClassOpKind(), + OpList{grad_output, self, target, weight.value_or(kNullValue)}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(reduction)), + reduction(reduction) + { + has_weight = !!weight; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", reduction=" << reduction; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& target, const ::std::optional& weight, const int64_t& reduction) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == self && + operand(i++) == target && + nullable_operand(i++) == weight.value_or(kNullValue) && + this->reduction == reduction); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(5); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(has_weight ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back("reduction", reduction); + + torch::lazy::TSOpVector binary_cross_entropy_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(binary_cross_entropy_backward_out.size(), 1); + + return binary_cross_entropy_backward_out; + + } + + + int64_t reduction; + bool has_weight: 1; + +}; + +class BitwiseAndTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::bitwise_and); + } + + BitwiseAndTensor(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + BitwiseAndTensor::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector bitwise_and_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(bitwise_and_out.size(), 1); + + return bitwise_and_out; + + } + + + + + +}; + +class BitwiseOrTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::bitwise_or); + } + + BitwiseOrTensor(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + BitwiseOrTensor::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector bitwise_or_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(bitwise_or_out.size(), 1); + + return bitwise_or_out; + + } + + + + + +}; + +class Bmm : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::bmm); + } + + Bmm(const torch::lazy::Value& self, const torch::lazy::Value& mat2, std::vector&& shapes) + : TsNode( + Bmm::ClassOpKind(), + OpList{self, mat2}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& mat2) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == mat2); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector bmm_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(bmm_out.size(), 1); + + return bmm_out; + + } + + + + + +}; + +class Cat : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::cat); + } + + Cat(const torch::lazy::Value& tensors, const int64_t& dim, std::vector&& shapes) + : TsNode( + Cat::ClassOpKind(), + OpList{tensors}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim)), + dim(dim) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& tensors, const int64_t& dim) const { + size_t i = 0; + return (operand(i++) == tensors && + this->dim == dim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + + torch::lazy::TSOpVector cat_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(cat_out.size(), 1); + + return cat_out; + + } + + + int64_t dim; + + +}; + +class Clamp : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::clamp); + } + + Clamp(const torch::lazy::Value& self, const ::std::optional& min, const ::std::optional& max, std::vector&& shapes) + : TsNode( + Clamp::ClassOpKind(), + OpList{self, min.value_or(kNullValue), max.value_or(kNullValue)}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + has_min = !!min; + has_max = !!max; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::optional& min, const ::std::optional& max) const { + size_t i = 0; + return (operand(i++) == self && + nullable_operand(i++) == min.value_or(kNullValue) && + nullable_operand(i++) == max.value_or(kNullValue)); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(has_min ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back(has_max ? loctx->GetOutputOp(operand(i++)) : nullptr); + + torch::lazy::TSOpVector clamp_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(clamp_out.size(), 1); + + return clamp_out; + + } + + + + bool has_min: 1; + bool has_max: 1; + +}; + +class ClampMin : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::clamp_min); + } + + ClampMin(const torch::lazy::Value& self, const torch::lazy::Value& min, std::vector&& shapes) + : TsNode( + ClampMin::ClassOpKind(), + OpList{self, min}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& min) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == min); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector clamp_min_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(clamp_min_out.size(), 1); + + return clamp_min_out; + + } + + + + + +}; + +class ConstantPadNd : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::constant_pad_nd); + } + + ConstantPadNd(const torch::lazy::Value& self, const ::std::vector& pad, const torch::lazy::Value& value, std::vector&& shapes) + : TsNode( + ConstantPadNd::ClassOpKind(), + OpList{self, value}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(pad)), + pad(pad) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", pad=" << pad; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::vector& pad, const torch::lazy::Value& value) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == value && + this->pad == pad); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("pad", pad); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector constant_pad_nd_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(constant_pad_nd_out.size(), 1); + + return constant_pad_nd_out; + + } + + + ::std::vector pad; + + +}; + +class Convolution : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::convolution); + } + + Convolution(const torch::lazy::Value& input, const torch::lazy::Value& weight, const ::std::optional& bias, const ::std::vector& stride, const ::std::vector& padding, const ::std::vector& dilation, const bool& transposed, const ::std::vector& output_padding, const int64_t& groups, std::vector&& shapes) + : TsNode( + Convolution::ClassOpKind(), + OpList{input, weight, bias.value_or(kNullValue)}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(stride, padding, dilation, transposed, output_padding, groups)), + stride(stride), + padding(padding), + dilation(dilation), + transposed(transposed), + output_padding(output_padding), + groups(groups) + { + has_bias = !!bias; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", stride=" << stride; + ss << ", padding=" << padding; + ss << ", dilation=" << dilation; + ss << ", transposed=" << transposed; + ss << ", output_padding=" << output_padding; + ss << ", groups=" << groups; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& input, const torch::lazy::Value& weight, const ::std::optional& bias, const ::std::vector& stride, const ::std::vector& padding, const ::std::vector& dilation, const bool& transposed, const ::std::vector& output_padding, const int64_t& groups) const { + size_t i = 0; + return (operand(i++) == input && + operand(i++) == weight && + nullable_operand(i++) == bias.value_or(kNullValue) && + this->stride == stride && + this->padding == padding && + this->dilation == dilation && + this->transposed == transposed && + this->output_padding == output_padding && + this->groups == groups); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(9); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(has_bias ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back("stride", stride); + arguments.emplace_back("padding", padding); + arguments.emplace_back("dilation", dilation); + arguments.emplace_back("transposed", transposed); + arguments.emplace_back("output_padding", output_padding); + arguments.emplace_back("groups", groups); + + torch::lazy::TSOpVector convolution_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(convolution_out.size(), 1); + + return convolution_out; + + } + + + ::std::vector stride; + ::std::vector padding; + ::std::vector dilation; + bool transposed; + ::std::vector output_padding; + int64_t groups; + bool has_bias: 1; + +}; + +class ConvolutionBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::convolution_backward); + } + + ConvolutionBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& input, const torch::lazy::Value& weight, const ::std::optional<::std::vector>& bias_sizes, const ::std::vector& stride, const ::std::vector& padding, const ::std::vector& dilation, const bool& transposed, const ::std::vector& output_padding, const int64_t& groups, const ::std::vector& output_mask, std::vector&& shapes) + : TsNode( + ConvolutionBackward::ClassOpKind(), + OpList{grad_output, input, weight}, + std::move(shapes), + /* num_outputs */ 3, + torch::lazy::MHash(bias_sizes, stride, padding, dilation, transposed, output_padding, groups, output_mask)), + bias_sizes(bias_sizes), + stride(stride), + padding(padding), + dilation(dilation), + transposed(transposed), + output_padding(output_padding), + groups(groups), + output_mask(output_mask) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + if (bias_sizes.has_value()) { + ss << ", bias_sizes=" << bias_sizes.value(); + } else { + ss << ", bias_sizes=null"; + } + ss << ", stride=" << stride; + ss << ", padding=" << padding; + ss << ", dilation=" << dilation; + ss << ", transposed=" << transposed; + ss << ", output_padding=" << output_padding; + ss << ", groups=" << groups; + ss << ", output_mask=" << output_mask; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& input, const torch::lazy::Value& weight, const ::std::optional<::std::vector>& bias_sizes, const ::std::vector& stride, const ::std::vector& padding, const ::std::vector& dilation, const bool& transposed, const ::std::vector& output_padding, const int64_t& groups, const ::std::vector& output_mask) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == input && + operand(i++) == weight && + ((!this->bias_sizes&&!bias_sizes) || (this->bias_sizes&&bias_sizes && *(this->bias_sizes) == *bias_sizes)) && + this->stride == stride && + this->padding == padding && + this->dilation == dilation && + this->transposed == transposed && + this->output_padding == output_padding && + this->groups == groups && + this->output_mask == output_mask); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(11); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("bias_sizes", bias_sizes); + arguments.emplace_back("stride", stride); + arguments.emplace_back("padding", padding); + arguments.emplace_back("dilation", dilation); + arguments.emplace_back("transposed", transposed); + arguments.emplace_back("output_padding", output_padding); + arguments.emplace_back("groups", groups); + arguments.emplace_back("output_mask", output_mask); + + torch::lazy::TSOpVector convolution_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(convolution_backward_out.size(), 3); + + return convolution_backward_out; + + } + + + ::std::optional<::std::vector> bias_sizes; + ::std::vector stride; + ::std::vector padding; + ::std::vector dilation; + bool transposed; + ::std::vector output_padding; + int64_t groups; + ::std::vector output_mask; + + +}; + +class Cos : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::cos); + } + + Cos(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Cos::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector cos_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(cos_out.size(), 1); + + return cos_out; + + } + + + + + +}; + +class Cumsum : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::cumsum); + } + + Cumsum(const torch::lazy::Value& self, const int64_t& dim, const ::std::optional& dtype, std::vector&& shapes) + : TsNode( + Cumsum::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim, dtype)), + dim(dim), + dtype(dtype) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + if (dtype.has_value()) { + ss << ", dtype=" << dtype.value(); + } else { + ss << ", dtype=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dim, const ::std::optional& dtype) const { + size_t i = 0; + return (operand(i++) == self && + this->dim == dim && + ((!this->dtype&&!dtype) || (this->dtype&&dtype && *(this->dtype) == *dtype))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + kwarguments.emplace_back("dtype", dtype); + torch::lazy::TSOpVector cumsum_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(cumsum_out.size(), 1); + + return cumsum_out; + + } + + + int64_t dim; + ::std::optional dtype; + + +}; + +class DetachCopy : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::detach_copy); + } + + DetachCopy(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + DetachCopy::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector detach_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(detach_copy_out.size(), 1); + + return detach_copy_out; + + } + + + + + +}; + +class DiagonalCopy : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::diagonal_copy); + } + + DiagonalCopy(const torch::lazy::Value& self, const int64_t& offset, const int64_t& dim1, const int64_t& dim2, std::vector&& shapes) + : TsNode( + DiagonalCopy::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(offset, dim1, dim2)), + offset(offset), + dim1(dim1), + dim2(dim2) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", offset=" << offset; + ss << ", dim1=" << dim1; + ss << ", dim2=" << dim2; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& offset, const int64_t& dim1, const int64_t& dim2) const { + size_t i = 0; + return (operand(i++) == self && + this->offset == offset && + this->dim1 == dim1 && + this->dim2 == dim2); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("offset", offset); + arguments.emplace_back("dim1", dim1); + arguments.emplace_back("dim2", dim2); + + torch::lazy::TSOpVector diagonal_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(diagonal_copy_out.size(), 1); + + return diagonal_copy_out; + + } + + + int64_t offset; + int64_t dim1; + int64_t dim2; + + +}; + +class DiagonalScatter : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::diagonal_scatter); + } + + DiagonalScatter(const torch::lazy::Value& self, const torch::lazy::Value& src, const int64_t& offset, const int64_t& dim1, const int64_t& dim2, std::vector&& shapes) + : TsNode( + DiagonalScatter::ClassOpKind(), + OpList{self, src}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(offset, dim1, dim2)), + offset(offset), + dim1(dim1), + dim2(dim2) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", offset=" << offset; + ss << ", dim1=" << dim1; + ss << ", dim2=" << dim2; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& src, const int64_t& offset, const int64_t& dim1, const int64_t& dim2) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == src && + this->offset == offset && + this->dim1 == dim1 && + this->dim2 == dim2); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(5); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("offset", offset); + arguments.emplace_back("dim1", dim1); + arguments.emplace_back("dim2", dim2); + + torch::lazy::TSOpVector diagonal_scatter_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(diagonal_scatter_out.size(), 1); + + return diagonal_scatter_out; + + } + + + int64_t offset; + int64_t dim1; + int64_t dim2; + + +}; + +class DivTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::div); + } + + DivTensor(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + DivTensor::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector div_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(div_out.size(), 1); + + return div_out; + + } + + + + + +}; + +class DivTensorMode : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::div); + } + + DivTensorMode(const torch::lazy::Value& self, const torch::lazy::Value& other, const ::std::optional& rounding_mode, std::vector&& shapes) + : TsNode( + DivTensorMode::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(rounding_mode)), + rounding_mode(rounding_mode.has_value() ? ::std::make_optional(std::string(*rounding_mode)) : ::std::nullopt) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + if (rounding_mode.has_value()) { + ss << ", rounding_mode=" << rounding_mode.value(); + } else { + ss << ", rounding_mode=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other, const ::std::optional& rounding_mode) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other && + ((!this->rounding_mode&&!rounding_mode) || (this->rounding_mode&&rounding_mode && *(this->rounding_mode) == *rounding_mode))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("rounding_mode", rounding_mode); + torch::lazy::TSOpVector div_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(div_out.size(), 1); + + return div_out; + + } + + + ::std::optional rounding_mode; + + +}; + +class Elu : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::elu); + } + + Elu(const torch::lazy::Value& self, const torch::lazy::Value& alpha, const torch::lazy::Value& scale, const torch::lazy::Value& input_scale, std::vector&& shapes) + : TsNode( + Elu::ClassOpKind(), + OpList{self, alpha, scale, input_scale}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& alpha, const torch::lazy::Value& scale, const torch::lazy::Value& input_scale) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == alpha && + operand(i++) == scale && + operand(i++) == input_scale); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector elu_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(elu_out.size(), 1); + + return elu_out; + + } + + + + + +}; + +class EluBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::elu_backward); + } + + EluBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& alpha, const torch::lazy::Value& scale, const torch::lazy::Value& input_scale, const bool& is_result, const torch::lazy::Value& self_or_result, std::vector&& shapes) + : TsNode( + EluBackward::ClassOpKind(), + OpList{grad_output, alpha, scale, input_scale, self_or_result}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(is_result)), + is_result(is_result) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", is_result=" << is_result; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& alpha, const torch::lazy::Value& scale, const torch::lazy::Value& input_scale, const bool& is_result, const torch::lazy::Value& self_or_result) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == alpha && + operand(i++) == scale && + operand(i++) == input_scale && + operand(i++) == self_or_result && + this->is_result == is_result); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(6); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("is_result", is_result); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector elu_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(elu_backward_out.size(), 1); + + return elu_backward_out; + + } + + + bool is_result; + + +}; + +class Embedding : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::embedding); + } + + Embedding(const torch::lazy::Value& weight, const torch::lazy::Value& indices, const int64_t& padding_idx, const bool& scale_grad_by_freq, const bool& sparse, std::vector&& shapes) + : TsNode( + Embedding::ClassOpKind(), + OpList{weight, indices}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(padding_idx, scale_grad_by_freq, sparse)), + padding_idx(padding_idx), + scale_grad_by_freq(scale_grad_by_freq), + sparse(sparse) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", padding_idx=" << padding_idx; + ss << ", scale_grad_by_freq=" << scale_grad_by_freq; + ss << ", sparse=" << sparse; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& weight, const torch::lazy::Value& indices, const int64_t& padding_idx, const bool& scale_grad_by_freq, const bool& sparse) const { + size_t i = 0; + return (operand(i++) == weight && + operand(i++) == indices && + this->padding_idx == padding_idx && + this->scale_grad_by_freq == scale_grad_by_freq && + this->sparse == sparse); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(5); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("padding_idx", padding_idx); + arguments.emplace_back("scale_grad_by_freq", scale_grad_by_freq); + arguments.emplace_back("sparse", sparse); + + torch::lazy::TSOpVector embedding_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(embedding_out.size(), 1); + + return embedding_out; + + } + + + int64_t padding_idx; + bool scale_grad_by_freq; + bool sparse; + + +}; + +class EmbeddingDenseBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::embedding_dense_backward); + } + + EmbeddingDenseBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& indices, const int64_t& num_weights, const int64_t& padding_idx, const bool& scale_grad_by_freq, std::vector&& shapes) + : TsNode( + EmbeddingDenseBackward::ClassOpKind(), + OpList{grad_output, indices}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(num_weights, padding_idx, scale_grad_by_freq)), + num_weights(num_weights), + padding_idx(padding_idx), + scale_grad_by_freq(scale_grad_by_freq) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", num_weights=" << num_weights; + ss << ", padding_idx=" << padding_idx; + ss << ", scale_grad_by_freq=" << scale_grad_by_freq; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& indices, const int64_t& num_weights, const int64_t& padding_idx, const bool& scale_grad_by_freq) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == indices && + this->num_weights == num_weights && + this->padding_idx == padding_idx && + this->scale_grad_by_freq == scale_grad_by_freq); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(5); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("num_weights", num_weights); + arguments.emplace_back("padding_idx", padding_idx); + arguments.emplace_back("scale_grad_by_freq", scale_grad_by_freq); + + torch::lazy::TSOpVector embedding_dense_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(embedding_dense_backward_out.size(), 1); + + return embedding_dense_backward_out; + + } + + + int64_t num_weights; + int64_t padding_idx; + bool scale_grad_by_freq; + + +}; + +class EqScalar : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::eq); + } + + EqScalar(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + EqScalar::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector eq_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(eq_out.size(), 1); + + return eq_out; + + } + + + + + +}; + +class EqTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::eq); + } + + EqTensor(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + EqTensor::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector eq_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(eq_out.size(), 1); + + return eq_out; + + } + + + + + +}; + +class Exp : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::exp); + } + + Exp(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Exp::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector exp_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(exp_out.size(), 1); + + return exp_out; + + } + + + + + +}; + +class ExpandCopy : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::expand_copy); + } + + ExpandCopy(const torch::lazy::Value& self, const ::std::vector& size, const bool& implicit, std::vector&& shapes) + : TsNode( + ExpandCopy::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(size, implicit)), + size(size), + implicit(implicit) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", size=" << size; + ss << ", implicit=" << implicit; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::vector& size, const bool& implicit) const { + size_t i = 0; + return (operand(i++) == self && + this->size == size && + this->implicit == implicit); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("size", size); + kwarguments.emplace_back("implicit", implicit); + torch::lazy::TSOpVector expand_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(expand_copy_out.size(), 1); + + return expand_copy_out; + + } + + + ::std::vector size; + bool implicit; + + +}; + +class Flip : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::flip); + } + + Flip(const torch::lazy::Value& self, const ::std::vector& dims, std::vector&& shapes) + : TsNode( + Flip::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dims)), + dims(dims) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dims=" << dims; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::vector& dims) const { + size_t i = 0; + return (operand(i++) == self && + this->dims == dims); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dims", dims); + + torch::lazy::TSOpVector flip_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(flip_out.size(), 1); + + return flip_out; + + } + + + ::std::vector dims; + + +}; + +class Floor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::floor); + } + + Floor(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Floor::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector floor_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(floor_out.size(), 1); + + return floor_out; + + } + + + + + +}; + +class Frac : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::frac); + } + + Frac(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Frac::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector frac_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(frac_out.size(), 1); + + return frac_out; + + } + + + + + +}; + +class Gather : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::gather); + } + + Gather(const torch::lazy::Value& self, const int64_t& dim, const torch::lazy::Value& index, const bool& sparse_grad, std::vector&& shapes) + : TsNode( + Gather::ClassOpKind(), + OpList{self, index}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim, sparse_grad)), + dim(dim), + sparse_grad(sparse_grad) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + ss << ", sparse_grad=" << sparse_grad; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dim, const torch::lazy::Value& index, const bool& sparse_grad) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == index && + this->dim == dim && + this->sparse_grad == sparse_grad); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("sparse_grad", sparse_grad); + torch::lazy::TSOpVector gather_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(gather_out.size(), 1); + + return gather_out; + + } + + + int64_t dim; + bool sparse_grad; + + +}; + +class GeScalar : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::ge); + } + + GeScalar(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + GeScalar::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector ge_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(ge_out.size(), 1); + + return ge_out; + + } + + + + + +}; + +class GeTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::ge); + } + + GeTensor(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + GeTensor::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector ge_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(ge_out.size(), 1); + + return ge_out; + + } + + + + + +}; + +class Gelu : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::gelu); + } + + Gelu(const torch::lazy::Value& self, const c10::string_view& approximate, std::vector&& shapes) + : TsNode( + Gelu::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(approximate)), + approximate(approximate) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", approximate=" << approximate; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const c10::string_view& approximate) const { + size_t i = 0; + return (operand(i++) == self && + this->approximate == approximate); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("approximate", approximate); + torch::lazy::TSOpVector gelu_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(gelu_out.size(), 1); + + return gelu_out; + + } + + + std::string approximate; + + +}; + +class GeluBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::gelu_backward); + } + + GeluBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const c10::string_view& approximate, std::vector&& shapes) + : TsNode( + GeluBackward::ClassOpKind(), + OpList{grad_output, self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(approximate)), + approximate(approximate) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", approximate=" << approximate; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const c10::string_view& approximate) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == self && + this->approximate == approximate); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("approximate", approximate); + torch::lazy::TSOpVector gelu_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(gelu_backward_out.size(), 1); + + return gelu_backward_out; + + } + + + std::string approximate; + + +}; + +class Glu : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::glu); + } + + Glu(const torch::lazy::Value& self, const int64_t& dim, std::vector&& shapes) + : TsNode( + Glu::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim)), + dim(dim) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dim) const { + size_t i = 0; + return (operand(i++) == self && + this->dim == dim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + + torch::lazy::TSOpVector glu_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(glu_out.size(), 1); + + return glu_out; + + } + + + int64_t dim; + + +}; + +class GluBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::glu_backward); + } + + GluBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const int64_t& dim, std::vector&& shapes) + : TsNode( + GluBackward::ClassOpKind(), + OpList{grad_output, self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim)), + dim(dim) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const int64_t& dim) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == self && + this->dim == dim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + + torch::lazy::TSOpVector glu_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(glu_backward_out.size(), 1); + + return glu_backward_out; + + } + + + int64_t dim; + + +}; + +class GluJvp : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::glu_jvp); + } + + GluJvp(const torch::lazy::Value& glu, const torch::lazy::Value& x, const torch::lazy::Value& dx, const int64_t& dim, std::vector&& shapes) + : TsNode( + GluJvp::ClassOpKind(), + OpList{glu, x, dx}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim)), + dim(dim) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& glu, const torch::lazy::Value& x, const torch::lazy::Value& dx, const int64_t& dim) const { + size_t i = 0; + return (operand(i++) == glu && + operand(i++) == x && + operand(i++) == dx && + this->dim == dim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + + torch::lazy::TSOpVector glu_jvp_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(glu_jvp_out.size(), 1); + + return glu_jvp_out; + + } + + + int64_t dim; + + +}; + +class GridSampler2d : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::grid_sampler_2d); + } + + GridSampler2d(const torch::lazy::Value& input, const torch::lazy::Value& grid, const int64_t& interpolation_mode, const int64_t& padding_mode, const bool& align_corners, std::vector&& shapes) + : TsNode( + GridSampler2d::ClassOpKind(), + OpList{input, grid}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(interpolation_mode, padding_mode, align_corners)), + interpolation_mode(interpolation_mode), + padding_mode(padding_mode), + align_corners(align_corners) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", interpolation_mode=" << interpolation_mode; + ss << ", padding_mode=" << padding_mode; + ss << ", align_corners=" << align_corners; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& input, const torch::lazy::Value& grid, const int64_t& interpolation_mode, const int64_t& padding_mode, const bool& align_corners) const { + size_t i = 0; + return (operand(i++) == input && + operand(i++) == grid && + this->interpolation_mode == interpolation_mode && + this->padding_mode == padding_mode && + this->align_corners == align_corners); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(5); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("interpolation_mode", interpolation_mode); + arguments.emplace_back("padding_mode", padding_mode); + arguments.emplace_back("align_corners", align_corners); + + torch::lazy::TSOpVector grid_sampler_2d_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(grid_sampler_2d_out.size(), 1); + + return grid_sampler_2d_out; + + } + + + int64_t interpolation_mode; + int64_t padding_mode; + bool align_corners; + + +}; + +class GridSampler2dBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::grid_sampler_2d_backward); + } + + GridSampler2dBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& input, const torch::lazy::Value& grid, const int64_t& interpolation_mode, const int64_t& padding_mode, const bool& align_corners, const ::std::vector& output_mask, std::vector&& shapes) + : TsNode( + GridSampler2dBackward::ClassOpKind(), + OpList{grad_output, input, grid}, + std::move(shapes), + /* num_outputs */ 2, + torch::lazy::MHash(interpolation_mode, padding_mode, align_corners, output_mask)), + interpolation_mode(interpolation_mode), + padding_mode(padding_mode), + align_corners(align_corners), + output_mask(output_mask) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", interpolation_mode=" << interpolation_mode; + ss << ", padding_mode=" << padding_mode; + ss << ", align_corners=" << align_corners; + ss << ", output_mask=" << output_mask; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& input, const torch::lazy::Value& grid, const int64_t& interpolation_mode, const int64_t& padding_mode, const bool& align_corners, const ::std::vector& output_mask) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == input && + operand(i++) == grid && + this->interpolation_mode == interpolation_mode && + this->padding_mode == padding_mode && + this->align_corners == align_corners && + this->output_mask == output_mask); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(7); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("interpolation_mode", interpolation_mode); + arguments.emplace_back("padding_mode", padding_mode); + arguments.emplace_back("align_corners", align_corners); + arguments.emplace_back("output_mask", output_mask); + + torch::lazy::TSOpVector grid_sampler_2d_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(grid_sampler_2d_backward_out.size(), 2); + + return grid_sampler_2d_backward_out; + + } + + + int64_t interpolation_mode; + int64_t padding_mode; + bool align_corners; + ::std::vector output_mask; + + +}; + +class GtScalar : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::gt); + } + + GtScalar(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + GtScalar::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector gt_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(gt_out.size(), 1); + + return gt_out; + + } + + + + + +}; + +class GtTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::gt); + } + + GtTensor(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + GtTensor::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector gt_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(gt_out.size(), 1); + + return gt_out; + + } + + + + + +}; + +class Hardsigmoid : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::hardsigmoid); + } + + Hardsigmoid(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Hardsigmoid::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector hardsigmoid_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(hardsigmoid_out.size(), 1); + + return hardsigmoid_out; + + } + + + + + +}; + +class IndexSelect : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::index_select); + } + + IndexSelect(const torch::lazy::Value& self, const int64_t& dim, const torch::lazy::Value& index, std::vector&& shapes) + : TsNode( + IndexSelect::ClassOpKind(), + OpList{self, index}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim)), + dim(dim) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dim, const torch::lazy::Value& index) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == index && + this->dim == dim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector index_select_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(index_select_out.size(), 1); + + return index_select_out; + + } + + + int64_t dim; + + +}; + +class LeScalar : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::le); + } + + LeScalar(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + LeScalar::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector le_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(le_out.size(), 1); + + return le_out; + + } + + + + + +}; + +class LeTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::le); + } + + LeTensor(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + LeTensor::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector le_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(le_out.size(), 1); + + return le_out; + + } + + + + + +}; + +class LeakyRelu : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::leaky_relu); + } + + LeakyRelu(const torch::lazy::Value& self, const torch::lazy::Value& negative_slope, std::vector&& shapes) + : TsNode( + LeakyRelu::ClassOpKind(), + OpList{self, negative_slope}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& negative_slope) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == negative_slope); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector leaky_relu_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(leaky_relu_out.size(), 1); + + return leaky_relu_out; + + } + + + + + +}; + +class LeakyReluBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::leaky_relu_backward); + } + + LeakyReluBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& negative_slope, const bool& self_is_result, std::vector&& shapes) + : TsNode( + LeakyReluBackward::ClassOpKind(), + OpList{grad_output, self, negative_slope}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(self_is_result)), + self_is_result(self_is_result) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", self_is_result=" << self_is_result; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& negative_slope, const bool& self_is_result) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == self && + operand(i++) == negative_slope && + this->self_is_result == self_is_result); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("self_is_result", self_is_result); + + torch::lazy::TSOpVector leaky_relu_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(leaky_relu_backward_out.size(), 1); + + return leaky_relu_backward_out; + + } + + + bool self_is_result; + + +}; + +class Log : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::log); + } + + Log(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Log::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector log_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(log_out.size(), 1); + + return log_out; + + } + + + + + +}; + +class Log2 : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::log2); + } + + Log2(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Log2::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector log2_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(log2_out.size(), 1); + + return log2_out; + + } + + + + + +}; + +class LogSigmoidBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::log_sigmoid_backward); + } + + LogSigmoidBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& buffer, std::vector&& shapes) + : TsNode( + LogSigmoidBackward::ClassOpKind(), + OpList{grad_output, self, buffer}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& buffer) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == self && + operand(i++) == buffer); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector log_sigmoid_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(log_sigmoid_backward_out.size(), 1); + + return log_sigmoid_backward_out; + + } + + + + + +}; + +class LogSigmoidForward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::log_sigmoid_forward); + } + + LogSigmoidForward(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + LogSigmoidForward::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 2, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector log_sigmoid_forward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(log_sigmoid_forward_out.size(), 2); + + return log_sigmoid_forward_out; + + } + + + + + +}; + +class Logdet : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::logdet); + } + + Logdet(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Logdet::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector logdet_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(logdet_out.size(), 1); + + return logdet_out; + + } + + + + + +}; + +class LtScalar : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::lt); + } + + LtScalar(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + LtScalar::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector lt_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(lt_out.size(), 1); + + return lt_out; + + } + + + + + +}; + +class LtTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::lt); + } + + LtTensor(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + LtTensor::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector lt_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(lt_out.size(), 1); + + return lt_out; + + } + + + + + +}; + +class MaskedFillScalar : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::masked_fill); + } + + MaskedFillScalar(const torch::lazy::Value& self, const torch::lazy::Value& mask, const torch::lazy::Value& value, std::vector&& shapes) + : TsNode( + MaskedFillScalar::ClassOpKind(), + OpList{self, mask, value}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& mask, const torch::lazy::Value& value) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == mask && + operand(i++) == value); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector masked_fill_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(masked_fill_out.size(), 1); + + return masked_fill_out; + + } + + + + + +}; + +class MaskedFillTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::masked_fill); + } + + MaskedFillTensor(const torch::lazy::Value& self, const torch::lazy::Value& mask, const torch::lazy::Value& value, std::vector&& shapes) + : TsNode( + MaskedFillTensor::ClassOpKind(), + OpList{self, mask, value}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& mask, const torch::lazy::Value& value) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == mask && + operand(i++) == value); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector masked_fill_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(masked_fill_out.size(), 1); + + return masked_fill_out; + + } + + + + + +}; + +class MaxDim : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::max); + } + + MaxDim(const torch::lazy::Value& self, const int64_t& dim, const bool& keepdim, std::vector&& shapes) + : TsNode( + MaxDim::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 2, + torch::lazy::MHash(dim, keepdim)), + dim(dim), + keepdim(keepdim) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + ss << ", keepdim=" << keepdim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dim, const bool& keepdim) const { + size_t i = 0; + return (operand(i++) == self && + this->dim == dim && + this->keepdim == keepdim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back("keepdim", keepdim); + + torch::lazy::TSOpVector max_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(max_out.size(), 2); + + return max_out; + + } + + + int64_t dim; + bool keepdim; + + +}; + +class Max : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::max); + } + + Max(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Max::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector max_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(max_out.size(), 1); + + return max_out; + + } + + + + + +}; + +class MaxPool2dWithIndices : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::max_pool2d_with_indices); + } + + MaxPool2dWithIndices(const torch::lazy::Value& self, const ::std::vector& kernel_size, const ::std::vector& stride, const ::std::vector& padding, const ::std::vector& dilation, const bool& ceil_mode, std::vector&& shapes) + : TsNode( + MaxPool2dWithIndices::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 2, + torch::lazy::MHash(kernel_size, stride, padding, dilation, ceil_mode)), + kernel_size(kernel_size), + stride(stride), + padding(padding), + dilation(dilation), + ceil_mode(ceil_mode) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", kernel_size=" << kernel_size; + ss << ", stride=" << stride; + ss << ", padding=" << padding; + ss << ", dilation=" << dilation; + ss << ", ceil_mode=" << ceil_mode; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::vector& kernel_size, const ::std::vector& stride, const ::std::vector& padding, const ::std::vector& dilation, const bool& ceil_mode) const { + size_t i = 0; + return (operand(i++) == self && + this->kernel_size == kernel_size && + this->stride == stride && + this->padding == padding && + this->dilation == dilation && + this->ceil_mode == ceil_mode); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(6); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("kernel_size", kernel_size); + arguments.emplace_back("stride", stride); + arguments.emplace_back("padding", padding); + arguments.emplace_back("dilation", dilation); + arguments.emplace_back("ceil_mode", ceil_mode); + + torch::lazy::TSOpVector max_pool2d_with_indices_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(max_pool2d_with_indices_out.size(), 2); + + return max_pool2d_with_indices_out; + + } + + + ::std::vector kernel_size; + ::std::vector stride; + ::std::vector padding; + ::std::vector dilation; + bool ceil_mode; + + +}; + +class MaxPool2dWithIndicesBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::max_pool2d_with_indices_backward); + } + + MaxPool2dWithIndicesBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const ::std::vector& kernel_size, const ::std::vector& stride, const ::std::vector& padding, const ::std::vector& dilation, const bool& ceil_mode, const torch::lazy::Value& indices, std::vector&& shapes) + : TsNode( + MaxPool2dWithIndicesBackward::ClassOpKind(), + OpList{grad_output, self, indices}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(kernel_size, stride, padding, dilation, ceil_mode)), + kernel_size(kernel_size), + stride(stride), + padding(padding), + dilation(dilation), + ceil_mode(ceil_mode) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", kernel_size=" << kernel_size; + ss << ", stride=" << stride; + ss << ", padding=" << padding; + ss << ", dilation=" << dilation; + ss << ", ceil_mode=" << ceil_mode; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const ::std::vector& kernel_size, const ::std::vector& stride, const ::std::vector& padding, const ::std::vector& dilation, const bool& ceil_mode, const torch::lazy::Value& indices) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == self && + operand(i++) == indices && + this->kernel_size == kernel_size && + this->stride == stride && + this->padding == padding && + this->dilation == dilation && + this->ceil_mode == ceil_mode); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(8); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("kernel_size", kernel_size); + arguments.emplace_back("stride", stride); + arguments.emplace_back("padding", padding); + arguments.emplace_back("dilation", dilation); + arguments.emplace_back("ceil_mode", ceil_mode); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector max_pool2d_with_indices_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(max_pool2d_with_indices_backward_out.size(), 1); + + return max_pool2d_with_indices_backward_out; + + } + + + ::std::vector kernel_size; + ::std::vector stride; + ::std::vector padding; + ::std::vector dilation; + bool ceil_mode; + + +}; + +class Maximum : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::maximum); + } + + Maximum(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + Maximum::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector maximum_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(maximum_out.size(), 1); + + return maximum_out; + + } + + + + + +}; + +class Mean : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::mean); + } + + Mean(const torch::lazy::Value& self, const ::std::optional& dtype, std::vector&& shapes) + : TsNode( + Mean::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dtype)), + dtype(dtype) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + if (dtype.has_value()) { + ss << ", dtype=" << dtype.value(); + } else { + ss << ", dtype=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::optional& dtype) const { + size_t i = 0; + return (operand(i++) == self && + ((!this->dtype&&!dtype) || (this->dtype&&dtype && *(this->dtype) == *dtype))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("dtype", dtype); + torch::lazy::TSOpVector mean_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(mean_out.size(), 1); + + return mean_out; + + } + + + ::std::optional dtype; + + +}; + +class MeanDim : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::mean); + } + + MeanDim(const torch::lazy::Value& self, const ::std::optional<::std::vector>& dim, const bool& keepdim, const ::std::optional& dtype, std::vector&& shapes) + : TsNode( + MeanDim::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim, keepdim, dtype)), + dim(dim), + keepdim(keepdim), + dtype(dtype) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + if (dim.has_value()) { + ss << ", dim=" << dim.value(); + } else { + ss << ", dim=null"; + } + ss << ", keepdim=" << keepdim; + if (dtype.has_value()) { + ss << ", dtype=" << dtype.value(); + } else { + ss << ", dtype=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::optional<::std::vector>& dim, const bool& keepdim, const ::std::optional& dtype) const { + size_t i = 0; + return (operand(i++) == self && + ((!this->dim&&!dim) || (this->dim&&dim && *(this->dim) == *dim)) && + this->keepdim == keepdim && + ((!this->dtype&&!dtype) || (this->dtype&&dtype && *(this->dtype) == *dtype))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back("keepdim", keepdim); + kwarguments.emplace_back("dtype", dtype); + torch::lazy::TSOpVector mean_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(mean_out.size(), 1); + + return mean_out; + + } + + + ::std::optional<::std::vector> dim; + bool keepdim; + ::std::optional dtype; + + +}; + +class Min : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::min); + } + + Min(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Min::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector min_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(min_out.size(), 1); + + return min_out; + + } + + + + + +}; + +class Minimum : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::minimum); + } + + Minimum(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + Minimum::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector minimum_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(minimum_out.size(), 1); + + return minimum_out; + + } + + + + + +}; + +class Mm : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::mm); + } + + Mm(const torch::lazy::Value& self, const torch::lazy::Value& mat2, std::vector&& shapes) + : TsNode( + Mm::ClassOpKind(), + OpList{self, mat2}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& mat2) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == mat2); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector mm_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(mm_out.size(), 1); + + return mm_out; + + } + + + + + +}; + +class MulTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::mul); + } + + MulTensor(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + MulTensor::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector mul_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(mul_out.size(), 1); + + return mul_out; + + } + + + + + +}; + +class Mv : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::mv); + } + + Mv(const torch::lazy::Value& self, const torch::lazy::Value& vec, std::vector&& shapes) + : TsNode( + Mv::ClassOpKind(), + OpList{self, vec}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& vec) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == vec); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector mv_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(mv_out.size(), 1); + + return mv_out; + + } + + + + + +}; + +class NativeBatchNorm : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::native_batch_norm); + } + + NativeBatchNorm(const torch::lazy::Value& input, const ::std::optional& weight, const ::std::optional& bias, const ::std::optional& running_mean, const ::std::optional& running_var, const bool& training, const double& momentum, const double& eps, std::vector&& shapes) + : TsNode( + NativeBatchNorm::ClassOpKind(), + OpList{input, weight.value_or(kNullValue), bias.value_or(kNullValue), running_mean.value_or(kNullValue), running_var.value_or(kNullValue)}, + std::move(shapes), + /* num_outputs */ 3, + torch::lazy::MHash(training, momentum, eps)), + training(training), + momentum(momentum), + eps(eps) + { + has_weight = !!weight; + has_bias = !!bias; + has_running_mean = !!running_mean; + has_running_var = !!running_var; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", training=" << training; + ss << ", momentum=" << momentum; + ss << ", eps=" << eps; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& input, const ::std::optional& weight, const ::std::optional& bias, const ::std::optional& running_mean, const ::std::optional& running_var, const bool& training, const double& momentum, const double& eps) const { + size_t i = 0; + return (operand(i++) == input && + nullable_operand(i++) == weight.value_or(kNullValue) && + nullable_operand(i++) == bias.value_or(kNullValue) && + nullable_operand(i++) == running_mean.value_or(kNullValue) && + nullable_operand(i++) == running_var.value_or(kNullValue) && + this->training == training && + this->momentum == momentum && + this->eps == eps); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(8); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(has_weight ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back(has_bias ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back(has_running_mean ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back(has_running_var ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back("training", training); + arguments.emplace_back("momentum", momentum); + arguments.emplace_back("eps", eps); + + torch::lazy::TSOpVector native_batch_norm_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(native_batch_norm_out.size(), 3); + + return native_batch_norm_out; + + } + + + bool training; + double momentum; + double eps; + bool has_weight: 1; + bool has_bias: 1; + bool has_running_mean: 1; + bool has_running_var: 1; + +}; + +class NativeBatchNormBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::native_batch_norm_backward); + } + + NativeBatchNormBackward(const torch::lazy::Value& grad_out, const torch::lazy::Value& input, const ::std::optional& weight, const ::std::optional& running_mean, const ::std::optional& running_var, const ::std::optional& save_mean, const ::std::optional& save_invstd, const bool& train, const double& eps, const ::std::vector& output_mask, std::vector&& shapes) + : TsNode( + NativeBatchNormBackward::ClassOpKind(), + OpList{grad_out, input, weight.value_or(kNullValue), running_mean.value_or(kNullValue), running_var.value_or(kNullValue), save_mean.value_or(kNullValue), save_invstd.value_or(kNullValue)}, + std::move(shapes), + /* num_outputs */ 3, + torch::lazy::MHash(train, eps, output_mask)), + train(train), + eps(eps), + output_mask(output_mask) + { + has_weight = !!weight; + has_running_mean = !!running_mean; + has_running_var = !!running_var; + has_save_mean = !!save_mean; + has_save_invstd = !!save_invstd; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", train=" << train; + ss << ", eps=" << eps; + ss << ", output_mask=" << output_mask; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_out, const torch::lazy::Value& input, const ::std::optional& weight, const ::std::optional& running_mean, const ::std::optional& running_var, const ::std::optional& save_mean, const ::std::optional& save_invstd, const bool& train, const double& eps, const ::std::vector& output_mask) const { + size_t i = 0; + return (operand(i++) == grad_out && + operand(i++) == input && + nullable_operand(i++) == weight.value_or(kNullValue) && + nullable_operand(i++) == running_mean.value_or(kNullValue) && + nullable_operand(i++) == running_var.value_or(kNullValue) && + nullable_operand(i++) == save_mean.value_or(kNullValue) && + nullable_operand(i++) == save_invstd.value_or(kNullValue) && + this->train == train && + this->eps == eps && + this->output_mask == output_mask); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(10); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(has_weight ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back(has_running_mean ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back(has_running_var ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back(has_save_mean ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back(has_save_invstd ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back("train", train); + arguments.emplace_back("eps", eps); + arguments.emplace_back("output_mask", output_mask); + + torch::lazy::TSOpVector native_batch_norm_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(native_batch_norm_backward_out.size(), 3); + + return native_batch_norm_backward_out; + + } + + + bool train; + double eps; + ::std::vector output_mask; + bool has_weight: 1; + bool has_running_mean: 1; + bool has_running_var: 1; + bool has_save_mean: 1; + bool has_save_invstd: 1; + +}; + +class NativeDropout : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::native_dropout); + } + + NativeDropout(const torch::lazy::Value& input, const double& p, const ::std::optional& train, std::vector&& shapes) + : TsNode( + NativeDropout::ClassOpKind(), + OpList{input}, + std::move(shapes), + /* num_outputs */ 2, + torch::lazy::MHash(p, train)), + p(p), + train(train) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", p=" << p; + if (train.has_value()) { + ss << ", train=" << train.value(); + } else { + ss << ", train=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& input, const double& p, const ::std::optional& train) const { + size_t i = 0; + return (operand(i++) == input && + this->p == p && + ((!this->train&&!train) || (this->train&&train && *(this->train) == *train))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("p", p); + arguments.emplace_back("train", train); + + torch::lazy::TSOpVector native_dropout_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(native_dropout_out.size(), 2); + + return native_dropout_out; + + } + + + double p; + ::std::optional train; + + +}; + +class NativeDropoutBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::native_dropout_backward); + } + + NativeDropoutBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& mask, const double& scale, std::vector&& shapes) + : TsNode( + NativeDropoutBackward::ClassOpKind(), + OpList{grad_output, mask}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(scale)), + scale(scale) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", scale=" << scale; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& mask, const double& scale) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == mask && + this->scale == scale); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("scale", scale); + + torch::lazy::TSOpVector native_dropout_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(native_dropout_backward_out.size(), 1); + + return native_dropout_backward_out; + + } + + + double scale; + + +}; + +class NativeLayerNorm : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::native_layer_norm); + } + + NativeLayerNorm(const torch::lazy::Value& input, const ::std::vector& normalized_shape, const ::std::optional& weight, const ::std::optional& bias, const double& eps, std::vector&& shapes) + : TsNode( + NativeLayerNorm::ClassOpKind(), + OpList{input, weight.value_or(kNullValue), bias.value_or(kNullValue)}, + std::move(shapes), + /* num_outputs */ 3, + torch::lazy::MHash(normalized_shape, eps)), + normalized_shape(normalized_shape), + eps(eps) + { + has_weight = !!weight; + has_bias = !!bias; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", normalized_shape=" << normalized_shape; + ss << ", eps=" << eps; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& input, const ::std::vector& normalized_shape, const ::std::optional& weight, const ::std::optional& bias, const double& eps) const { + size_t i = 0; + return (operand(i++) == input && + nullable_operand(i++) == weight.value_or(kNullValue) && + nullable_operand(i++) == bias.value_or(kNullValue) && + this->normalized_shape == normalized_shape && + this->eps == eps); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(5); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("normalized_shape", normalized_shape); + arguments.emplace_back(has_weight ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back(has_bias ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back("eps", eps); + + torch::lazy::TSOpVector native_layer_norm_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(native_layer_norm_out.size(), 3); + + return native_layer_norm_out; + + } + + + ::std::vector normalized_shape; + double eps; + bool has_weight: 1; + bool has_bias: 1; + +}; + +class NativeLayerNormBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::native_layer_norm_backward); + } + + NativeLayerNormBackward(const torch::lazy::Value& grad_out, const torch::lazy::Value& input, const ::std::vector& normalized_shape, const torch::lazy::Value& mean, const torch::lazy::Value& rstd, const ::std::optional& weight, const ::std::optional& bias, const ::std::vector& output_mask, std::vector&& shapes) + : TsNode( + NativeLayerNormBackward::ClassOpKind(), + OpList{grad_out, input, mean, rstd, weight.value_or(kNullValue), bias.value_or(kNullValue)}, + std::move(shapes), + /* num_outputs */ 3, + torch::lazy::MHash(normalized_shape, output_mask)), + normalized_shape(normalized_shape), + output_mask(output_mask) + { + has_weight = !!weight; + has_bias = !!bias; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", normalized_shape=" << normalized_shape; + ss << ", output_mask=" << output_mask; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_out, const torch::lazy::Value& input, const ::std::vector& normalized_shape, const torch::lazy::Value& mean, const torch::lazy::Value& rstd, const ::std::optional& weight, const ::std::optional& bias, const ::std::vector& output_mask) const { + size_t i = 0; + return (operand(i++) == grad_out && + operand(i++) == input && + operand(i++) == mean && + operand(i++) == rstd && + nullable_operand(i++) == weight.value_or(kNullValue) && + nullable_operand(i++) == bias.value_or(kNullValue) && + this->normalized_shape == normalized_shape && + this->output_mask == output_mask); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(8); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("normalized_shape", normalized_shape); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(has_weight ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back(has_bias ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back("output_mask", output_mask); + + torch::lazy::TSOpVector native_layer_norm_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(native_layer_norm_backward_out.size(), 3); + + return native_layer_norm_backward_out; + + } + + + ::std::vector normalized_shape; + ::std::vector output_mask; + bool has_weight: 1; + bool has_bias: 1; + +}; + +class NeScalar : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::ne); + } + + NeScalar(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + NeScalar::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector ne_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(ne_out.size(), 1); + + return ne_out; + + } + + + + + +}; + +class NeTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::ne); + } + + NeTensor(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + NeTensor::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector ne_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(ne_out.size(), 1); + + return ne_out; + + } + + + + + +}; + +class Neg : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::neg); + } + + Neg(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Neg::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector neg_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(neg_out.size(), 1); + + return neg_out; + + } + + + + + +}; + +class NllLoss2dBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::nll_loss2d_backward); + } + + NllLoss2dBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& target, const ::std::optional& weight, const int64_t& reduction, const int64_t& ignore_index, const torch::lazy::Value& total_weight, std::vector&& shapes) + : TsNode( + NllLoss2dBackward::ClassOpKind(), + OpList{grad_output, self, target, weight.value_or(kNullValue), total_weight}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(reduction, ignore_index)), + reduction(reduction), + ignore_index(ignore_index) + { + has_weight = !!weight; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", reduction=" << reduction; + ss << ", ignore_index=" << ignore_index; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& target, const ::std::optional& weight, const int64_t& reduction, const int64_t& ignore_index, const torch::lazy::Value& total_weight) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == self && + operand(i++) == target && + nullable_operand(i++) == weight.value_or(kNullValue) && + operand(i++) == total_weight && + this->reduction == reduction && + this->ignore_index == ignore_index); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(7); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(has_weight ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back("reduction", reduction); + arguments.emplace_back("ignore_index", ignore_index); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector nll_loss2d_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(nll_loss2d_backward_out.size(), 1); + + return nll_loss2d_backward_out; + + } + + + int64_t reduction; + int64_t ignore_index; + bool has_weight: 1; + +}; + +class NllLoss2dForward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::nll_loss2d_forward); + } + + NllLoss2dForward(const torch::lazy::Value& self, const torch::lazy::Value& target, const ::std::optional& weight, const int64_t& reduction, const int64_t& ignore_index, std::vector&& shapes) + : TsNode( + NllLoss2dForward::ClassOpKind(), + OpList{self, target, weight.value_or(kNullValue)}, + std::move(shapes), + /* num_outputs */ 2, + torch::lazy::MHash(reduction, ignore_index)), + reduction(reduction), + ignore_index(ignore_index) + { + has_weight = !!weight; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", reduction=" << reduction; + ss << ", ignore_index=" << ignore_index; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& target, const ::std::optional& weight, const int64_t& reduction, const int64_t& ignore_index) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == target && + nullable_operand(i++) == weight.value_or(kNullValue) && + this->reduction == reduction && + this->ignore_index == ignore_index); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(5); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(has_weight ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back("reduction", reduction); + arguments.emplace_back("ignore_index", ignore_index); + + torch::lazy::TSOpVector nll_loss2d_forward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(nll_loss2d_forward_out.size(), 2); + + return nll_loss2d_forward_out; + + } + + + int64_t reduction; + int64_t ignore_index; + bool has_weight: 1; + +}; + +class NllLossBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::nll_loss_backward); + } + + NllLossBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& target, const ::std::optional& weight, const int64_t& reduction, const int64_t& ignore_index, const torch::lazy::Value& total_weight, std::vector&& shapes) + : TsNode( + NllLossBackward::ClassOpKind(), + OpList{grad_output, self, target, weight.value_or(kNullValue), total_weight}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(reduction, ignore_index)), + reduction(reduction), + ignore_index(ignore_index) + { + has_weight = !!weight; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", reduction=" << reduction; + ss << ", ignore_index=" << ignore_index; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& target, const ::std::optional& weight, const int64_t& reduction, const int64_t& ignore_index, const torch::lazy::Value& total_weight) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == self && + operand(i++) == target && + nullable_operand(i++) == weight.value_or(kNullValue) && + operand(i++) == total_weight && + this->reduction == reduction && + this->ignore_index == ignore_index); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(7); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(has_weight ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back("reduction", reduction); + arguments.emplace_back("ignore_index", ignore_index); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector nll_loss_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(nll_loss_backward_out.size(), 1); + + return nll_loss_backward_out; + + } + + + int64_t reduction; + int64_t ignore_index; + bool has_weight: 1; + +}; + +class NllLossForward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::nll_loss_forward); + } + + NllLossForward(const torch::lazy::Value& self, const torch::lazy::Value& target, const ::std::optional& weight, const int64_t& reduction, const int64_t& ignore_index, std::vector&& shapes) + : TsNode( + NllLossForward::ClassOpKind(), + OpList{self, target, weight.value_or(kNullValue)}, + std::move(shapes), + /* num_outputs */ 2, + torch::lazy::MHash(reduction, ignore_index)), + reduction(reduction), + ignore_index(ignore_index) + { + has_weight = !!weight; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", reduction=" << reduction; + ss << ", ignore_index=" << ignore_index; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& target, const ::std::optional& weight, const int64_t& reduction, const int64_t& ignore_index) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == target && + nullable_operand(i++) == weight.value_or(kNullValue) && + this->reduction == reduction && + this->ignore_index == ignore_index); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(5); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(has_weight ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back("reduction", reduction); + arguments.emplace_back("ignore_index", ignore_index); + + torch::lazy::TSOpVector nll_loss_forward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(nll_loss_forward_out.size(), 2); + + return nll_loss_forward_out; + + } + + + int64_t reduction; + int64_t ignore_index; + bool has_weight: 1; + +}; + +class Nonzero : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::nonzero); + } + + Nonzero(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Nonzero::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector nonzero_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(nonzero_out.size(), 1); + + return nonzero_out; + + } + + + + + +}; + +class NormScalaroptDim : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::norm); + } + + NormScalaroptDim(const torch::lazy::Value& self, const ::std::optional& p, const ::std::vector& dim, const bool& keepdim, std::vector&& shapes) + : TsNode( + NormScalaroptDim::ClassOpKind(), + OpList{self, p.value_or(kNullValue)}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim, keepdim)), + dim(dim), + keepdim(keepdim) + { + has_p = !!p; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + ss << ", keepdim=" << keepdim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::optional& p, const ::std::vector& dim, const bool& keepdim) const { + size_t i = 0; + return (operand(i++) == self && + nullable_operand(i++) == p.value_or(kNullValue) && + this->dim == dim && + this->keepdim == keepdim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(has_p ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back("dim", dim); + arguments.emplace_back("keepdim", keepdim); + + torch::lazy::TSOpVector norm_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(norm_out.size(), 1); + + return norm_out; + + } + + + ::std::vector dim; + bool keepdim; + bool has_p: 1; + +}; + +class NormalFunctional : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::normal_functional); + } + + NormalFunctional(const torch::lazy::Value& self, const double& mean, const double& std, const ::std::optional& generator, std::vector&& shapes) + : TsNode( + NormalFunctional::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(mean, std, generator)), + mean(mean), + std(std), + generator(generator) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", mean=" << mean; + ss << ", std=" << std; + if (generator.has_value()) { + ss << ", generator=" << "torch.Generator()"; + } else { + ss << ", generator=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const double& mean, const double& std, const ::std::optional& generator) const { + size_t i = 0; + return (operand(i++) == self && + this->mean == mean && + this->std == std && + ((!this->generator&&!generator) || (this->generator&&generator && *(this->generator) == *generator))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("mean", mean); + arguments.emplace_back("std", std); + kwarguments.emplace_back("generator", generator); + torch::lazy::TSOpVector normal_functional_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(normal_functional_out.size(), 1); + + return normal_functional_out; + + } + + + double mean; + double std; + ::std::optional generator; + + +}; + +class PermuteCopy : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::permute_copy); + } + + PermuteCopy(const torch::lazy::Value& self, const ::std::vector& dims, std::vector&& shapes) + : TsNode( + PermuteCopy::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dims)), + dims(dims) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dims=" << dims; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::vector& dims) const { + size_t i = 0; + return (operand(i++) == self && + this->dims == dims); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dims", dims); + + torch::lazy::TSOpVector permute_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(permute_copy_out.size(), 1); + + return permute_copy_out; + + } + + + ::std::vector dims; + + +}; + +class PowTensorTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::pow); + } + + PowTensorTensor(const torch::lazy::Value& self, const torch::lazy::Value& exponent, std::vector&& shapes) + : TsNode( + PowTensorTensor::ClassOpKind(), + OpList{self, exponent}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& exponent) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == exponent); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector pow_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(pow_out.size(), 1); + + return pow_out; + + } + + + + + +}; + +class PowTensorScalar : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::pow); + } + + PowTensorScalar(const torch::lazy::Value& self, const torch::lazy::Value& exponent, std::vector&& shapes) + : TsNode( + PowTensorScalar::ClassOpKind(), + OpList{self, exponent}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& exponent) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == exponent); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector pow_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(pow_out.size(), 1); + + return pow_out; + + } + + + + + +}; + +class RandomFrom : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::random); + } + + RandomFrom(const torch::lazy::Value& self, const int64_t& from, const ::std::optional& to, const ::std::optional& generator, std::vector&& shapes) + : TsNode( + RandomFrom::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(from, to, generator)), + from(from), + to(to), + generator(generator) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", from=" << from; + if (to.has_value()) { + ss << ", to=" << to.value(); + } else { + ss << ", to=null"; + } + if (generator.has_value()) { + ss << ", generator=" << "torch.Generator()"; + } else { + ss << ", generator=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& from, const ::std::optional& to, const ::std::optional& generator) const { + size_t i = 0; + return (operand(i++) == self && + this->from == from && + ((!this->to&&!to) || (this->to&&to && *(this->to) == *to)) && + ((!this->generator&&!generator) || (this->generator&&generator && *(this->generator) == *generator))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("from", from); + arguments.emplace_back("to", to); + kwarguments.emplace_back("generator", generator); + torch::lazy::TSOpVector random_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(random_out.size(), 1); + + return random_out; + + } + + + int64_t from; + ::std::optional to; + ::std::optional generator; + + +}; + +class RandomTo : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::random); + } + + RandomTo(const torch::lazy::Value& self, const int64_t& to, const ::std::optional& generator, std::vector&& shapes) + : TsNode( + RandomTo::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(to, generator)), + to(to), + generator(generator) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", to=" << to; + if (generator.has_value()) { + ss << ", generator=" << "torch.Generator()"; + } else { + ss << ", generator=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& to, const ::std::optional& generator) const { + size_t i = 0; + return (operand(i++) == self && + this->to == to && + ((!this->generator&&!generator) || (this->generator&&generator && *(this->generator) == *generator))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("to", to); + kwarguments.emplace_back("generator", generator); + torch::lazy::TSOpVector random_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(random_out.size(), 1); + + return random_out; + + } + + + int64_t to; + ::std::optional generator; + + +}; + +class Random : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::random); + } + + Random(const torch::lazy::Value& self, const ::std::optional& generator, std::vector&& shapes) + : TsNode( + Random::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(generator)), + generator(generator) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + if (generator.has_value()) { + ss << ", generator=" << "torch.Generator()"; + } else { + ss << ", generator=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::optional& generator) const { + size_t i = 0; + return (operand(i++) == self && + ((!this->generator&&!generator) || (this->generator&&generator && *(this->generator) == *generator))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("generator", generator); + torch::lazy::TSOpVector random_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(random_out.size(), 1); + + return random_out; + + } + + + ::std::optional generator; + + +}; + +class Reciprocal : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::reciprocal); + } + + Reciprocal(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Reciprocal::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector reciprocal_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(reciprocal_out.size(), 1); + + return reciprocal_out; + + } + + + + + +}; + +class Relu : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::relu); + } + + Relu(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Relu::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector relu_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(relu_out.size(), 1); + + return relu_out; + + } + + + + + +}; + +class RemainderTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::remainder); + } + + RemainderTensor(const torch::lazy::Value& self, const torch::lazy::Value& other, std::vector&& shapes) + : TsNode( + RemainderTensor::ClassOpKind(), + OpList{self, other}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector remainder_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(remainder_out.size(), 1); + + return remainder_out; + + } + + + + + +}; + +class Repeat : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::repeat); + } + + Repeat(const torch::lazy::Value& self, const ::std::vector& repeats, std::vector&& shapes) + : TsNode( + Repeat::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(repeats)), + repeats(repeats) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", repeats=" << repeats; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::vector& repeats) const { + size_t i = 0; + return (operand(i++) == self && + this->repeats == repeats); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("repeats", repeats); + + torch::lazy::TSOpVector repeat_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(repeat_out.size(), 1); + + return repeat_out; + + } + + + ::std::vector repeats; + + +}; + +class Rsqrt : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::rsqrt); + } + + Rsqrt(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Rsqrt::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector rsqrt_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(rsqrt_out.size(), 1); + + return rsqrt_out; + + } + + + + + +}; + +class ScatterAdd : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::scatter_add); + } + + ScatterAdd(const torch::lazy::Value& self, const int64_t& dim, const torch::lazy::Value& index, const torch::lazy::Value& src, std::vector&& shapes) + : TsNode( + ScatterAdd::ClassOpKind(), + OpList{self, index, src}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim)), + dim(dim) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dim, const torch::lazy::Value& index, const torch::lazy::Value& src) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == index && + operand(i++) == src && + this->dim == dim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector scatter_add_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(scatter_add_out.size(), 1); + + return scatter_add_out; + + } + + + int64_t dim; + + +}; + +class SelectCopyInt : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::select_copy); + } + + SelectCopyInt(const torch::lazy::Value& self, const int64_t& dim, const int64_t& index, std::vector&& shapes) + : TsNode( + SelectCopyInt::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim, index)), + dim(dim), + index(index) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + ss << ", index=" << index; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dim, const int64_t& index) const { + size_t i = 0; + return (operand(i++) == self && + this->dim == dim && + this->index == index); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back("index", index); + + torch::lazy::TSOpVector select_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(select_copy_out.size(), 1); + + return select_copy_out; + + } + + + int64_t dim; + int64_t index; + + +}; + +class SelectScatter : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::select_scatter); + } + + SelectScatter(const torch::lazy::Value& self, const torch::lazy::Value& src, const int64_t& dim, const int64_t& index, std::vector&& shapes) + : TsNode( + SelectScatter::ClassOpKind(), + OpList{self, src}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim, index)), + dim(dim), + index(index) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + ss << ", index=" << index; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& src, const int64_t& dim, const int64_t& index) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == src && + this->dim == dim && + this->index == index); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back("index", index); + + torch::lazy::TSOpVector select_scatter_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(select_scatter_out.size(), 1); + + return select_scatter_out; + + } + + + int64_t dim; + int64_t index; + + +}; + +class Selu : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::selu); + } + + Selu(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Selu::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector selu_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(selu_out.size(), 1); + + return selu_out; + + } + + + + + +}; + +class Sgn : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::sgn); + } + + Sgn(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Sgn::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector sgn_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(sgn_out.size(), 1); + + return sgn_out; + + } + + + + + +}; + +class Sigmoid : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::sigmoid); + } + + Sigmoid(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Sigmoid::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector sigmoid_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(sigmoid_out.size(), 1); + + return sigmoid_out; + + } + + + + + +}; + +class SigmoidBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(c10::Symbol::fromQualString("aten::sigmoid_backward")); + } + + SigmoidBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& output, std::vector&& shapes) + : TsNode( + SigmoidBackward::ClassOpKind(), + OpList{grad_output, output}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& output) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == output); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector sigmoid_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(sigmoid_backward_out.size(), 1); + + return sigmoid_backward_out; + + } + + + + + +}; + +class Silu : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::silu); + } + + Silu(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Silu::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector silu_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(silu_out.size(), 1); + + return silu_out; + + } + + + + + +}; + +class SliceCopyTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::slice_copy); + } + + SliceCopyTensor(const torch::lazy::Value& self, const int64_t& dim, const ::std::optional& start, const ::std::optional& end, const torch::lazy::Value& step, std::vector&& shapes) + : TsNode( + SliceCopyTensor::ClassOpKind(), + OpList{self, start.value_or(kNullValue), end.value_or(kNullValue), step}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim)), + dim(dim) + { + has_start = !!start; + has_end = !!end; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dim, const ::std::optional& start, const ::std::optional& end, const torch::lazy::Value& step) const { + size_t i = 0; + return (operand(i++) == self && + nullable_operand(i++) == start.value_or(kNullValue) && + nullable_operand(i++) == end.value_or(kNullValue) && + operand(i++) == step && + this->dim == dim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(5); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back(has_start ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back(has_end ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector slice_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(slice_copy_out.size(), 1); + + return slice_copy_out; + + } + + + int64_t dim; + bool has_start: 1; + bool has_end: 1; + +}; + +class SliceScatter : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::slice_scatter); + } + + SliceScatter(const torch::lazy::Value& self, const torch::lazy::Value& src, const int64_t& dim, const ::std::optional& start, const ::std::optional& end, const torch::lazy::Value& step, std::vector&& shapes) + : TsNode( + SliceScatter::ClassOpKind(), + OpList{self, src, start.value_or(kNullValue), end.value_or(kNullValue), step}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim)), + dim(dim) + { + has_start = !!start; + has_end = !!end; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& src, const int64_t& dim, const ::std::optional& start, const ::std::optional& end, const torch::lazy::Value& step) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == src && + nullable_operand(i++) == start.value_or(kNullValue) && + nullable_operand(i++) == end.value_or(kNullValue) && + operand(i++) == step && + this->dim == dim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(6); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back(has_start ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back(has_end ? loctx->GetOutputOp(operand(i++)) : nullptr); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector slice_scatter_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(slice_scatter_out.size(), 1); + + return slice_scatter_out; + + } + + + int64_t dim; + bool has_start: 1; + bool has_end: 1; + +}; + +class SmoothL1Loss : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::smooth_l1_loss); + } + + SmoothL1Loss(const torch::lazy::Value& self, const torch::lazy::Value& target, const int64_t& reduction, const double& beta, std::vector&& shapes) + : TsNode( + SmoothL1Loss::ClassOpKind(), + OpList{self, target}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(reduction, beta)), + reduction(reduction), + beta(beta) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", reduction=" << reduction; + ss << ", beta=" << beta; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& target, const int64_t& reduction, const double& beta) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == target && + this->reduction == reduction && + this->beta == beta); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("reduction", reduction); + arguments.emplace_back("beta", beta); + + torch::lazy::TSOpVector smooth_l1_loss_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(smooth_l1_loss_out.size(), 1); + + return smooth_l1_loss_out; + + } + + + int64_t reduction; + double beta; + + +}; + +class SmoothL1LossBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::smooth_l1_loss_backward); + } + + SmoothL1LossBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& target, const int64_t& reduction, const double& beta, std::vector&& shapes) + : TsNode( + SmoothL1LossBackward::ClassOpKind(), + OpList{grad_output, self, target}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(reduction, beta)), + reduction(reduction), + beta(beta) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", reduction=" << reduction; + ss << ", beta=" << beta; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& target, const int64_t& reduction, const double& beta) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == self && + operand(i++) == target && + this->reduction == reduction && + this->beta == beta); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(5); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("reduction", reduction); + arguments.emplace_back("beta", beta); + + torch::lazy::TSOpVector smooth_l1_loss_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(smooth_l1_loss_backward_out.size(), 1); + + return smooth_l1_loss_backward_out; + + } + + + int64_t reduction; + double beta; + + +}; + +class Softplus : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::softplus); + } + + Softplus(const torch::lazy::Value& self, const torch::lazy::Value& beta, const torch::lazy::Value& threshold, std::vector&& shapes) + : TsNode( + Softplus::ClassOpKind(), + OpList{self, beta, threshold}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& beta, const torch::lazy::Value& threshold) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == beta && + operand(i++) == threshold); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector softplus_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(softplus_out.size(), 1); + + return softplus_out; + + } + + + + + +}; + +class SoftplusBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::softplus_backward); + } + + SoftplusBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& beta, const torch::lazy::Value& threshold, std::vector&& shapes) + : TsNode( + SoftplusBackward::ClassOpKind(), + OpList{grad_output, self, beta, threshold}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& beta, const torch::lazy::Value& threshold) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == self && + operand(i++) == beta && + operand(i++) == threshold); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector softplus_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(softplus_backward_out.size(), 1); + + return softplus_backward_out; + + } + + + + + +}; + +class Sort : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::sort); + } + + Sort(const torch::lazy::Value& self, const int64_t& dim, const bool& descending, std::vector&& shapes) + : TsNode( + Sort::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 2, + torch::lazy::MHash(dim, descending)), + dim(dim), + descending(descending) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + ss << ", descending=" << descending; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dim, const bool& descending) const { + size_t i = 0; + return (operand(i++) == self && + this->dim == dim && + this->descending == descending); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back("descending", descending); + + torch::lazy::TSOpVector sort_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(sort_out.size(), 2); + + return sort_out; + + } + + + int64_t dim; + bool descending; + + +}; + +class Sqrt : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::sqrt); + } + + Sqrt(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Sqrt::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector sqrt_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(sqrt_out.size(), 1); + + return sqrt_out; + + } + + + + + +}; + +class SqueezeCopy : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::squeeze_copy); + } + + SqueezeCopy(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + SqueezeCopy::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector squeeze_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(squeeze_copy_out.size(), 1); + + return squeeze_copy_out; + + } + + + + + +}; + +class SqueezeCopyDim : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::squeeze_copy); + } + + SqueezeCopyDim(const torch::lazy::Value& self, const int64_t& dim, std::vector&& shapes) + : TsNode( + SqueezeCopyDim::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim)), + dim(dim) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dim) const { + size_t i = 0; + return (operand(i++) == self && + this->dim == dim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + + torch::lazy::TSOpVector squeeze_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(squeeze_copy_out.size(), 1); + + return squeeze_copy_out; + + } + + + int64_t dim; + + +}; + +class SqueezeCopyDims : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::squeeze_copy); + } + + SqueezeCopyDims(const torch::lazy::Value& self, const ::std::vector& dim, std::vector&& shapes) + : TsNode( + SqueezeCopyDims::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim)), + dim(dim) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::vector& dim) const { + size_t i = 0; + return (operand(i++) == self && + this->dim == dim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + + torch::lazy::TSOpVector squeeze_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(squeeze_copy_out.size(), 1); + + return squeeze_copy_out; + + } + + + ::std::vector dim; + + +}; + +class Stack : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::stack); + } + + Stack(const torch::lazy::Value& tensors, const int64_t& dim, std::vector&& shapes) + : TsNode( + Stack::ClassOpKind(), + OpList{tensors}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim)), + dim(dim) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& tensors, const int64_t& dim) const { + size_t i = 0; + return (operand(i++) == tensors && + this->dim == dim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + + torch::lazy::TSOpVector stack_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(stack_out.size(), 1); + + return stack_out; + + } + + + int64_t dim; + + +}; + +class Std : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::std); + } + + Std(const torch::lazy::Value& self, const bool& unbiased, std::vector&& shapes) + : TsNode( + Std::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(unbiased)), + unbiased(unbiased) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", unbiased=" << unbiased; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const bool& unbiased) const { + size_t i = 0; + return (operand(i++) == self && + this->unbiased == unbiased); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("unbiased", unbiased); + + torch::lazy::TSOpVector std_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(std_out.size(), 1); + + return std_out; + + } + + + bool unbiased; + + +}; + +class StdDim : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::std); + } + + StdDim(const torch::lazy::Value& self, const ::std::optional<::std::vector>& dim, const bool& unbiased, const bool& keepdim, std::vector&& shapes) + : TsNode( + StdDim::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim, unbiased, keepdim)), + dim(dim), + unbiased(unbiased), + keepdim(keepdim) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + if (dim.has_value()) { + ss << ", dim=" << dim.value(); + } else { + ss << ", dim=null"; + } + ss << ", unbiased=" << unbiased; + ss << ", keepdim=" << keepdim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::optional<::std::vector>& dim, const bool& unbiased, const bool& keepdim) const { + size_t i = 0; + return (operand(i++) == self && + ((!this->dim&&!dim) || (this->dim&&dim && *(this->dim) == *dim)) && + this->unbiased == unbiased && + this->keepdim == keepdim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back("unbiased", unbiased); + arguments.emplace_back("keepdim", keepdim); + + torch::lazy::TSOpVector std_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(std_out.size(), 1); + + return std_out; + + } + + + ::std::optional<::std::vector> dim; + bool unbiased; + bool keepdim; + + +}; + +class StdCorrection : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::std); + } + + StdCorrection(const torch::lazy::Value& self, const ::std::optional<::std::vector>& dim, const ::std::optional& correction, const bool& keepdim, std::vector&& shapes) + : TsNode( + StdCorrection::ClassOpKind(), + OpList{self, correction.value_or(kNullValue)}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim, keepdim)), + dim(dim), + keepdim(keepdim) + { + has_correction = !!correction; + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + if (dim.has_value()) { + ss << ", dim=" << dim.value(); + } else { + ss << ", dim=null"; + } + ss << ", keepdim=" << keepdim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::optional<::std::vector>& dim, const ::std::optional& correction, const bool& keepdim) const { + size_t i = 0; + return (operand(i++) == self && + nullable_operand(i++) == correction.value_or(kNullValue) && + ((!this->dim&&!dim) || (this->dim&&dim && *(this->dim) == *dim)) && + this->keepdim == keepdim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(2); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + kwarguments.emplace_back("correction", has_correction ? loctx->GetOutputOp(operand(i++)) : nullptr); + kwarguments.emplace_back("keepdim", keepdim); + torch::lazy::TSOpVector std_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(std_out.size(), 1); + + return std_out; + + } + + + ::std::optional<::std::vector> dim; + bool keepdim; + bool has_correction: 1; + +}; + +class SubTensor : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::sub); + } + + SubTensor(const torch::lazy::Value& self, const torch::lazy::Value& other, const torch::lazy::Value& alpha, std::vector&& shapes) + : TsNode( + SubTensor::ClassOpKind(), + OpList{self, other, alpha}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& other, const torch::lazy::Value& alpha) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == other && + operand(i++) == alpha); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("alpha", loctx->GetOutputOp(operand(i++))); + torch::lazy::TSOpVector sub_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(sub_out.size(), 1); + + return sub_out; + + } + + + + + +}; + +class Sum : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::sum); + } + + Sum(const torch::lazy::Value& self, const ::std::optional& dtype, std::vector&& shapes) + : TsNode( + Sum::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dtype)), + dtype(dtype) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + if (dtype.has_value()) { + ss << ", dtype=" << dtype.value(); + } else { + ss << ", dtype=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::optional& dtype) const { + size_t i = 0; + return (operand(i++) == self && + ((!this->dtype&&!dtype) || (this->dtype&&dtype && *(this->dtype) == *dtype))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("dtype", dtype); + torch::lazy::TSOpVector sum_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(sum_out.size(), 1); + + return sum_out; + + } + + + ::std::optional dtype; + + +}; + +class SumDimIntlist : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::sum); + } + + SumDimIntlist(const torch::lazy::Value& self, const ::std::optional<::std::vector>& dim, const bool& keepdim, const ::std::optional& dtype, std::vector&& shapes) + : TsNode( + SumDimIntlist::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim, keepdim, dtype)), + dim(dim), + keepdim(keepdim), + dtype(dtype) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + if (dim.has_value()) { + ss << ", dim=" << dim.value(); + } else { + ss << ", dim=null"; + } + ss << ", keepdim=" << keepdim; + if (dtype.has_value()) { + ss << ", dtype=" << dtype.value(); + } else { + ss << ", dtype=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::optional<::std::vector>& dim, const bool& keepdim, const ::std::optional& dtype) const { + size_t i = 0; + return (operand(i++) == self && + ((!this->dim&&!dim) || (this->dim&&dim && *(this->dim) == *dim)) && + this->keepdim == keepdim && + ((!this->dtype&&!dtype) || (this->dtype&&dtype && *(this->dtype) == *dtype))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + arguments.emplace_back("keepdim", keepdim); + kwarguments.emplace_back("dtype", dtype); + torch::lazy::TSOpVector sum_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(sum_out.size(), 1); + + return sum_out; + + } + + + ::std::optional<::std::vector> dim; + bool keepdim; + ::std::optional dtype; + + +}; + +class TCopy : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::t_copy); + } + + TCopy(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + TCopy::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector t_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(t_copy_out.size(), 1); + + return t_copy_out; + + } + + + + + +}; + +class Tanh : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::tanh); + } + + Tanh(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Tanh::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector tanh_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(tanh_out.size(), 1); + + return tanh_out; + + } + + + + + +}; + +class TanhBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::tanh_backward); + } + + TanhBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& output, std::vector&& shapes) + : TsNode( + TanhBackward::ClassOpKind(), + OpList{grad_output, output}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& output) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == output); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector tanh_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(tanh_backward_out.size(), 1); + + return tanh_backward_out; + + } + + + + + +}; + +class Threshold : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::threshold); + } + + Threshold(const torch::lazy::Value& self, const torch::lazy::Value& threshold, const torch::lazy::Value& value, std::vector&& shapes) + : TsNode( + Threshold::ClassOpKind(), + OpList{self, threshold, value}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const torch::lazy::Value& threshold, const torch::lazy::Value& value) const { + size_t i = 0; + return (operand(i++) == self && + operand(i++) == threshold && + operand(i++) == value); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector threshold_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(threshold_out.size(), 1); + + return threshold_out; + + } + + + + + +}; + +class ThresholdBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::threshold_backward); + } + + ThresholdBackward(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& threshold, std::vector&& shapes) + : TsNode( + ThresholdBackward::ClassOpKind(), + OpList{grad_output, self, threshold}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const torch::lazy::Value& self, const torch::lazy::Value& threshold) const { + size_t i = 0; + return (operand(i++) == grad_output && + operand(i++) == self && + operand(i++) == threshold); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector threshold_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(threshold_backward_out.size(), 1); + + return threshold_backward_out; + + } + + + + + +}; + +class Topk : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::topk); + } + + Topk(const torch::lazy::Value& self, const int64_t& k, const int64_t& dim, const bool& largest, const bool& sorted, std::vector&& shapes) + : TsNode( + Topk::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 2, + torch::lazy::MHash(k, dim, largest, sorted)), + k(k), + dim(dim), + largest(largest), + sorted(sorted) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", k=" << k; + ss << ", dim=" << dim; + ss << ", largest=" << largest; + ss << ", sorted=" << sorted; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& k, const int64_t& dim, const bool& largest, const bool& sorted) const { + size_t i = 0; + return (operand(i++) == self && + this->k == k && + this->dim == dim && + this->largest == largest && + this->sorted == sorted); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(5); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("k", k); + arguments.emplace_back("dim", dim); + arguments.emplace_back("largest", largest); + arguments.emplace_back("sorted", sorted); + + torch::lazy::TSOpVector topk_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(topk_out.size(), 2); + + return topk_out; + + } + + + int64_t k; + int64_t dim; + bool largest; + bool sorted; + + +}; + +class Trace : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::trace); + } + + Trace(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Trace::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector trace_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(trace_out.size(), 1); + + return trace_out; + + } + + + + + +}; + +class TransposeCopyInt : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::transpose_copy); + } + + TransposeCopyInt(const torch::lazy::Value& self, const int64_t& dim0, const int64_t& dim1, std::vector&& shapes) + : TsNode( + TransposeCopyInt::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim0, dim1)), + dim0(dim0), + dim1(dim1) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim0=" << dim0; + ss << ", dim1=" << dim1; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dim0, const int64_t& dim1) const { + size_t i = 0; + return (operand(i++) == self && + this->dim0 == dim0 && + this->dim1 == dim1); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim0", dim0); + arguments.emplace_back("dim1", dim1); + + torch::lazy::TSOpVector transpose_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(transpose_copy_out.size(), 1); + + return transpose_copy_out; + + } + + + int64_t dim0; + int64_t dim1; + + +}; + +class Tril : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::tril); + } + + Tril(const torch::lazy::Value& self, const int64_t& diagonal, std::vector&& shapes) + : TsNode( + Tril::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(diagonal)), + diagonal(diagonal) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", diagonal=" << diagonal; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& diagonal) const { + size_t i = 0; + return (operand(i++) == self && + this->diagonal == diagonal); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("diagonal", diagonal); + + torch::lazy::TSOpVector tril_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(tril_out.size(), 1); + + return tril_out; + + } + + + int64_t diagonal; + + +}; + +class Triu : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::triu); + } + + Triu(const torch::lazy::Value& self, const int64_t& diagonal, std::vector&& shapes) + : TsNode( + Triu::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(diagonal)), + diagonal(diagonal) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", diagonal=" << diagonal; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& diagonal) const { + size_t i = 0; + return (operand(i++) == self && + this->diagonal == diagonal); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("diagonal", diagonal); + + torch::lazy::TSOpVector triu_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(triu_out.size(), 1); + + return triu_out; + + } + + + int64_t diagonal; + + +}; + +class Trunc : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::trunc); + } + + Trunc(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Trunc::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector trunc_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(trunc_out.size(), 1); + + return trunc_out; + + } + + + + + +}; + +class UnfoldCopy : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::unfold_copy); + } + + UnfoldCopy(const torch::lazy::Value& self, const int64_t& dimension, const int64_t& size, const int64_t& step, std::vector&& shapes) + : TsNode( + UnfoldCopy::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dimension, size, step)), + dimension(dimension), + size(size), + step(step) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dimension=" << dimension; + ss << ", size=" << size; + ss << ", step=" << step; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dimension, const int64_t& size, const int64_t& step) const { + size_t i = 0; + return (operand(i++) == self && + this->dimension == dimension && + this->size == size && + this->step == step); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dimension", dimension); + arguments.emplace_back("size", size); + arguments.emplace_back("step", step); + + torch::lazy::TSOpVector unfold_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(unfold_copy_out.size(), 1); + + return unfold_copy_out; + + } + + + int64_t dimension; + int64_t size; + int64_t step; + + +}; + +class Uniform : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::uniform); + } + + Uniform(const torch::lazy::Value& self, const double& from, const double& to, const ::std::optional& generator, std::vector&& shapes) + : TsNode( + Uniform::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(from, to, generator)), + from(from), + to(to), + generator(generator) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", from=" << from; + ss << ", to=" << to; + if (generator.has_value()) { + ss << ", generator=" << "torch.Generator()"; + } else { + ss << ", generator=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const double& from, const double& to, const ::std::optional& generator) const { + size_t i = 0; + return (operand(i++) == self && + this->from == from && + this->to == to && + ((!this->generator&&!generator) || (this->generator&&generator && *(this->generator) == *generator))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(3); + kwarguments.reserve(1); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("from", from); + arguments.emplace_back("to", to); + kwarguments.emplace_back("generator", generator); + torch::lazy::TSOpVector uniform_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(uniform_out.size(), 1); + + return uniform_out; + + } + + + double from; + double to; + ::std::optional generator; + + +}; + +class UnsqueezeCopy : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::unsqueeze_copy); + } + + UnsqueezeCopy(const torch::lazy::Value& self, const int64_t& dim, std::vector&& shapes) + : TsNode( + UnsqueezeCopy::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dim)), + dim(dim) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dim=" << dim; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const int64_t& dim) const { + size_t i = 0; + return (operand(i++) == self && + this->dim == dim); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dim", dim); + + torch::lazy::TSOpVector unsqueeze_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(unsqueeze_copy_out.size(), 1); + + return unsqueeze_copy_out; + + } + + + int64_t dim; + + +}; + +class UpsampleBilinear2d : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::upsample_bilinear2d); + } + + UpsampleBilinear2d(const torch::lazy::Value& self, const ::std::vector& output_size, const bool& align_corners, const ::std::optional& scales_h, const ::std::optional& scales_w, std::vector&& shapes) + : TsNode( + UpsampleBilinear2d::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(output_size, align_corners, scales_h, scales_w)), + output_size(output_size), + align_corners(align_corners), + scales_h(scales_h), + scales_w(scales_w) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", output_size=" << output_size; + ss << ", align_corners=" << align_corners; + if (scales_h.has_value()) { + ss << ", scales_h=" << scales_h.value(); + } else { + ss << ", scales_h=null"; + } + if (scales_w.has_value()) { + ss << ", scales_w=" << scales_w.value(); + } else { + ss << ", scales_w=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::vector& output_size, const bool& align_corners, const ::std::optional& scales_h, const ::std::optional& scales_w) const { + size_t i = 0; + return (operand(i++) == self && + this->output_size == output_size && + this->align_corners == align_corners && + ((!this->scales_h&&!scales_h) || (this->scales_h&&scales_h && *(this->scales_h) == *scales_h)) && + ((!this->scales_w&&!scales_w) || (this->scales_w&&scales_w && *(this->scales_w) == *scales_w))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(5); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("output_size", output_size); + arguments.emplace_back("align_corners", align_corners); + arguments.emplace_back("scales_h", scales_h); + arguments.emplace_back("scales_w", scales_w); + + torch::lazy::TSOpVector upsample_bilinear2d_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(upsample_bilinear2d_out.size(), 1); + + return upsample_bilinear2d_out; + + } + + + ::std::vector output_size; + bool align_corners; + ::std::optional scales_h; + ::std::optional scales_w; + + +}; + +class UpsampleBilinear2dBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::upsample_bilinear2d_backward); + } + + UpsampleBilinear2dBackward(const torch::lazy::Value& grad_output, const ::std::vector& output_size, const ::std::vector& input_size, const bool& align_corners, const ::std::optional& scales_h, const ::std::optional& scales_w, std::vector&& shapes) + : TsNode( + UpsampleBilinear2dBackward::ClassOpKind(), + OpList{grad_output}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(output_size, input_size, align_corners, scales_h, scales_w)), + output_size(output_size), + input_size(input_size), + align_corners(align_corners), + scales_h(scales_h), + scales_w(scales_w) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", output_size=" << output_size; + ss << ", input_size=" << input_size; + ss << ", align_corners=" << align_corners; + if (scales_h.has_value()) { + ss << ", scales_h=" << scales_h.value(); + } else { + ss << ", scales_h=null"; + } + if (scales_w.has_value()) { + ss << ", scales_w=" << scales_w.value(); + } else { + ss << ", scales_w=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const ::std::vector& output_size, const ::std::vector& input_size, const bool& align_corners, const ::std::optional& scales_h, const ::std::optional& scales_w) const { + size_t i = 0; + return (operand(i++) == grad_output && + this->output_size == output_size && + this->input_size == input_size && + this->align_corners == align_corners && + ((!this->scales_h&&!scales_h) || (this->scales_h&&scales_h && *(this->scales_h) == *scales_h)) && + ((!this->scales_w&&!scales_w) || (this->scales_w&&scales_w && *(this->scales_w) == *scales_w))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(6); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("output_size", output_size); + arguments.emplace_back("input_size", input_size); + arguments.emplace_back("align_corners", align_corners); + arguments.emplace_back("scales_h", scales_h); + arguments.emplace_back("scales_w", scales_w); + + torch::lazy::TSOpVector upsample_bilinear2d_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(upsample_bilinear2d_backward_out.size(), 1); + + return upsample_bilinear2d_backward_out; + + } + + + ::std::vector output_size; + ::std::vector input_size; + bool align_corners; + ::std::optional scales_h; + ::std::optional scales_w; + + +}; + +class UpsampleNearest2d : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::upsample_nearest2d); + } + + UpsampleNearest2d(const torch::lazy::Value& self, const ::std::vector& output_size, const ::std::optional& scales_h, const ::std::optional& scales_w, std::vector&& shapes) + : TsNode( + UpsampleNearest2d::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(output_size, scales_h, scales_w)), + output_size(output_size), + scales_h(scales_h), + scales_w(scales_w) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", output_size=" << output_size; + if (scales_h.has_value()) { + ss << ", scales_h=" << scales_h.value(); + } else { + ss << ", scales_h=null"; + } + if (scales_w.has_value()) { + ss << ", scales_w=" << scales_w.value(); + } else { + ss << ", scales_w=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::vector& output_size, const ::std::optional& scales_h, const ::std::optional& scales_w) const { + size_t i = 0; + return (operand(i++) == self && + this->output_size == output_size && + ((!this->scales_h&&!scales_h) || (this->scales_h&&scales_h && *(this->scales_h) == *scales_h)) && + ((!this->scales_w&&!scales_w) || (this->scales_w&&scales_w && *(this->scales_w) == *scales_w))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(4); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("output_size", output_size); + arguments.emplace_back("scales_h", scales_h); + arguments.emplace_back("scales_w", scales_w); + + torch::lazy::TSOpVector upsample_nearest2d_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(upsample_nearest2d_out.size(), 1); + + return upsample_nearest2d_out; + + } + + + ::std::vector output_size; + ::std::optional scales_h; + ::std::optional scales_w; + + +}; + +class UpsampleNearest2dBackward : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::upsample_nearest2d_backward); + } + + UpsampleNearest2dBackward(const torch::lazy::Value& grad_output, const ::std::vector& output_size, const ::std::vector& input_size, const ::std::optional& scales_h, const ::std::optional& scales_w, std::vector&& shapes) + : TsNode( + UpsampleNearest2dBackward::ClassOpKind(), + OpList{grad_output}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(output_size, input_size, scales_h, scales_w)), + output_size(output_size), + input_size(input_size), + scales_h(scales_h), + scales_w(scales_w) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", output_size=" << output_size; + ss << ", input_size=" << input_size; + if (scales_h.has_value()) { + ss << ", scales_h=" << scales_h.value(); + } else { + ss << ", scales_h=null"; + } + if (scales_w.has_value()) { + ss << ", scales_w=" << scales_w.value(); + } else { + ss << ", scales_w=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& grad_output, const ::std::vector& output_size, const ::std::vector& input_size, const ::std::optional& scales_h, const ::std::optional& scales_w) const { + size_t i = 0; + return (operand(i++) == grad_output && + this->output_size == output_size && + this->input_size == input_size && + ((!this->scales_h&&!scales_h) || (this->scales_h&&scales_h && *(this->scales_h) == *scales_h)) && + ((!this->scales_w&&!scales_w) || (this->scales_w&&scales_w && *(this->scales_w) == *scales_w))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(5); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("output_size", output_size); + arguments.emplace_back("input_size", input_size); + arguments.emplace_back("scales_h", scales_h); + arguments.emplace_back("scales_w", scales_w); + + torch::lazy::TSOpVector upsample_nearest2d_backward_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(upsample_nearest2d_backward_out.size(), 1); + + return upsample_nearest2d_backward_out; + + } + + + ::std::vector output_size; + ::std::vector input_size; + ::std::optional scales_h; + ::std::optional scales_w; + + +}; + +class ViewCopy : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::view_copy); + } + + ViewCopy(const torch::lazy::Value& self, const ::std::vector& size, std::vector&& shapes) + : TsNode( + ViewCopy::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(size)), + size(size) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", size=" << size; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const ::std::vector& size) const { + size_t i = 0; + return (operand(i++) == self && + this->size == size); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("size", size); + + torch::lazy::TSOpVector view_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(view_copy_out.size(), 1); + + return view_copy_out; + + } + + + ::std::vector size; + + +}; + +class ViewCopyDtype : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::view_copy); + } + + ViewCopyDtype(const torch::lazy::Value& self, const at::ScalarType& dtype, std::vector&& shapes) + : TsNode( + ViewCopyDtype::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash(dtype)), + dtype(dtype) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dtype=" << dtype; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self, const at::ScalarType& dtype) const { + size_t i = 0; + return (operand(i++) == self && + this->dtype == dtype); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(2); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + arguments.emplace_back("dtype", dtype); + + torch::lazy::TSOpVector view_copy_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(view_copy_out.size(), 1); + + return view_copy_out; + + } + + + at::ScalarType dtype; + + +}; + +class Zero : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::zero); + } + + Zero(const torch::lazy::Value& self, std::vector&& shapes) + : TsNode( + Zero::ClassOpKind(), + OpList{self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash()) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& self) const { + size_t i = 0; + return (operand(i++) == self); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(0); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + + torch::lazy::TSOpVector zero_out = torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(zero_out.size(), 1); + + return zero_out; + + } + + + + + +}; + +} // namespace lazy +} // namespace torch + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/generated/LazyNativeFunctions.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/generated/LazyNativeFunctions.h new file mode 100644 index 0000000000000000000000000000000000000000..82f63eb0acc9037329361360459e52f8eda6d55c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/generated/LazyNativeFunctions.h @@ -0,0 +1,216 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +// an external backend might generate file within its code tree +// and check all the source files within the tree with clang-format. +// so, disable it since the backend might have a different config. +// clang-format off + +// Autogenerated file by gen_backend_stubs.py. Do not edit directly! + +#include + +namespace torch { +namespace lazy { + +struct LazyNativeFunctions { + +static ::std::tuple convolution_backward(const at::Tensor & grad_output, const at::Tensor & input, const at::Tensor & weight, at::OptionalIntArrayRef bias_sizes, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool transposed, at::IntArrayRef output_padding, int64_t groups, ::std::array output_mask); +static ::std::tuple native_batch_norm(const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, const ::std::optional & running_mean, const ::std::optional & running_var, bool training, double momentum, double eps); +static ::std::tuple native_batch_norm_backward(const at::Tensor & grad_out, const at::Tensor & input, const ::std::optional & weight, const ::std::optional & running_mean, const ::std::optional & running_var, const ::std::optional & save_mean, const ::std::optional & save_invstd, bool train, double eps, ::std::array output_mask); +static ::std::tuple native_layer_norm(const at::Tensor & input, at::IntArrayRef normalized_shape, const ::std::optional & weight, const ::std::optional & bias, double eps); +static ::std::tuple native_layer_norm_backward(const at::Tensor & grad_out, const at::Tensor & input, at::IntArrayRef normalized_shape, const at::Tensor & mean, const at::Tensor & rstd, const ::std::optional & weight, const ::std::optional & bias, ::std::array output_mask); +static ::std::tuple svd(const at::Tensor & self, bool some, bool compute_uv); +static ::std::tuple grid_sampler_2d_backward(const at::Tensor & grad_output, const at::Tensor & input, const at::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners, ::std::array output_mask); +static ::std::tuple log_sigmoid_forward(const at::Tensor & self); +static ::std::tuple max(const at::Tensor & self, int64_t dim, bool keepdim); +static ::std::tuple max_pool2d_with_indices(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode); +static ::std::tuple max_pool3d_with_indices(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode); +static ::std::tuple native_dropout(const at::Tensor & input, double p, ::std::optional train); +static ::std::tuple nll_loss2d_forward(const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, int64_t ignore_index); +static ::std::tuple nll_loss_forward(const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, int64_t ignore_index); +static ::std::tuple sort(const at::Tensor & self, int64_t dim, bool descending); +static ::std::tuple topk(const at::Tensor & self, int64_t k, int64_t dim, bool largest, bool sorted); +static at::Tensor & arange_out(const at::Scalar & start, const at::Scalar & end, const at::Scalar & step, at::Tensor & out); +static at::Tensor & fill_(at::Tensor & self, const at::Scalar & value); +static at::Tensor & logsumexp_out(const at::Tensor & self, at::IntArrayRef dim, bool keepdim, at::Tensor & out); +static at::Tensor _adaptive_avg_pool2d(const at::Tensor & self, at::IntArrayRef output_size); +static at::Tensor _adaptive_avg_pool2d_backward(const at::Tensor & grad_output, const at::Tensor & self); +static at::Tensor _copy_from(const at::Tensor & self, const at::Tensor & dst, bool non_blocking); +static at::Tensor _copy_from_and_resize(const at::Tensor & self, const at::Tensor & dst); +static at::Tensor _log_softmax(const at::Tensor & self, int64_t dim, bool half_to_float); +static at::Tensor _log_softmax_backward_data(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype); +static at::Tensor _reshape_alias_copy_symint(const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride); +static at::Tensor _softmax(const at::Tensor & self, int64_t dim, bool half_to_float); +static at::Tensor _softmax_backward_data(const at::Tensor & grad_output, const at::Tensor & output, int64_t dim, at::ScalarType input_dtype); +static at::Tensor _to_copy(const at::Tensor & self, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, bool non_blocking, ::std::optional memory_format); +static at::Tensor _trilinear(const at::Tensor & i1, const at::Tensor & i2, const at::Tensor & i3, at::IntArrayRef expand1, at::IntArrayRef expand2, at::IntArrayRef expand3, at::IntArrayRef sumdim, int64_t unroll_dim); +static at::Tensor _unsafe_view(const at::Tensor & self, at::IntArrayRef size); +static at::Tensor abs(const at::Tensor & self); +static at::Tensor add(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha); +static at::Tensor addcdiv(const at::Tensor & self, const at::Tensor & tensor1, const at::Tensor & tensor2, const at::Scalar & value); +static at::Tensor addcmul(const at::Tensor & self, const at::Tensor & tensor1, const at::Tensor & tensor2, const at::Scalar & value); +static at::Tensor addmm(const at::Tensor & self, const at::Tensor & mat1, const at::Tensor & mat2, const at::Scalar & beta, const at::Scalar & alpha); +static at::Tensor alias_copy(const at::Tensor & self); +static at::Tensor all(const at::Tensor & self); +static at::Tensor any(const at::Tensor & self); +static at::Tensor as_strided_copy_symint(const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional storage_offset); +static at::Tensor as_strided_scatter_symint(const at::Tensor & self, const at::Tensor & src, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional storage_offset); +static at::Tensor avg_pool2d(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, bool ceil_mode, bool count_include_pad, ::std::optional divisor_override); +static at::Tensor avg_pool2d_backward(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, bool ceil_mode, bool count_include_pad, ::std::optional divisor_override); +static at::Tensor baddbmm(const at::Tensor & self, const at::Tensor & batch1, const at::Tensor & batch2, const at::Scalar & beta, const at::Scalar & alpha); +static at::Tensor bernoulli(const at::Tensor & self, ::std::optional generator); +static at::Tensor bernoulli(const at::Tensor & self, double p, ::std::optional generator); +static at::Tensor binary_cross_entropy(const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction); +static at::Tensor binary_cross_entropy_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction); +static at::Tensor bitwise_and(const at::Tensor & self, const at::Tensor & other); +static at::Tensor bitwise_or(const at::Tensor & self, const at::Tensor & other); +static at::Tensor block_diag(at::TensorList tensors); +static at::Tensor bmm(const at::Tensor & self, const at::Tensor & mat2); +static at::Tensor cat(const at::ITensorListRef & tensors, int64_t dim); +static at::Tensor clamp(const at::Tensor & self, const ::std::optional & min, const ::std::optional & max); +static at::Tensor clamp_min(const at::Tensor & self, const at::Scalar & min); +static at::Tensor clone(const at::Tensor & self, ::std::optional memory_format); +static at::Tensor constant_pad_nd(const at::Tensor & self, at::IntArrayRef pad, const at::Scalar & value); +static at::Tensor convolution(const at::Tensor & input, const at::Tensor & weight, const ::std::optional & bias, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool transposed, at::IntArrayRef output_padding, int64_t groups); +static at::Tensor cos(const at::Tensor & self); +static at::Tensor cumsum(const at::Tensor & self, int64_t dim, ::std::optional dtype); +static at::Tensor detach_copy(const at::Tensor & self); +static at::Tensor diag_embed(const at::Tensor & self, int64_t offset, int64_t dim1, int64_t dim2); +static at::Tensor diagonal_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef input_sizes, int64_t offset, int64_t dim1, int64_t dim2); +static at::Tensor diagonal_copy(const at::Tensor & self, int64_t offset, int64_t dim1, int64_t dim2); +static at::Tensor diagonal_scatter(const at::Tensor & self, const at::Tensor & src, int64_t offset, int64_t dim1, int64_t dim2); +static at::Tensor div(const at::Tensor & self, const at::Tensor & other); +static at::Tensor div(const at::Tensor & self, const at::Tensor & other, ::std::optional rounding_mode); +static at::Tensor elu(const at::Tensor & self, const at::Scalar & alpha, const at::Scalar & scale, const at::Scalar & input_scale); +static at::Tensor elu_backward(const at::Tensor & grad_output, const at::Scalar & alpha, const at::Scalar & scale, const at::Scalar & input_scale, bool is_result, const at::Tensor & self_or_result); +static at::Tensor embedding(const at::Tensor & weight, const at::Tensor & indices, int64_t padding_idx, bool scale_grad_by_freq, bool sparse); +static at::Tensor embedding_dense_backward(const at::Tensor & grad_output, const at::Tensor & indices, int64_t num_weights, int64_t padding_idx, bool scale_grad_by_freq); +static at::Tensor empty_strided_symint(c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); +static at::Tensor empty_symint(c10::SymIntArrayRef size, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory, ::std::optional memory_format); +static at::Tensor eq(const at::Tensor & self, const at::Scalar & other); +static at::Tensor eq(const at::Tensor & self, const at::Tensor & other); +static at::Tensor exp(const at::Tensor & self); +static at::Tensor expand_copy_symint(const at::Tensor & self, c10::SymIntArrayRef size, bool implicit); +static at::Tensor flip(const at::Tensor & self, at::IntArrayRef dims); +static at::Tensor floor(const at::Tensor & self); +static at::Tensor frac(const at::Tensor & self); +static at::Tensor gather(const at::Tensor & self, int64_t dim, const at::Tensor & index, bool sparse_grad); +static at::Tensor ge(const at::Tensor & self, const at::Scalar & other); +static at::Tensor ge(const at::Tensor & self, const at::Tensor & other); +static at::Tensor gelu(const at::Tensor & self, c10::string_view approximate); +static at::Tensor gelu_backward(const at::Tensor & grad_output, const at::Tensor & self, c10::string_view approximate); +static at::Tensor glu(const at::Tensor & self, int64_t dim); +static at::Tensor glu_backward(const at::Tensor & grad_output, const at::Tensor & self, int64_t dim); +static at::Tensor glu_jvp(const at::Tensor & glu, const at::Tensor & x, const at::Tensor & dx, int64_t dim); +static at::Tensor grid_sampler_2d(const at::Tensor & input, const at::Tensor & grid, int64_t interpolation_mode, int64_t padding_mode, bool align_corners); +static at::Tensor gt(const at::Tensor & self, const at::Scalar & other); +static at::Tensor gt(const at::Tensor & self, const at::Tensor & other); +static at::Tensor hardsigmoid(const at::Tensor & self); +static at::Tensor index_select(const at::Tensor & self, int64_t dim, const at::Tensor & index); +static at::Tensor le(const at::Tensor & self, const at::Scalar & other); +static at::Tensor le(const at::Tensor & self, const at::Tensor & other); +static at::Tensor leaky_relu(const at::Tensor & self, const at::Scalar & negative_slope); +static at::Tensor leaky_relu_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & negative_slope, bool self_is_result); +static at::Tensor lift(const at::Tensor & self); +static at::Tensor lift_fresh(const at::Tensor & self); +static at::Tensor linalg_pinv(const at::Tensor & self, const ::std::optional & atol, const ::std::optional & rtol, bool hermitian); +static at::Tensor log(const at::Tensor & self); +static at::Tensor log2(const at::Tensor & self); +static at::Tensor log_sigmoid_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & buffer); +static at::Tensor logdet(const at::Tensor & self); +static at::Tensor lt(const at::Tensor & self, const at::Scalar & other); +static at::Tensor lt(const at::Tensor & self, const at::Tensor & other); +static at::Tensor masked_fill(const at::Tensor & self, const at::Tensor & mask, const at::Scalar & value); +static at::Tensor masked_fill(const at::Tensor & self, const at::Tensor & mask, const at::Tensor & value); +static at::Tensor max(const at::Tensor & self); +static at::Tensor max_pool2d_with_indices_backward(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode, const at::Tensor & indices); +static at::Tensor max_pool3d_with_indices_backward(const at::Tensor & grad_output, const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode, const at::Tensor & indices); +static at::Tensor maximum(const at::Tensor & self, const at::Tensor & other); +static at::Tensor mean(const at::Tensor & self, ::std::optional dtype); +static at::Tensor mean(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype); +static at::Tensor min(const at::Tensor & self); +static at::Tensor minimum(const at::Tensor & self, const at::Tensor & other); +static at::Tensor mm(const at::Tensor & self, const at::Tensor & mat2); +static at::Tensor mul(const at::Tensor & self, const at::Tensor & other); +static at::Tensor mv(const at::Tensor & self, const at::Tensor & vec); +static at::Tensor narrow_copy_symint(const at::Tensor & self, int64_t dim, c10::SymInt start, c10::SymInt length); +static at::Tensor native_dropout_backward(const at::Tensor & grad_output, const at::Tensor & mask, double scale); +static at::Tensor ne(const at::Tensor & self, const at::Scalar & other); +static at::Tensor ne(const at::Tensor & self, const at::Tensor & other); +static at::Tensor neg(const at::Tensor & self); +static at::Tensor new_empty_strided_symint(const at::Tensor & self, c10::SymIntArrayRef size, c10::SymIntArrayRef stride, ::std::optional dtype, ::std::optional layout, ::std::optional device, ::std::optional pin_memory); +static at::Tensor nll_loss2d_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, int64_t ignore_index, const at::Tensor & total_weight); +static at::Tensor nll_loss_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, const ::std::optional & weight, int64_t reduction, int64_t ignore_index, const at::Tensor & total_weight); +static at::Tensor nonzero(const at::Tensor & self); +static at::Tensor norm(const at::Tensor & self, const ::std::optional & p, at::IntArrayRef dim, bool keepdim); +static at::Tensor normal_functional(const at::Tensor & self, double mean, double std, ::std::optional generator); +static at::Tensor permute_copy(const at::Tensor & self, at::IntArrayRef dims); +static at::Tensor pixel_shuffle(const at::Tensor & self, int64_t upscale_factor); +static at::Tensor pixel_unshuffle(const at::Tensor & self, int64_t downscale_factor); +static at::Tensor pow(const at::Tensor & self, const at::Scalar & exponent); +static at::Tensor pow(const at::Tensor & self, const at::Tensor & exponent); +static at::Tensor random(const at::Tensor & self, ::std::optional generator); +static at::Tensor random(const at::Tensor & self, int64_t from, ::std::optional to, ::std::optional generator); +static at::Tensor random(const at::Tensor & self, int64_t to, ::std::optional generator); +static at::Tensor reciprocal(const at::Tensor & self); +static at::Tensor relu(const at::Tensor & self); +static at::Tensor remainder(const at::Tensor & self, const at::Tensor & other); +static at::Tensor repeat(const at::Tensor & self, at::IntArrayRef repeats); +static at::Tensor rsqrt(const at::Tensor & self); +static at::Tensor scatter_add(const at::Tensor & self, int64_t dim, const at::Tensor & index, const at::Tensor & src); +static at::Tensor select_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef input_sizes, int64_t dim, c10::SymInt index); +static at::Tensor select_copy(const at::Tensor & self, int64_t dim, int64_t index); +static at::Tensor select_scatter(const at::Tensor & self, const at::Tensor & src, int64_t dim, int64_t index); +static at::Tensor sgn(const at::Tensor & self); +static at::Tensor sigmoid(const at::Tensor & self); +static at::Tensor sigmoid_backward(const at::Tensor & grad_output, const at::Tensor & output); +static at::Tensor silu(const at::Tensor & self); +static at::Tensor slice_backward_symint(const at::Tensor & grad_output, c10::SymIntArrayRef input_sizes, int64_t dim, c10::SymInt start, c10::SymInt end, c10::SymInt step); +static at::Tensor slice_copy_symint(const at::Tensor & self, int64_t dim, ::std::optional start, ::std::optional end, c10::SymInt step); +static at::Tensor slice_scatter_symint(const at::Tensor & self, const at::Tensor & src, int64_t dim, ::std::optional start, ::std::optional end, c10::SymInt step); +static at::Tensor smooth_l1_loss(const at::Tensor & self, const at::Tensor & target, int64_t reduction, double beta); +static at::Tensor smooth_l1_loss_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Tensor & target, int64_t reduction, double beta); +static at::Tensor softplus(const at::Tensor & self, const at::Scalar & beta, const at::Scalar & threshold); +static at::Tensor softplus_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & beta, const at::Scalar & threshold); +static at::Tensor sqrt(const at::Tensor & self); +static at::Tensor squeeze_copy(const at::Tensor & self); +static at::Tensor squeeze_copy(const at::Tensor & self, at::IntArrayRef dim); +static at::Tensor squeeze_copy(const at::Tensor & self, int64_t dim); +static at::Tensor stack(at::TensorList tensors, int64_t dim); +static at::Tensor std(const at::Tensor & self, at::OptionalIntArrayRef dim, bool unbiased, bool keepdim); +static at::Tensor std(const at::Tensor & self, at::OptionalIntArrayRef dim, const ::std::optional & correction, bool keepdim); +static at::Tensor std(const at::Tensor & self, bool unbiased); +static at::Tensor sub(const at::Tensor & self, const at::Tensor & other, const at::Scalar & alpha); +static at::Tensor sum(const at::Tensor & self, ::std::optional dtype); +static at::Tensor sum(const at::Tensor & self, at::OptionalIntArrayRef dim, bool keepdim, ::std::optional dtype); +static at::Tensor t_copy(const at::Tensor & self); +static at::Tensor tanh(const at::Tensor & self); +static at::Tensor tanh_backward(const at::Tensor & grad_output, const at::Tensor & output); +static at::Tensor threshold(const at::Tensor & self, const at::Scalar & threshold, const at::Scalar & value); +static at::Tensor threshold_backward(const at::Tensor & grad_output, const at::Tensor & self, const at::Scalar & threshold); +static at::Tensor trace(const at::Tensor & self); +static at::Tensor transpose_copy(const at::Tensor & self, int64_t dim0, int64_t dim1); +static at::Tensor tril(const at::Tensor & self, int64_t diagonal); +static at::Tensor triu(const at::Tensor & self, int64_t diagonal); +static at::Tensor trunc(const at::Tensor & self); +static at::Tensor unfold_copy(const at::Tensor & self, int64_t dimension, int64_t size, int64_t step); +static at::Tensor uniform(const at::Tensor & self, double from, double to, ::std::optional generator); +static at::Tensor unsqueeze_copy(const at::Tensor & self, int64_t dim); +static at::Tensor upsample_bilinear2d(const at::Tensor & self, at::IntArrayRef output_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w); +static at::Tensor upsample_bilinear2d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, bool align_corners, ::std::optional scales_h, ::std::optional scales_w); +static at::Tensor upsample_nearest2d(const at::Tensor & self, at::IntArrayRef output_size, ::std::optional scales_h, ::std::optional scales_w); +static at::Tensor upsample_nearest2d_backward(const at::Tensor & grad_output, at::IntArrayRef output_size, at::IntArrayRef input_size, ::std::optional scales_h, ::std::optional scales_w); +static at::Tensor view_copy(const at::Tensor & self, at::ScalarType dtype); +static at::Tensor view_copy_symint(const at::Tensor & self, c10::SymIntArrayRef size); +static at::Tensor zero(const at::Tensor & self); +static ::std::tuple native_group_norm(const at::Tensor & input, const ::std::optional & weight, const ::std::optional & bias, int64_t N, int64_t C, int64_t HxW, int64_t group, double eps); +static at::Tensor max_pool3d(const at::Tensor & self, at::IntArrayRef kernel_size, at::IntArrayRef stride, at::IntArrayRef padding, at::IntArrayRef dilation, bool ceil_mode); + +}; +} // namespace lazy +} // namespace torch + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/generated/LazyNonNativeIr.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/generated/LazyNonNativeIr.h new file mode 100644 index 0000000000000000000000000000000000000000..4cea78933ef5fb0fe8fb8dea61f56825200d92bc --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/generated/LazyNonNativeIr.h @@ -0,0 +1,160 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include + +// This file contains autogenerated LazyTensor Non Native IR nodes + +namespace torch { +namespace lazy { + +class Scalar : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::prim::Constant); + } + + Scalar(const at::Scalar& value, const at::ScalarType& type) + : TsNode( + Scalar::ClassOpKind(), + OpList{}, + compute_shape_scalar(value, type), + /* num_outputs */ 1, + torch::lazy::MHash(value, type)), + value(value), + type(type) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", value=" << value; + ss << ", type=" << type; + return ss.str(); + } + + + + bool CanBeReused(const at::Scalar& value, const at::ScalarType& type) const; + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override; + + at::Scalar value; + at::ScalarType type; + + +}; + +class Expand : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(at::aten::expand); + } + + Expand(const torch::lazy::Value& input, const ::std::vector& size, const bool& is_scalar_expand) + : TsNode( + Expand::ClassOpKind(), + OpList{input}, + [&](){ return compute_shape_expand(operand(0), size, is_scalar_expand)[0]; }, + /* num_outputs */ 1, + torch::lazy::MHash(size, is_scalar_expand)), + size(size), + is_scalar_expand(is_scalar_expand) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", size=" << size; + ss << ", is_scalar_expand=" << is_scalar_expand; + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& input, const ::std::vector& size, const bool& is_scalar_expand) const { + size_t i = 0; + return (operand(i++) == input && + this->size == size && + this->is_scalar_expand == is_scalar_expand); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override; + + ::std::vector size; + bool is_scalar_expand; + + +}; + +class Cast : public TsNode { + public: + static torch::lazy::OpKind ClassOpKind() { + return torch::lazy::OpKind(ltc_cast); + } + + Cast(const torch::lazy::Value& input, const at::ScalarType& dtype, const ::std::optional& stype) + : TsNode( + Cast::ClassOpKind(), + OpList{input}, + compute_shape_cast(input, dtype, stype), + /* num_outputs */ 1, + torch::lazy::MHash(dtype, stype)), + dtype(dtype), + stype(stype) + { + + } + + std::string ToString() const override { + std::stringstream ss; + ss << TsNode::ToString(); + ss << ", dtype=" << dtype; + if (stype.has_value()) { + ss << ", stype=" << stype.value(); + } else { + ss << ", stype=null"; + } + return ss.str(); + } + + + + bool CanBeReused(const torch::lazy::Value& input, const at::ScalarType& dtype, const ::std::optional& stype) const { + size_t i = 0; + return (operand(i++) == input && + this->dtype == dtype && + ((!this->stype&&!stype) || (this->stype&&stype && *(this->stype) == *stype))); + } + + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override; + + at::ScalarType dtype; + ::std::optional stype; + + +}; + +} // namespace lazy +} // namespace torch + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/python/init.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/python/init.h new file mode 100644 index 0000000000000000000000000000000000000000..56e5f624fcff53187e799126003008a0d2874429 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/python/init.h @@ -0,0 +1,15 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include + +namespace torch::lazy { + +TORCH_PYTHON_API void initLazyBindings(PyObject* module); + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/python/python_util.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/python/python_util.h new file mode 100644 index 0000000000000000000000000000000000000000..dc5777bb0f45af1f31d11fc08adb7f873f7bfe46 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/python/python_util.h @@ -0,0 +1,18 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include +#include +#include +#include + +namespace torch::lazy { + +std::optional TORCH_PYTHON_API GetPythonFrameTop(); + +std::vector TORCH_PYTHON_API GetPythonFrames(); + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/config.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/config.h new file mode 100644 index 0000000000000000000000000000000000000000..0157b30fc7817ed9a74cca3ceb769db3a31b320b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/config.h @@ -0,0 +1,12 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +// TODO(whc) unclear if this is useful, has only been tested as true +TORCH_DECLARE_bool(torch_lazy_ts_tensor_update_sync); + +TORCH_DECLARE_bool(torch_lazy_ts_cuda); + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/dynamic_ir.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/dynamic_ir.h new file mode 100644 index 0000000000000000000000000000000000000000..4c42b0831100df2c145d7d5ddc0933f7c2085460 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/dynamic_ir.h @@ -0,0 +1,82 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include + +#include +#include +#include +#include +#include +#include +#include + +TORCH_DECLARE_bool(ltc_enable_dynamic_shapes); + +namespace torch::lazy { + +/** + * The goal of "dynamic" Nodes is to patch a hole in our tracing. + * Previously, if a user called `sizes` on a Tensor, it would leak out + * of our tracing system, as `sizes` returns a torch.Size or an int. To + * prevent this from happening, we introduce DimensionNode, a new type + * of Node that abstracts the operation of getting the dimensions of a + * Tensor. + * + * Consider the following example: + * ``` + * numel = x.shape()[0] * x.shape()[1] + * ``` + * + * Here, `x.shape()[i]` will be a SizeNode (subclass of DimensionNode), + * and the multiplication of the two SizeNodes will be represented by + * a SizeMul (also a subclass of DimensionNode). Through this, we can + * prevent `numel` from being represented as a Python int and thus + * burned into the Graph. + */ + +// Represents the result of calling `size` on a Tensor +class TORCH_API SizeNode : public TsNode, public DimensionNode { + public: + SizeNode(Value input, size_t dim); + int64_t getStaticValue() const override; + bool isSymbolic() const override; + std::string ToString() const override; + size_t dim_ = 0; + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + TSLoweringContext* loctx) const override; +}; + +class TORCH_API SizeAdd : public TsNode, public DimensionNode { + public: + SizeAdd(Value a, Value b); + int64_t getStaticValue() const override; + bool isSymbolic() const override; + std::string ToString() const override; +}; + +class TORCH_API SizeMul : public TsNode, public DimensionNode { + public: + SizeMul(Value a, Value b); + int64_t getStaticValue() const override; + bool isSymbolic() const override; + std::string ToString() const override; +}; + +class TORCH_API SizeDiv : public TsNode, public DimensionNode { + public: + SizeDiv(Value a, Value b); + int64_t getStaticValue() const override; + bool isSymbolic() const override; + std::string ToString() const override; +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ir_builder.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ir_builder.h new file mode 100644 index 0000000000000000000000000000000000000000..7d8dd8c804cc68910334fc737043c58e90cc4ea0 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ir_builder.h @@ -0,0 +1,74 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +namespace torch::lazy { + +struct TorchScriptIrBuilder : IrBuilder { + NodePtr MakeDeviceData( + const std::shared_ptr& data) const override { + return DeviceData::Create(data); + } + // TODO: Scalar node is not currently used by ts_backend. Enable reusing + // Scalar node later if needed. + NodePtr MakeScalar(const at::Scalar& value, const at::ScalarType& type) + const override { + return MakeNode(value, type); + } + NodePtr MakeExpand( + const Value& input0, + const std::vector& size, + const bool& is_scalar_expand) const override { + return ReuseOrMakeNode(input0, size, is_scalar_expand); + } + NodePtr MakeCast( + const Value& input0, + const at::ScalarType& dtype, + const std::optional& stype = + std::nullopt) const override { + return ReuseOrMakeNode(input0, dtype, stype); + } + NodePtr MakeTensorList(const OpList& inputs) const override { + return ReuseOrMakeNode(inputs); + } + // Generic needs cleanup + NodePtr MakeGeneric( + const OpKind& op, + const OpList& operands, + const Shape& shape, + const size_t& num_outputs = 1, + const hash_t& hash_seed = + static_cast(0x5a2d296e9)) const override { + return MakeNode(op, operands, shape, num_outputs, hash_seed); + } + + // dynamic ir nodes + // TODO: verify if IR node reusing works for Dynamic shape ops + NodePtr MakeSizeNode(const Value& input, size_t dim) const override { + return MakeNode(input, dim); + } + NodePtr MakeSizeAdd(const Value& a, const Value& b) const override { + return MakeNode(a, b); + } + NodePtr MakeSizeMul(const Value& a, const Value& b) const override { + return MakeNode(a, b); + } + NodePtr MakeSizeDiv(const Value& a, const Value& b) const override { + return MakeNode(a, b); + } +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ops/device_data.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ops/device_data.h new file mode 100644 index 0000000000000000000000000000000000000000..f258cf99c580b129f640070fd14839230a6f881f --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ops/device_data.h @@ -0,0 +1,55 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include + +#include + +namespace torch::lazy { + +class TORCH_API DeviceData : public TsNode { + public: + static OpKind ClassOpKind() { + return ltc_device_data; + } + + explicit DeviceData(std::shared_ptr data); + + // A DeviceData node can be reused if the shape matches, + // but we will substitute the actual data_ pointer under + // the hood. + bool CanBeReused(const std::shared_ptr& data) const { + return data_->shape() == data->shape(); + } + + std::string ToString() const override; + + const std::shared_ptr& data() const { + return data_; + } + + void SetData(std::shared_ptr data) { + data_ = std::move(data); + } + + static const DeviceData* Cast(const Node* node); + + // To reuse IR nodes, use this method to create DeviceData nodes + // instead of calling the constructor directconst ly. + static NodePtr Create(const std::shared_ptr& data); + + TSOpVector Lower( + std::shared_ptr function, + TSLoweringContext* loctx) const override; + + private: + std::shared_ptr data_; +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ops/generic.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ops/generic.h new file mode 100644 index 0000000000000000000000000000000000000000..8334391a593aa80e966f057b94eb47b43834c03e --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ops/generic.h @@ -0,0 +1,57 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include + +namespace torch::lazy { + +// Generic IR Node implementation for nodes which can simply be described by a +// specific OpKind and a lowering function. IR nodes carrying +// metadata should not be using this class TORCH_API (and have the metadata +// captured by the LowerFn), but they should instead create a dedicated IR node. +// Doing the former would limit IR introspection. +class TORCH_API Generic : public TsNode { + public: + Generic( + OpKind op, + OpList operands, + Shape shape, + size_t num_outputs = 1, + hash_t hash_seed = static_cast(0x5a2d296e9)); + + Generic( + OpKind op, + OpList operands, + const std::function& shape_fn, + size_t num_outputs = 1, + hash_t hash_seed = static_cast(0x5a2d296e9)); + + Generic( + OpKind op, + OpList operands, + size_t num_outputs = 1, + hash_t hash_seed = static_cast(0x5a2d296e9)); + + Generic(OpKind op, Shape shape, size_t num_outputs, hash_t hash_seed); + + private: + hash_t hash_seed_; +}; + +inline NodePtr GenericOp( + OpKind op, + OpList operands, + Shape shape, + size_t num_outputs = 1, + hash_t hash_seed = static_cast(0x5a2d296e9)) { + return MakeNode( + op, operands, std::move(shape), num_outputs, hash_seed); +} + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ops/to_copy.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ops/to_copy.h new file mode 100644 index 0000000000000000000000000000000000000000..121cd0ffcbc99e5680c16e312013059d6dd5e74c --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ops/to_copy.h @@ -0,0 +1,130 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::lazy { + +// This IR was copied from code-generated output, but the entire _to_copy +// operator cannot be trivially code generated since it is only desirable to +// capture IR for certain permutations of _to_copy (e.g. dtype), and for the +// others it is difficult to even invoke the aten/eager fallback necessitating +// directly implementing the right to(device) behavior +class ToCopy : public torch::lazy::TsNode { + public: + static OpKind ClassOpKind() { + return OpKind(at::aten::_to_copy); + } + + ToCopy( + const torch::lazy::Value& self, + const std::optional& dtype, + const std::optional& layout, + const std::optional& device, + const std::optional& pin_memory, + const bool& non_blocking, + const std::optional& memory_format, + std::vector&& shapes) + : torch::lazy::TsNode( + ClassOpKind(), + {self}, + std::move(shapes), + /* num_outputs */ 1, + torch::lazy::MHash( + dtype, + layout, + device, + pin_memory, + non_blocking, + memory_format)), + + dtype(dtype), + layout(layout), + device(device), + pin_memory(pin_memory), + non_blocking(non_blocking), + memory_format(memory_format) {} + + bool CanBeReused( + const torch::lazy::Value& self, + const std::optional& dtype, + const std::optional& layout, + const std::optional& device, + const std::optional& pin_memory, + const bool& non_blocking, + const std::optional& memory_format) const { + size_t i = 0; + return ( + operand(i++) == self && this->dtype == dtype && + this->layout == layout && this->device == device && + this->pin_memory == pin_memory && this->non_blocking == non_blocking && + this->memory_format == memory_format); + } + + std::string ToString() const override { + std::stringstream ss; + ss << torch::lazy::TsNode::ToString(); + if (dtype.has_value()) { + ss << ", dtype=" << dtype.value(); + } else { + ss << ", dtype=null"; + } + if (layout.has_value()) { + ss << ", layout=" << layout.value(); + } else { + ss << ", layout=null"; + } + if (device.has_value()) { + ss << ", device=" << device.value(); + } else { + ss << ", device=null"; + } + if (pin_memory.has_value()) { + ss << ", pin_memory=" << pin_memory.value(); + } else { + ss << ", pin_memory=null"; + } + ss << ", non_blocking=" << non_blocking; + if (memory_format.has_value()) { + ss << ", memory_format=" << memory_format.value(); + } else { + ss << ", memory_format=null"; + } + return ss.str(); + } + + torch::lazy::TSOpVector Lower( + std::shared_ptr function, + torch::lazy::TSLoweringContext* loctx) const override { + std::vector arguments; + std::vector kwarguments; + arguments.reserve(1); + kwarguments.reserve(6); + size_t i = 0; + arguments.emplace_back(loctx->GetOutputOp(operand(i++))); + kwarguments.emplace_back("dtype", dtype); + kwarguments.emplace_back("layout", layout); + kwarguments.emplace_back("device", device); + kwarguments.emplace_back("pin_memory", pin_memory); + kwarguments.emplace_back("non_blocking", non_blocking); + kwarguments.emplace_back("memory_format", memory_format); + torch::lazy::TSOpVector _to_copy_out = + torch::lazy::LowerTSBuiltin(function, op().op, arguments, kwarguments); + TORCH_CHECK_EQ(_to_copy_out.size(), 1); + + return _to_copy_out; + } + + std::optional dtype; + std::optional layout; + std::optional device; + std::optional pin_memory; + bool non_blocking; + std::optional memory_format; +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/tensor_aten_ops.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/tensor_aten_ops.h new file mode 100644 index 0000000000000000000000000000000000000000..b6d42fb70a08f6942e1a9c89ea387b0221878cb2 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/tensor_aten_ops.h @@ -0,0 +1,20 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::lazy { + +////////////////////////////////////////////////////////////////////////////// +// ATEN operators follows here, listed in alphabetical order. +////////////////////////////////////////////////////////////////////////////// + +void copy_(torch::lazy::LazyTensorPtr& input, torch::lazy::LazyTensorPtr& src); +// Fills the input with the given value. +void fill_(torch::lazy::LazyTensorPtr& input, const at::Scalar& value); + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_autograd_functions.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_autograd_functions.h new file mode 100644 index 0000000000000000000000000000000000000000..3d7ba8436e4923dac4fe138a1a90cb62b084b546 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_autograd_functions.h @@ -0,0 +1,27 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +namespace torch::lazy { + +struct MaxPool3dAutogradFunctionTS + : public torch::autograd::Function { + static at::Tensor forward( + torch::autograd::AutogradContext* ctx, + const at::Tensor& self, + at::IntArrayRef kernel_size, + at::IntArrayRef stride, + at::IntArrayRef padding, + at::IntArrayRef dilation, + bool ceil_mode); + static torch::autograd::variable_list backward( + torch::autograd::AutogradContext* ctx, + torch::autograd::variable_list grad_output); +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_backend_impl.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_backend_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..d00d8f1812545994e00b2137df9cdc11c1cf20e8 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_backend_impl.h @@ -0,0 +1,57 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include + +namespace torch::lazy { + +class TORCH_API TSData : public torch::lazy::BackendData { + public: + TSData(const at::Scalar& scalar, const torch::lazy::BackendDevice& device) + : torch::lazy::BackendData(device, torch::lazy::Shape(scalar.type(), {})), + scalar(scalar) {} + + TSData( + at::Tensor data, + const torch::lazy::Shape& shape, + const torch::lazy::BackendDevice& device) + : torch::lazy::BackendData(device, shape), data_(std::move(data)) {} + + TSData( + const torch::lazy::Shape& shape, + const torch::lazy::BackendDevice& device) + : torch::lazy::BackendData(device, shape) {} + + Handle GetHandle() override { + return reinterpret_cast(this); + } + + void Assign(const torch::lazy::BackendData& data) override { + data_ = static_cast(data).data_; + } + + bool HasValue() const override { + return data_.defined(); + } + + at::Tensor data() { + return data_; + } + + std::optional scalar; + + private: + at::Tensor data_; +}; + +TORCH_API torch::lazy::BackendImplInterface* GetTSBackendImpl(); + +TORCH_PYTHON_API void InitTorchScriptBackend(); + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_eager_fallback.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_eager_fallback.h new file mode 100644 index 0000000000000000000000000000000000000000..3cbf6f8a37d864acfe1f2be569a1b7cc436801fc --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_eager_fallback.h @@ -0,0 +1,30 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include + +namespace torch::lazy { + +bool force_eager_fallback(c10::Symbol op); +void ltc_eager_fallback( + const c10::OperatorHandle& op, + torch::jit::Stack* stack); + +void ts_eager_fallback( + const c10::OperatorHandle& op, + torch::jit::Stack* stack, + c10::DeviceType device_type); + +// The TorchScript backend does not register itself with pytorch dispatcher +// until it is explicitly initialized. This function should only be called +// by the main Torchscript backend init function. +void register_ts_ltc_eager_fallback(); + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_lowering_context.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_lowering_context.h new file mode 100644 index 0000000000000000000000000000000000000000..3ab1b3191135cd0ef213962515cc264459f9f28b --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_lowering_context.h @@ -0,0 +1,156 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include + +#include +#include +#include +#include +#include +#include + +namespace torch::lazy { + +using TSOpVector = std::vector; + +class TORCH_API TSComputation : public Computation { + public: + TSComputation(const std::shared_ptr& graph) + : graph_(graph), graph_executor_(graph, "") { + for (torch::jit::Value* input : graph_->inputs()) { + parameter_names_.push_back(input->debugName()); + } + } + + int parameters_size() const override { + return static_cast(parameter_names_.size()); + } + + const std::vector& parameter_shapes() const override { + TORCH_CHECK( + false, "TODO(whc) implement TS computation shapes or change interface"); + return parameter_shapes_; + } + + const std::vector& parameter_names() const override { + return parameter_names_; + } + + const Shape& result_shape() const override { + TORCH_CHECK( + false, "TODO(whc) implement TS computation shapes or change interface"); + return result_shape_; + } + + const std::string to_string() const override { + std::ostringstream oss; + oss << *graph_; + return oss.str(); + } + + std::shared_ptr graph() const { + return graph_; + } + + torch::jit::GraphExecutor& graph_executor() { + return graph_executor_; + } + + private: + std::shared_ptr graph_; + torch::jit::GraphExecutor graph_executor_; + std::vector parameter_names_; + std::vector parameter_shapes_; + Shape result_shape_; +}; + +class TORCH_API TSLoweringContext : public LoweringContext { + public: + TSLoweringContext(const std::string& name, const BackendDevice device); + + TSLoweringContext( + const std::string& name, + BackendDevice device, + c10::ArrayRef post_order, + Util::EmissionMap emit_status); + + size_t AddResult(const Output& output) override { + return AddResult(GetOutputOp(output)); + } + + void AddParameter( + const torch::lazy::Output& output, + size_t index, + const Shape& shape, + const std::string& name) override { + TORCH_INTERNAL_ASSERT(false, "not implemented"); + } + + void Lower(const Node* node); + + ComputationPtr Build() override { + for (torch::jit::Value* output : root_tuple_) { + graph_->block()->registerOutput(output); + } + return std::make_shared(graph_); + } + + // Retrieves the lowered operation for an output. If the requested output is + // not available yet, the graph behind the output's Node is lowered, and the + // corresponding TS operation returned. + torch::jit::Value* GetOutputOp(const Output& output) { + auto it = emitted_outputs_.find(output); + if (it == emitted_outputs_.end()) { + auto post_order = Util::ComputePostOrder(output.node, &emit_status_); + for (auto node : post_order) { + Lower(node); + } + // At this point the output better be present, otherwise there is an issue + // with the lowering code. + it = emitted_outputs_.find(output); + TORCH_CHECK( + it != emitted_outputs_.end(), + "No TS operation emitted for output: ", + output.ToString()); + } + return it->second; + } + + // Assigns the given TS operation to the specified output. As outputs are + // lowered in a post-order fashion, later nodes should always find their + // operands among the emitted outputs. + void AssignOutputOp(const Output& output, torch::jit::Value* op); + + // If a parameter associated with data has already been declared, it will be + // returned. Otherwise a new one will be created, associated with the tensor + // held in data. + torch::jit::Value* GetParameter(const BackendDataPtr& data); + + std::shared_ptr graph() const { + return graph_; + } + + private: + struct Parameter { + torch::jit::Value* param{nullptr}; + size_t index = 0; + }; + + size_t AddResult(torch::jit::Value* op) { + root_tuple_.push_back(op); + return root_tuple_.size() - 1; + } + + std::shared_ptr graph_; + std::shared_ptr function_; + std::unordered_map parameters_map_; + std::vector root_tuple_; + OutputMap emitted_outputs_; +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_node.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_node.h new file mode 100644 index 0000000000000000000000000000000000000000..5efd7eed90acd7f260b9f9a64fd86819cc9fb6c3 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_node.h @@ -0,0 +1,109 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include +#include +#include +#include +#include +#include + +namespace torch::lazy { + +using TSOpVector = std::vector; + +class TORCH_API TsNode : public lazy::Node { + public: + TsNode( + OpKind op, + OpList operands, + std::vector&& shapes, + size_t num_outputs, + hash_t hash_seed = kHashSeed); + + TsNode( + OpKind op, + OpList operands, + const std::function& shape_fn, + size_t num_outputs, + hash_t hash_seed = kHashSeed); + + TsNode( + OpKind op, + OpList operands, + size_t num_outputs, + hash_t hash_seed = kHashSeed); + + TsNode( + OpKind op, + Shape shape, + size_t num_outputs, + hash_t hash_seed = kHashSeed); + + ~TsNode() override = default; + + hash_t hash() const override; + + hash_t shapeHash() const override; + + const std::string getPythonStacktrace() const; + + // Lower is a backend-specific method since it returns a backend specific + // type. hence, it is convenient to define it differently per-backend rather + // than at Node API + virtual TSOpVector Lower( + std::shared_ptr function, + TSLoweringContext* loctx) const; + + private: + // The hash of the dag WITH size info. Used for shape caching + hash_t shape_hash_; + // The hash of the dag used to look up the compiled graph by a hash + // in this case, we will use the dag hash WITHOUT size info if dynamic shape + // is enabled and use the dag hash WITH size info otherwise. + hash_t dag_hash_; +}; + +// Note: this OpKind is separate from ltc_ops.h since it would be a circular +// import otherwise, I like leaving TensorList in this file, and I think most of +// ltc_ops special cases will be deleted anyway +const OpKind tensor_list_opkind = OpKind::Get("lazy_tensors::tensor_list"); + +// TensorList represents an at::TensorList which is a vector[Tensor] but is also +// a first-class IValue and can be fed as a single input to a TS program. It is +// much easier to handle TensorLists in Lazy Tensor code if they are represented +// as a single Node so there can be more than one TensorList and more than one +// Tensor side-by-side as operands to an op. +// +// Note: shape is undefined for TensorList. We assert in some places that +// #shapes matches #outputs and this stems from +// the fact that currently all IR nodes represent tensors (there is no +// type system for this IR). Because of this, TensorList is a bit of a +// hack. +// +// TODO(whc) once Shape() API is moved to Node base, also make it virtual, and +// then implement it as NotImplemented for TensorList, also fixing the assertion +// that would fail. +struct TORCH_API TensorList : public TsNode { + static OpKind ClassOpKind() { + return tensor_list_opkind; + } + + TensorList() = delete; + TensorList(OpList values); + + bool CanBeReused(OpList values) const { + return operands() == std::vector(values.begin(), values.end()); + } + + TSOpVector Lower( + std::shared_ptr function, + TSLoweringContext* loctx) const override; +}; + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_node_lowering.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_node_lowering.h new file mode 100644 index 0000000000000000000000000000000000000000..37a11e964bb5e8e4eeaff374b8a5b7792c3502b0 --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/lazy/ts_backend/ts_node_lowering.h @@ -0,0 +1,20 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once + +#include +#include + +namespace torch::lazy { +using TSOpVector = std::vector; + +TORCH_API TSOpVector LowerTSBuiltin( + const std::shared_ptr& function, + c10::Symbol sym, + const std::vector& arguments, + const std::vector& kwarguments = {}); + +} // namespace torch::lazy + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/mtia/profiler/MTIAMemoryProfiler.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/mtia/profiler/MTIAMemoryProfiler.h new file mode 100644 index 0000000000000000000000000000000000000000..b50d495618218335dd644f3da308eb59f28a9eac --- /dev/null +++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/torch/include/torch/csrc/mtia/profiler/MTIAMemoryProfiler.h @@ -0,0 +1,25 @@ +#if !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION) +#pragma once +#include + +namespace torch::mtia { +using namespace torch::profiler::impl::python_tracer; + +void initMemoryProfiler(); + +std::unique_ptr getMemoryTracer(); + +class MTIAMemoryProfiler final : public PythonMemoryTracerBase { + public: + explicit MTIAMemoryProfiler() = default; + ~MTIAMemoryProfiler() override = default; + void start() override; + void stop() override; + void export_memory_history(const std::string& path) override; +}; + +} // namespace torch::mtia + +#else +#error "This file should not be included when either TORCH_STABLE_ONLY or TORCH_TARGET_VERSION is defined." +#endif // !defined(TORCH_STABLE_ONLY) && !defined(TORCH_TARGET_VERSION)