diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/__pycache__/__init__.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..869bbaa0d88d1bf4eee77ce5ce5fe679ac8d2ee9
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/__pycache__/__init__.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/__pycache__/big_modeling.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/__pycache__/big_modeling.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..747ef42a0e46b6d2d3b2dc99ab0f98041f58014f
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/__pycache__/big_modeling.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/__pycache__/checkpointing.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/__pycache__/checkpointing.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..71983c40b90c35ca4ca3cd6ed09d5d6853dc1a8a
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/__pycache__/checkpointing.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/__pycache__/launchers.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/__pycache__/launchers.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..00be399e9b0c01cd32772fae980fca18f47d97ef
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/__pycache__/launchers.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/__pycache__/optimizer.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/__pycache__/optimizer.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..adf7a1abe7b65541e873cd7c0aee0de416f042eb
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/accelerate/__pycache__/optimizer.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/html/ElementSoup.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/html/ElementSoup.py
new file mode 100644
index 0000000000000000000000000000000000000000..c35365d0510605c08588775d4e5598586c8f8311
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/html/ElementSoup.py
@@ -0,0 +1,10 @@
+__doc__ = """Legacy interface to the BeautifulSoup HTML parser.
+"""
+
+__all__ = ["parse", "convert_tree"]
+
+from .soupparser import convert_tree, parse as _parse
+
+def parse(file, beautifulsoup=None, makeelement=None):
+ root = _parse(file, beautifulsoup=beautifulsoup, makeelement=makeelement)
+ return root.getroot()
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/html/_difflib.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/html/_difflib.py
new file mode 100644
index 0000000000000000000000000000000000000000..dfd0ebd888880bbc8ad235436422dcdc12c33043
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/html/_difflib.py
@@ -0,0 +1,2106 @@
+# Copied from CPython 3.14b2+.
+# cython: infer_types=True
+
+"""
+Module difflib -- helpers for computing deltas between objects.
+
+Function get_close_matches(word, possibilities, n=3, cutoff=0.6):
+ Use SequenceMatcher to return list of the best "good enough" matches.
+
+Function context_diff(a, b):
+ For two lists of strings, return a delta in context diff format.
+
+Function ndiff(a, b):
+ Return a delta: the difference between `a` and `b` (lists of strings).
+
+Function restore(delta, which):
+ Return one of the two sequences that generated an ndiff delta.
+
+Function unified_diff(a, b):
+ For two lists of strings, return a delta in unified diff format.
+
+Class SequenceMatcher:
+ A flexible class for comparing pairs of sequences of any type.
+
+Class Differ:
+ For producing human-readable deltas from sequences of lines of text.
+
+Class HtmlDiff:
+ For producing HTML side by side comparison with change highlights.
+"""
+
+try:
+ import cython
+except ImportError:
+ class fake_cython:
+ compiled = False
+ def cfunc(self, func): return func
+ def declare(self, _, value): return value
+ def __getattr__(self, type_name): return "object"
+
+ cython = fake_cython()
+
+
+__all__ = ['get_close_matches', 'ndiff', 'restore', 'SequenceMatcher',
+ 'Differ','IS_CHARACTER_JUNK', 'IS_LINE_JUNK', 'context_diff',
+ 'unified_diff', 'diff_bytes', 'HtmlDiff', 'Match']
+
+from heapq import nlargest as _nlargest
+from collections import namedtuple as _namedtuple
+
+try:
+ from types import GenericAlias
+except ImportError:
+ GenericAlias = None
+
+Match = _namedtuple('Match', 'a b size')
+
+def _calculate_ratio(matches, length):
+ if length:
+ return 2.0 * matches / length
+ return 1.0
+
+class SequenceMatcher:
+
+ """
+ SequenceMatcher is a flexible class for comparing pairs of sequences of
+ any type, so long as the sequence elements are hashable. The basic
+ algorithm predates, and is a little fancier than, an algorithm
+ published in the late 1980's by Ratcliff and Obershelp under the
+ hyperbolic name "gestalt pattern matching". The basic idea is to find
+ the longest contiguous matching subsequence that contains no "junk"
+ elements (R-O doesn't address junk). The same idea is then applied
+ recursively to the pieces of the sequences to the left and to the right
+ of the matching subsequence. This does not yield minimal edit
+ sequences, but does tend to yield matches that "look right" to people.
+
+ SequenceMatcher tries to compute a "human-friendly diff" between two
+ sequences. Unlike e.g. UNIX(tm) diff, the fundamental notion is the
+ longest *contiguous* & junk-free matching subsequence. That's what
+ catches peoples' eyes. The Windows(tm) windiff has another interesting
+ notion, pairing up elements that appear uniquely in each sequence.
+ That, and the method here, appear to yield more intuitive difference
+ reports than does diff. This method appears to be the least vulnerable
+ to syncing up on blocks of "junk lines", though (like blank lines in
+ ordinary text files, or maybe "
" lines in HTML files). That may be
+ because this is the only method of the 3 that has a *concept* of
+ "junk" .
+
+ Example, comparing two strings, and considering blanks to be "junk":
+
+ >>> s = SequenceMatcher(lambda x: x == " ",
+ ... "private Thread currentThread;",
+ ... "private volatile Thread currentThread;")
+ >>>
+
+ .ratio() returns a float in [0, 1], measuring the "similarity" of the
+ sequences. As a rule of thumb, a .ratio() value over 0.6 means the
+ sequences are close matches:
+
+ >>> print(round(s.ratio(), 3))
+ 0.866
+ >>>
+
+ If you're only interested in where the sequences match,
+ .get_matching_blocks() is handy:
+
+ >>> for block in s.get_matching_blocks():
+ ... print("a[%d] and b[%d] match for %d elements" % block)
+ a[0] and b[0] match for 8 elements
+ a[8] and b[17] match for 21 elements
+ a[29] and b[38] match for 0 elements
+
+ Note that the last tuple returned by .get_matching_blocks() is always a
+ dummy, (len(a), len(b), 0), and this is the only case in which the last
+ tuple element (number of elements matched) is 0.
+
+ If you want to know how to change the first sequence into the second,
+ use .get_opcodes():
+
+ >>> for opcode in s.get_opcodes():
+ ... print("%6s a[%d:%d] b[%d:%d]" % opcode)
+ equal a[0:8] b[0:8]
+ insert a[8:8] b[8:17]
+ equal a[8:29] b[17:38]
+
+ See the Differ class for a fancy human-friendly file differencer, which
+ uses SequenceMatcher both to compare sequences of lines, and to compare
+ sequences of characters within similar (near-matching) lines.
+
+ See also function get_close_matches() in this module, which shows how
+ simple code building on SequenceMatcher can be used to do useful work.
+
+ Timing: Basic R-O is cubic time worst case and quadratic time expected
+ case. SequenceMatcher is quadratic time for the worst case and has
+ expected-case behavior dependent in a complicated way on how many
+ elements the sequences have in common; best case time is linear.
+ """
+
+ def __init__(self, isjunk=None, a='', b='', autojunk=True):
+ """Construct a SequenceMatcher.
+
+ Optional arg isjunk is None (the default), or a one-argument
+ function that takes a sequence element and returns true iff the
+ element is junk. None is equivalent to passing "lambda x: 0", i.e.
+ no elements are considered to be junk. For example, pass
+ lambda x: x in " \\t"
+ if you're comparing lines as sequences of characters, and don't
+ want to synch up on blanks or hard tabs.
+
+ Optional arg a is the first of two sequences to be compared. By
+ default, an empty string. The elements of a must be hashable. See
+ also .set_seqs() and .set_seq1().
+
+ Optional arg b is the second of two sequences to be compared. By
+ default, an empty string. The elements of b must be hashable. See
+ also .set_seqs() and .set_seq2().
+
+ Optional arg autojunk should be set to False to disable the
+ "automatic junk heuristic" that treats popular elements as junk
+ (see module documentation for more information).
+ """
+
+ # Members:
+ # a
+ # first sequence
+ # b
+ # second sequence; differences are computed as "what do
+ # we need to do to 'a' to change it into 'b'?"
+ # b2j
+ # for x in b, b2j[x] is a list of the indices (into b)
+ # at which x appears; junk and popular elements do not appear
+ # fullbcount
+ # for x in b, fullbcount[x] == the number of times x
+ # appears in b; only materialized if really needed (used
+ # only for computing quick_ratio())
+ # matching_blocks
+ # a list of (i, j, k) triples, where a[i:i+k] == b[j:j+k];
+ # ascending & non-overlapping in i and in j; terminated by
+ # a dummy (len(a), len(b), 0) sentinel
+ # opcodes
+ # a list of (tag, i1, i2, j1, j2) tuples, where tag is
+ # one of
+ # 'replace' a[i1:i2] should be replaced by b[j1:j2]
+ # 'delete' a[i1:i2] should be deleted
+ # 'insert' b[j1:j2] should be inserted
+ # 'equal' a[i1:i2] == b[j1:j2]
+ # isjunk
+ # a user-supplied function taking a sequence element and
+ # returning true iff the element is "junk" -- this has
+ # subtle but helpful effects on the algorithm, which I'll
+ # get around to writing up someday <0.9 wink>.
+ # DON'T USE! Only __chain_b uses this. Use "in self.bjunk".
+ # bjunk
+ # the items in b for which isjunk is True.
+ # bpopular
+ # nonjunk items in b treated as junk by the heuristic (if used).
+
+ self.isjunk = isjunk
+ self.a = self.b = None
+ self.autojunk = autojunk
+ self.set_seqs(a, b)
+
+ def set_seqs(self, a, b):
+ """Set the two sequences to be compared.
+
+ >>> s = SequenceMatcher()
+ >>> s.set_seqs("abcd", "bcde")
+ >>> s.ratio()
+ 0.75
+ """
+
+ self.set_seq1(a)
+ self.set_seq2(b)
+
+ def set_seq1(self, a):
+ """Set the first sequence to be compared.
+
+ The second sequence to be compared is not changed.
+
+ >>> s = SequenceMatcher(None, "abcd", "bcde")
+ >>> s.ratio()
+ 0.75
+ >>> s.set_seq1("bcde")
+ >>> s.ratio()
+ 1.0
+ >>>
+
+ SequenceMatcher computes and caches detailed information about the
+ second sequence, so if you want to compare one sequence S against
+ many sequences, use .set_seq2(S) once and call .set_seq1(x)
+ repeatedly for each of the other sequences.
+
+ See also set_seqs() and set_seq2().
+ """
+
+ if a is self.a:
+ return
+ self.a = a
+ self.matching_blocks = self.opcodes = None
+
+ def set_seq2(self, b):
+ """Set the second sequence to be compared.
+
+ The first sequence to be compared is not changed.
+
+ >>> s = SequenceMatcher(None, "abcd", "bcde")
+ >>> s.ratio()
+ 0.75
+ >>> s.set_seq2("abcd")
+ >>> s.ratio()
+ 1.0
+ >>>
+
+ SequenceMatcher computes and caches detailed information about the
+ second sequence, so if you want to compare one sequence S against
+ many sequences, use .set_seq2(S) once and call .set_seq1(x)
+ repeatedly for each of the other sequences.
+
+ See also set_seqs() and set_seq1().
+ """
+
+ if b is self.b:
+ return
+ self.b = b
+ self.matching_blocks = self.opcodes = None
+ self.fullbcount = None
+ self.__chain_b()
+
+ # For each element x in b, set b2j[x] to a list of the indices in
+ # b where x appears; the indices are in increasing order; note that
+ # the number of times x appears in b is len(b2j[x]) ...
+ # when self.isjunk is defined, junk elements don't show up in this
+ # map at all, which stops the central find_longest_match method
+ # from starting any matching block at a junk element ...
+ # b2j also does not contain entries for "popular" elements, meaning
+ # elements that account for more than 1 + 1% of the total elements, and
+ # when the sequence is reasonably large (>= 200 elements); this can
+ # be viewed as an adaptive notion of semi-junk, and yields an enormous
+ # speedup when, e.g., comparing program files with hundreds of
+ # instances of "return NULL;" ...
+ # note that this is only called when b changes; so for cross-product
+ # kinds of matches, it's best to call set_seq2 once, then set_seq1
+ # repeatedly
+
+ def __chain_b(self):
+ # Because isjunk is a user-defined (not C) function, and we test
+ # for junk a LOT, it's important to minimize the number of calls.
+ # Before the tricks described here, __chain_b was by far the most
+ # time-consuming routine in the whole module! If anyone sees
+ # Jim Roskind, thank him again for profile.py -- I never would
+ # have guessed that.
+ # The first trick is to build b2j ignoring the possibility
+ # of junk. I.e., we don't call isjunk at all yet. Throwing
+ # out the junk later is much cheaper than building b2j "right"
+ # from the start.
+ b = self.b
+ self.b2j = b2j = {}
+
+ for i, elt in enumerate(b):
+ indices = b2j.setdefault(elt, [])
+ indices.append(i)
+
+ # Purge junk elements
+ self.bjunk = junk = set()
+ isjunk = self.isjunk
+ if isjunk:
+ for elt in b2j.keys():
+ if isjunk(elt):
+ junk.add(elt)
+ for elt in junk: # separate loop avoids separate list of keys
+ del b2j[elt]
+
+ # Purge popular elements that are not junk
+ self.bpopular = popular = set()
+ n = len(b)
+ if self.autojunk and n >= 200:
+ ntest = n // 100 + 1
+ for elt, idxs in b2j.items():
+ if len(idxs) > ntest:
+ popular.add(elt)
+ for elt in popular: # ditto; as fast for 1% deletion
+ del b2j[elt]
+
+ def find_longest_match(self, alo=0, ahi_=None, blo=0, bhi_=None):
+ """Find longest matching block in a[alo:ahi] and b[blo:bhi].
+
+ By default it will find the longest match in the entirety of a and b.
+
+ If isjunk is not defined:
+
+ Return (i,j,k) such that a[i:i+k] is equal to b[j:j+k], where
+ alo <= i <= i+k <= ahi
+ blo <= j <= j+k <= bhi
+ and for all (i',j',k') meeting those conditions,
+ k >= k'
+ i <= i'
+ and if i == i', j <= j'
+
+ In other words, of all maximal matching blocks, return one that
+ starts earliest in a, and of all those maximal matching blocks that
+ start earliest in a, return the one that starts earliest in b.
+
+ >>> s = SequenceMatcher(None, " abcd", "abcd abcd")
+ >>> s.find_longest_match(0, 5, 0, 9)
+ Match(a=0, b=4, size=5)
+
+ If isjunk is defined, first the longest matching block is
+ determined as above, but with the additional restriction that no
+ junk element appears in the block. Then that block is extended as
+ far as possible by matching (only) junk elements on both sides. So
+ the resulting block never matches on junk except as identical junk
+ happens to be adjacent to an "interesting" match.
+
+ Here's the same example as before, but considering blanks to be
+ junk. That prevents " abcd" from matching the " abcd" at the tail
+ end of the second sequence directly. Instead only the "abcd" can
+ match, and matches the leftmost "abcd" in the second sequence:
+
+ >>> s = SequenceMatcher(lambda x: x==" ", " abcd", "abcd abcd")
+ >>> s.find_longest_match(0, 5, 0, 9)
+ Match(a=1, b=0, size=4)
+
+ If no blocks match, return (alo, blo, 0).
+
+ >>> s = SequenceMatcher(None, "ab", "c")
+ >>> s.find_longest_match(0, 2, 0, 1)
+ Match(a=0, b=0, size=0)
+ """
+
+ # CAUTION: stripping common prefix or suffix would be incorrect.
+ # E.g.,
+ # ab
+ # acab
+ # Longest matching block is "ab", but if common prefix is
+ # stripped, it's "a" (tied with "b"). UNIX(tm) diff does so
+ # strip, so ends up claiming that ab is changed to acab by
+ # inserting "ca" in the middle. That's minimal but unintuitive:
+ # "it's obvious" that someone inserted "ac" at the front.
+ # Windiff ends up at the same place as diff, but by pairing up
+ # the unique 'b's and then matching the first two 'a's.
+
+ bjunk: set = self.bjunk
+ a, b, b2j = self.a, self.b, self.b2j
+ ahi = len(a) if ahi_ is None else ahi_
+ bhi = len(b) if bhi_ is None else bhi_
+ besti, bestj, bestsize = alo, blo, 0
+ # find longest junk-free match
+ # during an iteration of the loop, j2len[j] = length of longest
+ # junk-free match ending with a[i-1] and b[j]
+ j2len = {}
+ nothing = []
+ for i in range(alo, ahi):
+ # look at all instances of a[i] in b; note that because
+ # b2j has no junk keys, the loop is skipped if a[i] is junk
+ newj2len = {}
+ for j in b2j.get(a[i], nothing):
+ # a[i] matches b[j]
+ if j < blo:
+ continue
+ if j >= bhi:
+ break
+ k = newj2len[j] = j2len.get(j-1, 0) + 1
+ if k > bestsize:
+ besti, bestj, bestsize = i-k+1, j-k+1, k
+ j2len = newj2len
+
+ # Extend the best by non-junk elements on each end. In particular,
+ # "popular" non-junk elements aren't in b2j, which greatly speeds
+ # the inner loop above, but also means "the best" match so far
+ # doesn't contain any junk *or* popular non-junk elements.
+ while besti > alo and bestj > blo and \
+ b[bestj-1] not in bjunk and \
+ a[besti-1] == b[bestj-1]:
+ besti, bestj, bestsize = besti-1, bestj-1, bestsize+1
+ while besti+bestsize < ahi and bestj+bestsize < bhi and \
+ b[bestj+bestsize] not in bjunk and \
+ a[besti+bestsize] == b[bestj+bestsize]:
+ bestsize += 1
+
+ # Now that we have a wholly interesting match (albeit possibly
+ # empty!), we may as well suck up the matching junk on each
+ # side of it too. Can't think of a good reason not to, and it
+ # saves post-processing the (possibly considerable) expense of
+ # figuring out what to do with it. In the case of an empty
+ # interesting match, this is clearly the right thing to do,
+ # because no other kind of match is possible in the regions.
+ while besti > alo and bestj > blo and \
+ b[bestj-1] in bjunk and \
+ a[besti-1] == b[bestj-1]:
+ besti, bestj, bestsize = besti-1, bestj-1, bestsize+1
+ while besti+bestsize < ahi and bestj+bestsize < bhi and \
+ b[bestj+bestsize] in bjunk and \
+ a[besti+bestsize] == b[bestj+bestsize]:
+ bestsize = bestsize + 1
+
+ return Match(besti, bestj, bestsize)
+
+ def get_matching_blocks(self):
+ """Return list of triples describing matching subsequences.
+
+ Each triple is of the form (i, j, n), and means that
+ a[i:i+n] == b[j:j+n]. The triples are monotonically increasing in
+ i and in j. New in Python 2.5, it's also guaranteed that if
+ (i, j, n) and (i', j', n') are adjacent triples in the list, and
+ the second is not the last triple in the list, then i+n != i' or
+ j+n != j'. IOW, adjacent triples never describe adjacent equal
+ blocks.
+
+ The last triple is a dummy, (len(a), len(b), 0), and is the only
+ triple with n==0.
+
+ >>> s = SequenceMatcher(None, "abxcd", "abcd")
+ >>> list(s.get_matching_blocks())
+ [Match(a=0, b=0, size=2), Match(a=3, b=2, size=2), Match(a=5, b=4, size=0)]
+ """
+
+ if self.matching_blocks is not None:
+ return self.matching_blocks
+ la, lb = len(self.a), len(self.b)
+
+ # This is most naturally expressed as a recursive algorithm, but
+ # at least one user bumped into extreme use cases that exceeded
+ # the recursion limit on their box. So, now we maintain a list
+ # ('queue`) of blocks we still need to look at, and append partial
+ # results to `matching_blocks` in a loop; the matches are sorted
+ # at the end.
+ queue = [(0, la, 0, lb)]
+ matching_blocks = []
+ while queue:
+ alo, ahi, blo, bhi = queue.pop()
+ i, j, k = x = self.find_longest_match(alo, ahi, blo, bhi)
+ # a[alo:i] vs b[blo:j] unknown
+ # a[i:i+k] same as b[j:j+k]
+ # a[i+k:ahi] vs b[j+k:bhi] unknown
+ if k: # if k is 0, there was no matching block
+ matching_blocks.append(x)
+ if alo < i and blo < j:
+ queue.append((alo, i, blo, j))
+ if i+k < ahi and j+k < bhi:
+ queue.append((i+k, ahi, j+k, bhi))
+ matching_blocks.sort()
+
+ # It's possible that we have adjacent equal blocks in the
+ # matching_blocks list now. Starting with 2.5, this code was added
+ # to collapse them.
+ i1 = j1 = k1 = 0
+ non_adjacent = []
+ for i2, j2, k2 in matching_blocks:
+ # Is this block adjacent to i1, j1, k1?
+ if i1 + k1 == i2 and j1 + k1 == j2:
+ # Yes, so collapse them -- this just increases the length of
+ # the first block by the length of the second, and the first
+ # block so lengthened remains the block to compare against.
+ k1 += k2
+ else:
+ # Not adjacent. Remember the first block (k1==0 means it's
+ # the dummy we started with), and make the second block the
+ # new block to compare against.
+ if k1:
+ non_adjacent.append((i1, j1, k1))
+ i1, j1, k1 = i2, j2, k2
+ if k1:
+ non_adjacent.append((i1, j1, k1))
+
+ non_adjacent.append( (la, lb, 0) )
+ self.matching_blocks = list(map(Match._make, non_adjacent))
+ return self.matching_blocks
+
+ def get_opcodes(self):
+ """Return list of 5-tuples describing how to turn a into b.
+
+ Each tuple is of the form (tag, i1, i2, j1, j2). The first tuple
+ has i1 == j1 == 0, and remaining tuples have i1 == the i2 from the
+ tuple preceding it, and likewise for j1 == the previous j2.
+
+ The tags are strings, with these meanings:
+
+ 'replace': a[i1:i2] should be replaced by b[j1:j2]
+ 'delete': a[i1:i2] should be deleted.
+ Note that j1==j2 in this case.
+ 'insert': b[j1:j2] should be inserted at a[i1:i1].
+ Note that i1==i2 in this case.
+ 'equal': a[i1:i2] == b[j1:j2]
+
+ >>> a = "qabxcd"
+ >>> b = "abycdf"
+ >>> s = SequenceMatcher(None, a, b)
+ >>> for tag, i1, i2, j1, j2 in s.get_opcodes():
+ ... print(("%7s a[%d:%d] (%s) b[%d:%d] (%s)" %
+ ... (tag, i1, i2, a[i1:i2], j1, j2, b[j1:j2])))
+ delete a[0:1] (q) b[0:0] ()
+ equal a[1:3] (ab) b[0:2] (ab)
+ replace a[3:4] (x) b[2:3] (y)
+ equal a[4:6] (cd) b[3:5] (cd)
+ insert a[6:6] () b[5:6] (f)
+ """
+
+ if self.opcodes is not None:
+ return self.opcodes
+ i = j = 0
+ self.opcodes = answer = []
+ for ai, bj, size in self.get_matching_blocks():
+ # invariant: we've pumped out correct diffs to change
+ # a[:i] into b[:j], and the next matching block is
+ # a[ai:ai+size] == b[bj:bj+size]. So we need to pump
+ # out a diff to change a[i:ai] into b[j:bj], pump out
+ # the matching block, and move (i,j) beyond the match
+ tag = ''
+ if i < ai and j < bj:
+ tag = 'replace'
+ elif i < ai:
+ tag = 'delete'
+ elif j < bj:
+ tag = 'insert'
+ if tag:
+ answer.append( (tag, i, ai, j, bj) )
+ i, j = ai+size, bj+size
+ # the list of matching blocks is terminated by a
+ # sentinel with size 0
+ if size:
+ answer.append( ('equal', ai, i, bj, j) )
+ return answer
+
+ def get_grouped_opcodes(self, n=3):
+ """ Isolate change clusters by eliminating ranges with no changes.
+
+ Return a generator of groups with up to n lines of context.
+ Each group is in the same format as returned by get_opcodes().
+
+ >>> from pprint import pprint
+ >>> a = list(map(str, range(1,40)))
+ >>> b = a[:]
+ >>> b[8:8] = ['i'] # Make an insertion
+ >>> b[20] += 'x' # Make a replacement
+ >>> b[23:28] = [] # Make a deletion
+ >>> b[30] += 'y' # Make another replacement
+ >>> pprint(list(SequenceMatcher(None,a,b).get_grouped_opcodes()))
+ [[('equal', 5, 8, 5, 8), ('insert', 8, 8, 8, 9), ('equal', 8, 11, 9, 12)],
+ [('equal', 16, 19, 17, 20),
+ ('replace', 19, 20, 20, 21),
+ ('equal', 20, 22, 21, 23),
+ ('delete', 22, 27, 23, 23),
+ ('equal', 27, 30, 23, 26)],
+ [('equal', 31, 34, 27, 30),
+ ('replace', 34, 35, 30, 31),
+ ('equal', 35, 38, 31, 34)]]
+ """
+
+ codes = self.get_opcodes()
+ if not codes:
+ codes = [("equal", 0, 1, 0, 1)]
+ # Fixup leading and trailing groups if they show no changes.
+ if codes[0][0] == 'equal':
+ tag, i1, i2, j1, j2 = codes[0]
+ codes[0] = tag, max(i1, i2-n), i2, max(j1, j2-n), j2
+ if codes[-1][0] == 'equal':
+ tag, i1, i2, j1, j2 = codes[-1]
+ codes[-1] = tag, i1, min(i2, i1+n), j1, min(j2, j1+n)
+
+ nn = n + n
+ group = []
+ for tag, i1, i2, j1, j2 in codes:
+ # End the current group and start a new one whenever
+ # there is a large range with no changes.
+ if tag == 'equal' and i2-i1 > nn:
+ group.append((tag, i1, min(i2, i1+n), j1, min(j2, j1+n)))
+ yield group
+ group = []
+ i1, j1 = max(i1, i2-n), max(j1, j2-n)
+ group.append((tag, i1, i2, j1 ,j2))
+ if group and not (len(group)==1 and group[0][0] == 'equal'):
+ yield group
+
+ def ratio(self):
+ """Return a measure of the sequences' similarity (float in [0,1]).
+
+ Where T is the total number of elements in both sequences, and
+ M is the number of matches, this is 2.0*M / T.
+ Note that this is 1 if the sequences are identical, and 0 if
+ they have nothing in common.
+
+ .ratio() is expensive to compute if you haven't already computed
+ .get_matching_blocks() or .get_opcodes(), in which case you may
+ want to try .quick_ratio() or .real_quick_ratio() first to get an
+ upper bound.
+
+ >>> s = SequenceMatcher(None, "abcd", "bcde")
+ >>> s.ratio()
+ 0.75
+ >>> s.quick_ratio()
+ 0.75
+ >>> s.real_quick_ratio()
+ 1.0
+ """
+
+ matches: cython.Py_ssize_t
+ matches = sum(triple[-1] for triple in self.get_matching_blocks())
+ return _calculate_ratio(matches, len(self.a) + len(self.b))
+
+ def quick_ratio(self):
+ """Return an upper bound on ratio() relatively quickly.
+
+ This isn't defined beyond that it is an upper bound on .ratio(), and
+ is faster to compute.
+ """
+
+ # viewing a and b as multisets, set matches to the cardinality
+ # of their intersection; this counts the number of matches
+ # without regard to order, so is clearly an upper bound
+ if self.fullbcount is None:
+ self.fullbcount = fullbcount = {}
+ for elt in self.b:
+ fullbcount[elt] = fullbcount.get(elt, 0) + 1
+ fullbcount = self.fullbcount
+ # avail[x] is the number of times x appears in 'b' less the
+ # number of times we've seen it in 'a' so far ... kinda
+ avail = {}
+ matches: cython.Py_ssize_t
+ matches = 0
+ for elt in self.a:
+ if elt in avail:
+ numb = avail[elt]
+ else:
+ numb = fullbcount.get(elt, 0)
+ avail[elt] = numb - 1
+ if numb > 0:
+ matches = matches + 1
+ return _calculate_ratio(matches, len(self.a) + len(self.b))
+
+ def real_quick_ratio(self):
+ """Return an upper bound on ratio() very quickly.
+
+ This isn't defined beyond that it is an upper bound on .ratio(), and
+ is faster to compute than either .ratio() or .quick_ratio().
+ """
+
+ la, lb = len(self.a), len(self.b)
+ # can't have more matches than the number of elements in the
+ # shorter sequence
+ return _calculate_ratio(min(la, lb), la + lb)
+
+ if GenericAlias is not None:
+ __class_getitem__ = classmethod(GenericAlias)
+
+
+def get_close_matches(word, possibilities, n=3, cutoff=0.6):
+ """Use SequenceMatcher to return list of the best "good enough" matches.
+
+ word is a sequence for which close matches are desired (typically a
+ string).
+
+ possibilities is a list of sequences against which to match word
+ (typically a list of strings).
+
+ Optional arg n (default 3) is the maximum number of close matches to
+ return. n must be > 0.
+
+ Optional arg cutoff (default 0.6) is a float in [0, 1]. Possibilities
+ that don't score at least that similar to word are ignored.
+
+ The best (no more than n) matches among the possibilities are returned
+ in a list, sorted by similarity score, most similar first.
+
+ >>> get_close_matches("appel", ["ape", "apple", "peach", "puppy"])
+ ['apple', 'ape']
+ >>> import keyword as _keyword
+ >>> get_close_matches("wheel", _keyword.kwlist)
+ ['while']
+ >>> get_close_matches("Apple", _keyword.kwlist)
+ []
+ >>> get_close_matches("accept", _keyword.kwlist)
+ ['except']
+ """
+
+ if not n > 0:
+ raise ValueError("n must be > 0: %r" % (n,))
+ if not 0.0 <= cutoff <= 1.0:
+ raise ValueError("cutoff must be in [0.0, 1.0]: %r" % (cutoff,))
+ result = []
+ s = SequenceMatcher()
+ s.set_seq2(word)
+ for x in possibilities:
+ s.set_seq1(x)
+ if s.real_quick_ratio() >= cutoff and \
+ s.quick_ratio() >= cutoff and \
+ s.ratio() >= cutoff:
+ result.append((s.ratio(), x))
+
+ # Move the best scorers to head of list
+ result = _nlargest(n, result)
+ # Strip scores for the best n matches
+ return [x for score, x in result]
+
+
+def _keep_original_ws(s, tag_s):
+ """Replace whitespace with the original whitespace characters in `s`"""
+ return ''.join(
+ c if tag_c == " " and c.isspace() else tag_c
+ for c, tag_c in zip(s, tag_s)
+ )
+
+
+
+class Differ:
+ r"""
+ Differ is a class for comparing sequences of lines of text, and
+ producing human-readable differences or deltas. Differ uses
+ SequenceMatcher both to compare sequences of lines, and to compare
+ sequences of characters within similar (near-matching) lines.
+
+ Each line of a Differ delta begins with a two-letter code:
+
+ '- ' line unique to sequence 1
+ '+ ' line unique to sequence 2
+ ' ' line common to both sequences
+ '? ' line not present in either input sequence
+
+ Lines beginning with '? ' attempt to guide the eye to intraline
+ differences, and were not present in either input sequence. These lines
+ can be confusing if the sequences contain tab characters.
+
+ Note that Differ makes no claim to produce a *minimal* diff. To the
+ contrary, minimal diffs are often counter-intuitive, because they synch
+ up anywhere possible, sometimes accidental matches 100 pages apart.
+ Restricting synch points to contiguous matches preserves some notion of
+ locality, at the occasional cost of producing a longer diff.
+
+ Example: Comparing two texts.
+
+ First we set up the texts, sequences of individual single-line strings
+ ending with newlines (such sequences can also be obtained from the
+ `readlines()` method of file-like objects):
+
+ >>> text1 = ''' 1. Beautiful is better than ugly.
+ ... 2. Explicit is better than implicit.
+ ... 3. Simple is better than complex.
+ ... 4. Complex is better than complicated.
+ ... '''.splitlines(keepends=True)
+ >>> len(text1)
+ 4
+ >>> text1[0][-1]
+ '\n'
+ >>> text2 = ''' 1. Beautiful is better than ugly.
+ ... 3. Simple is better than complex.
+ ... 4. Complicated is better than complex.
+ ... 5. Flat is better than nested.
+ ... '''.splitlines(keepends=True)
+
+ Next we instantiate a Differ object:
+
+ >>> d = Differ()
+
+ Note that when instantiating a Differ object we may pass functions to
+ filter out line and character 'junk'. See Differ.__init__ for details.
+
+ Finally, we compare the two:
+
+ >>> result = list(d.compare(text1, text2))
+
+ 'result' is a list of strings, so let's pretty-print it:
+
+ >>> from pprint import pprint as _pprint
+ >>> _pprint(result)
+ [' 1. Beautiful is better than ugly.\n',
+ '- 2. Explicit is better than implicit.\n',
+ '- 3. Simple is better than complex.\n',
+ '+ 3. Simple is better than complex.\n',
+ '? ++\n',
+ '- 4. Complex is better than complicated.\n',
+ '? ^ ---- ^\n',
+ '+ 4. Complicated is better than complex.\n',
+ '? ++++ ^ ^\n',
+ '+ 5. Flat is better than nested.\n']
+
+ As a single multi-line string it looks like this:
+
+ >>> print(''.join(result), end="")
+ 1. Beautiful is better than ugly.
+ - 2. Explicit is better than implicit.
+ - 3. Simple is better than complex.
+ + 3. Simple is better than complex.
+ ? ++
+ - 4. Complex is better than complicated.
+ ? ^ ---- ^
+ + 4. Complicated is better than complex.
+ ? ++++ ^ ^
+ + 5. Flat is better than nested.
+ """
+
+ def __init__(self, linejunk=None, charjunk=None):
+ """
+ Construct a text differencer, with optional filters.
+
+ The two optional keyword parameters are for filter functions:
+
+ - `linejunk`: A function that should accept a single string argument,
+ and return true iff the string is junk. The module-level function
+ `IS_LINE_JUNK` may be used to filter out lines without visible
+ characters, except for at most one splat ('#'). It is recommended
+ to leave linejunk None; the underlying SequenceMatcher class has
+ an adaptive notion of "noise" lines that's better than any static
+ definition the author has ever been able to craft.
+
+ - `charjunk`: A function that should accept a string of length 1. The
+ module-level function `IS_CHARACTER_JUNK` may be used to filter out
+ whitespace characters (a blank or tab; **note**: bad idea to include
+ newline in this!). Use of IS_CHARACTER_JUNK is recommended.
+ """
+
+ self.linejunk = linejunk
+ self.charjunk = charjunk
+
+ def compare(self, a, b):
+ r"""
+ Compare two sequences of lines; generate the resulting delta.
+
+ Each sequence must contain individual single-line strings ending with
+ newlines. Such sequences can be obtained from the `readlines()` method
+ of file-like objects. The delta generated also consists of newline-
+ terminated strings, ready to be printed as-is via the writelines()
+ method of a file-like object.
+
+ Example:
+
+ >>> print(''.join(Differ().compare('one\ntwo\nthree\n'.splitlines(True),
+ ... 'ore\ntree\nemu\n'.splitlines(True))),
+ ... end="")
+ - one
+ ? ^
+ + ore
+ ? ^
+ - two
+ - three
+ ? -
+ + tree
+ + emu
+ """
+
+ cruncher = SequenceMatcher(self.linejunk, a, b)
+ for tag, alo, ahi, blo, bhi in cruncher.get_opcodes():
+ if tag == 'replace':
+ g = self._fancy_replace(a, alo, ahi, b, blo, bhi)
+ elif tag == 'delete':
+ g = self._dump('-', a, alo, ahi)
+ elif tag == 'insert':
+ g = self._dump('+', b, blo, bhi)
+ elif tag == 'equal':
+ g = self._dump(' ', a, alo, ahi)
+ else:
+ raise ValueError('unknown tag %r' % (tag,))
+
+ yield from g
+
+ def _dump(self, tag, x, lo, hi):
+ """Generate comparison results for a same-tagged range."""
+ for i in range(lo, hi):
+ yield '%s %s' % (tag, x[i])
+
+ def _plain_replace(self, a, alo, ahi, b, blo, bhi):
+ assert alo < ahi and blo < bhi
+ # dump the shorter block first -- reduces the burden on short-term
+ # memory if the blocks are of very different sizes
+ if bhi - blo < ahi - alo:
+ first = self._dump('+', b, blo, bhi)
+ second = self._dump('-', a, alo, ahi)
+ else:
+ first = self._dump('-', a, alo, ahi)
+ second = self._dump('+', b, blo, bhi)
+
+ for g in first, second:
+ yield from g
+
+ def _fancy_replace(self, a, alo, ahi, b, blo, bhi):
+ r"""
+ When replacing one block of lines with another, search the blocks
+ for *similar* lines; the best-matching pair (if any) is used as a
+ synch point, and intraline difference marking is done on the
+ similar pair. Lots of work, but often worth it.
+
+ Example:
+
+ >>> d = Differ()
+ >>> results = d._fancy_replace(['abcDefghiJkl\n'], 0, 1,
+ ... ['abcdefGhijkl\n'], 0, 1)
+ >>> print(''.join(results), end="")
+ - abcDefghiJkl
+ ? ^ ^ ^
+ + abcdefGhijkl
+ ? ^ ^ ^
+ """
+ # Don't synch up unless the lines have a similarity score above
+ # cutoff. Previously only the smallest pair was handled here,
+ # and if there are many pairs with the best ratio, recursion
+ # could grow very deep, and runtime cubic. See:
+ # https://github.com/python/cpython/issues/119105
+ #
+ # Later, more pathological cases prompted removing recursion
+ # entirely.
+ cutoff = 0.74999
+ cruncher = SequenceMatcher(self.charjunk)
+ crqr = cruncher.real_quick_ratio
+ cqr = cruncher.quick_ratio
+ cr = cruncher.ratio
+
+ WINDOW = 10
+ best_i = best_j = None
+ dump_i, dump_j = alo, blo # smallest indices not yet resolved
+ for j in range(blo, bhi):
+ cruncher.set_seq2(b[j])
+ # Search the corresponding i's within WINDOW for rhe highest
+ # ratio greater than `cutoff`.
+ aequiv = alo + (j - blo)
+ arange = range(max(aequiv - WINDOW, dump_i),
+ min(aequiv + WINDOW + 1, ahi))
+ if not arange: # likely exit if `a` is shorter than `b`
+ break
+ best_ratio = cutoff
+ for i in arange:
+ cruncher.set_seq1(a[i])
+ # Ordering by cheapest to most expensive ratio is very
+ # valuable, most often getting out early.
+ if (crqr() > best_ratio
+ and cqr() > best_ratio
+ and cr() > best_ratio):
+ best_i, best_j, best_ratio = i, j, cr()
+
+ if best_i is None:
+ # found nothing to synch on yet - move to next j
+ continue
+
+ # pump out straight replace from before this synch pair
+ yield from self._fancy_helper(a, dump_i, best_i,
+ b, dump_j, best_j)
+ # do intraline marking on the synch pair
+ aelt, belt = a[best_i], b[best_j]
+ if aelt != belt:
+ # pump out a '-', '?', '+', '?' quad for the synched lines
+ atags = btags = ""
+ cruncher.set_seqs(aelt, belt)
+ for tag, ai1, ai2, bj1, bj2 in cruncher.get_opcodes():
+ la, lb = ai2 - ai1, bj2 - bj1
+ if tag == 'replace':
+ atags += '^' * la
+ btags += '^' * lb
+ elif tag == 'delete':
+ atags += '-' * la
+ elif tag == 'insert':
+ btags += '+' * lb
+ elif tag == 'equal':
+ atags += ' ' * la
+ btags += ' ' * lb
+ else:
+ raise ValueError('unknown tag %r' % (tag,))
+ yield from self._qformat(aelt, belt, atags, btags)
+ else:
+ # the synch pair is identical
+ yield ' ' + aelt
+ dump_i, dump_j = best_i + 1, best_j + 1
+ best_i = best_j = None
+
+ # pump out straight replace from after the last synch pair
+ yield from self._fancy_helper(a, dump_i, ahi,
+ b, dump_j, bhi)
+
+ def _fancy_helper(self, a, alo, ahi, b, blo, bhi):
+ g = []
+ if alo < ahi:
+ if blo < bhi:
+ g = self._plain_replace(a, alo, ahi, b, blo, bhi)
+ else:
+ g = self._dump('-', a, alo, ahi)
+ elif blo < bhi:
+ g = self._dump('+', b, blo, bhi)
+
+ yield from g
+
+ def _qformat(self, aline, bline, atags, btags):
+ r"""
+ Format "?" output and deal with tabs.
+
+ Example:
+
+ >>> d = Differ()
+ >>> results = d._qformat('\tabcDefghiJkl\n', '\tabcdefGhijkl\n',
+ ... ' ^ ^ ^ ', ' ^ ^ ^ ')
+ >>> for line in results: print(repr(line))
+ ...
+ '- \tabcDefghiJkl\n'
+ '? \t ^ ^ ^\n'
+ '+ \tabcdefGhijkl\n'
+ '? \t ^ ^ ^\n'
+ """
+ atags = _keep_original_ws(aline, atags).rstrip()
+ btags = _keep_original_ws(bline, btags).rstrip()
+
+ yield "- " + aline
+ if atags:
+ yield f"? {atags}\n"
+
+ yield "+ " + bline
+ if btags:
+ yield f"? {btags}\n"
+
+# With respect to junk, an earlier version of ndiff simply refused to
+# *start* a match with a junk element. The result was cases like this:
+# before: private Thread currentThread;
+# after: private volatile Thread currentThread;
+# If you consider whitespace to be junk, the longest contiguous match
+# not starting with junk is "e Thread currentThread". So ndiff reported
+# that "e volatil" was inserted between the 't' and the 'e' in "private".
+# While an accurate view, to people that's absurd. The current version
+# looks for matching blocks that are entirely junk-free, then extends the
+# longest one of those as far as possible but only with matching junk.
+# So now "currentThread" is matched, then extended to suck up the
+# preceding blank; then "private" is matched, and extended to suck up the
+# following blank; then "Thread" is matched; and finally ndiff reports
+# that "volatile " was inserted before "Thread". The only quibble
+# remaining is that perhaps it was really the case that " volatile"
+# was inserted after "private". I can live with that .
+
+def IS_LINE_JUNK(line, pat=None):
+ r"""
+ Return True for ignorable line: if `line` is blank or contains a single '#'.
+
+ Examples:
+
+ >>> IS_LINE_JUNK('\n')
+ True
+ >>> IS_LINE_JUNK(' # \n')
+ True
+ >>> IS_LINE_JUNK('hello\n')
+ False
+ """
+
+ if pat is None:
+ # Default: match '#' or the empty string
+ return line.strip() in '#'
+ # Previous versions used the undocumented parameter 'pat' as a
+ # match function. Retain this behaviour for compatibility.
+ return pat(line) is not None
+
+def IS_CHARACTER_JUNK(ch, ws=" \t"):
+ r"""
+ Return True for ignorable character: iff `ch` is a space or tab.
+
+ Examples:
+
+ >>> IS_CHARACTER_JUNK(' ')
+ True
+ >>> IS_CHARACTER_JUNK('\t')
+ True
+ >>> IS_CHARACTER_JUNK('\n')
+ False
+ >>> IS_CHARACTER_JUNK('x')
+ False
+ """
+
+ return ch in ws
+
+
+########################################################################
+### Unified Diff
+########################################################################
+
+def _format_range_unified(start, stop):
+ 'Convert range to the "ed" format'
+ # Per the diff spec at http://www.unix.org/single_unix_specification/
+ beginning = start + 1 # lines start numbering with one
+ length = stop - start
+ if length == 1:
+ return '{}'.format(beginning)
+ if not length:
+ beginning -= 1 # empty ranges begin at line just before the range
+ return '{},{}'.format(beginning, length)
+
+def unified_diff(a, b, fromfile='', tofile='', fromfiledate='',
+ tofiledate='', n=3, lineterm='\n'):
+ r"""
+ Compare two sequences of lines; generate the delta as a unified diff.
+
+ Unified diffs are a compact way of showing line changes and a few
+ lines of context. The number of context lines is set by 'n' which
+ defaults to three.
+
+ By default, the diff control lines (those with ---, +++, or @@) are
+ created with a trailing newline. This is helpful so that inputs
+ created from file.readlines() result in diffs that are suitable for
+ file.writelines() since both the inputs and outputs have trailing
+ newlines.
+
+ For inputs that do not have trailing newlines, set the lineterm
+ argument to "" so that the output will be uniformly newline free.
+
+ The unidiff format normally has a header for filenames and modification
+ times. Any or all of these may be specified using strings for
+ 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.
+ The modification times are normally expressed in the ISO 8601 format.
+
+ Example:
+
+ >>> for line in unified_diff('one two three four'.split(),
+ ... 'zero one tree four'.split(), 'Original', 'Current',
+ ... '2005-01-26 23:30:50', '2010-04-02 10:20:52',
+ ... lineterm=''):
+ ... print(line) # doctest: +NORMALIZE_WHITESPACE
+ --- Original 2005-01-26 23:30:50
+ +++ Current 2010-04-02 10:20:52
+ @@ -1,4 +1,4 @@
+ +zero
+ one
+ -two
+ -three
+ +tree
+ four
+ """
+
+ _check_types(a, b, fromfile, tofile, fromfiledate, tofiledate, lineterm)
+ started = False
+ for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
+ if not started:
+ started = True
+ fromdate = '\t{}'.format(fromfiledate) if fromfiledate else ''
+ todate = '\t{}'.format(tofiledate) if tofiledate else ''
+ yield '--- {}{}{}'.format(fromfile, fromdate, lineterm)
+ yield '+++ {}{}{}'.format(tofile, todate, lineterm)
+
+ first, last = group[0], group[-1]
+ file1_range = _format_range_unified(first[1], last[2])
+ file2_range = _format_range_unified(first[3], last[4])
+ yield '@@ -{} +{} @@{}'.format(file1_range, file2_range, lineterm)
+
+ for tag, i1, i2, j1, j2 in group:
+ if tag == 'equal':
+ for line in a[i1:i2]:
+ yield ' ' + line
+ continue
+ if tag in {'replace', 'delete'}:
+ for line in a[i1:i2]:
+ yield '-' + line
+ if tag in {'replace', 'insert'}:
+ for line in b[j1:j2]:
+ yield '+' + line
+
+
+########################################################################
+### Context Diff
+########################################################################
+
+def _format_range_context(start, stop):
+ 'Convert range to the "ed" format'
+ # Per the diff spec at http://www.unix.org/single_unix_specification/
+ beginning = start + 1 # lines start numbering with one
+ length = stop - start
+ if not length:
+ beginning -= 1 # empty ranges begin at line just before the range
+ if length <= 1:
+ return '{}'.format(beginning)
+ return '{},{}'.format(beginning, beginning + length - 1)
+
+# See http://www.unix.org/single_unix_specification/
+def context_diff(a, b, fromfile='', tofile='',
+ fromfiledate='', tofiledate='', n=3, lineterm='\n'):
+ r"""
+ Compare two sequences of lines; generate the delta as a context diff.
+
+ Context diffs are a compact way of showing line changes and a few
+ lines of context. The number of context lines is set by 'n' which
+ defaults to three.
+
+ By default, the diff control lines (those with *** or ---) are
+ created with a trailing newline. This is helpful so that inputs
+ created from file.readlines() result in diffs that are suitable for
+ file.writelines() since both the inputs and outputs have trailing
+ newlines.
+
+ For inputs that do not have trailing newlines, set the lineterm
+ argument to "" so that the output will be uniformly newline free.
+
+ The context diff format normally has a header for filenames and
+ modification times. Any or all of these may be specified using
+ strings for 'fromfile', 'tofile', 'fromfiledate', and 'tofiledate'.
+ The modification times are normally expressed in the ISO 8601 format.
+ If not specified, the strings default to blanks.
+
+ Example:
+
+ >>> print(''.join(context_diff('one\ntwo\nthree\nfour\n'.splitlines(True),
+ ... 'zero\none\ntree\nfour\n'.splitlines(True), 'Original', 'Current')),
+ ... end="")
+ *** Original
+ --- Current
+ ***************
+ *** 1,4 ****
+ one
+ ! two
+ ! three
+ four
+ --- 1,4 ----
+ + zero
+ one
+ ! tree
+ four
+ """
+
+ _check_types(a, b, fromfile, tofile, fromfiledate, tofiledate, lineterm)
+ prefix = dict(insert='+ ', delete='- ', replace='! ', equal=' ')
+ started = False
+ for group in SequenceMatcher(None,a,b).get_grouped_opcodes(n):
+ if not started:
+ started = True
+ fromdate = '\t{}'.format(fromfiledate) if fromfiledate else ''
+ todate = '\t{}'.format(tofiledate) if tofiledate else ''
+ yield '*** {}{}{}'.format(fromfile, fromdate, lineterm)
+ yield '--- {}{}{}'.format(tofile, todate, lineterm)
+
+ first, last = group[0], group[-1]
+ yield '***************' + lineterm
+
+ file1_range = _format_range_context(first[1], last[2])
+ yield '*** {} ****{}'.format(file1_range, lineterm)
+
+ if any(tag in {'replace', 'delete'} for tag, _, _, _, _ in group):
+ for tag, i1, i2, _, _ in group:
+ if tag != 'insert':
+ for line in a[i1:i2]:
+ yield prefix[tag] + line
+
+ file2_range = _format_range_context(first[3], last[4])
+ yield '--- {} ----{}'.format(file2_range, lineterm)
+
+ if any(tag in {'replace', 'insert'} for tag, _, _, _, _ in group):
+ for tag, _, _, j1, j2 in group:
+ if tag != 'delete':
+ for line in b[j1:j2]:
+ yield prefix[tag] + line
+
+def _check_types(a, b, *args):
+ # Checking types is weird, but the alternative is garbled output when
+ # someone passes mixed bytes and str to {unified,context}_diff(). E.g.
+ # without this check, passing filenames as bytes results in output like
+ # --- b'oldfile.txt'
+ # +++ b'newfile.txt'
+ # because of how str.format() incorporates bytes objects.
+ if a and not isinstance(a[0], str):
+ raise TypeError('lines to compare must be str, not %s (%r)' %
+ (type(a[0]).__name__, a[0]))
+ if b and not isinstance(b[0], str):
+ raise TypeError('lines to compare must be str, not %s (%r)' %
+ (type(b[0]).__name__, b[0]))
+ if isinstance(a, str):
+ raise TypeError('input must be a sequence of strings, not %s' %
+ type(a).__name__)
+ if isinstance(b, str):
+ raise TypeError('input must be a sequence of strings, not %s' %
+ type(b).__name__)
+ for arg in args:
+ if not isinstance(arg, str):
+ raise TypeError('all arguments must be str, not: %r' % (arg,))
+
+def diff_bytes(dfunc, a, b, fromfile=b'', tofile=b'',
+ fromfiledate=b'', tofiledate=b'', n=3, lineterm=b'\n'):
+ r"""
+ Compare `a` and `b`, two sequences of lines represented as bytes rather
+ than str. This is a wrapper for `dfunc`, which is typically either
+ unified_diff() or context_diff(). Inputs are losslessly converted to
+ strings so that `dfunc` only has to worry about strings, and encoded
+ back to bytes on return. This is necessary to compare files with
+ unknown or inconsistent encoding. All other inputs (except `n`) must be
+ bytes rather than str.
+ """
+ def decode(s):
+ try:
+ return s.decode('ascii', 'surrogateescape')
+ except AttributeError as err:
+ msg = ('all arguments must be bytes, not %s (%r)' %
+ (type(s).__name__, s))
+ raise TypeError(msg) from err
+ a = list(map(decode, a))
+ b = list(map(decode, b))
+ fromfile = decode(fromfile)
+ tofile = decode(tofile)
+ fromfiledate = decode(fromfiledate)
+ tofiledate = decode(tofiledate)
+ lineterm = decode(lineterm)
+
+ lines = dfunc(a, b, fromfile, tofile, fromfiledate, tofiledate, n, lineterm)
+ for line in lines:
+ yield line.encode('ascii', 'surrogateescape')
+
+def ndiff(a, b, linejunk=None, charjunk=IS_CHARACTER_JUNK):
+ r"""
+ Compare `a` and `b` (lists of strings); return a `Differ`-style delta.
+
+ Optional keyword parameters `linejunk` and `charjunk` are for filter
+ functions, or can be None:
+
+ - linejunk: A function that should accept a single string argument and
+ return true iff the string is junk. The default is None, and is
+ recommended; the underlying SequenceMatcher class has an adaptive
+ notion of "noise" lines.
+
+ - charjunk: A function that accepts a character (string of length
+ 1), and returns true iff the character is junk. The default is
+ the module-level function IS_CHARACTER_JUNK, which filters out
+ whitespace characters (a blank or tab; note: it's a bad idea to
+ include newline in this!).
+
+ Tools/scripts/ndiff.py is a command-line front-end to this function.
+
+ Example:
+
+ >>> diff = ndiff('one\ntwo\nthree\n'.splitlines(keepends=True),
+ ... 'ore\ntree\nemu\n'.splitlines(keepends=True))
+ >>> print(''.join(diff), end="")
+ - one
+ ? ^
+ + ore
+ ? ^
+ - two
+ - three
+ ? -
+ + tree
+ + emu
+ """
+ return Differ(linejunk, charjunk).compare(a, b)
+
+def _mdiff(fromlines, tolines, context=None, linejunk=None,
+ charjunk=IS_CHARACTER_JUNK):
+ r"""Returns generator yielding marked up from/to side by side differences.
+
+ Arguments:
+ fromlines -- list of text lines to compared to tolines
+ tolines -- list of text lines to be compared to fromlines
+ context -- number of context lines to display on each side of difference,
+ if None, all from/to text lines will be generated.
+ linejunk -- passed on to ndiff (see ndiff documentation)
+ charjunk -- passed on to ndiff (see ndiff documentation)
+
+ This function returns an iterator which returns a tuple:
+ (from line tuple, to line tuple, boolean flag)
+
+ from/to line tuple -- (line num, line text)
+ line num -- integer or None (to indicate a context separation)
+ line text -- original line text with following markers inserted:
+ '\0+' -- marks start of added text
+ '\0-' -- marks start of deleted text
+ '\0^' -- marks start of changed text
+ '\1' -- marks end of added/deleted/changed text
+
+ boolean flag -- None indicates context separation, True indicates
+ either "from" or "to" line contains a change, otherwise False.
+
+ This function/iterator was originally developed to generate side by side
+ file difference for making HTML pages (see HtmlDiff class for example
+ usage).
+
+ Note, this function utilizes the ndiff function to generate the side by
+ side difference markup. Optional ndiff arguments may be passed to this
+ function and they in turn will be passed to ndiff.
+ """
+ import re
+
+ # regular expression for finding intraline change indices
+ change_re = re.compile(r'(\++|\-+|\^+)')
+
+ # create the difference iterator to generate the differences
+ diff_lines_iterator = ndiff(fromlines,tolines,linejunk,charjunk)
+
+ def _make_line(lines, format_key, side, num_lines=[0,0]):
+ """Returns line of text with user's change markup and line formatting.
+
+ lines -- list of lines from the ndiff generator to produce a line of
+ text from. When producing the line of text to return, the
+ lines used are removed from this list.
+ format_key -- '+' return first line in list with "add" markup around
+ the entire line.
+ '-' return first line in list with "delete" markup around
+ the entire line.
+ '?' return first line in list with add/delete/change
+ intraline markup (indices obtained from second line)
+ None return first line in list with no markup
+ side -- indice into the num_lines list (0=from,1=to)
+ num_lines -- from/to current line number. This is NOT intended to be a
+ passed parameter. It is present as a keyword argument to
+ maintain memory of the current line numbers between calls
+ of this function.
+
+ Note, this function is purposefully not defined at the module scope so
+ that data it needs from its parent function (within whose context it
+ is defined) does not need to be of module scope.
+ """
+ num_lines[side] += 1
+ # Handle case where no user markup is to be added, just return line of
+ # text with user's line format to allow for usage of the line number.
+ if format_key is None:
+ return (num_lines[side],lines.pop(0)[2:])
+ # Handle case of intraline changes
+ if format_key == '?':
+ text, markers = lines.pop(0), lines.pop(0)
+ # find intraline changes (store change type and indices in tuples)
+ sub_info = []
+ def record_sub_info(match_object,sub_info=sub_info):
+ sub_info.append([match_object.group(1)[0],match_object.span()])
+ return match_object.group(1)
+ change_re.sub(record_sub_info,markers)
+ # process each tuple inserting our special marks that won't be
+ # noticed by an xml/html escaper.
+ for key,(begin,end) in reversed(sub_info):
+ text = text[0:begin]+'\0'+key+text[begin:end]+'\1'+text[end:]
+ text = text[2:]
+ # Handle case of add/delete entire line
+ else:
+ text = lines.pop(0)[2:]
+ # if line of text is just a newline, insert a space so there is
+ # something for the user to highlight and see.
+ if not text:
+ text = ' '
+ # insert marks that won't be noticed by an xml/html escaper.
+ text = '\0' + format_key + text + '\1'
+ # Return line of text, first allow user's line formatter to do its
+ # thing (such as adding the line number) then replace the special
+ # marks with what the user's change markup.
+ return (num_lines[side],text)
+
+ def _line_iterator():
+ """Yields from/to lines of text with a change indication.
+
+ This function is an iterator. It itself pulls lines from a
+ differencing iterator, processes them and yields them. When it can
+ it yields both a "from" and a "to" line, otherwise it will yield one
+ or the other. In addition to yielding the lines of from/to text, a
+ boolean flag is yielded to indicate if the text line(s) have
+ differences in them.
+
+ Note, this function is purposefully not defined at the module scope so
+ that data it needs from its parent function (within whose context it
+ is defined) does not need to be of module scope.
+ """
+ lines = []
+ num_blanks_pending, num_blanks_to_yield = 0, 0
+ while True:
+ # Load up next 4 lines so we can look ahead, create strings which
+ # are a concatenation of the first character of each of the 4 lines
+ # so we can do some very readable comparisons.
+ while len(lines) < 4:
+ lines.append(next(diff_lines_iterator, 'X'))
+ s = ''.join([line[0] for line in lines])
+ if s.startswith('X'):
+ # When no more lines, pump out any remaining blank lines so the
+ # corresponding add/delete lines get a matching blank line so
+ # all line pairs get yielded at the next level.
+ num_blanks_to_yield = num_blanks_pending
+ elif s.startswith('-?+?'):
+ # simple intraline change
+ yield _make_line(lines,'?',0), _make_line(lines,'?',1), True
+ continue
+ elif s.startswith('--++'):
+ # in delete block, add block coming: we do NOT want to get
+ # caught up on blank lines yet, just process the delete line
+ num_blanks_pending -= 1
+ yield _make_line(lines,'-',0), None, True
+ continue
+ elif s.startswith(('--?+', '--+', '- ')):
+ # in delete block and see an intraline change or unchanged line
+ # coming: yield the delete line and then blanks
+ from_line,to_line = _make_line(lines,'-',0), None
+ num_blanks_to_yield,num_blanks_pending = num_blanks_pending-1,0
+ elif s.startswith('-+?'):
+ # intraline change
+ yield _make_line(lines,None,0), _make_line(lines,'?',1), True
+ continue
+ elif s.startswith('-?+'):
+ # intraline change
+ yield _make_line(lines,'?',0), _make_line(lines,None,1), True
+ continue
+ elif s.startswith('-'):
+ # delete FROM line
+ num_blanks_pending -= 1
+ yield _make_line(lines,'-',0), None, True
+ continue
+ elif s.startswith('+--'):
+ # in add block, delete block coming: we do NOT want to get
+ # caught up on blank lines yet, just process the add line
+ num_blanks_pending += 1
+ yield None, _make_line(lines,'+',1), True
+ continue
+ elif s.startswith(('+ ', '+-')):
+ # will be leaving an add block: yield blanks then add line
+ from_line, to_line = None, _make_line(lines,'+',1)
+ num_blanks_to_yield,num_blanks_pending = num_blanks_pending+1,0
+ elif s.startswith('+'):
+ # inside an add block, yield the add line
+ num_blanks_pending += 1
+ yield None, _make_line(lines,'+',1), True
+ continue
+ elif s.startswith(' '):
+ # unchanged text, yield it to both sides
+ yield _make_line(lines[:],None,0),_make_line(lines,None,1),False
+ continue
+ # Catch up on the blank lines so when we yield the next from/to
+ # pair, they are lined up.
+ while(num_blanks_to_yield < 0):
+ num_blanks_to_yield += 1
+ yield None,('','\n'),True
+ while(num_blanks_to_yield > 0):
+ num_blanks_to_yield -= 1
+ yield ('','\n'),None,True
+ if s.startswith('X'):
+ return
+ else:
+ yield from_line,to_line,True
+
+ def _line_pair_iterator():
+ """Yields from/to lines of text with a change indication.
+
+ This function is an iterator. It itself pulls lines from the line
+ iterator. Its difference from that iterator is that this function
+ always yields a pair of from/to text lines (with the change
+ indication). If necessary it will collect single from/to lines
+ until it has a matching pair from/to pair to yield.
+
+ Note, this function is purposefully not defined at the module scope so
+ that data it needs from its parent function (within whose context it
+ is defined) does not need to be of module scope.
+ """
+ line_iterator = _line_iterator()
+ fromlines,tolines=[],[]
+ while True:
+ # Collecting lines of text until we have a from/to pair
+ while (len(fromlines)==0 or len(tolines)==0):
+ try:
+ from_line, to_line, found_diff = next(line_iterator)
+ except StopIteration:
+ return
+ if from_line is not None:
+ fromlines.append((from_line,found_diff))
+ if to_line is not None:
+ tolines.append((to_line,found_diff))
+ # Once we have a pair, remove them from the collection and yield it
+ from_line, fromDiff = fromlines.pop(0)
+ to_line, to_diff = tolines.pop(0)
+ yield (from_line,to_line,fromDiff or to_diff)
+
+ # Handle case where user does not want context differencing, just yield
+ # them up without doing anything else with them.
+ line_pair_iterator = _line_pair_iterator()
+ if context is None:
+ yield from line_pair_iterator
+ # Handle case where user wants context differencing. We must do some
+ # storage of lines until we know for sure that they are to be yielded.
+ else:
+ context += 1
+ lines_to_write = 0
+ while True:
+ # Store lines up until we find a difference, note use of a
+ # circular queue because we only need to keep around what
+ # we need for context.
+ index, contextLines = 0, [None]*(context)
+ found_diff = False
+ while(found_diff is False):
+ try:
+ from_line, to_line, found_diff = next(line_pair_iterator)
+ except StopIteration:
+ return
+ i = index % context
+ contextLines[i] = (from_line, to_line, found_diff)
+ index += 1
+ # Yield lines that we have collected so far, but first yield
+ # the user's separator.
+ if index > context:
+ yield None, None, None
+ lines_to_write = context
+ else:
+ lines_to_write = index
+ index = 0
+ while(lines_to_write):
+ i = index % context
+ index += 1
+ yield contextLines[i]
+ lines_to_write -= 1
+ # Now yield the context lines after the change
+ lines_to_write = context-1
+ try:
+ while(lines_to_write):
+ from_line, to_line, found_diff = next(line_pair_iterator)
+ # If another change within the context, extend the context
+ if found_diff:
+ lines_to_write = context-1
+ else:
+ lines_to_write -= 1
+ yield from_line, to_line, found_diff
+ except StopIteration:
+ # Catch exception from next() and return normally
+ return
+
+
+_file_template = """
+
+
+
+
+
+ Diff comparison
+
+
+
+
+ %(table)s%(legend)s
+
+
+"""
+
+_styles = """
+ :root {color-scheme: light dark}
+ table.diff {
+ font-family: Menlo, Consolas, Monaco, Liberation Mono, Lucida Console, monospace;
+ border: medium;
+ }
+ .diff_header {
+ background-color: #e0e0e0;
+ font-weight: bold;
+ }
+ td.diff_header {
+ text-align: right;
+ padding: 0 8px;
+ }
+ .diff_next {
+ background-color: #c0c0c0;
+ padding: 4px 0;
+ }
+ .diff_add {background-color:palegreen}
+ .diff_chg {background-color:#ffff77}
+ .diff_sub {background-color:#ffaaaa}
+ table.diff[summary="Legends"] {
+ margin-top: 20px;
+ border: 1px solid #ccc;
+ }
+ table.diff[summary="Legends"] th {
+ background-color: #e0e0e0;
+ padding: 4px 8px;
+ }
+ table.diff[summary="Legends"] td {
+ padding: 4px 8px;
+ }
+
+ @media (prefers-color-scheme: dark) {
+ .diff_header {background-color:#666}
+ .diff_next {background-color:#393939}
+ .diff_add {background-color:darkgreen}
+ .diff_chg {background-color:#847415}
+ .diff_sub {background-color:darkred}
+ table.diff[summary="Legends"] {border-color:#555}
+ table.diff[summary="Legends"] th{background-color:#666}
+ }"""
+
+_table_template = """
+
+
+
+ %(header_row)s
+
+%(data_rows)s
+
"""
+
+_legend = """
+
+ | Legends |
+
+ | Colors |
+ | Added |
+ | Changed |
+ | Deleted |
+ |
+
+ | Links |
+ | (f)irst change |
+ | (n)ext change |
+ | (t)op |
+ |
+
"""
+
+class HtmlDiff(object):
+ """For producing HTML side by side comparison with change highlights.
+
+ This class can be used to create an HTML table (or a complete HTML file
+ containing the table) showing a side by side, line by line comparison
+ of text with inter-line and intra-line change highlights. The table can
+ be generated in either full or contextual difference mode.
+
+ The following methods are provided for HTML generation:
+
+ make_table -- generates HTML for a single side by side table
+ make_file -- generates complete HTML file with a single side by side table
+
+ See Doc/includes/diff.py for an example usage of this class.
+ """
+
+ _file_template = _file_template
+ _styles = _styles
+ _table_template = _table_template
+ _legend = _legend
+ _default_prefix = 0
+
+ def __init__(self,tabsize=8,wrapcolumn=None,linejunk=None,
+ charjunk=IS_CHARACTER_JUNK):
+ """HtmlDiff instance initializer
+
+ Arguments:
+ tabsize -- tab stop spacing, defaults to 8.
+ wrapcolumn -- column number where lines are broken and wrapped,
+ defaults to None where lines are not wrapped.
+ linejunk,charjunk -- keyword arguments passed into ndiff() (used by
+ HtmlDiff() to generate the side by side HTML differences). See
+ ndiff() documentation for argument default values and descriptions.
+ """
+ self._tabsize = tabsize
+ self._wrapcolumn = wrapcolumn
+ self._linejunk = linejunk
+ self._charjunk = charjunk
+
+ def make_file(self, fromlines, tolines, fromdesc='', todesc='',
+ context=False, numlines=5, *, charset='utf-8'):
+ """Returns HTML file of side by side comparison with change highlights
+
+ Arguments:
+ fromlines -- list of "from" lines
+ tolines -- list of "to" lines
+ fromdesc -- "from" file column header string
+ todesc -- "to" file column header string
+ context -- set to True for contextual differences (defaults to False
+ which shows full differences).
+ numlines -- number of context lines. When context is set True,
+ controls number of lines displayed before and after the change.
+ When context is False, controls the number of lines to place
+ the "next" link anchors before the next change (so click of
+ "next" link jumps to just before the change).
+ charset -- charset of the HTML document
+ """
+
+ return (self._file_template % dict(
+ styles=self._styles,
+ legend=self._legend,
+ table=self.make_table(fromlines, tolines, fromdesc, todesc,
+ context=context, numlines=numlines),
+ charset=charset
+ )).encode(charset, 'xmlcharrefreplace').decode(charset)
+
+ def _tab_newline_replace(self,fromlines,tolines):
+ """Returns from/to line lists with tabs expanded and newlines removed.
+
+ Instead of tab characters being replaced by the number of spaces
+ needed to fill in to the next tab stop, this function will fill
+ the space with tab characters. This is done so that the difference
+ algorithms can identify changes in a file when tabs are replaced by
+ spaces and vice versa. At the end of the HTML generation, the tab
+ characters will be replaced with a nonbreakable space.
+ """
+ def expand_tabs(line):
+ # hide real spaces
+ line = line.replace(' ','\0')
+ # expand tabs into spaces
+ line = line.expandtabs(self._tabsize)
+ # replace spaces from expanded tabs back into tab characters
+ # (we'll replace them with markup after we do differencing)
+ line = line.replace(' ','\t')
+ return line.replace('\0',' ').rstrip('\n')
+ fromlines = [expand_tabs(line) for line in fromlines]
+ tolines = [expand_tabs(line) for line in tolines]
+ return fromlines,tolines
+
+ def _split_line(self,data_list,line_num,text):
+ """Builds list of text lines by splitting text lines at wrap point
+
+ This function will determine if the input text line needs to be
+ wrapped (split) into separate lines. If so, the first wrap point
+ will be determined and the first line appended to the output
+ text line list. This function is used recursively to handle
+ the second part of the split line to further split it.
+ """
+ # if blank line or context separator, just add it to the output list
+ if not line_num:
+ data_list.append((line_num,text))
+ return
+
+ # if line text doesn't need wrapping, just add it to the output list
+ size = len(text)
+ max = self._wrapcolumn
+ if (size <= max) or ((size -(text.count('\0')*3)) <= max):
+ data_list.append((line_num,text))
+ return
+
+ # scan text looking for the wrap point, keeping track if the wrap
+ # point is inside markers
+ i = 0
+ n = 0
+ mark = ''
+ while n < max and i < size:
+ if text[i] == '\0':
+ i += 1
+ mark = text[i]
+ i += 1
+ elif text[i] == '\1':
+ i += 1
+ mark = ''
+ else:
+ i += 1
+ n += 1
+
+ # wrap point is inside text, break it up into separate lines
+ line1 = text[:i]
+ line2 = text[i:]
+
+ # if wrap point is inside markers, place end marker at end of first
+ # line and start marker at beginning of second line because each
+ # line will have its own table tag markup around it.
+ if mark:
+ line1 = line1 + '\1'
+ line2 = '\0' + mark + line2
+
+ # tack on first line onto the output list
+ data_list.append((line_num,line1))
+
+ # use this routine again to wrap the remaining text
+ self._split_line(data_list,'>',line2)
+
+ def _line_wrapper(self,diffs):
+ """Returns iterator that splits (wraps) mdiff text lines"""
+
+ # pull from/to data and flags from mdiff iterator
+ for fromdata,todata,flag in diffs:
+ # check for context separators and pass them through
+ if flag is None:
+ yield fromdata,todata,flag
+ continue
+ (fromline,fromtext),(toline,totext) = fromdata,todata
+ # for each from/to line split it at the wrap column to form
+ # list of text lines.
+ fromlist,tolist = [],[]
+ self._split_line(fromlist,fromline,fromtext)
+ self._split_line(tolist,toline,totext)
+ # yield from/to line in pairs inserting blank lines as
+ # necessary when one side has more wrapped lines
+ while fromlist or tolist:
+ if fromlist:
+ fromdata = fromlist.pop(0)
+ else:
+ fromdata = ('',' ')
+ if tolist:
+ todata = tolist.pop(0)
+ else:
+ todata = ('',' ')
+ yield fromdata,todata,flag
+
+ def _collect_lines(self,diffs):
+ """Collects mdiff output into separate lists
+
+ Before storing the mdiff from/to data into a list, it is converted
+ into a single line of text with HTML markup.
+ """
+
+ fromlist,tolist,flaglist = [],[],[]
+ # pull from/to data and flags from mdiff style iterator
+ for fromdata,todata,flag in diffs:
+ try:
+ # store HTML markup of the lines into the lists
+ fromlist.append(self._format_line(0,flag,*fromdata))
+ tolist.append(self._format_line(1,flag,*todata))
+ except TypeError:
+ # exceptions occur for lines where context separators go
+ fromlist.append(None)
+ tolist.append(None)
+ flaglist.append(flag)
+ return fromlist,tolist,flaglist
+
+ def _format_line(self,side,flag,linenum,text):
+ """Returns HTML markup of "from" / "to" text lines
+
+ side -- 0 or 1 indicating "from" or "to" text
+ flag -- indicates if difference on line
+ linenum -- line number (used for line number column)
+ text -- line text to be marked up
+ """
+ try:
+ linenum = '%d' % linenum
+ id = ' id="%s%s"' % (self._prefix[side],linenum)
+ except TypeError:
+ # handle blank lines where linenum is '>' or ''
+ id = ''
+ # replace those things that would get confused with HTML symbols
+ text=text.replace("&","&").replace(">",">").replace("<","<")
+
+ # make space non-breakable so they don't get compressed or line wrapped
+ text = text.replace(' ',' ').rstrip()
+
+ return '%s | ' \
+ % (id,linenum,text)
+
+ def _make_prefix(self):
+ """Create unique anchor prefixes"""
+
+ # Generate a unique anchor prefix so multiple tables
+ # can exist on the same HTML page without conflicts.
+ fromprefix = "from%d_" % HtmlDiff._default_prefix
+ toprefix = "to%d_" % HtmlDiff._default_prefix
+ HtmlDiff._default_prefix += 1
+ # store prefixes so line format method has access
+ self._prefix = [fromprefix,toprefix]
+
+ def _convert_flags(self,fromlist,tolist,flaglist,context,numlines):
+ """Makes list of "next" links"""
+
+ # all anchor names will be generated using the unique "to" prefix
+ toprefix = self._prefix[1]
+
+ # process change flags, generating middle column of next anchors/links
+ next_id = ['']*len(flaglist)
+ next_href = ['']*len(flaglist)
+ num_chg, in_change = 0, False
+ last = 0
+ for i,flag in enumerate(flaglist):
+ if flag:
+ if not in_change:
+ in_change = True
+ last = i
+ # at the beginning of a change, drop an anchor a few lines
+ # (the context lines) before the change for the previous
+ # link
+ i = max([0,i-numlines])
+ next_id[i] = ' id="difflib_chg_%s_%d"' % (toprefix,num_chg)
+ # at the beginning of a change, drop a link to the next
+ # change
+ num_chg += 1
+ next_href[last] = 'n' % (
+ toprefix,num_chg)
+ else:
+ in_change = False
+ # check for cases where there is no content to avoid exceptions
+ if not flaglist:
+ flaglist = [False]
+ next_id = ['']
+ next_href = ['']
+ last = 0
+ if context:
+ fromlist = [' | No Differences Found | ']
+ tolist = fromlist
+ else:
+ fromlist = tolist = [' | Empty File | ']
+ # if not a change on first line, drop a link
+ if not flaglist[0]:
+ next_href[0] = 'f' % toprefix
+ # redo the last link to link to the top
+ next_href[last] = 't' % (toprefix)
+
+ return fromlist,tolist,flaglist,next_href,next_id
+
+ def make_table(self,fromlines,tolines,fromdesc='',todesc='',context=False,
+ numlines=5):
+ """Returns HTML table of side by side comparison with change highlights
+
+ Arguments:
+ fromlines -- list of "from" lines
+ tolines -- list of "to" lines
+ fromdesc -- "from" file column header string
+ todesc -- "to" file column header string
+ context -- set to True for contextual differences (defaults to False
+ which shows full differences).
+ numlines -- number of context lines. When context is set True,
+ controls number of lines displayed before and after the change.
+ When context is False, controls the number of lines to place
+ the "next" link anchors before the next change (so click of
+ "next" link jumps to just before the change).
+ """
+
+ # make unique anchor prefixes so that multiple tables may exist
+ # on the same page without conflict.
+ self._make_prefix()
+
+ # change tabs to spaces before it gets more difficult after we insert
+ # markup
+ fromlines,tolines = self._tab_newline_replace(fromlines,tolines)
+
+ # create diffs iterator which generates side by side from/to data
+ if context:
+ context_lines = numlines
+ else:
+ context_lines = None
+ diffs = _mdiff(fromlines,tolines,context_lines,linejunk=self._linejunk,
+ charjunk=self._charjunk)
+
+ # set up iterator to wrap lines that exceed desired width
+ if self._wrapcolumn:
+ diffs = self._line_wrapper(diffs)
+
+ # collect up from/to lines and flags into lists (also format the lines)
+ fromlist,tolist,flaglist = self._collect_lines(diffs)
+
+ # process change flags, generating middle column of next anchors/links
+ fromlist,tolist,flaglist,next_href,next_id = self._convert_flags(
+ fromlist,tolist,flaglist,context,numlines)
+
+ s = []
+ fmt = ' | %s | %s' + \
+ '%s | %s
\n'
+ for i in range(len(flaglist)):
+ if flaglist[i] is None:
+ # mdiff yields None on separator lines skip the bogus ones
+ # generated for the first line
+ if i > 0:
+ s.append(' \n \n')
+ else:
+ s.append( fmt % (next_id[i],next_href[i],fromlist[i],
+ next_href[i],tolist[i]))
+ if fromdesc or todesc:
+ header_row = '%s%s%s%s
' % (
+ '
| ',
+ '' % fromdesc,
+ '
| ',
+ '' % todesc)
+ else:
+ header_row = ''
+
+ table = self._table_template % dict(
+ data_rows=''.join(s),
+ header_row=header_row,
+ prefix=self._prefix[1])
+
+ return table.replace('\0+',''). \
+ replace('\0-',''). \
+ replace('\0^',''). \
+ replace('\1',''). \
+ replace('\t',' ')
+
+
+def restore(delta, which):
+ r"""
+ Generate one of the two sequences that generated a delta.
+
+ Given a `delta` produced by `Differ.compare()` or `ndiff()`, extract
+ lines originating from file 1 or 2 (parameter `which`), stripping off line
+ prefixes.
+
+ Examples:
+
+ >>> diff = ndiff('one\ntwo\nthree\n'.splitlines(keepends=True),
+ ... 'ore\ntree\nemu\n'.splitlines(keepends=True))
+ >>> diff = list(diff)
+ >>> print(''.join(restore(diff, 1)), end="")
+ one
+ two
+ three
+ >>> print(''.join(restore(diff, 2)), end="")
+ ore
+ tree
+ emu
+ """
+ try:
+ tag = {1: "- ", 2: "+ "}[int(which)]
+ except KeyError:
+ raise ValueError('unknown delta choice (must be 1 or 2): %r'
+ % which) from None
+ prefixes = (" ", tag)
+ for line in delta:
+ if line[:2] in prefixes:
+ yield line[2:]
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/html/formfill.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/html/formfill.py
new file mode 100644
index 0000000000000000000000000000000000000000..9741c28b747c8f84114d415ac6b4f76169b406aa
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/html/formfill.py
@@ -0,0 +1,299 @@
+from lxml.etree import XPath, ElementBase
+from lxml.html import fromstring, XHTML_NAMESPACE
+from lxml.html import _forms_xpath, _options_xpath, _nons, _transform_result
+from lxml.html import defs
+import copy
+
+try:
+ basestring
+except NameError:
+ # Python 3
+ basestring = str
+
+__all__ = ['FormNotFound', 'fill_form', 'fill_form_html',
+ 'insert_errors', 'insert_errors_html',
+ 'DefaultErrorCreator']
+
+class FormNotFound(LookupError):
+ """
+ Raised when no form can be found
+ """
+
+_form_name_xpath = XPath('descendant-or-self::form[name=$name]|descendant-or-self::x:form[name=$name]', namespaces={'x':XHTML_NAMESPACE})
+_input_xpath = XPath('|'.join(['descendant-or-self::'+_tag for _tag in ('input','select','textarea','x:input','x:select','x:textarea')]),
+ namespaces={'x':XHTML_NAMESPACE})
+_label_for_xpath = XPath('//label[@for=$for_id]|//x:label[@for=$for_id]',
+ namespaces={'x':XHTML_NAMESPACE})
+_name_xpath = XPath('descendant-or-self::*[@name=$name]')
+
+def fill_form(
+ el,
+ values,
+ form_id=None,
+ form_index=None,
+ ):
+ el = _find_form(el, form_id=form_id, form_index=form_index)
+ _fill_form(el, values)
+
+def fill_form_html(html, values, form_id=None, form_index=None):
+ result_type = type(html)
+ if isinstance(html, basestring):
+ doc = fromstring(html)
+ else:
+ doc = copy.deepcopy(html)
+ fill_form(doc, values, form_id=form_id, form_index=form_index)
+ return _transform_result(result_type, doc)
+
+def _fill_form(el, values):
+ counts = {}
+ if hasattr(values, 'mixed'):
+ # For Paste request parameters
+ values = values.mixed()
+ inputs = _input_xpath(el)
+ for input in inputs:
+ name = input.get('name')
+ if not name:
+ continue
+ if _takes_multiple(input):
+ value = values.get(name, [])
+ if not isinstance(value, (list, tuple)):
+ value = [value]
+ _fill_multiple(input, value)
+ elif name not in values:
+ continue
+ else:
+ index = counts.get(name, 0)
+ counts[name] = index + 1
+ value = values[name]
+ if isinstance(value, (list, tuple)):
+ try:
+ value = value[index]
+ except IndexError:
+ continue
+ elif index > 0:
+ continue
+ _fill_single(input, value)
+
+def _takes_multiple(input):
+ if _nons(input.tag) == 'select' and input.get('multiple'):
+ # FIXME: multiple="0"?
+ return True
+ type = input.get('type', '').lower()
+ if type in ('radio', 'checkbox'):
+ return True
+ return False
+
+def _fill_multiple(input, value):
+ type = input.get('type', '').lower()
+ if type == 'checkbox':
+ v = input.get('value')
+ if v is None:
+ if not value:
+ result = False
+ else:
+ result = value[0]
+ if isinstance(value, basestring):
+ # The only valid "on" value for an unnamed checkbox is 'on'
+ result = result == 'on'
+ _check(input, result)
+ else:
+ _check(input, v in value)
+ elif type == 'radio':
+ v = input.get('value')
+ _check(input, v in value)
+ else:
+ assert _nons(input.tag) == 'select'
+ for option in _options_xpath(input):
+ v = option.get('value')
+ if v is None:
+ # This seems to be the default, at least on IE
+ # FIXME: but I'm not sure
+ v = option.text_content()
+ _select(option, v in value)
+
+def _check(el, check):
+ if check:
+ el.set('checked', '')
+ else:
+ if 'checked' in el.attrib:
+ del el.attrib['checked']
+
+def _select(el, select):
+ if select:
+ el.set('selected', '')
+ else:
+ if 'selected' in el.attrib:
+ del el.attrib['selected']
+
+def _fill_single(input, value):
+ if _nons(input.tag) == 'textarea':
+ input.text = value
+ else:
+ input.set('value', value)
+
+def _find_form(el, form_id=None, form_index=None):
+ if form_id is None and form_index is None:
+ forms = _forms_xpath(el)
+ for form in forms:
+ return form
+ raise FormNotFound(
+ "No forms in page")
+ if form_id is not None:
+ form = el.get_element_by_id(form_id)
+ if form is not None:
+ return form
+ forms = _form_name_xpath(el, name=form_id)
+ if forms:
+ return forms[0]
+ else:
+ raise FormNotFound(
+ "No form with the name or id of %r (forms: %s)"
+ % (id, ', '.join(_find_form_ids(el))))
+ if form_index is not None:
+ forms = _forms_xpath(el)
+ try:
+ return forms[form_index]
+ except IndexError:
+ raise FormNotFound(
+ "There is no form with the index %r (%i forms found)"
+ % (form_index, len(forms)))
+
+def _find_form_ids(el):
+ forms = _forms_xpath(el)
+ if not forms:
+ yield '(no forms)'
+ return
+ for index, form in enumerate(forms):
+ if form.get('id'):
+ if form.get('name'):
+ yield '%s or %s' % (form.get('id'),
+ form.get('name'))
+ else:
+ yield form.get('id')
+ elif form.get('name'):
+ yield form.get('name')
+ else:
+ yield '(unnamed form %s)' % index
+
+############################################################
+## Error filling
+############################################################
+
+class DefaultErrorCreator:
+ insert_before = True
+ block_inside = True
+ error_container_tag = 'div'
+ error_message_class = 'error-message'
+ error_block_class = 'error-block'
+ default_message = "Invalid"
+
+ def __init__(self, **kw):
+ for name, value in kw.items():
+ if not hasattr(self, name):
+ raise TypeError(
+ "Unexpected keyword argument: %s" % name)
+ setattr(self, name, value)
+
+ def __call__(self, el, is_block, message):
+ error_el = el.makeelement(self.error_container_tag)
+ if self.error_message_class:
+ error_el.set('class', self.error_message_class)
+ if is_block and self.error_block_class:
+ error_el.set('class', error_el.get('class', '')+' '+self.error_block_class)
+ if message is None or message == '':
+ message = self.default_message
+ if isinstance(message, ElementBase):
+ error_el.append(message)
+ else:
+ assert isinstance(message, basestring), (
+ "Bad message; should be a string or element: %r" % message)
+ error_el.text = message or self.default_message
+ if is_block and self.block_inside:
+ if self.insert_before:
+ error_el.tail = el.text
+ el.text = None
+ el.insert(0, error_el)
+ else:
+ el.append(error_el)
+ else:
+ parent = el.getparent()
+ pos = parent.index(el)
+ if self.insert_before:
+ parent.insert(pos, error_el)
+ else:
+ error_el.tail = el.tail
+ el.tail = None
+ parent.insert(pos+1, error_el)
+
+default_error_creator = DefaultErrorCreator()
+
+
+def insert_errors(
+ el,
+ errors,
+ form_id=None,
+ form_index=None,
+ error_class="error",
+ error_creator=default_error_creator,
+ ):
+ el = _find_form(el, form_id=form_id, form_index=form_index)
+ for name, error in errors.items():
+ if error is None:
+ continue
+ for error_el, message in _find_elements_for_name(el, name, error):
+ assert isinstance(message, (basestring, type(None), ElementBase)), (
+ "Bad message: %r" % message)
+ _insert_error(error_el, message, error_class, error_creator)
+
+def insert_errors_html(html, values, **kw):
+ result_type = type(html)
+ if isinstance(html, basestring):
+ doc = fromstring(html)
+ else:
+ doc = copy.deepcopy(html)
+ insert_errors(doc, values, **kw)
+ return _transform_result(result_type, doc)
+
+def _insert_error(el, error, error_class, error_creator):
+ if _nons(el.tag) in defs.empty_tags or _nons(el.tag) == 'textarea':
+ is_block = False
+ else:
+ is_block = True
+ if _nons(el.tag) != 'form' and error_class:
+ _add_class(el, error_class)
+ if el.get('id'):
+ labels = _label_for_xpath(el, for_id=el.get('id'))
+ if labels:
+ for label in labels:
+ _add_class(label, error_class)
+ error_creator(el, is_block, error)
+
+def _add_class(el, class_name):
+ if el.get('class'):
+ el.set('class', el.get('class')+' '+class_name)
+ else:
+ el.set('class', class_name)
+
+def _find_elements_for_name(form, name, error):
+ if name is None:
+ # An error for the entire form
+ yield form, error
+ return
+ if name.startswith('#'):
+ # By id
+ el = form.get_element_by_id(name[1:])
+ if el is not None:
+ yield el, error
+ return
+ els = _name_xpath(form, name=name)
+ if not els:
+ # FIXME: should this raise an exception?
+ return
+ if not isinstance(error, (list, tuple)):
+ yield els[0], error
+ return
+ # FIXME: if error is longer than els, should it raise an error?
+ for el, err in zip(els, error):
+ if err is None:
+ continue
+ yield el, err
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/html/html5parser.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/html/html5parser.py
new file mode 100644
index 0000000000000000000000000000000000000000..2f7be1568977aff1ccc6533f0626226e0f57bec9
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/html/html5parser.py
@@ -0,0 +1,260 @@
+"""
+An interface to html5lib that mimics the lxml.html interface.
+"""
+import sys
+import string
+
+from html5lib import HTMLParser as _HTMLParser
+from html5lib.treebuilders.etree_lxml import TreeBuilder
+from lxml import etree
+from lxml.html import Element, XHTML_NAMESPACE, _contains_block_level_tag
+
+# python3 compatibility
+try:
+ _strings = basestring
+except NameError:
+ _strings = (bytes, str)
+try:
+ from urllib2 import urlopen
+except ImportError:
+ from urllib.request import urlopen
+try:
+ from urlparse import urlparse
+except ImportError:
+ from urllib.parse import urlparse
+
+
+class HTMLParser(_HTMLParser):
+ """An html5lib HTML parser with lxml as tree."""
+
+ def __init__(self, strict=False, **kwargs):
+ _HTMLParser.__init__(self, strict=strict, tree=TreeBuilder, **kwargs)
+
+
+try:
+ from html5lib import XHTMLParser as _XHTMLParser
+except ImportError:
+ pass
+else:
+ class XHTMLParser(_XHTMLParser):
+ """An html5lib XHTML Parser with lxml as tree."""
+
+ def __init__(self, strict=False, **kwargs):
+ _XHTMLParser.__init__(self, strict=strict, tree=TreeBuilder, **kwargs)
+
+ xhtml_parser = XHTMLParser()
+
+
+def _find_tag(tree, tag):
+ elem = tree.find(tag)
+ if elem is not None:
+ return elem
+ return tree.find('{%s}%s' % (XHTML_NAMESPACE, tag))
+
+
+def document_fromstring(html, guess_charset=None, parser=None):
+ """
+ Parse a whole document into a string.
+
+ If `guess_charset` is true, or if the input is not Unicode but a
+ byte string, the `chardet` library will perform charset guessing
+ on the string.
+ """
+ if not isinstance(html, _strings):
+ raise TypeError('string required')
+
+ if parser is None:
+ parser = html_parser
+
+ options = {}
+ if guess_charset is None and isinstance(html, bytes):
+ # html5lib does not accept useChardet as an argument, if it
+ # detected the html argument would produce unicode objects.
+ guess_charset = True
+ if guess_charset is not None:
+ options['useChardet'] = guess_charset
+ return parser.parse(html, **options).getroot()
+
+
+def fragments_fromstring(html, no_leading_text=False,
+ guess_charset=None, parser=None):
+ """Parses several HTML elements, returning a list of elements.
+
+ The first item in the list may be a string. If no_leading_text is true,
+ then it will be an error if there is leading text, and it will always be
+ a list of only elements.
+
+ If `guess_charset` is true, the `chardet` library will perform charset
+ guessing on the string.
+ """
+ if not isinstance(html, _strings):
+ raise TypeError('string required')
+
+ if parser is None:
+ parser = html_parser
+
+ options = {}
+ if guess_charset is None and isinstance(html, bytes):
+ # html5lib does not accept useChardet as an argument, if it
+ # detected the html argument would produce unicode objects.
+ guess_charset = False
+ if guess_charset is not None:
+ options['useChardet'] = guess_charset
+ children = parser.parseFragment(html, 'div', **options)
+ if children and isinstance(children[0], _strings):
+ if no_leading_text:
+ if children[0].strip():
+ raise etree.ParserError('There is leading text: %r' %
+ children[0])
+ del children[0]
+ return children
+
+
+def fragment_fromstring(html, create_parent=False,
+ guess_charset=None, parser=None):
+ """Parses a single HTML element; it is an error if there is more than
+ one element, or if anything but whitespace precedes or follows the
+ element.
+
+ If 'create_parent' is true (or is a tag name) then a parent node
+ will be created to encapsulate the HTML in a single element. In
+ this case, leading or trailing text is allowed.
+
+ If `guess_charset` is true, the `chardet` library will perform charset
+ guessing on the string.
+ """
+ if not isinstance(html, _strings):
+ raise TypeError('string required')
+
+ accept_leading_text = bool(create_parent)
+
+ elements = fragments_fromstring(
+ html, guess_charset=guess_charset, parser=parser,
+ no_leading_text=not accept_leading_text)
+
+ if create_parent:
+ if not isinstance(create_parent, _strings):
+ create_parent = 'div'
+ new_root = Element(create_parent)
+ if elements:
+ if isinstance(elements[0], _strings):
+ new_root.text = elements[0]
+ del elements[0]
+ new_root.extend(elements)
+ return new_root
+
+ if not elements:
+ raise etree.ParserError('No elements found')
+ if len(elements) > 1:
+ raise etree.ParserError('Multiple elements found')
+ result = elements[0]
+ if result.tail and result.tail.strip():
+ raise etree.ParserError('Element followed by text: %r' % result.tail)
+ result.tail = None
+ return result
+
+
+def fromstring(html, guess_charset=None, parser=None):
+ """Parse the html, returning a single element/document.
+
+ This tries to minimally parse the chunk of text, without knowing if it
+ is a fragment or a document.
+
+ 'base_url' will set the document's base_url attribute (and the tree's
+ docinfo.URL)
+
+ If `guess_charset` is true, or if the input is not Unicode but a
+ byte string, the `chardet` library will perform charset guessing
+ on the string.
+ """
+ if not isinstance(html, _strings):
+ raise TypeError('string required')
+ doc = document_fromstring(html, parser=parser,
+ guess_charset=guess_charset)
+
+ # document starts with doctype or , full document!
+ start = html[:50]
+ if isinstance(start, bytes):
+ # Allow text comparison in python3.
+ # Decode as ascii, that also covers latin-1 and utf-8 for the
+ # characters we need.
+ start = start.decode('ascii', 'replace')
+
+ start = start.lstrip().lower()
+ if start.startswith(' implies too much structure.
+ if _contains_block_level_tag(body):
+ body.tag = 'div'
+ else:
+ body.tag = 'span'
+ return body
+
+
+def parse(filename_url_or_file, guess_charset=None, parser=None):
+ """Parse a filename, URL, or file-like object into an HTML document
+ tree. Note: this returns a tree, not an element. Use
+ ``parse(...).getroot()`` to get the document root.
+
+ If ``guess_charset`` is true, the ``useChardet`` option is passed into
+ html5lib to enable character detection. This option is on by default
+ when parsing from URLs, off by default when parsing from file(-like)
+ objects (which tend to return Unicode more often than not), and on by
+ default when parsing from a file path (which is read in binary mode).
+ """
+ if parser is None:
+ parser = html_parser
+ if not isinstance(filename_url_or_file, _strings):
+ fp = filename_url_or_file
+ if guess_charset is None:
+ # assume that file-like objects return Unicode more often than bytes
+ guess_charset = False
+ elif _looks_like_url(filename_url_or_file):
+ fp = urlopen(filename_url_or_file)
+ if guess_charset is None:
+ # assume that URLs return bytes
+ guess_charset = True
+ else:
+ fp = open(filename_url_or_file, 'rb')
+ if guess_charset is None:
+ guess_charset = True
+
+ options = {}
+ # html5lib does not accept useChardet as an argument, if it
+ # detected the html argument would produce unicode objects.
+ if guess_charset:
+ options['useChardet'] = guess_charset
+ return parser.parse(fp, **options)
+
+
+def _looks_like_url(str):
+ scheme = urlparse(str)[0]
+ if not scheme:
+ return False
+ elif (sys.platform == 'win32' and
+ scheme in string.ascii_letters
+ and len(scheme) == 1):
+ # looks like a 'normal' absolute path
+ return False
+ else:
+ return True
+
+
+html_parser = HTMLParser()
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/includes/__init__.pxd b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/includes/__init__.pxd
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/includes/config.pxd b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/includes/config.pxd
new file mode 100644
index 0000000000000000000000000000000000000000..9c04438f737374901d92acc497e4175ebc216891
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/includes/config.pxd
@@ -0,0 +1,3 @@
+cdef extern from "etree_defs.h":
+ cdef bint ENABLE_THREADING
+ cdef bint ENABLE_SCHEMATRON
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/includes/relaxng.pxd b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/includes/relaxng.pxd
new file mode 100644
index 0000000000000000000000000000000000000000..5ac96711e7b8124400fe6a8acb59f0e4192d2949
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/includes/relaxng.pxd
@@ -0,0 +1,64 @@
+from lxml.includes.tree cimport xmlDoc
+from lxml.includes.xmlerror cimport xmlStructuredErrorFunc
+
+cdef extern from "libxml/relaxng.h" nogil:
+ ctypedef struct xmlRelaxNG
+ ctypedef struct xmlRelaxNGParserCtxt
+
+ ctypedef struct xmlRelaxNGValidCtxt
+
+ ctypedef enum xmlRelaxNGValidErr:
+ XML_RELAXNG_OK = 0
+ XML_RELAXNG_ERR_MEMORY = 1
+ XML_RELAXNG_ERR_TYPE = 2
+ XML_RELAXNG_ERR_TYPEVAL = 3
+ XML_RELAXNG_ERR_DUPID = 4
+ XML_RELAXNG_ERR_TYPECMP = 5
+ XML_RELAXNG_ERR_NOSTATE = 6
+ XML_RELAXNG_ERR_NODEFINE = 7
+ XML_RELAXNG_ERR_LISTEXTRA = 8
+ XML_RELAXNG_ERR_LISTEMPTY = 9
+ XML_RELAXNG_ERR_INTERNODATA = 10
+ XML_RELAXNG_ERR_INTERSEQ = 11
+ XML_RELAXNG_ERR_INTEREXTRA = 12
+ XML_RELAXNG_ERR_ELEMNAME = 13
+ XML_RELAXNG_ERR_ATTRNAME = 14
+ XML_RELAXNG_ERR_ELEMNONS = 15
+ XML_RELAXNG_ERR_ATTRNONS = 16
+ XML_RELAXNG_ERR_ELEMWRONGNS = 17
+ XML_RELAXNG_ERR_ATTRWRONGNS = 18
+ XML_RELAXNG_ERR_ELEMEXTRANS = 19
+ XML_RELAXNG_ERR_ATTREXTRANS = 20
+ XML_RELAXNG_ERR_ELEMNOTEMPTY = 21
+ XML_RELAXNG_ERR_NOELEM = 22
+ XML_RELAXNG_ERR_NOTELEM = 23
+ XML_RELAXNG_ERR_ATTRVALID = 24
+ XML_RELAXNG_ERR_CONTENTVALID = 25
+ XML_RELAXNG_ERR_EXTRACONTENT = 26
+ XML_RELAXNG_ERR_INVALIDATTR = 27
+ XML_RELAXNG_ERR_DATAELEM = 28
+ XML_RELAXNG_ERR_VALELEM = 29
+ XML_RELAXNG_ERR_LISTELEM = 30
+ XML_RELAXNG_ERR_DATATYPE = 31
+ XML_RELAXNG_ERR_VALUE = 32
+ XML_RELAXNG_ERR_LIST = 33
+ XML_RELAXNG_ERR_NOGRAMMAR = 34
+ XML_RELAXNG_ERR_EXTRADATA = 35
+ XML_RELAXNG_ERR_LACKDATA = 36
+ XML_RELAXNG_ERR_INTERNAL = 37
+ XML_RELAXNG_ERR_ELEMWRONG = 38
+ XML_RELAXNG_ERR_TEXTWRONG = 39
+
+ cdef xmlRelaxNGValidCtxt* xmlRelaxNGNewValidCtxt(xmlRelaxNG* schema)
+ cdef int xmlRelaxNGValidateDoc(xmlRelaxNGValidCtxt* ctxt, xmlDoc* doc)
+ cdef xmlRelaxNG* xmlRelaxNGParse(xmlRelaxNGParserCtxt* ctxt)
+ cdef xmlRelaxNGParserCtxt* xmlRelaxNGNewParserCtxt(char* URL)
+ cdef xmlRelaxNGParserCtxt* xmlRelaxNGNewDocParserCtxt(xmlDoc* doc)
+ cdef void xmlRelaxNGFree(xmlRelaxNG* schema)
+ cdef void xmlRelaxNGFreeParserCtxt(xmlRelaxNGParserCtxt* ctxt)
+ cdef void xmlRelaxNGFreeValidCtxt(xmlRelaxNGValidCtxt* ctxt)
+
+ cdef void xmlRelaxNGSetValidStructuredErrors(
+ xmlRelaxNGValidCtxt* ctxt, xmlStructuredErrorFunc serror, void *ctx)
+ cdef void xmlRelaxNGSetParserStructuredErrors(
+ xmlRelaxNGParserCtxt* ctxt, xmlStructuredErrorFunc serror, void *ctx)
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/includes/schematron.pxd b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/includes/schematron.pxd
new file mode 100644
index 0000000000000000000000000000000000000000..181248afd6ab89811c896f2992aa8bf4c69affe9
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/includes/schematron.pxd
@@ -0,0 +1,34 @@
+from lxml.includes cimport xmlerror
+from lxml.includes.tree cimport xmlDoc
+
+cdef extern from "libxml/schematron.h" nogil:
+ ctypedef struct xmlSchematron
+ ctypedef struct xmlSchematronParserCtxt
+ ctypedef struct xmlSchematronValidCtxt
+
+ ctypedef enum xmlSchematronValidOptions:
+ XML_SCHEMATRON_OUT_QUIET = 1 # quiet no report
+ XML_SCHEMATRON_OUT_TEXT = 2 # build a textual report
+ XML_SCHEMATRON_OUT_XML = 4 # output SVRL
+ XML_SCHEMATRON_OUT_ERROR = 8 # output via xmlStructuredErrorFunc
+ XML_SCHEMATRON_OUT_FILE = 256 # output to a file descriptor
+ XML_SCHEMATRON_OUT_BUFFER = 512 # output to a buffer
+ XML_SCHEMATRON_OUT_IO = 1024 # output to I/O mechanism
+
+ cdef xmlSchematronParserCtxt* xmlSchematronNewDocParserCtxt(
+ xmlDoc* doc)
+ cdef xmlSchematronParserCtxt* xmlSchematronNewParserCtxt(
+ char* filename) nogil
+ cdef xmlSchematronValidCtxt* xmlSchematronNewValidCtxt(
+ xmlSchematron* schema, int options)
+
+ cdef xmlSchematron* xmlSchematronParse(xmlSchematronParserCtxt* ctxt)
+ cdef int xmlSchematronValidateDoc(xmlSchematronValidCtxt* ctxt,
+ xmlDoc* instance)
+
+ cdef void xmlSchematronFreeParserCtxt(xmlSchematronParserCtxt* ctxt)
+ cdef void xmlSchematronFreeValidCtxt(xmlSchematronValidCtxt* ctxt)
+ cdef void xmlSchematronFree(xmlSchematron* schema)
+ cdef void xmlSchematronSetValidStructuredErrors(
+ xmlSchematronValidCtxt* ctxt,
+ xmlerror.xmlStructuredErrorFunc error_func, void *data)
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/includes/xpath.pxd b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/includes/xpath.pxd
new file mode 100644
index 0000000000000000000000000000000000000000..22069eb7cbb576b6236f53912f8529863a07cd08
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/includes/xpath.pxd
@@ -0,0 +1,136 @@
+from lxml.includes cimport tree
+from lxml.includes cimport xmlerror
+
+from libc.string cimport const_char
+from lxml.includes.tree cimport xmlChar, const_xmlChar
+
+
+cdef extern from "libxml/xpath.h" nogil:
+ ctypedef enum xmlXPathObjectType:
+ XPATH_UNDEFINED = 0
+ XPATH_NODESET = 1
+ XPATH_BOOLEAN = 2
+ XPATH_NUMBER = 3
+ XPATH_STRING = 4
+ XPATH_POINT = 5
+ XPATH_RANGE = 6
+ XPATH_LOCATIONSET = 7
+ XPATH_USERS = 8
+ XPATH_XSLT_TREE = 9
+
+ ctypedef enum xmlXPathError:
+ XPATH_EXPRESSION_OK = 0
+ XPATH_NUMBER_ERROR = 1
+ XPATH_UNFINISHED_LITERAL_ERROR = 2
+ XPATH_START_LITERAL_ERROR = 3
+ XPATH_VARIABLE_REF_ERROR = 4
+ XPATH_UNDEF_VARIABLE_ERROR = 5
+ XPATH_INVALID_PREDICATE_ERROR = 6
+ XPATH_EXPR_ERROR = 7
+ XPATH_UNCLOSED_ERROR = 8
+ XPATH_UNKNOWN_FUNC_ERROR = 9
+ XPATH_INVALID_OPERAND = 10
+ XPATH_INVALID_TYPE = 11
+ XPATH_INVALID_ARITY = 12
+ XPATH_INVALID_CTXT_SIZE = 13
+ XPATH_INVALID_CTXT_POSITION = 14
+ XPATH_MEMORY_ERROR = 15
+ XPTR_SYNTAX_ERROR = 16
+ XPTR_RESOURCE_ERROR = 17
+ XPTR_SUB_RESOURCE_ERROR = 18
+ XPATH_UNDEF_PREFIX_ERROR = 19
+ XPATH_ENCODING_ERROR = 20
+ XPATH_INVALID_CHAR_ERROR = 21
+ XPATH_INVALID_CTXT = 22
+
+ ctypedef struct xmlNodeSet:
+ int nodeNr
+ int nodeMax
+ tree.xmlNode** nodeTab
+
+ ctypedef struct xmlXPathObject:
+ xmlXPathObjectType type
+ xmlNodeSet* nodesetval
+ bint boolval
+ double floatval
+ xmlChar* stringval
+
+ ctypedef struct xmlXPathContext:
+ tree.xmlDoc* doc
+ tree.xmlNode* node
+ tree.xmlDict* dict
+ tree.xmlHashTable* nsHash
+ const_xmlChar* function
+ const_xmlChar* functionURI
+ xmlerror.xmlStructuredErrorFunc error
+ xmlerror.xmlError lastError
+ void* userData
+
+ ctypedef struct xmlXPathParserContext:
+ xmlXPathContext* context
+ xmlXPathObject* value
+ tree.xmlNode* ancestor
+ int error
+
+ ctypedef struct xmlXPathCompExpr
+
+ ctypedef void (*xmlXPathFunction)(xmlXPathParserContext* ctxt, int nargs)
+ ctypedef xmlXPathFunction (*xmlXPathFuncLookupFunc)(void* ctxt,
+ const_xmlChar* name,
+ const_xmlChar* ns_uri)
+
+ cdef xmlXPathContext* xmlXPathNewContext(tree.xmlDoc* doc)
+ cdef xmlXPathObject* xmlXPathEvalExpression(const_xmlChar* str,
+ xmlXPathContext* ctxt)
+ cdef xmlXPathObject* xmlXPathCompiledEval(xmlXPathCompExpr* comp,
+ xmlXPathContext* ctxt)
+ cdef xmlXPathCompExpr* xmlXPathCompile(const_xmlChar* str)
+ cdef xmlXPathCompExpr* xmlXPathCtxtCompile(xmlXPathContext* ctxt,
+ const_xmlChar* str)
+ cdef void xmlXPathFreeContext(xmlXPathContext* ctxt)
+ cdef void xmlXPathFreeCompExpr(xmlXPathCompExpr* comp)
+ cdef void xmlXPathFreeObject(xmlXPathObject* obj)
+ cdef int xmlXPathRegisterNs(xmlXPathContext* ctxt,
+ const_xmlChar* prefix, const_xmlChar* ns_uri)
+
+ cdef xmlNodeSet* xmlXPathNodeSetCreate(tree.xmlNode* val)
+ cdef void xmlXPathFreeNodeSet(xmlNodeSet* val)
+
+
+cdef extern from "libxml/xpathInternals.h" nogil:
+ cdef int xmlXPathRegisterFunc(xmlXPathContext* ctxt,
+ const_xmlChar* name,
+ xmlXPathFunction f)
+ cdef int xmlXPathRegisterFuncNS(xmlXPathContext* ctxt,
+ const_xmlChar* name,
+ const_xmlChar* ns_uri,
+ xmlXPathFunction f)
+ cdef void xmlXPathRegisterFuncLookup(xmlXPathContext *ctxt,
+ xmlXPathFuncLookupFunc f,
+ void *funcCtxt)
+ cdef int xmlXPathRegisterVariable(xmlXPathContext *ctxt,
+ const_xmlChar* name,
+ xmlXPathObject* value)
+ cdef int xmlXPathRegisterVariableNS(xmlXPathContext *ctxt,
+ const_xmlChar* name,
+ const_xmlChar* ns_uri,
+ xmlXPathObject* value)
+ cdef void xmlXPathRegisteredVariablesCleanup(xmlXPathContext *ctxt)
+ cdef void xmlXPathRegisteredNsCleanup(xmlXPathContext *ctxt)
+ cdef xmlXPathObject* valuePop (xmlXPathParserContext *ctxt)
+ cdef int valuePush(xmlXPathParserContext* ctxt, xmlXPathObject *value)
+
+ cdef xmlXPathObject* xmlXPathNewCString(const_char *val)
+ cdef xmlXPathObject* xmlXPathWrapCString(const_char * val)
+ cdef xmlXPathObject* xmlXPathNewString(const_xmlChar *val)
+ cdef xmlXPathObject* xmlXPathWrapString(const_xmlChar * val)
+ cdef xmlXPathObject* xmlXPathNewFloat(double val)
+ cdef xmlXPathObject* xmlXPathNewBoolean(int val)
+ cdef xmlXPathObject* xmlXPathNewNodeSet(tree.xmlNode* val)
+ cdef xmlXPathObject* xmlXPathNewValueTree(tree.xmlNode* val)
+ cdef void xmlXPathNodeSetAdd(xmlNodeSet* cur,
+ tree.xmlNode* val)
+ cdef void xmlXPathNodeSetAddUnique(xmlNodeSet* cur,
+ tree.xmlNode* val)
+ cdef xmlXPathObject* xmlXPathWrapNodeSet(xmlNodeSet* val)
+ cdef void xmlXPathErr(xmlXPathParserContext* ctxt, int error)
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/isoschematron/__init__.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/isoschematron/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..ac89fb62e4527e81931d62b07aeecb6eaf3feec0
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/lxml/isoschematron/__init__.py
@@ -0,0 +1,348 @@
+"""The ``lxml.isoschematron`` package implements ISO Schematron support on top
+of the pure-xslt 'skeleton' implementation.
+"""
+
+import sys
+import os.path
+from lxml import etree as _etree # due to validator __init__ signature
+
+
+# some compat stuff, borrowed from lxml.html
+try:
+ unicode
+except NameError:
+ # Python 3
+ unicode = str
+try:
+ basestring
+except NameError:
+ # Python 3
+ basestring = str
+
+
+__all__ = ['extract_xsd', 'extract_rng', 'iso_dsdl_include',
+ 'iso_abstract_expand', 'iso_svrl_for_xslt1',
+ 'svrl_validation_errors', 'schematron_schema_valid',
+ 'stylesheet_params', 'Schematron']
+
+
+# some namespaces
+#FIXME: Maybe lxml should provide a dedicated place for common namespace
+#FIXME: definitions?
+XML_SCHEMA_NS = "http://www.w3.org/2001/XMLSchema"
+RELAXNG_NS = "http://relaxng.org/ns/structure/1.0"
+SCHEMATRON_NS = "http://purl.oclc.org/dsdl/schematron"
+SVRL_NS = "http://purl.oclc.org/dsdl/svrl"
+
+
+# some helpers
+_schematron_root = '{%s}schema' % SCHEMATRON_NS
+_xml_schema_root = '{%s}schema' % XML_SCHEMA_NS
+_resources_dir = os.path.join(os.path.dirname(__file__), 'resources')
+
+
+# the iso-schematron skeleton implementation steps aka xsl transformations
+extract_xsd = _etree.XSLT(_etree.parse(
+ os.path.join(_resources_dir, 'xsl', 'XSD2Schtrn.xsl')))
+extract_rng = _etree.XSLT(_etree.parse(
+ os.path.join(_resources_dir, 'xsl', 'RNG2Schtrn.xsl')))
+iso_dsdl_include = _etree.XSLT(_etree.parse(
+ os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1',
+ 'iso_dsdl_include.xsl')))
+iso_abstract_expand = _etree.XSLT(_etree.parse(
+ os.path.join(_resources_dir, 'xsl', 'iso-schematron-xslt1',
+ 'iso_abstract_expand.xsl')))
+iso_svrl_for_xslt1 = _etree.XSLT(_etree.parse(
+ os.path.join(_resources_dir,
+ 'xsl', 'iso-schematron-xslt1', 'iso_svrl_for_xslt1.xsl')))
+
+
+# svrl result accessors
+svrl_validation_errors = _etree.XPath(
+ '//svrl:failed-assert', namespaces={'svrl': SVRL_NS})
+
+# RelaxNG validator for schematron schemas
+schematron_schema_valid_supported = False
+try:
+ schematron_schema_valid = _etree.RelaxNG(
+ file=os.path.join(_resources_dir, 'rng', 'iso-schematron.rng'))
+ schematron_schema_valid_supported = True
+except _etree.RelaxNGParseError:
+ # Some distributions delete the file due to licensing issues.
+ def schematron_schema_valid(arg):
+ raise NotImplementedError("Validating the ISO schematron requires iso-schematron.rng")
+
+
+def stylesheet_params(**kwargs):
+ """Convert keyword args to a dictionary of stylesheet parameters.
+ XSL stylesheet parameters must be XPath expressions, i.e.:
+
+ * string expressions, like "'5'"
+ * simple (number) expressions, like "5"
+ * valid XPath expressions, like "/a/b/text()"
+
+ This function converts native Python keyword arguments to stylesheet
+ parameters following these rules:
+ If an arg is a string wrap it with XSLT.strparam().
+ If an arg is an XPath object use its path string.
+ If arg is None raise TypeError.
+ Else convert arg to string.
+ """
+ result = {}
+ for key, val in kwargs.items():
+ if isinstance(val, basestring):
+ val = _etree.XSLT.strparam(val)
+ elif val is None:
+ raise TypeError('None not allowed as a stylesheet parameter')
+ elif not isinstance(val, _etree.XPath):
+ val = unicode(val)
+ result[key] = val
+ return result
+
+
+# helper function for use in Schematron __init__
+def _stylesheet_param_dict(paramsDict, kwargsDict):
+ """Return a copy of paramsDict, updated with kwargsDict entries, wrapped as
+ stylesheet arguments.
+ kwargsDict entries with a value of None are ignored.
+ """
+ # beware of changing mutable default arg
+ paramsDict = dict(paramsDict)
+ for k, v in kwargsDict.items():
+ if v is not None: # None values do not override
+ paramsDict[k] = v
+ paramsDict = stylesheet_params(**paramsDict)
+ return paramsDict
+
+
+class Schematron(_etree._Validator):
+ """An ISO Schematron validator.
+
+ Pass a root Element or an ElementTree to turn it into a validator.
+ Alternatively, pass a filename as keyword argument 'file' to parse from
+ the file system.
+
+ Schematron is a less well known, but very powerful schema language.
+ The main idea is to use the capabilities of XPath to put restrictions on
+ the structure and the content of XML documents.
+
+ The standard behaviour is to fail on ``failed-assert`` findings only
+ (``ASSERTS_ONLY``). To change this, you can either pass a report filter
+ function to the ``error_finder`` parameter (e.g. ``ASSERTS_AND_REPORTS``
+ or a custom ``XPath`` object), or subclass isoschematron.Schematron for
+ complete control of the validation process.
+
+ Built on the Schematron language 'reference' skeleton pure-xslt
+ implementation, the validator is created as an XSLT 1.0 stylesheet using
+ these steps:
+
+ 0) (Extract from XML Schema or RelaxNG schema)
+ 1) Process inclusions
+ 2) Process abstract patterns
+ 3) Compile the schematron schema to XSLT
+
+ The ``include`` and ``expand`` keyword arguments can be used to switch off
+ steps 1) and 2).
+ To set parameters for steps 1), 2) and 3) hand parameter dictionaries to the
+ keyword arguments ``include_params``, ``expand_params`` or
+ ``compile_params``.
+ For convenience, the compile-step parameter ``phase`` is also exposed as a
+ keyword argument ``phase``. This takes precedence if the parameter is also
+ given in the parameter dictionary.
+
+ If ``store_schematron`` is set to True, the (included-and-expanded)
+ schematron document tree is stored and available through the ``schematron``
+ property.
+ If ``store_xslt`` is set to True, the validation XSLT document tree will be
+ stored and can be retrieved through the ``validator_xslt`` property.
+ With ``store_report`` set to True (default: False), the resulting validation
+ report document gets stored and can be accessed as the ``validation_report``
+ property.
+
+ If ``validate_schema`` is set to False, the validation of the schema file
+ itself is disabled. Validation happens by default after building the full
+ schema, unless the schema validation file cannot be found at import time,
+ in which case the validation gets disabled. Some lxml distributions exclude
+ this file due to licensing issues. ISO-Schematron validation can then still
+ be used normally, but the schemas themselves cannot be validated.
+
+ Here is a usage example::
+
+ >>> from lxml import etree
+ >>> from lxml.isoschematron import Schematron
+
+ >>> schematron = Schematron(etree.XML('''
+ ...
+ ...
+ ... id is the only permitted attribute name
+ ...
+ ... Attribute
+ ... is forbidden
+ ...
+ ...
+ ...
+ ... '''),
+ ... error_finder=Schematron.ASSERTS_AND_REPORTS)
+
+ >>> xml = etree.XML('''
+ ...
+ ...
+ ...
+ ...
+ ... ''')
+
+ >>> schematron.validate(xml)
+ False
+
+ >>> xml = etree.XML('''
+ ...
+ ...
+ ...
+ ...
+ ... ''')
+
+ >>> schematron.validate(xml)
+ True
+ """
+
+ # libxml2 error categorization for validation errors
+ _domain = _etree.ErrorDomains.SCHEMATRONV
+ _level = _etree.ErrorLevels.ERROR
+ _error_type = _etree.ErrorTypes.SCHEMATRONV_ASSERT
+
+ # convenience definitions for common behaviours
+ ASSERTS_ONLY = svrl_validation_errors # Default
+ ASSERTS_AND_REPORTS = _etree.XPath(
+ '//svrl:failed-assert | //svrl:successful-report',
+ namespaces={'svrl': SVRL_NS})
+
+ def _extract(self, element):
+ """Extract embedded schematron schema from non-schematron host schema.
+ This method will only be called by __init__ if the given schema document
+ is not a schematron schema by itself.
+ Must return a schematron schema document tree or None.
+ """
+ schematron = None
+ if element.tag == _xml_schema_root:
+ schematron = self._extract_xsd(element)
+ elif element.nsmap.get(element.prefix) == RELAXNG_NS:
+ # RelaxNG does not have a single unique root element
+ schematron = self._extract_rng(element)
+ return schematron
+
+ # customization points
+ # etree.XSLT objects that provide the extract, include, expand, compile
+ # steps
+ _extract_xsd = extract_xsd
+ _extract_rng = extract_rng
+ _include = iso_dsdl_include
+ _expand = iso_abstract_expand
+ _compile = iso_svrl_for_xslt1
+
+ # etree.xpath object that determines input document validity when applied to
+ # the svrl result report; must return a list of result elements (empty if
+ # valid)
+ _validation_errors = ASSERTS_ONLY
+
+ def __init__(self, etree=None, file=None, include=True, expand=True,
+ include_params={}, expand_params={}, compile_params={},
+ store_schematron=False, store_xslt=False, store_report=False,
+ phase=None, error_finder=ASSERTS_ONLY,
+ validate_schema=schematron_schema_valid_supported):
+ super().__init__()
+
+ self._store_report = store_report
+ self._schematron = None
+ self._validator_xslt = None
+ self._validation_report = None
+ if error_finder is not self.ASSERTS_ONLY:
+ self._validation_errors = error_finder
+
+ # parse schema document, may be a schematron schema or an XML Schema or
+ # a RelaxNG schema with embedded schematron rules
+ root = None
+ try:
+ if etree is not None:
+ if _etree.iselement(etree):
+ root = etree
+ else:
+ root = etree.getroot()
+ elif file is not None:
+ root = _etree.parse(file).getroot()
+ except Exception:
+ raise _etree.SchematronParseError(
+ "No tree or file given: %s" % sys.exc_info()[1])
+ if root is None:
+ raise ValueError("Empty tree")
+ if root.tag == _schematron_root:
+ schematron = root
+ else:
+ schematron = self._extract(root)
+ if schematron is None:
+ raise _etree.SchematronParseError(
+ "Document is not a schematron schema or schematron-extractable")
+ # perform the iso-schematron skeleton implementation steps to get a
+ # validating xslt
+ if include:
+ schematron = self._include(schematron, **include_params)
+ if expand:
+ schematron = self._expand(schematron, **expand_params)
+ if validate_schema and not schematron_schema_valid(schematron):
+ raise _etree.SchematronParseError(
+ "invalid schematron schema: %s" %
+ schematron_schema_valid.error_log)
+ if store_schematron:
+ self._schematron = schematron
+ # add new compile keyword args here if exposing them
+ compile_kwargs = {'phase': phase}
+ compile_params = _stylesheet_param_dict(compile_params, compile_kwargs)
+ validator_xslt = self._compile(schematron, **compile_params)
+ if store_xslt:
+ self._validator_xslt = validator_xslt
+ self._validator = _etree.XSLT(validator_xslt)
+
+ def __call__(self, etree):
+ """Validate doc using Schematron.
+
+ Returns true if document is valid, false if not.
+ """
+ self._clear_error_log()
+ result = self._validator(etree)
+ if self._store_report:
+ self._validation_report = result
+ errors = self._validation_errors(result)
+ if errors:
+ if _etree.iselement(etree):
+ fname = etree.getroottree().docinfo.URL or ''
+ else:
+ fname = etree.docinfo.URL or ''
+ for error in errors:
+ # Does svrl report the line number, anywhere? Don't think so.
+ self._append_log_message(
+ domain=self._domain, type=self._error_type,
+ level=self._level, line=0,
+ message=_etree.tostring(error, encoding='unicode'),
+ filename=fname)
+ return False
+ return True
+
+ @property
+ def schematron(self):
+ """ISO-schematron schema document (None if object has been initialized
+ with store_schematron=False).
+ """
+ return self._schematron
+
+ @property
+ def validator_xslt(self):
+ """ISO-schematron skeleton implementation XSLT validator document (None
+ if object has been initialized with store_xslt=False).
+ """
+ return self._validator_xslt
+
+ @property
+ def validation_report(self):
+ """ISO-schematron validation result report (None if result-storing has
+ been turned off).
+ """
+ return self._validation_report
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/__pycache__/__init__.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..40126fdadbc4802c02eec5f529a5e7c7494f2dce
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/__pycache__/__init__.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/__pycache__/compiler.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/__pycache__/compiler.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..827994077a42a8f3f0881243700f133dcb0d6fe6
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/__pycache__/compiler.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/__pycache__/driver.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/__pycache__/driver.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fed6fbde08b5c0fecdbaea009311604a1f977efa
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/__pycache__/driver.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/amd/__init__.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/amd/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/amd/compiler.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/amd/compiler.py
new file mode 100644
index 0000000000000000000000000000000000000000..887802333d8d385c2ba42e5a2753e61bcb6e9b59
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/amd/compiler.py
@@ -0,0 +1,495 @@
+from triton.backends.compiler import BaseBackend, GPUTarget, Language
+from triton._C.libtriton import ir, passes, llvm, amd
+from triton import knobs
+from dataclasses import dataclass
+from typing import Any, Dict, Tuple
+from types import ModuleType
+import hashlib
+import tempfile
+import re
+import functools
+import warnings
+from pathlib import Path
+
+
+def get_min_dot_size(target: GPUTarget):
+ # We fallback to use FMA and cast arguments if certain configurations is
+ # not supported natively by matrix core units.
+ return lambda lhs_type, rhs_type: (1, 1, 1)
+
+
+def is_pingpong_schedule_enabled(arch, use_async_copy):
+ return (arch == "gfx942" or (arch == "gfx950" and use_async_copy is True)
+ ) if knobs.amd.use_block_pingpong is None else knobs.amd.use_block_pingpong
+
+
+def is_in_thread_transpose_enabled(arch):
+ return (arch == "gfx942") if knobs.amd.use_in_thread_transpose is None else knobs.amd.use_in_thread_transpose
+
+
+@dataclass(frozen=True)
+class HIPOptions:
+ num_warps: int = 4
+ waves_per_eu: int = 0
+ num_stages: int = 2
+ num_ctas: int = 1
+ extern_libs: dict = None
+ debug: bool = False
+ sanitize_overflow: bool = True
+ arch: str = None
+ # We have native support for OCP fp8 variants since CDNA4/RDNA4. For earlier generations,
+ # we software emulate the support for them.
+ # UZ fp8 variants (fp8e4b8 and fp8e5b16) are natively supported for CDNA3. For other
+ # architectures they are software emulated.
+ supported_fp8_dtypes: Tuple[str] = ("fp8e4nv", "fp8e5", "fp8e5b16", "fp8e4b8")
+ deprecated_fp8_dot_operand_dtypes: Tuple[str] = ()
+ default_dot_input_precision: str = "ieee"
+ allowed_dot_input_precisions: Tuple[str] = ("ieee", 'bf16x3', 'bf16x6')
+ enable_fp_fusion: bool = True
+ launch_cooperative_grid: bool = False
+ matrix_instr_nonkdim: int = 0
+ kpack: int = 1
+ allow_flush_denorm: bool = False
+ max_num_imprecise_acc_default: int = 0
+ backend_name: str = 'hip'
+ instrumentation_mode: str = ""
+
+ # The following option provides hints to the AMDGPU backend regarding instruction scheduling
+ # for all `tt.dot` operations in a kernel. The "none" variant preserves the default
+ # instruction scheduling of the AMDGPU backend which aims at maximizing occupancy.
+ # The option is experimental and may change at any time regarding its semantics and/or may
+ # be gone entirely anytime.
+ #
+ # Current experimental scheduling variants:
+ #
+ # attention: enables a bunch of optimizations for attention kernels, including:
+ # - iglp 2 and sched.barrier around it
+ # - sink-insts-to-avoid-spills flag to avoid register spills
+ # memory-bound-attention: enables custom scheduling strategy in llvm backend,
+ # This option targets special FA variant, which is memory bound and
+ # has a lot of elementwise operations from fused operand dequantizations.
+ # Note that this option is highly experimental,
+ # and will be removed as soon as default sceduler algorithm is fixed.
+ #
+ # Option allows to set multiple variants divided by commas:
+ # schedule_hint="attention,memory-bound-attention"
+ schedule_hint: str = 'none'
+
+ def __post_init__(self):
+ gfx_major = int(self.arch[3:-2]) # Drop "gfx" prefix and minor/patch number
+ warp_size = 32 if gfx_major >= 10 else 64
+ object.__setattr__(self, 'warp_size', warp_size)
+ assert self.num_warps > 0 and (self.num_warps & (self.num_warps - 1)) == 0, \
+ "num_warps must be a power of 2"
+
+ if (self.arch == 'gfx950') and (self.kpack != 1):
+ warnings.warn(
+ f"kpack is deprecated starting from gfx950 and will be removed in later releases. So for now kpack = {self.kpack} will be overwritten to 1 to make transitioning easier."
+ )
+ object.__setattr__(self, 'kpack', 1)
+
+ default_libdir = Path(__file__).parent / 'lib'
+ extern_libs = {} if self.extern_libs is None else dict(self.extern_libs)
+ for lib in ["ocml", "ockl"]:
+ extern_libs[lib] = str(default_libdir / f'{lib}.bc')
+ object.__setattr__(self, 'extern_libs', tuple(extern_libs.items()))
+
+ def hash(self):
+ key = '_'.join([f'{name}-{val}' for name, val in self.__dict__.items()])
+ return hashlib.sha256(key.encode("utf-8")).hexdigest()
+
+
+class HIPBackend(BaseBackend):
+ instrumentation = None
+ supports_native_tensor_specialization = False
+
+ @staticmethod
+ def supports_target(target: GPUTarget):
+ return target.backend == 'hip'
+
+ def __init__(self, target: GPUTarget) -> None:
+ super().__init__(target)
+ assert isinstance(target.arch, str)
+ self.binary_ext = "hsaco"
+
+ def get_target_name(self, options) -> str:
+ return f"hip:{options.arch}"
+
+ def parse_options(self, opts) -> Any:
+ args = {'arch': knobs.runtime.override_arch or self.target.arch}
+
+ if opts.get("num_ctas", 1) > 1 and not amd.supports_multi_cta_launch(self.target.arch):
+ raise ValueError(f"num_ctas > 1 not supported on {self.target.arch}")
+
+ # Enable XF32 (TF32) for CDNA3 GPUs
+ if self.target.arch == 'gfx942':
+ allowed_dot_input_precisions = set(HIPOptions.allowed_dot_input_precisions)
+ allowed_dot_input_precisions.update({'tf32'})
+ args["allowed_dot_input_precisions"] = tuple(sorted(allowed_dot_input_precisions))
+
+ if "supported_fp8_dtypes" not in opts:
+ args["supported_fp8_dtypes"] = tuple(sorted(HIPOptions.supported_fp8_dtypes))
+
+ if self.target.arch == 'gfx950':
+ deprecated_fp8_dot_operand_dtypes = set(HIPOptions.deprecated_fp8_dot_operand_dtypes)
+ deprecated_fp8_dot_operand_dtypes.update({"fp8e5b16", "fp8e4b8"})
+ args["deprecated_fp8_dot_operand_dtypes"] = tuple(sorted(deprecated_fp8_dot_operand_dtypes))
+
+ if "enable_fp_fusion" not in opts:
+ args["enable_fp_fusion"] = knobs.language.default_fp_fusion
+ args.update({k: opts[k] for k in HIPOptions.__dataclass_fields__.keys() if k in opts and opts[k] is not None})
+ return HIPOptions(**args)
+
+ def pack_metadata(self, metadata):
+ return (
+ metadata.num_warps,
+ metadata.num_ctas,
+ metadata.shared,
+ )
+
+ def get_codegen_implementation(self, options):
+ return {"min_dot_size": get_min_dot_size(self.target)}
+
+ def get_module_map(self) -> Dict[str, ModuleType]:
+ from triton.language.extra.hip import libdevice
+
+ return {"triton.language.extra.libdevice": libdevice}
+
+ def load_dialects(self, ctx):
+ amd.load_dialects(ctx)
+ if HIPBackend.instrumentation:
+ HIPBackend.instrumentation.load_dialects(ctx)
+
+ @staticmethod
+ def is_within_2gb(arg):
+ import torch
+
+ MAX_INT_32 = 2**31 - 1
+ if hasattr(arg, "ptr_range"):
+ return arg.ptr_range() <= MAX_INT_32
+ if isinstance(arg, torch.Tensor) and hasattr(arg, "untyped_storage"):
+ return arg.untyped_storage().size() <= MAX_INT_32
+ return False
+
+ @staticmethod
+ def parse_attr(desc):
+ ret = BaseBackend.parse_attr(desc)
+ if "S" in desc:
+ ret += [["tt.pointer_range", 32]]
+ return ret
+
+ @staticmethod
+ def get_tensor_specialization(arg, **kwargs):
+ ret = BaseBackend.get_tensor_specialization(arg, **kwargs)
+ if knobs.amd.use_buffer_ops and HIPBackend.is_within_2gb(arg):
+ ret += "S"
+ return ret
+
+ @staticmethod
+ def make_ttir(mod, metadata, options):
+ pm = ir.pass_manager(mod.context)
+ pm.enable_debug()
+ passes.common.add_inliner(pm)
+ passes.ttir.add_rewrite_tensor_pointer(pm)
+ passes.ttir.add_rewrite_tensor_descriptor_to_pointer(pm)
+ passes.common.add_canonicalizer(pm)
+ passes.ttir.add_combine(pm)
+ passes.ttir.add_reorder_broadcast(pm)
+ passes.common.add_cse(pm)
+ passes.ttir.add_triton_licm(pm)
+ passes.common.add_symbol_dce(pm)
+ passes.ttir.add_loop_unroll(pm)
+ pm.run(mod, 'make_ttir')
+ return mod
+
+ @staticmethod
+ def make_ttgir(mod, metadata, options):
+ pm = ir.pass_manager(mod.context)
+ pm.enable_debug()
+ passes.ttir.add_convert_to_ttgpuir(pm, f"hip:{options.arch}", options.num_warps, options.warp_size,
+ options.num_ctas)
+ pm.run(mod, 'make_ttgir_early')
+ pm = ir.pass_manager(mod.context)
+ pm.enable_debug()
+ emuTF32 = False
+ passes.ttgpuir.add_coalesce(pm)
+ passes.ttgpuir.add_f32_dot_tc(pm, emuTF32)
+ passes.ttgpuir.add_remove_layout_conversions(pm)
+ passes.ttgpuir.add_optimize_thread_locality(pm)
+ amd.passes.ttgpuir.add_accelerate_matmul(pm, options.arch, options.matrix_instr_nonkdim, options.kpack)
+ passes.ttgpuir.add_remove_layout_conversions(pm)
+ amd.passes.ttgpuir.add_optimize_epilogue(pm)
+ amd.passes.ttgpuir.add_optimize_dot_operands(pm, options.arch)
+ amd.passes.ttgpuir.add_hoist_layout_conversions(pm)
+
+ passes.ttgpuir.add_fuse_nested_loops(pm)
+ passes.common.add_canonicalizer(pm)
+ passes.ttir.add_triton_licm(pm)
+ passes.common.add_canonicalizer(pm)
+
+ use_async_copy = knobs.amd.use_async_copy
+ use_block_pingpong = is_pingpong_schedule_enabled(options.arch, use_async_copy)
+
+ amd.passes.ttgpuir.add_schedule_loops(pm, options.num_stages)
+ amd.passes.ttgpuir.add_pipeline(pm, use_async_copy, use_block_pingpong)
+ if use_async_copy:
+ amd.passes.ttgpuir.add_coalesce_async_copy(pm, options.arch)
+ passes.common.add_canonicalizer(pm)
+ if options.schedule_hint.lower() != "none":
+ for hint in options.schedule_hint.split(","):
+ amd.passes.ttgpuir.insert_instruction_sched_hints(pm, hint)
+ passes.ttgpuir.add_remove_layout_conversions(pm)
+ passes.ttgpuir.add_reduce_data_duplication(pm)
+ if is_in_thread_transpose_enabled(options.arch):
+ amd.passes.ttgpuir.add_in_thread_transpose(pm)
+ passes.ttgpuir.add_remove_layout_conversions(pm)
+ amd.passes.ttgpuir.add_reorder_instructions(pm)
+ if use_block_pingpong and options.num_stages > 1:
+ amd.passes.ttgpuir.add_block_pingpong(pm, options.num_stages)
+
+ if knobs.amd.use_buffer_ops:
+ amd.passes.ttgpuir.add_canonicalize_pointers(pm)
+ passes.common.add_canonicalizer(pm)
+ amd.passes.ttgpuir.add_convert_to_buffer_ops(
+ pm,
+ options.arch,
+ knobs.amd.use_buffer_atomics,
+ knobs.amd.buffer_ops_analyze_small_tensor_range,
+ )
+
+ amd.passes.ttgpuir.add_fold_true_cmpi(pm)
+ passes.common.add_canonicalizer(pm)
+ passes.common.add_cse(pm)
+ passes.common.add_symbol_dce(pm)
+ pm.run(mod, 'make_ttgir')
+ metadata["tensordesc_meta"] = mod.get_tensordesc_metadata()
+ return mod
+
+ @staticmethod
+ def gluon_to_ttgir(src, metadata, options):
+ mod = src
+ pm = ir.pass_manager(mod.context)
+ pm.enable_debug()
+
+ passes.gluon.add_inliner(pm)
+ passes.gluon.add_resolve_auto_encodings(pm)
+ passes.common.add_sccp(pm)
+ passes.ttir.add_loop_aware_cse(pm)
+ passes.gluon.add_canonicalizer(pm)
+ passes.ttgpuir.add_combine_tensor_select_and_if(pm)
+
+ pm.run(mod, 'gluon_to_ttgir')
+ metadata["tensordesc_meta"] = mod.get_tensordesc_metadata()
+ return mod
+
+ @staticmethod
+ def make_llir(src, metadata, options):
+ mod = src
+ # TritonGPU -> LLVM-IR (MLIR)
+ pm = ir.pass_manager(mod.context)
+ pm.enable_debug()
+ amd.passes.ttgpuir.add_update_async_wait_count(pm, options.arch)
+ # custom_lds_size is an experimental parameter that defines amount of LDS available
+ # for one thread block. Measured in bytes.
+ #
+ # If custom_lds_size = 0, pass will consider all LDS is available for one threads block,
+ # LDS size is determined by provided arch name.
+ custom_lds_size = 0
+ amd.passes.ttgpuir.add_optimize_lds_usage(pm, options.arch, custom_lds_size)
+ passes.convert.add_scf_to_cf(pm)
+ passes.gluon.add_inliner(pm)
+ passes.convert.add_index_to_llvmir(pm)
+
+ amd.passes.ttgpuir.add_allocate_shared_memory(pm)
+ # instrumentation point here so we can override IRs above (e.g., ttir and ttgir)
+ if HIPBackend.instrumentation:
+ HIPBackend.instrumentation.patch("ttgpuir_to_llvmir", pm, mod.context)
+ ## __HIP_FTZ is used to control the denorm flushing behavior of exp2 op as follows:
+ ## 1. If __HIP_FTZ = 1, exp2 flushes denorms in input and output regardless
+ ## of the value of kernel arg `allow_flush_denorm`.
+ ## 2. If __HIP_FTZ = 0, whether exp2 flushes denorms in input and output
+ ## depends on the value of kernel arg `allow_flush_denorm`.
+ ## 3. __HIP_FTZ is default to 1 and not exposed as a kernel argument.
+ ## For now it is used as a controller for developers only.
+ __HIP_FTZ = True
+ amd.passes.ttgpuir.add_to_llvmir(pm, options.arch, __HIP_FTZ)
+ passes.common.add_canonicalizer(pm)
+ passes.common.add_cse(pm)
+
+ passes.convert.add_cf_to_llvmir(pm)
+ passes.convert.add_arith_to_llvmir(pm)
+ passes.common.add_canonicalizer(pm)
+ passes.common.add_cse(pm)
+ passes.common.add_symbol_dce(pm)
+
+ if options.schedule_hint.lower() != "none":
+ amd.passes.ttgpuir.lower_instruction_sched_hints(pm, options.arch, options.num_stages)
+
+ # This can not be moved below the di_scope pass
+ if HIPBackend.instrumentation:
+ HIPBackend.instrumentation.patch("llvmir_to_llvm", pm, mod.context)
+
+ if not knobs.compilation.disable_line_info and not knobs.compilation.dump_ir_extract_di_local_variables:
+ passes.llvmir.add_di_scope(pm)
+
+ amd.passes.ttgpuir.add_builtin_func_to_llvmir(pm, __HIP_FTZ)
+ pm.run(mod, 'make_llir')
+
+ if knobs.compilation.dump_ir_extract_di_local_variables:
+ # comments below on why separate it
+ if not knobs.compilation.disable_line_info:
+ pm = ir.pass_manager(mod.context)
+ pm.enable_debug()
+ passes.llvmir.add_di_scope(pm)
+ pm.run(mod, 'make_llir.disable_line_info')
+
+ # insert dbg intrinsic with several DI Attribute including source
+ # var name and type info note: unknown reason for now, but this
+ # pass and add_di_scope has to be run separately, otherwise if we
+ # put them into previous pipline, it trigger a segmentfault without
+ # any error message; could be due to a bug in mlir or pybind11
+ pm = ir.pass_manager(mod.context)
+ pm.enable_debug()
+ passes.llvmir.add_di_local_variable(pm)
+ pm.run(mod, 'make_llir.dump_ir_extract_di_local_variables')
+
+ # LLVM-IR (MLIR) -> LLVM-IR (LLVM)
+ llvm.init_targets()
+ context = llvm.context()
+ llvm_mod = llvm.to_module(mod, context)
+ amd.attach_target_triple(llvm_mod)
+ target_features = ''
+ if knobs.compilation.enable_asan:
+ target_features = '+xnack'
+ llvm.attach_datalayout(llvm_mod, amd.TARGET_TRIPLE, options.arch, target_features)
+
+ # Set various control constants on the LLVM module so that device
+ # libraries can resolve references to them.
+ amd.set_isa_version(llvm_mod, options.arch)
+ amd.set_abi_version(llvm_mod, 500)
+ amd.set_bool_control_constant(llvm_mod, "__oclc_finite_only_opt", False)
+ amd.set_bool_control_constant(llvm_mod, "__oclc_correctly_rounded_sqrt32", True)
+ amd.set_bool_control_constant(llvm_mod, "__oclc_unsafe_math_opt", False)
+ amd.set_bool_control_constant(llvm_mod, "__oclc_wavefrontsize64", options.warp_size == 64)
+
+ # Set kernel attributes first given this may affect later optimizations.
+ fns = [fn for fn in llvm_mod.get_functions() if not fn.is_declaration()]
+ # The public kernel should be kernel 0.
+ fns[0].set_calling_conv(amd.CALLING_CONV_AMDGPU_KERNEL)
+ fns[0].add_fn_attr("amdgpu-flat-work-group-size", f"1,{options.num_warps*options.warp_size}")
+ if "memory-bound-attention" in options.schedule_hint.split(','):
+ fns[0].add_fn_attr("amdgpu-sched-strategy", "iterative-ilp")
+ fns[0].add_fn_attr("uniform-work-group-size", "true")
+ # LLVM AMDGPU backend supports the attribute "amdgpu-waves-per-eu"="[, ]".
+ # This attribute may be attached to a kernel function definition and is an optimization hint.
+ # parameter specifies the requested minimum number of waves per EU, and optional parameter
+ # specifies the requested maximum number of waves per EU (must be >= if specified).
+ # If is omitted, then there is no restriction on the maximum number of waves per EU other than
+ # the one dictated by the hardware for which the kernel is compiled. Passing 0, 0 as ,
+ # implies the default behavior (no limits).
+ # Specifying N, N forces LLVM to focus on a single register count, simplifies some heuristics
+ # and may improve scheduling.
+ fns[0].add_fn_attr("amdgpu-waves-per-eu", f"{options.waves_per_eu}, {options.waves_per_eu}")
+ denormal_mode = "preserve-sign" if options.allow_flush_denorm else "ieee"
+ fns[0].add_fn_attr("denormal-fp-math-f32", denormal_mode)
+ if knobs.compilation.enable_asan:
+ fns[0].add_fn_target_feature("+xnack")
+ fns[0].add_fn_asan_attr()
+
+ # Hint the compiler that we'd like the firmware to set the kernel arguments
+ # to user SGPRs so that the kernel does not need to s_load its arguments
+ # from memory.
+ amd.set_all_fn_arg_inreg(fns[0])
+
+ if knobs.compilation.enable_asan:
+ default_libdir = Path(__file__).parent / 'lib'
+ paths = [
+ str(default_libdir / 'asanrtl.bc'),
+ str(default_libdir / "ocml.bc"),
+ str(default_libdir / "ockl.bc")
+ ]
+ llvm.link_extern_libs(llvm_mod, paths)
+ elif options.extern_libs:
+ paths = [path for (name, path) in options.extern_libs if amd.need_extern_lib(llvm_mod, name)]
+ if len(paths) > 0:
+ llvm.link_extern_libs(llvm_mod, paths)
+
+ llvm.optimize_module(llvm_mod, llvm.OPTIMIZE_O3, options.arch, '', [], options.enable_fp_fusion)
+
+ # Architectures with architected SGPRs store the workgroup id in ttmp9 (X) and ttmp7 (Y[15:0], Z[31:16]).
+ # These attributes are used to determine if Z should be masked out when loading Y. They are inferred during
+ # optimize_module from calls to @llvm.amdgcn.workgroup.id.x/y/z(). We cannot rely on this because a
+ # dispatch dimensions might be used even if there is no program_id() call for it.
+ if amd.has_architected_sgprs(options.arch):
+ fns[0].remove_fn_attr("amdgpu-no-workgroup-id-x")
+ fns[0].remove_fn_attr("amdgpu-no-workgroup-id-y")
+ fns[0].remove_fn_attr("amdgpu-no-workgroup-id-z")
+
+ if knobs.amd.scalarize_packed_fops:
+ amd.add_scalarize_packed_fops_llvm_pass(fns[0])
+
+ # Get some metadata
+ metadata["shared"] = src.get_int_attr("ttg.shared")
+ metadata["profile_scratch_size"] = src.get_int_attr("ttg.profile_scratch_memory_size") or 0
+ metadata["profile_scratch_align"] = src.get_int_attr("ttg.profile_scratch_memory_alignment") or 1
+
+ amd.cleanup_bitcode_metadata(llvm_mod)
+ # Disable inlining of print related functions,
+ # because inlining of these function could slow down compilation significantly
+ amd.disable_print_inline(llvm_mod)
+ return str(llvm_mod)
+
+ @staticmethod
+ def make_amdgcn(src, metadata, options):
+ # Find kernel names (there should only be one)
+ # We get the name at the last possible step to accommodate `triton.compile`
+ # on user-provided LLVM
+ names = re.findall(r"define amdgpu_kernel void @([a-zA-Z_][a-zA-Z0-9_]*)", src)
+ assert len(names) == 1
+ metadata["name"] = names[0]
+ # llvm -> hsaco
+ flags = []
+ features = '-real-true16' if 'gfx11' in options.arch else ''
+ ir_hash = hashlib.sha256(src.encode("utf-8")).hexdigest()
+ dump_file_id = names[0] + '_' + ir_hash
+ _ = llvm.translate_to_mir(src, amd.TARGET_TRIPLE, options.arch, features, flags, options.enable_fp_fusion,
+ dump_file_id)
+ llvm.dump_sched_dag(src, amd.TARGET_TRIPLE, options.arch, features, flags, options.enable_fp_fusion,
+ dump_file_id)
+ amdgcn = llvm.translate_to_asm(src, amd.TARGET_TRIPLE, options.arch, features, flags, options.enable_fp_fusion,
+ False)
+ if knobs.amd.dump_amdgcn:
+ print("// -----// AMDGCN Dump //----- //")
+ print(amdgcn)
+ return amdgcn
+
+ @staticmethod
+ def make_hsaco(src, metadata, options):
+ target_features = ''
+ if knobs.compilation.enable_asan:
+ target_features = '+xnack'
+ hsaco = amd.assemble_amdgcn(src, options.arch, target_features)
+ with tempfile.NamedTemporaryFile() as tmp_out:
+ with tempfile.NamedTemporaryFile() as tmp_in:
+ with open(tmp_in.name, "wb") as fd_in:
+ fd_in.write(hsaco)
+ amd.link_hsaco(tmp_in.name, tmp_out.name)
+ with open(tmp_out.name, "rb") as fd_out:
+ ret = fd_out.read()
+ return ret
+
+ def add_stages(self, stages, options, language):
+ if language == Language.TRITON:
+ stages["ttir"] = lambda src, metadata: self.make_ttir(src, metadata, options)
+ stages["ttgir"] = lambda src, metadata: self.make_ttgir(src, metadata, options)
+ elif language == Language.GLUON:
+ stages["ttgir"] = lambda src, metadata: self.gluon_to_ttgir(src, metadata, options)
+ stages["llir"] = lambda src, metadata: self.make_llir(src, metadata, options)
+ stages["amdgcn"] = lambda src, metadata: self.make_amdgcn(src, metadata, options)
+ stages["hsaco"] = lambda src, metadata: self.make_hsaco(src, metadata, options)
+ if knobs.runtime.add_stages_inspection_hook is not None:
+ knobs.runtime.add_stages_inspection_hook(self, stages, options, language, None)
+
+ @functools.lru_cache()
+ def hash(self):
+ return f'{self.target}'
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/amd/driver.c b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/amd/driver.c
new file mode 100644
index 0000000000000000000000000000000000000000..24178b54c319bb278bf014570128887315d35827
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/amd/driver.c
@@ -0,0 +1,504 @@
+#define __HIP_PLATFORM_AMD__
+#include
+#include
+#define PY_SSIZE_T_CLEAN
+#include
+#include
+#include
+#include
+#include
+
+typedef struct {
+ uint32_t group0_0;
+ uint32_t group0_1;
+ uint32_t group0_2;
+ uint32_t group0_3;
+ uint32_t group1_0;
+ uint32_t group1_1;
+ uint32_t group1_2;
+ uint32_t group1_3;
+ uint32_t group1_4;
+ uint32_t group1_5;
+ uint32_t group1_6;
+ uint32_t group1_7;
+} TDMDescriptor;
+
+typedef struct {
+ PyObject_HEAD;
+ TDMDescriptor desc;
+} PyTDMDescriptorObject;
+
+static PyObject *PyTDMDescriptor_new(PyTypeObject *type, PyObject *args,
+ PyObject *kw) {
+ PyTDMDescriptorObject *self =
+ (PyTDMDescriptorObject *)type->tp_alloc(type, 0);
+ if (!self)
+ return NULL;
+
+ memset(&self->desc, 0, sizeof(self->desc));
+ return (PyObject *)self;
+}
+
+static void PyTDMDescriptor_dealloc(PyTDMDescriptorObject *self) {
+ Py_TYPE(self)->tp_free((PyObject *)self);
+}
+
+static PyTypeObject PyTDMDescriptorType = {
+ PyVarObject_HEAD_INIT(NULL, 0).tp_name =
+ "triton.backends.amd.PyTDMDescriptor",
+ .tp_basicsize = sizeof(PyTDMDescriptorObject),
+ .tp_itemsize = 0,
+ .tp_flags = Py_TPFLAGS_DEFAULT,
+ .tp_doc = "PyObject for TDMDescriptor",
+ .tp_new = PyTDMDescriptor_new,
+ .tp_dealloc = (destructor)PyTDMDescriptor_dealloc,
+};
+
+// TODO: Both host-side and device-side TDM descriptor follow the same encoding
+// format. Consider to add a common utility to remove duplicate code.
+static bool encodeTDMDescriptor(TDMDescriptor *desc, int elementBitWidth,
+ uint32_t *blockSize, int numWarps,
+ int padInterval, int padAmount, uint32_t *shape,
+ uint32_t *strides, uint64_t globalAddress,
+ int rank) {
+ // NYI: TDM > 2D cases
+ if (rank != 2)
+ return false;
+
+ // Get warp distribution
+ uint32_t numWarpsDim0 = numWarps;
+ for (; numWarpsDim0 > blockSize[0]; numWarpsDim0 /= 2)
+ ;
+ uint32_t numWarpsDim1 = numWarps / numWarpsDim0;
+ if (!(numWarpsDim0 > 0 && blockSize[1] % numWarpsDim1 == 0))
+ return false;
+
+ uint32_t blockSize0 = (blockSize[0] + numWarpsDim0 - 1) / numWarpsDim0;
+ uint32_t blockSize1 = (blockSize[1] + numWarpsDim1 - 1) / numWarpsDim1;
+
+ // group0 (128 bits / 4 dwords) effective bit encoding:
+ // [120:64]: global address
+ // [127:126]: type - currently always set to 0x2
+ desc->group0_2 = (uint32_t)(globalAddress & 0xFFFFFFFF);
+ desc->group0_3 = (uint32_t)((globalAddress >> 32) & 0x01FFFFFF);
+ desc->group0_3 |= (0x1 << 31);
+
+ // group1 (256 bits / 8 dwords) effective bit encoding:
+ // [17:16]: data size - log2(element size in bytes)
+ // [20]: enable padding
+ // [24:22]: pad interval - log2(pad interval in dwords) - 1
+ // [31:25]: pad amount - pad amount in dwords - 1
+ // [79:48]: tensor shape dim inner
+ // [111:80]: tensor shape dim outer
+ // [127:112]: block shape dim inner
+ // [143:128]: block shape dim outer
+ // [207:160]: tensor stride dim outer (we only use 32 bits)
+ int elementSizeInBytes = elementBitWidth / 8;
+ int dataSize = log2(elementSizeInBytes);
+ desc->group1_0 = (dataSize << 16);
+ int dwordSize = 32;
+ int padIntervalInDwords = padInterval * elementBitWidth / dwordSize;
+ int padAmountInDwords = padAmount * elementBitWidth / dwordSize;
+ if (padIntervalInDwords > 0 && padAmountInDwords > 0) {
+ int log2PadInterval = log2(padIntervalInDwords);
+ desc->group1_0 |= (1 << 20);
+ desc->group1_0 |= ((log2PadInterval - 1) << 22);
+ desc->group1_0 |= ((padAmountInDwords - 1) << 25);
+ }
+ desc->group1_1 = (shape[1] << 16);
+ desc->group1_2 = (shape[1] >> 16);
+ desc->group1_2 |= (shape[0] << 16);
+ desc->group1_3 = (shape[0] >> 16);
+ desc->group1_3 |= (blockSize1 << 16);
+ desc->group1_4 = (blockSize0 & 0xFFFF);
+ desc->group1_5 = strides[0];
+
+ return true;
+}
+
+// The list of paths to search for the HIP runtime library. The caller Python
+// code should substitute the search path placeholder.
+static const char *hipLibSearchPaths[] = {"/*py_libhip_search_path*/"};
+
+// The list of HIP dynamic library symbols and their signature we are interested
+// in this file.
+// |FOR_EACH_ERR_FN| is a macro to process APIs that return hipError_t;
+// |FOR_EACH_STR_FN| is a macro to process APIs that return const char *.
+#define HIP_SYMBOL_LIST(FOR_EACH_ERR_FN, FOR_EACH_STR_FN) \
+ FOR_EACH_STR_FN(hipGetErrorString, hipError_t hipError) \
+ FOR_EACH_ERR_FN(hipGetDeviceProperties, hipDeviceProp_t *prop, int deviceId) \
+ FOR_EACH_ERR_FN(hipModuleLoadDataEx, hipModule_t *module, const void *image, \
+ unsigned int numOptions, hipJitOption *options, \
+ void **optionValues) \
+ FOR_EACH_ERR_FN(hipModuleGetFunction, hipFunction_t *function, \
+ hipModule_t module, const char *kname) \
+ FOR_EACH_ERR_FN(hipFuncGetAttribute, int *, hipFunction_attribute attr, \
+ hipFunction_t function)
+
+// HIP driver version format: HIP_VERSION_MAJOR * 10000000 + HIP_VERSION_MINOR *
+// 100000 + HIP_VERSION_PATCH.
+#define TRITON_HIP_DRIVER_EXTRACT_MAJOR_VERSION(version) ((version) / 10000000)
+#define TRITON_HIP_DRIVER_EXTRACT_MINOR_VERSION(version) \
+ (((version) % 10000000) / 100000)
+#define TRITON_HIP_DRIVER_EXTRACT_PATCH_VERSION(version) ((version) % 100000)
+#define TRITON_HIP_DRIVER_REQ_MAJOR_VERSION (6)
+
+// #define TRITON_HIP_DRIVER_DBG_VERSION
+#ifdef TRITON_HIP_DRIVER_DBG_VERSION
+#define TRITON_HIP_DRIVER_LOG_VERSION(version, msgBuff) \
+ do { \
+ snprintf(msgBuff, sizeof(msgBuff), "libamdhip64 version is: %d.%d.%d", \
+ TRITON_HIP_DRIVER_EXTRACT_MAJOR_VERSION(version), \
+ TRITON_HIP_DRIVER_EXTRACT_MINOR_VERSION(version), \
+ TRITON_HIP_DRIVER_EXTRACT_PATCH_VERSION(version)); \
+ printf("%s\n", msgBuff); \
+ } while (0);
+#else
+#define TRITON_HIP_DRIVER_LOG_VERSION(version, msgBuff) \
+ do { \
+ (void)msgBuff; \
+ (void)(version); \
+ } while (0);
+#endif
+
+#define TRITON_HIP_MSG_BUFF_SIZE (1024U)
+
+// The HIP symbol table for holding resolved dynamic library symbols.
+struct HIPSymbolTable {
+#define DEFINE_EACH_ERR_FIELD(hipSymbolName, ...) \
+ hipError_t (*hipSymbolName)(__VA_ARGS__);
+#define DEFINE_EACH_STR_FIELD(hipSymbolName, ...) \
+ const char *(*hipSymbolName)(__VA_ARGS__);
+
+ HIP_SYMBOL_LIST(DEFINE_EACH_ERR_FIELD, DEFINE_EACH_STR_FIELD)
+};
+
+static struct HIPSymbolTable hipSymbolTable;
+
+static int checkDriverVersion(void *lib) {
+ int hipVersion = -1;
+ const char *error = NULL;
+ typedef hipError_t (*hipDriverGetVersion_fn)(int *driverVersion);
+ hipDriverGetVersion_fn hipDriverGetVersion;
+ dlerror(); // Clear existing errors
+ hipDriverGetVersion =
+ (hipDriverGetVersion_fn)dlsym(lib, "hipDriverGetVersion");
+ error = dlerror();
+ if (error) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "cannot query 'hipDriverGetVersion' from libamdhip64.so");
+ dlclose(lib);
+ return -1;
+ }
+
+ (void)hipDriverGetVersion(&hipVersion);
+ char msgBuff[TRITON_HIP_MSG_BUFF_SIZE] = {0};
+
+ const int hipMajVersion = TRITON_HIP_DRIVER_EXTRACT_MAJOR_VERSION(hipVersion);
+ if (hipMajVersion < TRITON_HIP_DRIVER_REQ_MAJOR_VERSION) {
+ const int hipMinVersion =
+ TRITON_HIP_DRIVER_EXTRACT_MINOR_VERSION(hipVersion);
+ const int hipPatchVersion =
+ TRITON_HIP_DRIVER_EXTRACT_PATCH_VERSION(hipVersion);
+ snprintf(msgBuff, sizeof(msgBuff),
+ "libamdhip64 version %d.%d.%d is not supported! Required major "
+ "version is >=%d.",
+ hipMajVersion, hipMinVersion, hipPatchVersion,
+ TRITON_HIP_DRIVER_REQ_MAJOR_VERSION);
+ PyErr_SetString(PyExc_RuntimeError, msgBuff);
+ dlclose(lib);
+ return -1;
+ }
+
+ TRITON_HIP_DRIVER_LOG_VERSION(hipVersion, msgBuff);
+
+ return hipVersion;
+}
+
+bool initSymbolTable() {
+ void *lib;
+
+ // Go through the list of search paths to dlopen the first HIP driver library.
+ int n = sizeof(hipLibSearchPaths) / sizeof(hipLibSearchPaths[0]);
+ for (int i = 0; i < n; ++i) {
+ void *handle = dlopen(hipLibSearchPaths[i], RTLD_LAZY | RTLD_LOCAL);
+ if (handle) {
+ lib = handle;
+ // printf("[triton] chosen %s\n", hipLibSearchPaths[i]);
+ }
+ }
+
+ if (!lib) {
+ PyErr_SetString(PyExc_RuntimeError, "cannot open libamdhip64.so");
+ return false;
+ }
+
+ int hipVersion = checkDriverVersion(lib);
+ if (hipVersion == -1)
+ return false;
+
+ const char *error = NULL;
+ typedef hipError_t (*hipGetProcAddress_fn)(
+ const char *symbol, void **pfn, int hipVersion, uint64_t hipFlags,
+ hipDriverProcAddressQueryResult *symbolStatus);
+ hipGetProcAddress_fn hipGetProcAddress;
+ dlerror(); // Clear existing errors
+
+ *(void **)&hipGetProcAddress = dlsym(lib, "hipGetProcAddress");
+ error = dlerror();
+ if (error) {
+ PyErr_SetString(PyExc_RuntimeError,
+ "cannot query 'hipGetProcAddress' from libamdhip64.so");
+ dlclose(lib);
+ return false;
+ }
+
+ // Resolve all symbols we are interested in.
+ uint64_t hipFlags = 0;
+ hipDriverProcAddressQueryResult symbolStatus;
+ hipError_t status = hipSuccess;
+#define QUERY_EACH_FN(hipSymbolName, ...) \
+ status = hipGetProcAddress(#hipSymbolName, \
+ (void **)&hipSymbolTable.hipSymbolName, \
+ hipVersion, hipFlags, &symbolStatus); \
+ if (status != hipSuccess) { \
+ PyErr_SetString(PyExc_RuntimeError, \
+ "cannot get address for '" #hipSymbolName \
+ "' from libamdhip64.so"); \
+ dlclose(lib); \
+ return false; \
+ }
+
+ HIP_SYMBOL_LIST(QUERY_EACH_FN, QUERY_EACH_FN)
+
+ return true;
+}
+
+static inline void gpuAssert(hipError_t code, const char *file, int line) {
+ {
+ if (code != HIP_SUCCESS) {
+ {
+ const char *prefix = "Triton Error [HIP]: ";
+ const char *str = hipSymbolTable.hipGetErrorString(code);
+ char err[TRITON_HIP_MSG_BUFF_SIZE] = {0};
+ snprintf(err, sizeof(err), "%s Code: %d, Messsage: %s", prefix, code,
+ str);
+ PyGILState_STATE gil_state;
+ gil_state = PyGILState_Ensure();
+ PyErr_SetString(PyExc_RuntimeError, err);
+ PyGILState_Release(gil_state);
+ }
+ }
+ }
+}
+
+#define HIP_CHECK(ans) \
+ { \
+ gpuAssert((ans), __FILE__, __LINE__); \
+ if (PyErr_Occurred()) \
+ return NULL; \
+ }
+
+static PyObject *getDeviceProperties(PyObject *self, PyObject *args) {
+ int device_id;
+ if (!PyArg_ParseTuple(args, "i", &device_id))
+ return NULL;
+
+ hipDeviceProp_t props;
+ HIP_CHECK(hipSymbolTable.hipGetDeviceProperties(&props, device_id));
+
+ // create a struct to hold device properties
+ return Py_BuildValue(
+ "{s:i, s:i, s:i, s:i, s:i, s:i, s:s, s:i, s:i}", "max_shared_mem",
+ props.sharedMemPerBlock, "max_num_regs", props.regsPerBlock,
+ "multiprocessor_count", props.multiProcessorCount, "sm_clock_rate",
+ props.clockRate, "mem_clock_rate", props.memoryClockRate, "mem_bus_width",
+ props.memoryBusWidth, "arch", props.gcnArchName, "warpSize",
+ props.warpSize, "max_threads_per_sm", props.maxThreadsPerMultiProcessor);
+}
+
+static PyObject *loadBinary(PyObject *self, PyObject *args) {
+ const char *name;
+ const char *data;
+ Py_ssize_t data_size;
+ int shared;
+ int device;
+ if (!PyArg_ParseTuple(args, "ss#ii", &name, &data, &data_size, &shared,
+ &device)) {
+ return NULL;
+ }
+
+ // set HIP options
+ hipJitOption opt[] = {hipJitOptionErrorLogBufferSizeBytes,
+ hipJitOptionErrorLogBuffer,
+ hipJitOptionInfoLogBufferSizeBytes,
+ hipJitOptionInfoLogBuffer, hipJitOptionLogVerbose};
+ const unsigned int errbufsize = 8192;
+ const unsigned int logbufsize = 8192;
+ char _err[errbufsize];
+ char _log[logbufsize];
+ void *optval[] = {(void *)(uintptr_t)errbufsize, (void *)_err,
+ (void *)(uintptr_t)logbufsize, (void *)_log, (void *)1};
+
+ // launch HIP Binary
+ hipModule_t mod;
+ hipFunction_t fun;
+ HIP_CHECK(hipSymbolTable.hipModuleLoadDataEx(&mod, data, 5, opt, optval))
+ HIP_CHECK(hipSymbolTable.hipModuleGetFunction(&fun, mod, name));
+
+ // get allocated registers and spilled registers from the function
+ int n_regs = 0;
+ int n_spills = 0;
+ int32_t n_max_threads = 0;
+ hipSymbolTable.hipFuncGetAttribute(&n_regs, HIP_FUNC_ATTRIBUTE_NUM_REGS, fun);
+ hipSymbolTable.hipFuncGetAttribute(&n_spills,
+ HIP_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, fun);
+ hipSymbolTable.hipFuncGetAttribute(
+ &n_max_threads, HIP_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, fun);
+ n_spills /= 4;
+ if (PyErr_Occurred()) {
+ return NULL;
+ }
+ return Py_BuildValue("(KKiii)", (uint64_t)mod, (uint64_t)fun, n_regs,
+ n_spills, n_max_threads);
+}
+
+static PyObject *createTDMDescriptor(PyObject *self, PyObject *args) {
+ int elementBitWidth;
+ PyObject *blockSize;
+ int numWarps;
+ int padInterval;
+ int padAmount;
+ PyObject *shape;
+ PyObject *strides;
+ unsigned long long globalAddress;
+
+ if (!PyArg_ParseTuple(args, "iOiiiOOK", &elementBitWidth, &blockSize,
+ &numWarps, &padInterval, &padAmount, &shape, &strides,
+ &globalAddress)) {
+ return NULL;
+ }
+
+ PyTDMDescriptorObject *descObj = (PyTDMDescriptorObject *)PyObject_CallObject(
+ (PyObject *)&PyTDMDescriptorType, NULL);
+ if (!descObj)
+ return NULL;
+
+ PyObject *blockSizeFast = NULL;
+ PyObject *shapeFast = NULL;
+ PyObject *stridesFast = NULL;
+
+ uint32_t blockSizeInt[2];
+ uint32_t shapeInt[2];
+ uint32_t stridesInt[2];
+
+ blockSizeFast = PySequence_Fast(blockSize, "blockSize must be a sequence");
+ if (!blockSizeFast)
+ goto cleanup;
+ int rank = PySequence_Fast_GET_SIZE(blockSizeFast);
+ if (rank != 2) {
+ PyErr_SetString(PyExc_RuntimeError, "rank must be 2");
+ goto cleanup;
+ }
+
+ for (int i = 0; i < rank; ++i) {
+ PyObject *item = PySequence_Fast_GET_ITEM(blockSizeFast, i);
+ if (!PyLong_Check(item)) {
+ PyErr_SetString(PyExc_TypeError, "block size must be an int");
+ goto cleanup;
+ }
+ blockSizeInt[i] = PyLong_AsLong(item);
+ }
+
+ shapeFast = PySequence_Fast(shape, "shape must be a sequence");
+ if (!shapeFast)
+ goto cleanup;
+
+ if (rank != PySequence_Fast_GET_SIZE(shapeFast)) {
+ PyErr_SetString(PyExc_RuntimeError, "rank mismatch");
+ goto cleanup;
+ }
+ for (int i = 0; i < rank; ++i) {
+ PyObject *item = PySequence_Fast_GET_ITEM(shapeFast, i);
+ if (!PyLong_Check(item)) {
+ PyErr_SetString(PyExc_TypeError, "shape must be an int");
+ goto cleanup;
+ }
+ shapeInt[i] = PyLong_AsLong(item);
+ }
+
+ stridesFast = PySequence_Fast(strides, "strides must be a sequence");
+ if (!stridesFast)
+ goto cleanup;
+
+ if (rank != PySequence_Fast_GET_SIZE(stridesFast)) {
+ PyErr_SetString(PyExc_RuntimeError, "rank mismatch");
+ goto cleanup;
+ }
+ for (int i = 0; i < rank; ++i) {
+ PyObject *item = PySequence_Fast_GET_ITEM(stridesFast, i);
+ if (!PyLong_Check(item)) {
+ PyErr_SetString(PyExc_TypeError, "shape must be an int");
+ goto cleanup;
+ }
+ stridesInt[i] = PyLong_AsLong(item);
+ }
+
+ Py_DECREF(blockSizeFast);
+ blockSizeFast = NULL;
+ Py_DECREF(shapeFast);
+ shapeFast = NULL;
+ Py_DECREF(stridesFast);
+ stridesFast = NULL;
+
+ bool success = encodeTDMDescriptor(
+ &descObj->desc, elementBitWidth, blockSizeInt, numWarps, padInterval,
+ padAmount, shapeInt, stridesInt, globalAddress, rank);
+ if (!success) {
+ PyErr_SetString(PyExc_RuntimeError, "Failed to encode TDM descriptor");
+ goto cleanup;
+ }
+
+ return (PyObject *)descObj;
+
+cleanup:
+ Py_XDECREF(blockSizeFast);
+ Py_XDECREF(shapeFast);
+ Py_XDECREF(stridesFast);
+ Py_XDECREF(descObj);
+ return NULL;
+}
+
+static PyMethodDef ModuleMethods[] = {
+ {"load_binary", loadBinary, METH_VARARGS,
+ "Load provided hsaco into HIP driver"},
+ {"get_device_properties", getDeviceProperties, METH_VARARGS,
+ "Get the properties for a given device"},
+ {"create_tdm_descriptor", createTDMDescriptor, METH_VARARGS,
+ "create a host-side TDM descriptor"},
+ {NULL, NULL, 0, NULL} // sentinel
+};
+
+static struct PyModuleDef ModuleDef = {PyModuleDef_HEAD_INIT, "hip_utils",
+ NULL, // documentation
+ -1, // size
+ ModuleMethods};
+
+PyMODINIT_FUNC PyInit_hip_utils(void) {
+ if (!initSymbolTable()) {
+ return NULL;
+ }
+
+ PyObject *m = PyModule_Create(&ModuleDef);
+ if (m == NULL) {
+ return NULL;
+ }
+ PyModule_AddFunctions(m, ModuleMethods);
+
+ if (PyType_Ready(&PyTDMDescriptorType) < 0)
+ return NULL;
+ Py_INCREF(&PyTDMDescriptorType);
+ PyModule_AddObject(m, "PyTDMDescriptor", (PyObject *)&PyTDMDescriptorType);
+
+ return m;
+}
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/amd/driver.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/amd/driver.py
new file mode 100644
index 0000000000000000000000000000000000000000..24a0d84e8ab646c37f86fb5d3d462bd41c5a75ba
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/amd/driver.py
@@ -0,0 +1,877 @@
+import functools
+import os
+import subprocess
+import re
+import triton
+from pathlib import Path
+from triton import knobs
+from triton.backends.compiler import GPUTarget
+from triton.backends.driver import GPUDriver
+from triton.runtime import _allocation
+from triton.runtime.build import compile_module_from_src
+
+dirname = os.path.dirname(os.path.realpath(__file__))
+include_dirs = [os.path.join(dirname, "include")]
+PyTDMDescriptor = None
+
+
+def _find_already_mmapped_dylib_on_linux(lib_name):
+ import platform
+ if platform.system() != 'Linux':
+ return None
+
+ # Use dl_iterate_phdr to walk through the list of shared libraries at runtime.
+ # See https://www.man7.org/linux/man-pages/man3/dl_iterate_phdr.3.html for details.
+
+ import ctypes
+ from ctypes import c_char, c_int, c_size_t, c_void_p, c_char_p, POINTER
+
+ class DlPhdrInfo(ctypes.Structure):
+ _fields_ = [
+ ('dlpi_addr', c_void_p),
+ ('dlpi_name', c_char_p),
+ # We don't care about the remaining fields.
+ ]
+
+ # callback_t must use POINTER(c_char) to avoid copying.
+ callback_t = ctypes.CFUNCTYPE(c_int, POINTER(DlPhdrInfo), POINTER(c_size_t), POINTER(c_char))
+
+ # Load libc and get the dl_iterate_phdr symbol.
+ try:
+ dl_iterate_phdr = ctypes.CDLL('libc.so.6').dl_iterate_phdr
+ except Exception:
+ return None
+ # argtypes must use c_char_p to accept create_string_buffer.
+ dl_iterate_phdr.argtypes = [callback_t, c_char_p]
+ dl_iterate_phdr.restype = c_int
+
+ max_path_length = 4096
+ path = ctypes.create_string_buffer(max_path_length + 1)
+
+ # Define callback to get the loaded dylib path.
+ def callback(info, size, data):
+ dlpi_name = info.contents.dlpi_name
+ p = Path(os.fsdecode(dlpi_name))
+ if lib_name in p.name:
+ # Found the dylib; get its path.
+ ctypes.memmove(data, dlpi_name, min(max_path_length, len(dlpi_name)))
+ return 1
+ return 0
+
+ if dl_iterate_phdr(callback_t(callback), path):
+ return os.fsdecode(ctypes.string_at(path))
+ return None
+
+
+@functools.lru_cache()
+def _get_path_to_hip_runtime_dylib():
+ lib_name = "libamdhip64.so"
+
+ # If we are told explicitly what HIP runtime dynamic library to use, obey that.
+ if env_libhip_path := knobs.amd.libhip_path:
+ if env_libhip_path.endswith(lib_name) and os.path.exists(env_libhip_path):
+ return env_libhip_path
+ raise RuntimeError(f"TRITON_LIBHIP_PATH '{env_libhip_path}' does not point to a valid {lib_name}")
+
+ # If the shared object is already mmapped to address space, use it.
+ mmapped_path = _find_already_mmapped_dylib_on_linux(lib_name)
+ if mmapped_path:
+ if os.path.exists(mmapped_path):
+ return mmapped_path
+ raise RuntimeError(f"memory mapped '{mmapped_path}' in process does not point to a valid {lib_name}")
+
+ paths = []
+
+ # Check backend
+ local_lib = os.path.join(os.path.dirname(__file__), "lib", lib_name)
+ if os.path.exists(local_lib):
+ return local_lib
+ paths.append(local_lib)
+
+ import site
+ # First search the HIP runtime dynamic library packaged with PyTorch. It's very likely
+ # that we run Triton together with PyTorch. This makes sure we use the same dynamic
+ # library to avoid version mismatch.
+ site_packages = site.getsitepackages()
+ user_site = site.getusersitepackages()
+ if site.ENABLE_USER_SITE: # ENABLE_USER_SITE is initialized in getusersitepackages()
+ site_packages = [user_site] + site_packages
+ for path in site_packages:
+ path = os.path.join(path, "torch", "lib", lib_name)
+ if os.path.exists(path):
+ return path
+ paths.append(path)
+
+ # Then try to see if developer provides a HIP runtime dynamic library using LD_LIBARAY_PATH.
+ env_ld_library_path = os.getenv("LD_LIBRARY_PATH")
+ if env_ld_library_path:
+ for d in env_ld_library_path.split(":"):
+ f = os.path.join(d, lib_name)
+ if os.path.exists(f):
+ return f
+ paths.append(f)
+
+ # HIP_PATH should point to HIP SDK root if set
+ env_hip_path = os.getenv("HIP_PATH")
+ if env_hip_path:
+ hip_lib_path = os.path.join(env_hip_path, "lib", lib_name)
+ if os.path.exists(hip_lib_path):
+ return hip_lib_path
+ paths.append(hip_lib_path)
+
+ # if available, `hipconfig --path` prints the HIP SDK root
+ try:
+ hip_root = subprocess.check_output(["hipconfig", "--path"]).decode().strip()
+ if hip_root:
+ hip_lib_path = os.path.join(hip_root, "lib", lib_name)
+ if os.path.exists(hip_lib_path):
+ return hip_lib_path
+ paths.append(hip_lib_path)
+ except (subprocess.CalledProcessError, FileNotFoundError):
+ # hipconfig may not be available
+ pass
+
+ # ROCm lib dir based on env var
+ env_rocm_path = os.getenv("ROCM_PATH")
+ if env_rocm_path:
+ rocm_lib_path = os.path.join(env_rocm_path, "lib", lib_name)
+ if os.path.exists(rocm_lib_path):
+ return rocm_lib_path
+ paths.append(rocm_lib_path)
+
+ # Afterwards try to search the loader dynamic library resolution paths.
+ libs = subprocess.check_output(["/sbin/ldconfig", "-p"]).decode(errors="ignore")
+ # each line looks like the following:
+ # libamdhip64.so.6 (libc6,x86-64) => /opt/rocm-6.0.2/lib/libamdhip64.so.6
+ # libamdhip64.so (libc6,x86-64) => /opt/rocm-6.0.2/lib/libamdhip64.so
+ locs = [line.split()[-1] for line in libs.splitlines() if line.strip().endswith(lib_name)]
+ for loc in locs:
+ if os.path.exists(loc):
+ return loc
+ paths.append(loc)
+
+ # As a last resort, guess if we have it in some common installation path.
+ common_install_path = os.path.join('/opt/rocm/lib/', lib_name)
+ if os.path.exists(common_install_path):
+ return common_install_path
+ paths.append(common_install_path)
+
+ raise RuntimeError(f"cannot locate {lib_name} after attempted paths {paths}")
+
+
+class HIPUtils(object):
+
+ def __new__(cls):
+ if not hasattr(cls, "instance"):
+ cls.instance = super(HIPUtils, cls).__new__(cls)
+ return cls.instance
+
+ def __init__(self):
+ libhip_path = _get_path_to_hip_runtime_dylib()
+ src = Path(os.path.join(dirname, "driver.c")).read_text()
+ # Just do a simple search and replace here instead of templates or format strings.
+ # This way we don't need to escape-quote C code curly brackets and we can replace
+ # exactly once.
+ src = src.replace('/*py_libhip_search_path*/', libhip_path, 1)
+ mod = compile_module_from_src(src=src, name="hip_utils", include_dirs=include_dirs)
+ self.load_binary = mod.load_binary
+ self.get_device_properties = mod.get_device_properties
+ self.create_tdm_descriptor = mod.create_tdm_descriptor
+ global PyTDMDescriptor
+ PyTDMDescriptor = mod.PyTDMDescriptor
+
+
+# -------------------- Launcher ----------------------------
+def ty_to_cpp(ty):
+ if ty.startswith('*'):
+ return "hipDeviceptr_t"
+ if ty == "tensordesc":
+ return "TDMDescriptor"
+ return {
+ "i1": "int8_t",
+ "i8": "int8_t",
+ "i16": "int16_t",
+ "i32": "int32_t",
+ "i64": "int64_t",
+ "u1": "uint8_t",
+ "u8": "uint8_t",
+ "u16": "uint16_t",
+ "u32": "uint32_t",
+ "u64": "uint64_t",
+ "fp16": "double",
+ "bf16": "double",
+ "fp32": "double",
+ "f32": "double",
+ "fp64": "double",
+ }[ty]
+
+
+FLOAT_STORAGE_TYPE = {
+ "fp16": "uint16_t",
+ "bf16": "uint16_t",
+ "fp32": "uint32_t",
+ "f32": "uint32_t",
+ "fp64": "uint64_t",
+}
+FLOAT_PACK_FUNCTION = {
+ "fp16": "pack_fp16",
+ "bf16": "pack_bf16",
+ "fp32": "pack_fp32",
+ "f32": "pack_fp32",
+ "fp64": "pack_fp64",
+}
+
+_BASE_ARGS_FORMAT = "piiiKKOOOOO"
+
+
+def make_launcher(constants, signature, warp_size, tensordesc_meta):
+
+ def _expand_signature(signature):
+ output = []
+ tensordesc_idx = 0
+ for sig in signature:
+ if isinstance(sig, str) and sig.startswith("tensordesc"):
+ meta = tensordesc_meta[tensordesc_idx] if tensordesc_meta else None
+ tensordesc_idx += 1
+
+ match = re.match("tensordesc<([^[>]*)\\[([^]]*)\\]", sig)
+ dtype = match.group(1)
+ shape = match.group(2)
+ ndim = shape.count(",") + 1
+
+ # If there is no descriptor's metadata, the descriptor has been decomposed to base pointer, shape and strides
+ if meta is None:
+ output.append("*" + dtype)
+ for _ in range(2 * ndim):
+ output.append("i64")
+ output.append("i1")
+ else:
+ output.append("tensordesc")
+
+ for _ in range(ndim):
+ output.append("i32")
+ for _ in range(ndim):
+ output.append("i64")
+ else:
+ output.append(sig)
+
+ return output
+
+ def _serialize_signature(sig):
+ if isinstance(sig, tuple):
+ return ','.join(map(_serialize_signature, sig))
+ return sig
+
+ def _extracted_type(ty):
+ if isinstance(ty, tuple):
+ val = ','.join(map(_extracted_type, ty))
+ return f"[{val}]"
+ if ty.startswith("*") or ty.startswith("tensordesc"):
+ return "PyObject*"
+ if ty == "constexpr":
+ return "PyObject*"
+ return ty_to_cpp(ty)
+
+ def format_of(ty):
+ if isinstance(ty, tuple):
+ val = ''.join(map(format_of, ty))
+ return f"({val})"
+ if ty.startswith("*") or ty.startswith("tensordesc"):
+ return "O"
+ if ty == "constexpr":
+ return "O"
+ return {
+ "double": "d",
+ "long": "l",
+ "int8_t": "b",
+ "int16_t": "h",
+ "int32_t": "i",
+ "int64_t": "L",
+ "uint8_t": "B",
+ "uint16_t": "H",
+ "uint32_t": "I",
+ "uint64_t": "K",
+ }[ty_to_cpp(ty)]
+
+ signature = {idx: s for idx, s in enumerate(_expand_signature(signature.values()))}
+
+ args_format = ''.join([format_of(ty) for ty in signature.values()])
+ format = _BASE_ARGS_FORMAT + args_format
+ signature = ','.join(map(_serialize_signature, signature.values()))
+ signature = list(filter(bool, signature.split(',')))
+ signature = {i: s for i, s in enumerate(signature)}
+ args_list = ', ' + ', '.join(f"&_arg{i}" for i, ty in signature.items()) if len(signature) > 0 else ''
+ # Record the end of regular arguments;
+ # subsequent arguments are architecture-specific descriptors, such as tensor descriptors for CUDA.
+ arg_decl_list = []
+ for i, ty in signature.items():
+ if ty == "constexpr":
+ continue
+ if ty in FLOAT_STORAGE_TYPE:
+ arg_decl_list.append(f"{FLOAT_STORAGE_TYPE[ty]} arg{i}")
+ else:
+ arg_decl_list.append(f"{ty_to_cpp(ty)} arg{i}")
+ arg_decls = ', '.join(arg_decl_list)
+ internal_args_list = []
+ for i, ty in signature.items():
+ if ty.startswith("*"):
+ internal_args_list.append(f"ptr_info{i}.dev_ptr")
+ elif ty.startswith("tensordesc"):
+ internal_args_list.append(f"*desc{i}")
+ elif ty in FLOAT_STORAGE_TYPE:
+ internal_args_list.append(f"_arg{i}_storage")
+ elif ty != "constexpr":
+ internal_args_list.append(f"_arg{i}")
+
+ newline = '\n '
+ ptr_decls = [
+ f"DevicePtrInfo ptr_info{i} = getPointer(_arg{i}, {i}); if (!ptr_info{i}.valid) return NULL;"
+ for i, ty in signature.items()
+ if ty.startswith("*")
+ ]
+ tensor_desc_decls = [
+ f"TDMDescriptor* desc{i} = getTDMDescriptor(_arg{i}, {i});" for i, ty in signature.items()
+ if ty.startswith("tensordesc")
+ ]
+ float_storage_decls = [
+ f"{FLOAT_STORAGE_TYPE[ty]} _arg{i}_storage = {FLOAT_PACK_FUNCTION[ty]}(_arg{i});"
+ for i, ty in signature.items()
+ if ty in FLOAT_STORAGE_TYPE
+ ]
+
+ libhip_path = _get_path_to_hip_runtime_dylib()
+
+ # generate glue code
+ params = list(range(len(signature)))
+ params = [f"&arg{i}" for i, ty in signature.items() if ty != "constexpr"]
+ params.append("&global_scratch")
+ params.append("&profile_scratch")
+ src = f"""
+#define __HIP_PLATFORM_AMD__
+#include
+#include
+#include
+#include
+#include
+#include
+
+typedef struct {{
+ uint32_t group0_0;
+ uint32_t group0_1;
+ uint32_t group0_2;
+ uint32_t group0_3;
+ uint32_t group1_0;
+ uint32_t group1_1;
+ uint32_t group1_2;
+ uint32_t group1_3;
+ uint32_t group1_4;
+ uint32_t group1_5;
+ uint32_t group1_6;
+ uint32_t group1_7;
+}} TDMDescriptor;
+
+typedef struct {{
+ PyObject_HEAD;
+ TDMDescriptor desc;
+}} PyTDMDescriptorObject;
+
+// The list of paths to search for the HIP runtime library. The caller Python
+// code should substitute the search path placeholder.
+static const char *hipLibSearchPaths[] = {{"{libhip_path}"}};
+
+// The list of HIP dynamic library symbols and their signature we are interested
+// in this file.
+#define HIP_SYMBOL_LIST(FOR_EACH_ERR_FN, FOR_EACH_STR_FN) \\
+ FOR_EACH_STR_FN(hipGetLastError, true) \\
+ FOR_EACH_STR_FN(hipGetErrorString, true, hipError_t hipError) \\
+ FOR_EACH_ERR_FN(hipDrvLaunchKernelEx, false, \\
+ const HIP_LAUNCH_CONFIG *config, \\
+ hipFunction_t f, \\
+ void **kernelParams, \\
+ void **extra) \\
+ FOR_EACH_ERR_FN(hipModuleLaunchKernel, true, hipFunction_t f, \\
+ unsigned int gridDimX, unsigned int gridDimY, \\
+ unsigned int gridDimZ, unsigned int blockDimX, \\
+ unsigned int blockDimY, unsigned int blockDimZ, \\
+ unsigned int sharedMemBytes, hipStream_t stream, \\
+ void **kernelParams, void **extra) \\
+ FOR_EACH_ERR_FN(hipModuleLaunchCooperativeKernel, true, hipFunction_t f, \\
+ unsigned int gridDimX, unsigned int gridDimY, \\
+ unsigned int gridDimZ, unsigned int blockDimX, \\
+ unsigned int blockDimY, unsigned int blockDimZ, \\
+ unsigned int sharedMemBytes, hipStream_t stream, \\
+ void **kernelParams, void **extra) \\
+ FOR_EACH_ERR_FN(hipPointerGetAttribute, true, void *data, \\
+ hipPointer_attribute attribute, hipDeviceptr_t ptr)
+
+// The HIP symbol table for holding resolved dynamic library symbols.
+struct HIPSymbolTable {{
+#define DEFINE_EACH_ERR_FIELD(hipSymbolName, required, ...) \\
+ hipError_t (*hipSymbolName)(__VA_ARGS__);
+#define DEFINE_EACH_STR_FIELD(hipSymbolName, required, ...) \\
+ const char *(*hipSymbolName)(__VA_ARGS__);
+
+ HIP_SYMBOL_LIST(DEFINE_EACH_ERR_FIELD, DEFINE_EACH_STR_FIELD)
+}};
+
+static struct HIPSymbolTable hipSymbolTable;
+
+bool initSymbolTable() {{
+ // Use the HIP runtime library loaded into the existing process if it exits.
+ void *lib = dlopen("libamdhip64.so", RTLD_NOLOAD);
+
+ // Otherwise, go through the list of search paths to dlopen the first HIP
+ // driver library.
+ if (!lib) {{
+ int n = sizeof(hipLibSearchPaths) / sizeof(hipLibSearchPaths[0]);
+ for (int i = 0; i < n; ++i) {{
+ void *handle = dlopen(hipLibSearchPaths[i], RTLD_LAZY | RTLD_LOCAL);
+ if (handle) {{
+ lib = handle;
+ }}
+ }}
+ }}
+ if (!lib) {{
+ PyErr_SetString(PyExc_RuntimeError, "cannot open libamdhip64.so");
+ return false;
+ }}
+
+ typedef hipError_t (*hipGetProcAddress_fn)(
+ const char *symbol, void **pfn, int hipVersion, uint64_t hipFlags,
+ hipDriverProcAddressQueryResult *symbolStatus);
+ hipGetProcAddress_fn hipGetProcAddress;
+ dlerror(); // Clear existing errors
+ const char *error = NULL;
+ *(void **)&hipGetProcAddress = dlsym(lib, "hipGetProcAddress");
+ error = dlerror();
+ if (error) {{
+ PyErr_SetString(PyExc_RuntimeError,
+ "cannot query 'hipGetProcAddress' from libamdhip64.so");
+ dlclose(lib);
+ return false;
+ }}
+
+ // Resolve all symbols we are interested in.
+ int hipVersion = HIP_VERSION;
+ uint64_t hipFlags = 0;
+ hipDriverProcAddressQueryResult symbolStatus;
+ hipError_t status = hipSuccess;
+#define QUERY_EACH_FN(hipSymbolName, required, ...) \
+ status = hipGetProcAddress(#hipSymbolName, \
+ (void **)&hipSymbolTable.hipSymbolName, \
+ hipVersion, hipFlags, &symbolStatus); \
+ if (required && status != hipSuccess) {{ \
+ PyErr_SetString(PyExc_RuntimeError, \
+ "cannot get address for '" #hipSymbolName \
+ "' from libamdhip64.so"); \
+ dlclose(lib); \
+ return false; \
+ }}
+
+ HIP_SYMBOL_LIST(QUERY_EACH_FN, QUERY_EACH_FN)
+
+ return true;
+}}
+
+static inline void gpuAssert(hipError_t code, const char *file, int line)
+{{
+ if (code != HIP_SUCCESS)
+ {{
+ const char* prefix = "Triton Error [HIP]: ";
+ const char* str = hipSymbolTable.hipGetErrorString(code);
+ char err[1024] = {{0}};
+ snprintf(err, 1024, "%s Code: %d, Messsage: %s", prefix, code, str );
+ PyErr_SetString(PyExc_RuntimeError, err);
+ }}
+}}
+
+#define HIP_CHECK(ans) {{ gpuAssert((ans), __FILE__, __LINE__); }}
+
+static void _launch(int gridX, int gridY, int gridZ, int num_warps, int num_ctas, int launch_cooperative_grid, int shared_memory, hipStream_t stream, hipFunction_t function, hipDeviceptr_t profile_scratch{', ' + arg_decls if len(arg_decls) > 0 else ''}) {{
+ if (gridX * gridY * gridZ == 0)
+ return;
+ hipDeviceptr_t global_scratch = 0;
+ void *params[] = {{ {', '.join(params)} }};
+ if(num_ctas > 1) {{
+ if (!hipSymbolTable.hipDrvLaunchKernelEx) {{
+ PyErr_SetString(PyExc_RuntimeError, "missing hipDrvLaunchKernelEx symbol; please update HIP runtime");
+ return;
+ }}
+
+ hipLaunchAttribute attributes[2];
+ // Attribute0: Cluster dimensions
+ attributes[0].id = 4;
+ int *cluster_dims = (int*)attributes[0].val.pad;
+ cluster_dims[0] = num_ctas;
+ cluster_dims[1] = 1;
+ cluster_dims[2] = 1;
+ // Attribute1: Cooperative launch
+ attributes[1].id = hipLaunchAttributeCooperative;
+ attributes[1].val.cooperative = launch_cooperative_grid;
+
+ HIP_LAUNCH_CONFIG config = {{
+ gridX * num_ctas, gridY, gridZ, // Grid size
+ {warp_size} * num_warps, 1, 1, // Block size
+ shared_memory, stream,
+ attributes, 2 // Number of attributes
+ }};
+ HIP_CHECK(hipSymbolTable.hipDrvLaunchKernelEx(&config, function, params, 0));
+ return;
+ }}
+ else if (launch_cooperative_grid) {{
+ HIP_CHECK(hipSymbolTable.hipModuleLaunchCooperativeKernel(function, gridX, gridY, gridZ, {warp_size}*num_warps, 1, 1, shared_memory, stream, params, 0));
+ return;
+ }}
+ else {{
+ HIP_CHECK(hipSymbolTable.hipModuleLaunchKernel(function, gridX, gridY, gridZ, {warp_size}*num_warps, 1, 1, shared_memory, stream, params, 0));
+ }}
+}}
+
+typedef struct _DevicePtrInfo {{
+ hipDeviceptr_t dev_ptr;
+ bool valid;
+}} DevicePtrInfo;
+
+static PyObject* data_ptr_str = NULL;
+static PyObject* py_tdm_descriptor_type = NULL;
+
+static inline DevicePtrInfo getPointer(PyObject *obj, int idx) {{
+ DevicePtrInfo ptr_info;
+ hipError_t status = hipSuccess;
+ ptr_info.dev_ptr = 0;
+ ptr_info.valid = true;
+ if (PyLong_Check(obj)) {{
+ ptr_info.dev_ptr = (hipDeviceptr_t)PyLong_AsUnsignedLongLong(obj);
+ return ptr_info;
+ }}
+ if (obj == Py_None) {{
+ // valid nullptr
+ return ptr_info;
+ }}
+ PyObject *ret = PyObject_CallMethodNoArgs(obj, data_ptr_str);
+ if (!ret) {{
+ PyErr_SetString(PyExc_TypeError, "Pointer argument must be either uint64 or have data_ptr method");
+ ptr_info.valid = false;
+ goto cleanup;
+ }}
+ if (!PyLong_Check(ret)) {{
+ PyErr_SetString(PyExc_TypeError, "data_ptr method of Pointer object must return 64-bit int");
+ ptr_info.valid = false;
+ goto cleanup;
+ }}
+ ptr_info.dev_ptr = (hipDeviceptr_t)PyLong_AsUnsignedLongLong(ret);
+ if (!ptr_info.dev_ptr)
+ goto cleanup;
+ uint64_t dev_ptr;
+ status = hipSymbolTable.hipPointerGetAttribute(&dev_ptr, HIP_POINTER_ATTRIBUTE_DEVICE_POINTER, ptr_info.dev_ptr);
+ if (status == hipErrorInvalidValue) {{
+ PyErr_Format(PyExc_ValueError,
+ "Pointer argument (at %d) cannot be accessed from Triton (cpu tensor?)", idx);
+ ptr_info.valid = false;
+ // Clear and ignore HIP error
+ (void)hipSymbolTable.hipGetLastError();
+ }}
+ ptr_info.dev_ptr = (hipDeviceptr_t)dev_ptr;
+cleanup:
+ Py_DECREF(ret);
+ return ptr_info;
+}}
+
+static inline TDMDescriptor* getTDMDescriptor(PyObject* obj, int idx) {{
+ if (Py_TYPE(obj) != (PyTypeObject*)py_tdm_descriptor_type) {{
+ PyErr_Format(PyExc_TypeError, "object must be of type PyTDMDescriptor, got %s", Py_TYPE(obj)->tp_name);
+ return NULL;
+ }}
+
+ TDMDescriptor* desc = &((PyTDMDescriptorObject*)obj)->desc;
+ return desc;
+}}
+
+static uint16_t pack_fp16(double f) {{
+ uint16_t result;
+ // from https://github.com/python/pythoncapi-compat/blob/5e317108f872c904eb726cb8d560dcadbdf88a72/pythoncapi_compat.h#L482-L492
+#if 0x030600B1 <= PY_VERSION_HEX && PY_VERSION_HEX <= 0x030B00A1 && !defined(PYPY_VERSION)
+ _PyFloat_Pack2(f, (unsigned char*)&result, 1);
+#else
+ PyFloat_Pack2(f, (char*)&result, 1);
+#endif
+ return result;
+}}
+
+static uint16_t pack_bf16(double f) {{
+ float f32 = (float)f;
+ uint32_t u32 = *(uint32_t*)&f32;
+ return (uint16_t)(u32 >> 16);
+}}
+
+static uint32_t pack_fp32(double f) {{
+ float f32 = (float)f;
+ return *(uint32_t*)&f32;
+}}
+
+static uint64_t pack_fp64(double f) {{
+ return *(uint64_t*)&f;
+}}
+
+static PyObject* launch(PyObject* self, PyObject* args) {{
+ int gridX, gridY, gridZ;
+ uint64_t _stream;
+ uint64_t _function;
+ int launch_cooperative_grid;
+ PyObject *profile_scratch_obj = NULL;
+ PyObject *launch_enter_hook = NULL;
+ PyObject *launch_exit_hook = NULL;
+ PyObject *kernel_metadata = NULL;
+ PyObject *launch_metadata = NULL;
+ {' '.join([f"{_extracted_type(ty)} _arg{i}; " for i, ty in signature.items()])}
+ if(!PyArg_ParseTuple(args, \"{format}\", &launch_cooperative_grid,
+ &gridX, &gridY, &gridZ, &_stream, &_function, &profile_scratch_obj,
+ &kernel_metadata, &launch_metadata,
+ &launch_enter_hook, &launch_exit_hook {args_list})) {{
+ return NULL;
+ }}
+
+ // extract kernel metadata
+ int num_warps, num_ctas, shared_memory;
+ if (!PyArg_ParseTuple(kernel_metadata, \"iii\", &num_warps, &num_ctas, &shared_memory)) {{
+ return NULL;
+ }}
+ // extract launch metadata
+ if (launch_enter_hook != Py_None){{
+ PyObject* ret = PyObject_CallOneArg(launch_enter_hook, launch_metadata);
+ if (!ret)
+ return NULL;
+ Py_DECREF(ret);
+ }}
+
+ hipDeviceptr_t profile_scratch = 0;
+ if (profile_scratch_obj != Py_None) {{
+ DevicePtrInfo profile_scratch_info = getPointer(profile_scratch_obj, -1);
+ if (!profile_scratch_info.valid) {{
+ return NULL;
+ }}
+ profile_scratch = profile_scratch_info.dev_ptr;
+ }}
+
+ // raise exception asap
+ {newline.join(tensor_desc_decls)}
+ {newline.join(ptr_decls)}
+ {newline.join(float_storage_decls)}
+ _launch(gridX, gridY, gridZ, num_warps, num_ctas, launch_cooperative_grid, shared_memory, (hipStream_t)_stream, (hipFunction_t)_function, (hipDeviceptr_t)profile_scratch{', ' + ', '.join(internal_args_list) if len(internal_args_list) > 0 else ''});
+
+ if(launch_exit_hook != Py_None){{
+ PyObject* ret = PyObject_CallOneArg(launch_exit_hook, launch_metadata);
+ if (!ret)
+ return NULL;
+ Py_DECREF(ret);
+ }}
+
+ if(PyErr_Occurred()) {{
+ return NULL;
+ }}
+ Py_RETURN_NONE;
+}}
+
+static PyMethodDef ModuleMethods[] = {{
+ {{"launch", launch, METH_VARARGS, "Entry point for all kernels with this signature"}},
+ {{NULL, NULL, 0, NULL}} // sentinel
+}};
+
+static struct PyModuleDef ModuleDef = {{
+ PyModuleDef_HEAD_INIT,
+ \"__triton_launcher\",
+ NULL, //documentation
+ -1, //size
+ ModuleMethods
+}};
+
+PyMODINIT_FUNC PyInit___triton_launcher(void) {{
+ if (!initSymbolTable()) {{
+ return NULL;
+ }}
+ PyObject *m = PyModule_Create(&ModuleDef);
+ if(m == NULL) {{
+ return NULL;
+ }}
+ data_ptr_str = PyUnicode_InternFromString("data_ptr");
+ if(data_ptr_str == NULL) {{
+ return NULL;
+ }}
+ PyObject* driver_mod = PyImport_ImportModule("triton.backends.amd.driver");
+ if (driver_mod == NULL) {{
+ return NULL;
+ }}
+ py_tdm_descriptor_type = PyObject_GetAttrString(driver_mod, "PyTDMDescriptor");
+ if (py_tdm_descriptor_type == NULL) {{
+ return NULL;
+ }}
+
+ PyModule_AddFunctions(m, ModuleMethods);
+ return m;
+}}
+"""
+ return src
+
+
+def make_tensordesc_arg(arg, kernel_metadata, tensordesc_metadata):
+ """
+ Translate a tensor descriptor argument into the appropriate list of kernel
+ arguments. If `tensordesc_metadata` is provided, we will create a
+ TDMDescriptor object. Otherwise, we decompose the tensor descriptor into
+ base pointer, shape, strides, and padding flag. In both cases, we append the
+ shape and strides at the end to match the expected kernel signature.
+ """
+
+ if tensordesc_metadata is None:
+ # Currently the host side tensor descriptors get decomposed in
+ # the frontend to tensor desc, shape, and strides. We have no
+ # way to use these shape and strides when processing tensor
+ # descriptors which is why we provide our own decomposition
+ # above. Sadly this means we have to pass the shape and strides
+ # twice.
+ return [arg.base, *arg.shape, *arg.strides, arg.padding == "nan", *arg.shape, *arg.strides]
+
+ shape = arg.shape
+ strides = arg.strides
+ base = arg.base.data_ptr()
+
+ assert "elem_bits" in tensordesc_metadata and "block_size" in tensordesc_metadata
+ elem_bits = tensordesc_metadata["elem_bits"]
+ block_size = tensordesc_metadata["block_size"]
+ pad_interval, pad_amount = 0, 0
+ interval_padding_pairs = tensordesc_metadata.get("interval_padding_pairs", [])
+ if interval_padding_pairs:
+ assert len(interval_padding_pairs) == 1 and len(interval_padding_pairs[0]) == 2
+ pad_interval, pad_amount = interval_padding_pairs[0]
+ num_warps = kernel_metadata[0]
+
+ driver = triton.runtime.driver.active
+ assert isinstance(driver, HIPDriver)
+
+ desc = driver.utils.create_tdm_descriptor(elem_bits, block_size, num_warps, pad_interval, pad_amount, shape,
+ strides, base)
+
+ return [desc, *shape, *strides]
+
+
+def wrap_handle_tensordesc(launcher, signature, tensordesc_metadata):
+ """
+ Wrap a kernel launcher function to handle tensor descriptor arguments.
+ Use the provided `tensordesc_metadata` to determine whether to create
+ TDMDescriptor objects or decompose the tensor descriptors.
+
+ Args:
+ launcher (callable): The original kernel launcher function.
+ signature (Dict[int, str]): The kernel signature mapping argument indices to types.
+ tensordesc_metadata (List[Dict] or None): The list of tensor descriptor metadata, following the order
+ of tensor descriptor arguments. If None, decompose tensor descriptors.
+ Returns:
+ launcher (callable): The wrapped kernel launcher function.
+ """
+
+ has_tensor_desc_arg = any(isinstance(sig, str) and sig.startswith("tensordesc") for sig in signature.values())
+ if not has_tensor_desc_arg:
+ return launcher
+
+ tensordesc_indices = set(
+ [i for i, sig in enumerate(signature.values()) if isinstance(sig, str) and sig.startswith("tensordesc")])
+ assert not tensordesc_metadata or len(tensordesc_metadata) == len(tensordesc_indices)
+ if not tensordesc_metadata:
+ tensordesc_metadata = [None] * len(tensordesc_indices)
+
+ def inner(*args):
+ meta_args = args[:len(_BASE_ARGS_FORMAT)]
+ raw_kernel_args = args[len(_BASE_ARGS_FORMAT):]
+ final_args = []
+ tensordesc_idx = 0
+ for i, arg in enumerate(raw_kernel_args):
+ if i in tensordesc_indices:
+ tensordesc_args = make_tensordesc_arg(arg, meta_args[7], # kernel_metadata
+ tensordesc_metadata[tensordesc_idx])
+ final_args.extend(tensordesc_args)
+ tensordesc_idx += 1
+ else:
+ final_args.append(arg)
+ return launcher(*meta_args, *final_args)
+
+ return inner
+
+
+class HIPLauncher(object):
+
+ def __init__(self, src, metadata):
+ constants = src.constants if hasattr(src, "constants") else dict()
+ arg_idx = lambda x: (src.fn.arg_names.index(x), ) if isinstance(x, str) else x
+ constants = {arg_idx(idx): value for idx, value in constants.items()}
+ signature = {idx: value for idx, value in src.signature.items()}
+ tensordesc_meta = getattr(metadata, "tensordesc_meta", None)
+ src = make_launcher(constants, signature, metadata.warp_size, tensordesc_meta)
+ mod = compile_module_from_src(src=src, name="__triton_launcher", include_dirs=include_dirs)
+ self.launch = wrap_handle_tensordesc(mod.launch, signature, tensordesc_meta)
+ self.launch_cooperative_grid = metadata.launch_cooperative_grid
+ self.profile_scratch_size = metadata.profile_scratch_size
+ self.profile_scratch_align = metadata.profile_scratch_align
+
+ def __call__(self, gridX, gridY, gridZ, stream, function, *args):
+
+ def allocate_scratch(size, align, allocator):
+ if size > 0:
+ grid_size = gridX * gridY * gridZ
+ alloc_size = grid_size * size
+ alloc_fn = allocator.get()
+ return alloc_fn(alloc_size, align, stream)
+ return None
+
+ profile_scratch = allocate_scratch(self.profile_scratch_size, self.profile_scratch_align,
+ _allocation._profile_allocator)
+
+ self.launch(self.launch_cooperative_grid, gridX, gridY, gridZ, stream, function, profile_scratch, *args)
+
+
+class HIPDriver(GPUDriver):
+
+ def __init__(self):
+ super().__init__()
+ self.utils = HIPUtils()
+ self.launcher_cls = HIPLauncher
+
+ def get_device_interface(self):
+ import torch
+ return torch.cuda
+
+ @staticmethod
+ def is_active():
+ try:
+ import torch
+ return torch.cuda.is_available() and (torch.version.hip is not None)
+ except ImportError:
+ return False
+
+ def map_python_to_cpp_type(self, ty: str) -> str:
+ return ty_to_cpp(ty)
+
+ def get_current_target(self):
+ device = self.get_current_device()
+ device_properties = self.utils.get_device_properties(device)
+ arch = knobs.runtime.override_arch or device_properties['arch']
+ warp_size = device_properties['warpSize']
+ return GPUTarget("hip", arch.split(':')[0], warp_size)
+
+ def get_active_torch_device(self):
+ import torch
+ # when using hip devices, the device string in pytorch is "cuda"
+ return torch.device("cuda", self.get_current_device())
+
+ def get_benchmarker(self):
+ from triton.testing import do_bench
+ return do_bench
+
+ def get_empty_cache_for_benchmark(self):
+ import torch
+
+ # It's the same as the Nvidia backend.
+ cache_size = 256 * 1024 * 1024
+ return torch.empty(int(cache_size // 4), dtype=torch.int, device='cuda')
+
+ def clear_cache(self, cache):
+ cache.zero_()
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/__init__.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/compiler.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/compiler.py
new file mode 100644
index 0000000000000000000000000000000000000000..1366e70ab7cf1474a5759b842ae56b7b72d7eb20
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/compiler.py
@@ -0,0 +1,553 @@
+from triton.backends.compiler import BaseBackend, GPUTarget, Language
+from triton._C.libtriton import ir, passes, llvm, nvidia
+from triton import knobs
+from triton.runtime.errors import PTXASError
+
+from dataclasses import dataclass
+import functools
+from typing import Any, Dict, Tuple, Optional
+from types import ModuleType
+import hashlib
+import re
+import tempfile
+import signal
+import os
+import subprocess
+from pathlib import Path
+
+
+def min_dot_size(target: GPUTarget):
+
+ def check_dot_compatibility(lhs_type, rhs_type) -> Tuple[int, int, int]: # [m, n, k]
+ lhs_bitwidth = lhs_type.scalar.primitive_bitwidth
+ rhs_bitwidth = rhs_type.scalar.primitive_bitwidth
+ assert lhs_bitwidth == rhs_bitwidth, "lhs and rhs bitwidth must be the same"
+ # For small M/N the input we can still use tensorcores with padding.
+ if lhs_bitwidth == 8:
+ return (1, 1, 32)
+ else:
+ return (1, 1, 16)
+
+ return check_dot_compatibility
+
+
+def get_ptxas(arch: int) -> knobs.NvidiaTool:
+ return knobs.nvidia.ptxas_blackwell if arch >= 100 else knobs.nvidia.ptxas
+
+
+@functools.lru_cache()
+def get_ptxas_version(arch: int = 80):
+ mock_ver = knobs.nvidia.mock_ptx_version
+ if mock_ver is not None:
+ return mock_ver # This is not really a version of ptxas, but it is good enough for testing
+ version = subprocess.check_output([get_ptxas(arch).path, "--version"]).decode("utf-8")
+ return version
+
+
+@functools.lru_cache()
+def ptx_get_version(cuda_version) -> int:
+ '''
+ Get the highest PTX version supported by the current CUDA driver.
+ '''
+ assert isinstance(cuda_version, str)
+ major, minor = map(int, cuda_version.split('.'))
+ if major == 12:
+ if minor < 6:
+ return 80 + minor
+ else:
+ return 80 + minor - 1
+ if major == 11:
+ return 70 + minor
+ if major == 10:
+ return 63 + minor
+
+ if major >= 13:
+ base_ptx = 90
+ return base_ptx + (major - 13) * 10 + minor
+
+ raise RuntimeError("Triton only support CUDA 10.0 or higher, but got CUDA version: " + cuda_version)
+
+
+def get_ptx_version_from_options(options, arch: int):
+ ptx_version = options.ptx_version
+ if ptx_version is None:
+ cuda_version = get_ptxas(arch).version
+ ptx_version = ptx_get_version(cuda_version)
+ return ptx_version
+
+
+@functools.lru_cache()
+def get_features(options, arch: int):
+ ptx_version = get_ptx_version_from_options(options, arch)
+
+ # PTX 8.6 is the max version supported by llvm c1188642.
+ #
+ # To check if a newer PTX version is supported, increase this value
+ # and run a test. If it's not supported, LLVM will print a warning
+ # like "+ptx8.4 is not a recognized feature for this target".
+ llvm_ptx_version = min(86, ptx_version)
+ features = f'+ptx{llvm_ptx_version}'
+ return features
+
+
+@functools.lru_cache(None)
+def file_hash(path):
+ with open(path, "rb") as f:
+ return hashlib.sha256(f.read()).hexdigest()
+
+
+def sm_arch_from_capability(capability: int):
+ # TODO: Handle non-"a" sms
+ suffix = "a" if capability >= 90 else ""
+ return f"sm_{capability}{suffix}"
+
+
+@dataclass(frozen=True)
+class CUDAOptions:
+ num_warps: int = 4
+ num_ctas: int = 1
+ num_stages: int = 3
+ warp_size: int = 32
+ # maxnreg corresponds to the ptx parameter .maxnreg, which controls the
+ # maximum number of 32-bit registers used by one thread.
+ maxnreg: Optional[int] = None
+ ptx_version: int = None
+ ptx_options: Optional[str] = knobs.nvidia.ptxas_options
+ ir_override: Optional[str] = None # filename of a user-defined IR (*.{ttir|ttgir|llir|ptx})
+ enable_fp_fusion: bool = True
+ enable_reflect_ftz: bool = True # ftz in libdevice
+ launch_cooperative_grid: bool = False
+ launch_pdl: bool = False
+ supported_fp8_dtypes: Tuple[str] = ("fp8e5", "fp8e4b15")
+ deprecated_fp8_dot_operand_dtypes: Tuple[str] = ()
+ default_dot_input_precision: str = "tf32"
+ allowed_dot_input_precisions: Tuple[str] = ("tf32", "tf32x3", "ieee", 'bf16x3', 'bf16x6')
+ max_num_imprecise_acc_default: bool = None
+ extern_libs: dict = None
+ debug: bool = False
+ backend_name: str = 'cuda'
+ sanitize_overflow: bool = True
+ arch: str = None
+ instrumentation_mode: str = ""
+
+ def __post_init__(self):
+ default_libdir = Path(__file__).parent / 'lib'
+ extern_libs = {} if self.extern_libs is None else dict(self.extern_libs)
+ if not extern_libs.get('libdevice', None):
+ extern_libs['libdevice'] = knobs.nvidia.libdevice_path or str(default_libdir / 'libdevice.10.bc')
+
+ object.__setattr__(self, 'extern_libs', tuple(extern_libs.items()))
+ assert self.num_warps > 0 and (self.num_warps & (self.num_warps - 1)) == 0, \
+ "num_warps must be a power of 2"
+
+ def hash(self):
+ hash_dict = dict(self.__dict__)
+ hash_dict["extern_libs"] = tuple((k, file_hash(v)) for k, v in sorted(hash_dict["extern_libs"]))
+ key = "_".join([f"{name}-{val}" for name, val in sorted(hash_dict.items())])
+ return hashlib.sha256(key.encode("utf-8")).hexdigest()
+
+
+class CUDABackend(BaseBackend):
+ instrumentation = None
+
+ @staticmethod
+ def supports_target(target: GPUTarget):
+ return target.backend == 'cuda'
+
+ def _parse_arch(self, arch):
+ pattern = r"^sm(\d+)$"
+ match = re.fullmatch(pattern, arch)
+ if not match:
+ raise ValueError(f"TRITON_OVERRIDE_ARCH must have the form {pattern}")
+ return int(match.group(1))
+
+ def get_target_name(self, options) -> str:
+ capability = self._parse_arch(options.arch)
+ return f"cuda:{capability}"
+
+ def __init__(self, target: GPUTarget) -> None:
+ super().__init__(target)
+ self.binary_ext = "cubin"
+
+ def parse_options(self, opts) -> Any:
+ # Enable debug mode for ConSan, so device-side assertions are not optimized out
+ if "instrumentation_mode" in opts and opts["instrumentation_mode"] == "consan":
+ opts["debug"] = True
+
+ args = {'arch': knobs.runtime.override_arch or f"sm{self.target.arch}"}
+ args.update({k: opts[k] for k in CUDAOptions.__dataclass_fields__.keys() if k in opts if opts[k] is not None})
+ capability = int(self._parse_arch(args["arch"]))
+
+ if args.get("num_ctas", 1) > 1 and capability < 90:
+ raise ValueError((f"num_ctas > 1 requires NVIDIA SM90+ (Hopper). "
+ f"Current target is sm_{capability}. This configuration will fail. "
+ f"Please set num_ctas=1 or target an SM90+ GPU."))
+
+ if "supported_fp8_dtypes" not in args:
+ supported_fp8_dtypes = set(CUDAOptions.supported_fp8_dtypes)
+ if capability >= 89:
+ supported_fp8_dtypes.add("fp8e4nv")
+ args["supported_fp8_dtypes"] = tuple(sorted(supported_fp8_dtypes))
+
+ if "deprecated_fp8_dot_operand_dtypes" not in args:
+ if capability >= 90:
+ args["deprecated_fp8_dot_operand_dtypes"] = ("fp8e4b15", )
+
+ if "enable_fp_fusion" not in args:
+ args["enable_fp_fusion"] = knobs.language.default_fp_fusion
+
+ args["max_num_imprecise_acc_default"] = 2**30 if capability == 90 else 0
+
+ return CUDAOptions(**args)
+
+ def pack_metadata(self, metadata):
+ return (
+ metadata.num_warps,
+ metadata.num_ctas,
+ metadata.shared,
+ )
+
+ def get_codegen_implementation(self, options):
+ import triton.language.extra.cuda as cuda
+ capability = int(self._parse_arch(options.arch))
+ codegen_fns = {
+ "convert_custom_types":
+ cuda.convert_custom_float8_sm80 if capability >= 80 else cuda.convert_custom_float8_sm70, "min_dot_size":
+ min_dot_size(self.target)
+ }
+ return codegen_fns
+
+ def get_module_map(self) -> Dict[str, ModuleType]:
+ from triton.language.extra.cuda import libdevice
+ return {"triton.language.extra.libdevice": libdevice}
+
+ def load_dialects(self, ctx):
+ nvidia.load_dialects(ctx)
+ if CUDABackend.instrumentation:
+ CUDABackend.instrumentation.load_dialects(ctx)
+
+ @staticmethod
+ def make_ttir(mod, metadata, opt, capability):
+ pm = ir.pass_manager(mod.context)
+ pm.enable_debug()
+ passes.common.add_inliner(pm)
+ passes.ttir.add_rewrite_tensor_pointer(pm)
+ if capability // 10 < 9:
+ passes.ttir.add_rewrite_tensor_descriptor_to_pointer(pm)
+ passes.common.add_canonicalizer(pm)
+ passes.ttir.add_combine(pm)
+ passes.ttir.add_reorder_broadcast(pm)
+ passes.common.add_cse(pm)
+ passes.common.add_symbol_dce(pm)
+ passes.ttir.add_loop_unroll(pm)
+ pm.run(mod, 'make_ttir')
+ return mod
+
+ @staticmethod
+ def make_ttgir(mod, metadata, opt, capability):
+ # Set maxnreg on all kernels, if it was provided.
+ if opt.maxnreg is not None:
+ mod.set_attr("ttg.maxnreg", ir.builder(mod.context).get_int32_attr(opt.maxnreg))
+
+ pm = ir.pass_manager(mod.context)
+ dump_enabled = pm.enable_debug()
+ emuTF32 = (capability // 10 >= 8)
+ passes.ttir.add_convert_to_ttgpuir(pm, f"cuda:{capability}", opt.num_warps, 32, opt.num_ctas)
+ # optimize TTGIR
+ passes.ttgpuir.add_coalesce(pm)
+ passes.ttgpuir.add_f32_dot_tc(pm, emuTF32)
+ # TODO(Qingyi): Move PlanCTAPass to the front of CoalescePass
+ nvidia.passes.ttnvgpuir.add_plan_cta(pm)
+ passes.ttgpuir.add_remove_layout_conversions(pm)
+ passes.ttgpuir.add_optimize_thread_locality(pm)
+ passes.ttgpuir.add_accelerate_matmul(pm)
+ passes.ttgpuir.add_remove_layout_conversions(pm)
+ passes.ttgpuir.add_optimize_dot_operands(pm, capability >= 80)
+ nvidia.passes.ttnvgpuir.add_optimize_descriptor_encoding(pm)
+ passes.ttir.add_loop_aware_cse(pm)
+ if capability // 10 in [8, 9]:
+ passes.ttgpuir.add_fuse_nested_loops(pm)
+ passes.common.add_canonicalizer(pm)
+ passes.ttir.add_triton_licm(pm)
+ passes.common.add_canonicalizer(pm)
+ passes.ttgpuir.add_combine_tensor_select_and_if(pm)
+ nvidia.passes.hopper.add_hopper_warpspec(pm, opt.num_stages, dump_enabled)
+ passes.ttgpuir.add_assign_latencies(pm, opt.num_stages)
+ passes.ttgpuir.add_schedule_loops(pm)
+ passes.ttgpuir.add_pipeline(pm, opt.num_stages, dump_enabled)
+ elif capability // 10 >= 10:
+ passes.ttgpuir.add_fuse_nested_loops(pm)
+ passes.common.add_canonicalizer(pm)
+ passes.ttir.add_triton_licm(pm)
+ passes.ttgpuir.add_optimize_accumulator_init(pm)
+ passes.ttgpuir.add_hoist_tmem_alloc(pm, False)
+ nvidia.passes.ttnvgpuir.add_promote_lhs_to_tmem(pm)
+ passes.ttgpuir.add_assign_latencies(pm, opt.num_stages)
+ passes.ttgpuir.add_schedule_loops(pm)
+ passes.ttgpuir.add_warp_specialize(pm, opt.num_stages)
+ passes.ttgpuir.add_pipeline(pm, opt.num_stages, dump_enabled)
+ passes.ttgpuir.add_optimize_partition_warps(pm)
+ passes.ttgpuir.add_combine_tensor_select_and_if(pm)
+ # hoist again and allow hoisting out of if statements
+ passes.ttgpuir.add_hoist_tmem_alloc(pm, True)
+ nvidia.passes.ttnvgpuir.add_remove_tmem_tokens(pm)
+ else:
+ passes.ttir.add_triton_licm(pm)
+ passes.common.add_canonicalizer(pm)
+ passes.ttir.add_loop_aware_cse(pm)
+ passes.ttgpuir.add_prefetch(pm)
+ passes.ttgpuir.add_optimize_dot_operands(pm, capability >= 80)
+ passes.ttgpuir.add_coalesce_async_copy(pm)
+ nvidia.passes.ttnvgpuir.add_optimize_tmem_layouts(pm)
+ if capability // 10 >= 9:
+ nvidia.passes.ttnvgpuir.add_tma_lowering(pm)
+ passes.ttgpuir.add_remove_layout_conversions(pm)
+ nvidia.passes.ttnvgpuir.add_interleave_tmem(pm)
+ passes.ttgpuir.add_reduce_data_duplication(pm)
+ passes.ttgpuir.add_reorder_instructions(pm)
+ passes.ttir.add_loop_aware_cse(pm)
+ passes.common.add_symbol_dce(pm)
+ nvidia.passes.ttnvgpuir.add_fence_insertion(pm, capability)
+ nvidia.passes.ttnvgpuir.add_lower_mma(pm)
+ passes.common.add_sccp(pm)
+ passes.common.add_cse(pm)
+ passes.common.add_canonicalizer(pm)
+
+ pm.run(mod, 'make_ttgir')
+ metadata["tensordesc_meta"] = mod.get_tensordesc_metadata()
+ return mod
+
+ def gluon_to_ttgir(self, src, metadata, options, capability):
+ mod = src
+ pm = ir.pass_manager(mod.context)
+ pm.enable_debug()
+
+ passes.gluon.add_inliner(pm)
+ passes.gluon.add_infer_coalesced_encodings(pm)
+ passes.gluon.add_resolve_auto_encodings(pm)
+ nvidia.passes.ttnvgpuir.add_tma_lowering(pm)
+ passes.gluon.add_canonicalizer(pm)
+ passes.common.add_sccp(pm)
+ passes.ttir.add_loop_aware_cse(pm)
+ passes.gluon.add_canonicalizer(pm)
+ passes.ttgpuir.add_combine_tensor_select_and_if(pm)
+
+ pm.run(mod, 'gluon_to_ttgir')
+ metadata["tensordesc_meta"] = mod.get_tensordesc_metadata()
+ return mod
+
+ def make_llir(self, src, metadata, options, capability):
+ ptx_version = get_ptx_version_from_options(options, self.target.arch)
+
+ mod = src
+ # TritonGPU -> LLVM-IR (MLIR)
+ pm = ir.pass_manager(mod.context)
+ pm.enable_debug()
+
+ passes.ttgpuir.add_combine_tensor_select_and_if(pm)
+ passes.ttgpuir.add_allocate_warp_groups(pm)
+ passes.convert.add_scf_to_cf(pm)
+ passes.gluon.add_inliner(pm)
+ nvidia.passes.ttgpuir.add_allocate_shared_memory_nv(pm, capability, ptx_version)
+ nvidia.passes.ttnvgpuir.add_allocate_tensor_memory(pm)
+ nvidia.passes.ttnvgpuir.add_check_matmul_two_cta(pm)
+ if knobs.compilation.instrumentation_mode == "consan":
+ # Call ConcurrencySanitizerPass here, before allocating global scratch memory but after allocating tensor and shared
+ passes.ttgpuir.add_concurrency_sanitizer(pm)
+ passes.ttgpuir.add_allocate_global_scratch_memory(pm)
+ nvidia.passes.ttnvgpuir.add_proxy_fence_insertion(pm, capability)
+ # instrumentation point here so we can override IRs above (e.g., ttir and ttgir)
+ if CUDABackend.instrumentation:
+ CUDABackend.instrumentation.patch("ttgpuir_to_llvmir", pm, mod.context)
+ nvidia.passes.ttgpuir.add_to_llvmir(pm, capability, ptx_version)
+ passes.common.add_canonicalizer(pm)
+ passes.common.add_cse(pm)
+ nvidia.passes.ttnvgpuir.add_nvgpu_to_llvm(pm)
+ nvidia.passes.ttnvgpuir.add_warp_specialize_to_llvm(pm)
+ passes.common.add_canonicalizer(pm)
+ passes.common.add_cse(pm)
+ passes.common.add_symbol_dce(pm)
+ passes.convert.add_nvvm_to_llvm(pm)
+
+ if not knobs.compilation.disable_line_info and not knobs.compilation.dump_ir_extract_di_local_variables:
+ passes.llvmir.add_di_scope(pm)
+
+ if CUDABackend.instrumentation:
+ CUDABackend.instrumentation.patch("llvmir_to_llvm", pm, mod.context)
+
+ pm.run(mod, 'make_llir')
+
+ if knobs.compilation.dump_ir_extract_di_local_variables:
+ # comments below on why separate it
+ if not knobs.compilation.disable_line_info:
+ pm = ir.pass_manager(mod.context)
+ pm.enable_debug()
+ passes.llvmir.add_di_scope(pm)
+ pm.run(mod, 'make_llir.disable_line_info')
+
+ # insert dbg intrinsic with several DI Attribute including source
+ # var name and type info note: unknown reason for now, but this
+ # pass and add_di_scope has to be run separately, otherwise if we
+ # put them into previous pipline, it trigger a segmentfault without
+ # any error message; could be due to a bug in mlir or pybind11
+ pm = ir.pass_manager(mod.context)
+ pm.enable_debug()
+ passes.llvmir.add_di_local_variable(pm)
+ pm.run(mod, 'make_llir.dump_ir_extract_di_local_variables')
+
+ # LLVM-IR (MLIR) -> LLVM-IR (LLVM)
+ llvm.init_targets()
+ context = llvm.context()
+ if knobs.compilation.enable_asan:
+ raise RuntimeError(
+ "Address Sanitizer Error: Address sanitizer is currently only supported on the AMD backend")
+ llvm_mod = llvm.to_module(mod, context)
+ proc = sm_arch_from_capability(capability)
+ features = get_features(options, self.target.arch)
+ triple = 'nvptx64-nvidia-cuda'
+ nvidia.set_short_ptr()
+ llvm.attach_datalayout(llvm_mod, triple, proc, features)
+ if options.enable_reflect_ftz:
+ nvidia.set_nvvm_reflect_ftz(llvm_mod)
+
+ if options.extern_libs and nvidia.has_extern_deps(llvm_mod):
+ paths = [path for (name, path) in options.extern_libs]
+ llvm.link_extern_libs(llvm_mod, paths)
+
+ llvm.optimize_module(llvm_mod, llvm.OPTIMIZE_O3)
+
+ # Get some metadata
+ # warp-specialization mutates num_warps
+ total_num_warps = src.get_int_attr("ttg.total-num-warps")
+ if total_num_warps is not None:
+ metadata["num_warps"] = total_num_warps
+ metadata["shared"] = src.get_int_attr("ttg.shared")
+ metadata["tmem_size"] = src.get_int_attr("ttg.tensor_memory_size")
+ metadata["global_scratch_size"] = src.get_int_attr("ttg.global_scratch_memory_size")
+ metadata["global_scratch_align"] = src.get_int_attr("ttg.global_scratch_memory_alignment")
+ metadata["profile_scratch_size"] = src.get_int_attr("ttg.profile_scratch_memory_size") or 0
+ metadata["profile_scratch_align"] = src.get_int_attr("ttg.profile_scratch_memory_alignment") or 1
+ ret = str(llvm_mod)
+ del llvm_mod
+ del context
+ return ret
+
+ def make_ptx(self, src, metadata, opt, capability):
+ ptx_version = get_ptx_version_from_options(opt, self.target.arch)
+
+ triple = 'nvptx64-nvidia-cuda'
+ proc = sm_arch_from_capability(capability)
+ features = get_features(opt, self.target.arch)
+ flags = ["nvptx-mad-wide-opt"]
+ ret = llvm.translate_to_asm(src, triple, proc, features, flags, opt.enable_fp_fusion, False)
+ # Find kernel names (there should only be one)
+ names = re.findall(r".visible .entry ([a-zA-Z_][a-zA-Z0-9_]*)", ret)
+ assert len(names) == 1
+ metadata["name"] = names[0]
+ # post-process
+ ptx_version = f'{ptx_version//10}.{ptx_version%10}'
+ ret = re.sub(r'\.version \d+\.\d+', f'.version {ptx_version}', ret, flags=re.MULTILINE)
+ ret = re.sub(r'\.target sm_\d+', f'.target sm_{capability}', ret, flags=re.MULTILINE)
+ if not knobs.compilation.dump_ir_extract_di_local_variables:
+ # Remove the debug flag that prevents ptxas from optimizing the code
+ # Note: if this flag is removed, the source var name and type info will be lost when ptx was compiled into cubin
+ # and we may not be able to see them in cuda-gdb
+ ret = re.sub(r",\s*debug|debug,\s*", "", ret)
+ if knobs.nvidia.dump_nvptx:
+ print("// -----// NVPTX Dump //----- //")
+ print(ret)
+ return ret
+
+ def make_cubin(self, src, metadata, opt, capability):
+ ptxas = get_ptxas(self.target.arch).path
+ with tempfile.NamedTemporaryFile(delete=False, mode='w', suffix='.ptx') as fsrc, \
+ tempfile.NamedTemporaryFile(delete=False, mode='r', suffix='.log') as flog:
+ fsrc.write(src)
+ fsrc.flush()
+ fbin = fsrc.name + '.o'
+
+ debug_info = []
+ if knobs.compilation.disable_line_info:
+ # This option is ignored if used without -lineinfo
+ debug_info += ["-lineinfo", "-suppress-debug-info"]
+ elif knobs.nvidia.disable_ptxas_opt:
+ # Synthesize complete debug info
+ debug_info += ["-g"]
+ else:
+ # Only emit line info
+ debug_info += ["-lineinfo"]
+
+ fmad = [] if opt.enable_fp_fusion else ["--fmad=false"]
+ arch = sm_arch_from_capability(capability)
+
+ # Disable ptxas optimizations if requested
+ disable_opt = ['--opt-level', '0'] if knobs.nvidia.disable_ptxas_opt else []
+
+ # Accept more ptxas options if provided
+ ptx_extra_options = opt.ptx_options.split(" ") if opt.ptx_options else []
+
+ ptxas_cmd = [
+ ptxas, *debug_info, *fmad, '-v', *disable_opt, *ptx_extra_options, f'--gpu-name={arch}', fsrc.name,
+ '-o', fbin
+ ]
+ try:
+ subprocess.run(ptxas_cmd, check=True, close_fds=False, stderr=flog)
+ if knobs.nvidia.dump_ptxas_log:
+ with open(flog.name) as log_file:
+ print(log_file.read())
+
+ if os.path.exists(fsrc.name):
+ os.remove(fsrc.name)
+ if os.path.exists(flog.name):
+ os.remove(flog.name)
+ except subprocess.CalledProcessError as e:
+ with open(flog.name) as log_file:
+ log = log_file.read()
+ if os.path.exists(flog.name):
+ os.remove(flog.name)
+
+ if e.returncode == 255:
+ error = 'Internal Triton PTX codegen error'
+ elif e.returncode == 128 + signal.SIGSEGV:
+ error = '`ptxas` raised SIGSEGV'
+ else:
+ error = f'`ptxas` failed with error code {e.returncode}'
+
+ error = (f"{error}\n"
+ f"`ptxas` stderr:\n{log}\n"
+ f'Repro command: {" ".join(ptxas_cmd)}\n')
+
+ print(f"""
+
+================================================================
+{error}
+
+{src}
+================================================================
+please share the reproducer above with Triton project.
+""")
+ raise PTXASError(error)
+
+ with open(fbin, 'rb') as f:
+ cubin = f.read()
+ if os.path.exists(fbin):
+ os.remove(fbin)
+ return cubin
+
+ def add_stages(self, stages, options, language):
+ capability = self._parse_arch(options.arch)
+ if language == Language.TRITON:
+ stages["ttir"] = lambda src, metadata: self.make_ttir(src, metadata, options, capability)
+ stages["ttgir"] = lambda src, metadata: self.make_ttgir(src, metadata, options, capability)
+ elif language == Language.GLUON:
+ stages["ttgir"] = lambda src, metadata: self.gluon_to_ttgir(src, metadata, options, capability)
+ stages["llir"] = lambda src, metadata: self.make_llir(src, metadata, options, capability)
+ stages["ptx"] = lambda src, metadata: self.make_ptx(src, metadata, options, self.target.arch)
+ stages["cubin"] = lambda src, metadata: self.make_cubin(src, metadata, options, self.target.arch)
+ if knobs.runtime.add_stages_inspection_hook is not None:
+ knobs.runtime.add_stages_inspection_hook(self, stages, options, language, capability)
+
+ @functools.lru_cache()
+ def hash(self):
+ version = get_ptxas_version(self.target.arch)
+ return f'{version}-{self.target.arch}'
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/driver.c b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/driver.c
new file mode 100644
index 0000000000000000000000000000000000000000..8e06e6369128a730698fe233bf6f0898447213a6
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/driver.c
@@ -0,0 +1,518 @@
+#include "cuda.h"
+#include
+#include
+#include
+#include
+#define PY_SSIZE_T_CLEAN
+#include
+
+typedef struct {
+ PyObject_HEAD;
+ _Alignas(128) CUtensorMap tensorMap;
+} PyCUtensorMapObject;
+
+// Raises a Python exception and returns false if code is not CUDA_SUCCESS.
+static bool gpuAssert(CUresult code, const char *file, int line) {
+ if (code == CUDA_SUCCESS)
+ return true;
+
+ const char *prefix = "Triton Error [CUDA]: ";
+ const char *str;
+ cuGetErrorString(code, &str);
+ char err[1024] = {0};
+ strcat(err, prefix);
+ strcat(err, str);
+ PyGILState_STATE gil_state;
+ gil_state = PyGILState_Ensure();
+ PyErr_SetString(PyExc_RuntimeError, err);
+ PyGILState_Release(gil_state);
+ return false;
+}
+
+// To be used only *outside* a Py_{BEGIN,END}_ALLOW_THREADS block.
+#define CUDA_CHECK_AND_RETURN_NULL(ans) \
+ do { \
+ if (!gpuAssert((ans), __FILE__, __LINE__)) \
+ goto cleanup; \
+ } while (0)
+
+// To be used inside a Py_{BEGIN,END}_ALLOW_THREADS block.
+#define CUDA_CHECK_AND_RETURN_NULL_ALLOW_THREADS(ans) \
+ do { \
+ if (!gpuAssert((ans), __FILE__, __LINE__)) { \
+ PyEval_RestoreThread(_save); \
+ return NULL; \
+ } \
+ } while (0)
+
+// Used to check if functions exist in old CUDA driver versions.
+#define INITIALIZE_FUNCTION_POINTER_IF_NULL(funcPointer, initializerFunction) \
+ do { \
+ if ((funcPointer) == NULL) { \
+ (funcPointer) = (initializerFunction)(); \
+ if ((funcPointer) == NULL) { \
+ goto cleanup; \
+ } \
+ } \
+ } while (0)
+
+static PyObject *getDeviceProperties(PyObject *self, PyObject *args) {
+ int device_id;
+ if (!PyArg_ParseTuple(args, "i", &device_id))
+ return NULL;
+ // Get device handle
+ CUdevice device;
+ cuDeviceGet(&device, device_id);
+
+ // create a struct to hold device properties
+ int max_shared_mem;
+ int max_num_regs;
+ int multiprocessor_count;
+ int warp_size;
+ int sm_clock_rate;
+ int mem_clock_rate;
+ int mem_bus_width;
+ CUDA_CHECK_AND_RETURN_NULL(cuDeviceGetAttribute(
+ &max_shared_mem, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN,
+ device));
+ CUDA_CHECK_AND_RETURN_NULL(cuDeviceGetAttribute(
+ &max_num_regs, CU_DEVICE_ATTRIBUTE_MAX_REGISTERS_PER_BLOCK, device));
+ CUDA_CHECK_AND_RETURN_NULL(cuDeviceGetAttribute(
+ &multiprocessor_count, CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, device));
+ CUDA_CHECK_AND_RETURN_NULL(
+ cuDeviceGetAttribute(&warp_size, CU_DEVICE_ATTRIBUTE_WARP_SIZE, device));
+ CUDA_CHECK_AND_RETURN_NULL(cuDeviceGetAttribute(
+ &sm_clock_rate, CU_DEVICE_ATTRIBUTE_CLOCK_RATE, device));
+ CUDA_CHECK_AND_RETURN_NULL(cuDeviceGetAttribute(
+ &mem_clock_rate, CU_DEVICE_ATTRIBUTE_MEMORY_CLOCK_RATE, device));
+ CUDA_CHECK_AND_RETURN_NULL(cuDeviceGetAttribute(
+ &mem_bus_width, CU_DEVICE_ATTRIBUTE_GLOBAL_MEMORY_BUS_WIDTH, device));
+
+ return Py_BuildValue("{s:i, s:i, s:i, s:i, s:i, s:i, s:i}", "max_shared_mem",
+ max_shared_mem, "max_num_regs", max_num_regs,
+ "multiprocessor_count", multiprocessor_count, "warpSize",
+ warp_size, "sm_clock_rate", sm_clock_rate,
+ "mem_clock_rate", mem_clock_rate, "mem_bus_width",
+ mem_bus_width);
+
+cleanup:
+ return NULL;
+}
+
+static PyObject *loadBinary(PyObject *self, PyObject *args) {
+ const char *name;
+ const char *data;
+ Py_ssize_t data_size;
+ int shared;
+ int device;
+ if (!PyArg_ParseTuple(args, "ss#ii", &name, &data, &data_size, &shared,
+ &device)) {
+ return NULL;
+ }
+ CUfunction fun;
+ CUmodule mod;
+ int32_t n_regs = 0;
+ int32_t n_spills = 0;
+ int32_t n_max_threads = 0;
+ // create driver handles
+ CUcontext pctx = 0;
+
+ Py_BEGIN_ALLOW_THREADS;
+ CUDA_CHECK_AND_RETURN_NULL_ALLOW_THREADS(cuCtxGetCurrent(&pctx));
+ if (!pctx) {
+ CUDA_CHECK_AND_RETURN_NULL_ALLOW_THREADS(
+ cuDevicePrimaryCtxRetain(&pctx, device));
+ CUDA_CHECK_AND_RETURN_NULL_ALLOW_THREADS(cuCtxSetCurrent(pctx));
+ }
+
+ CUDA_CHECK_AND_RETURN_NULL_ALLOW_THREADS(cuModuleLoadData(&mod, data));
+ CUDA_CHECK_AND_RETURN_NULL_ALLOW_THREADS(
+ cuModuleGetFunction(&fun, mod, name));
+ // get allocated registers and spilled registers from the function
+ CUDA_CHECK_AND_RETURN_NULL_ALLOW_THREADS(
+ cuFuncGetAttribute(&n_regs, CU_FUNC_ATTRIBUTE_NUM_REGS, fun));
+ CUDA_CHECK_AND_RETURN_NULL_ALLOW_THREADS(
+ cuFuncGetAttribute(&n_spills, CU_FUNC_ATTRIBUTE_LOCAL_SIZE_BYTES, fun));
+ n_spills /= 4;
+ CUDA_CHECK_AND_RETURN_NULL_ALLOW_THREADS(cuFuncGetAttribute(
+ &n_max_threads, CU_FUNC_ATTRIBUTE_MAX_THREADS_PER_BLOCK, fun));
+ // set dynamic shared memory if necessary
+ int shared_optin;
+ CUDA_CHECK_AND_RETURN_NULL_ALLOW_THREADS(cuDeviceGetAttribute(
+ &shared_optin, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_BLOCK_OPTIN,
+ device));
+ if (shared > 49152 && shared_optin > 49152) {
+ CUDA_CHECK_AND_RETURN_NULL_ALLOW_THREADS(
+ cuFuncSetCacheConfig(fun, CU_FUNC_CACHE_PREFER_SHARED));
+ int shared_total, shared_static;
+ CUDA_CHECK_AND_RETURN_NULL_ALLOW_THREADS(cuDeviceGetAttribute(
+ &shared_total, CU_DEVICE_ATTRIBUTE_MAX_SHARED_MEMORY_PER_MULTIPROCESSOR,
+ device));
+ CUDA_CHECK_AND_RETURN_NULL_ALLOW_THREADS(cuFuncGetAttribute(
+ &shared_static, CU_FUNC_ATTRIBUTE_SHARED_SIZE_BYTES, fun));
+ CUDA_CHECK_AND_RETURN_NULL_ALLOW_THREADS(
+ cuFuncSetAttribute(fun, CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES,
+ shared_optin - shared_static));
+ }
+ Py_END_ALLOW_THREADS;
+
+ if (PyErr_Occurred()) {
+ return NULL;
+ }
+ return Py_BuildValue("(KKiii)", (uint64_t)mod, (uint64_t)fun, n_regs,
+ n_spills, n_max_threads);
+}
+
+typedef CUresult (*cuOccupancyMaxActiveClusters_t)(
+ int *numClusters, CUfunction func, const CUlaunchConfig *config);
+
+typedef CUresult (*cuTensorMapEncodeTiled_t)(
+ CUtensorMap *tensorMap, CUtensorMapDataType tensorDataType,
+ cuuint32_t tensorRank, void *globalAddress, const cuuint64_t *globalDim,
+ const cuuint64_t *globalStrides, const cuuint32_t *boxDim,
+ const cuuint32_t *elementStrides, CUtensorMapInterleave interleave,
+ CUtensorMapSwizzle swizzle, CUtensorMapL2promotion l2Promotion,
+ CUtensorMapFloatOOBfill oobFill);
+
+#define defineGetFunctionHandle(name, symbolName) \
+ static symbolName##_t name() { \
+ /* Open the shared library */ \
+ void *libHandle = dlopen("libcuda.so.1", RTLD_LAZY); \
+ if (!libHandle) { \
+ PyErr_SetString(PyExc_RuntimeError, "Failed to open libcuda.so.1"); \
+ return NULL; \
+ } \
+ /* Clear any existing error */ \
+ dlerror(); \
+ symbolName##_t funcHandle = (symbolName##_t)dlsym(libHandle, #symbolName); \
+ /* Check for errors */ \
+ const char *err = dlerror(); \
+ if (err) { \
+ PyErr_SetString(PyExc_RuntimeError, \
+ "Failed to retrieve " #symbolName " from libcuda.so.1"); \
+ dlclose(libHandle); \
+ return NULL; \
+ } \
+ return funcHandle; \
+ }
+
+defineGetFunctionHandle(getCuOccupancyMaxActiveClustersHandle,
+ cuOccupancyMaxActiveClusters);
+
+defineGetFunctionHandle(getCuTensorMapEncodeTiledHandle,
+ cuTensorMapEncodeTiled);
+
+static PyObject *occupancyMaxActiveClusters(PyObject *self, PyObject *args) {
+ int clusterDim = -1, maxActiveClusters = -1;
+ int shared = 0;
+ CUfunction func;
+
+ if (!PyArg_ParseTuple(args, "Kii", &func, &shared, &clusterDim)) {
+ return NULL;
+ }
+
+ // Let each SM have one block
+ int maxActiveBlocks = 1;
+ Py_BEGIN_ALLOW_THREADS;
+ CUDA_CHECK_AND_RETURN_NULL_ALLOW_THREADS(cuFuncSetAttribute(
+ func, CU_FUNC_ATTRIBUTE_MAX_DYNAMIC_SHARED_SIZE_BYTES, shared));
+ Py_END_ALLOW_THREADS;
+
+ CUlaunchAttribute launchAttr[1];
+ launchAttr[0].id = CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION;
+ launchAttr[0].value.clusterDim.x = clusterDim;
+ launchAttr[0].value.clusterDim.y = 1;
+ launchAttr[0].value.clusterDim.z = 1;
+ CUlaunchConfig config;
+ config.gridDimX = clusterDim * maxActiveBlocks;
+ config.gridDimY = 1;
+ config.gridDimZ = 1;
+ config.blockDimX = 128;
+ config.blockDimY = 1;
+ config.blockDimZ = 1;
+ config.sharedMemBytes = shared;
+ config.hStream = 0;
+ config.numAttrs = 1;
+ config.attrs = launchAttr;
+
+ static cuOccupancyMaxActiveClusters_t cuOccupancyMaxActiveClusters = NULL;
+ INITIALIZE_FUNCTION_POINTER_IF_NULL(cuOccupancyMaxActiveClusters,
+ getCuOccupancyMaxActiveClustersHandle);
+
+ Py_BEGIN_ALLOW_THREADS;
+ CUDA_CHECK_AND_RETURN_NULL_ALLOW_THREADS(cuFuncSetAttribute(
+ func, CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED, 1));
+ CUDA_CHECK_AND_RETURN_NULL_ALLOW_THREADS(
+ cuOccupancyMaxActiveClusters(&maxActiveClusters, func, &config));
+ Py_END_ALLOW_THREADS;
+ return PyLong_FromLong(maxActiveClusters);
+
+cleanup:
+ return NULL;
+}
+
+static PyObject *setPrintfFifoSize(PyObject *self, PyObject *args) {
+ long size;
+ if (!PyArg_ParseTuple(args, "l", &size)) {
+ return NULL;
+ }
+ if (size < 0) {
+ PyErr_SetString(PyExc_ValueError, "fifo size must be non-negative");
+ return NULL;
+ }
+
+ Py_BEGIN_ALLOW_THREADS;
+
+ // Ensure we have an active context.
+ CUcontext ctx = NULL;
+ CUDA_CHECK_AND_RETURN_NULL_ALLOW_THREADS(cuCtxGetCurrent(&ctx));
+ if (!ctx) {
+ CUDA_CHECK_AND_RETURN_NULL_ALLOW_THREADS(
+ cuDevicePrimaryCtxRetain(&ctx, /*device=*/0));
+ CUDA_CHECK_AND_RETURN_NULL_ALLOW_THREADS(cuCtxSetCurrent(ctx));
+ }
+
+ // We can't set the fifo size after running a kernel that calls printf. This
+ // is true even if the set() call is a nop and the new size is the same as the
+ // old size.
+ //
+ // This is unfriendly, so check if the old size matches the new size, and skip
+ // the set() call if so.
+ size_t oldSize = 0;
+ CUDA_CHECK_AND_RETURN_NULL_ALLOW_THREADS(
+ cuCtxGetLimit(&oldSize, CU_LIMIT_PRINTF_FIFO_SIZE));
+ if (oldSize != size) {
+ CUDA_CHECK_AND_RETURN_NULL_ALLOW_THREADS(
+ cuCtxSetLimit(CU_LIMIT_PRINTF_FIFO_SIZE, size));
+ }
+
+ Py_END_ALLOW_THREADS;
+ Py_RETURN_NONE;
+}
+
+static PyObject *PyCUtensorMap_alloc(PyTypeObject *type, Py_ssize_t n_items) {
+ PyCUtensorMapObject *self = NULL;
+ void *mem = NULL;
+ size_t size = type->tp_basicsize;
+
+ if (posix_memalign(&mem, 128, size) != 0) {
+ PyErr_NoMemory();
+ return NULL;
+ }
+
+ self = (PyCUtensorMapObject *)mem;
+ PyObject_INIT(self, type);
+ return (PyObject *)self;
+}
+
+static void PyCUtensorMap_dealloc(PyObject *self) {
+ Py_TYPE(self)->tp_free(self);
+}
+
+static void PyCUtensorMap_free(void *ptr) { free(ptr); }
+
+// clang-format off
+static PyTypeObject PyCUtensorMapType = {
+ PyVarObject_HEAD_INIT(NULL, 0)
+ .tp_name = "triton.backends.nvidia.PyCUtensorMap",
+ .tp_basicsize = sizeof(PyCUtensorMapObject),
+ .tp_itemsize = 0,
+ .tp_flags = Py_TPFLAGS_DEFAULT,
+ .tp_doc = "",
+ .tp_new = PyType_GenericNew,
+ .tp_alloc = PyCUtensorMap_alloc,
+ .tp_dealloc = (destructor)PyCUtensorMap_dealloc,
+ .tp_free = PyCUtensorMap_free,
+};
+// clang-format on
+
+static PyObject *fillTMADescriptor(PyObject *self, PyObject *args) {
+ unsigned long long global_address;
+ int swizzle;
+ int elemSize;
+ int elemType;
+ PyObject *blockSize;
+ PyObject *shape;
+ PyObject *strides;
+ int padding;
+
+ if (!PyArg_ParseTuple(args, "KiiiOOOi", &global_address, &swizzle, &elemSize,
+ &elemType, &blockSize, &shape, &strides, &padding)) {
+ return NULL;
+ }
+
+ PyCUtensorMapObject *desc = (PyCUtensorMapObject *)PyObject_CallObject(
+ (PyObject *)&PyCUtensorMapType, NULL);
+ if (!desc) {
+ return NULL;
+ }
+
+ PyObject *blockSizeFast = NULL;
+ PyObject *shapeFast = NULL;
+ PyObject *stridesFast = NULL;
+
+ uint32_t blockSizeInt[5];
+ uint64_t shapeInt[5];
+ uint64_t stridesLL[5];
+
+ blockSizeFast = PySequence_Fast(blockSize, "blockSize must be a sequence");
+ if (!blockSizeFast)
+ goto cleanup;
+ int rank = PySequence_Fast_GET_SIZE(blockSizeFast);
+
+ for (int i = 0; i < rank; ++i) {
+ PyObject *item = PySequence_Fast_GET_ITEM(blockSizeFast, i);
+ if (!PyLong_Check(item)) {
+ PyErr_SetString(PyExc_TypeError, "block size must be an int");
+ goto cleanup;
+ }
+ blockSizeInt[rank - i - 1] = PyLong_AsLongLong(item);
+ }
+
+ shapeFast = PySequence_Fast(shape, "shape must be a sequence");
+ if (!shapeFast)
+ goto cleanup;
+
+ if (rank != PySequence_Fast_GET_SIZE(shapeFast)) {
+ PyErr_SetString(PyExc_RuntimeError, "Rank mismatch");
+ goto cleanup;
+ }
+ for (int i = 0; i < rank; ++i) {
+ PyObject *item = PySequence_Fast_GET_ITEM(shapeFast, i);
+ if (!PyLong_Check(item)) {
+ PyErr_SetString(PyExc_TypeError, "shape must be an int");
+ goto cleanup;
+ }
+ shapeInt[rank - i - 1] = PyLong_AsLong(item);
+ }
+
+ stridesFast = PySequence_Fast(strides, "strides must be a sequence");
+ if (!stridesFast)
+ goto cleanup;
+
+ if (rank != PySequence_Fast_GET_SIZE(stridesFast)) {
+ PyErr_SetString(PyExc_RuntimeError, "Rank mismatch");
+ goto cleanup;
+ }
+ for (int i = 0; i + 1 < rank; ++i) {
+ PyObject *item = PySequence_Fast_GET_ITEM(stridesFast, i);
+ if (!PyLong_Check(item)) {
+ PyErr_SetString(PyExc_TypeError, "shape must be an int");
+ goto cleanup;
+ }
+ stridesLL[rank - i - 2] = elemSize * PyLong_AsLongLong(item);
+ }
+ stridesLL[rank - 1] =
+ shapeInt[rank - 1] * (rank == 1 ? elemSize : stridesLL[rank - 2]);
+ Py_DECREF(blockSizeFast);
+ blockSizeFast = NULL;
+ Py_DECREF(shapeFast);
+ shapeFast = NULL;
+ Py_DECREF(stridesFast);
+ stridesFast = NULL;
+
+ CUtensorMapFloatOOBfill fill =
+ (padding == 1) ? CU_TENSOR_MAP_FLOAT_OOB_FILL_NAN_REQUEST_ZERO_FMA
+ : CU_TENSOR_MAP_FLOAT_OOB_FILL_NONE;
+
+ uint32_t elementStrides[5] = {1, 1, 1, 1, 1};
+ static cuTensorMapEncodeTiled_t cuTensorMapEncodeTiled = NULL;
+ INITIALIZE_FUNCTION_POINTER_IF_NULL(cuTensorMapEncodeTiled,
+ getCuTensorMapEncodeTiledHandle);
+ CUresult res = cuTensorMapEncodeTiled(
+ &desc->tensorMap, elemType, rank, (void *)global_address, shapeInt,
+ stridesLL, blockSizeInt, elementStrides, CU_TENSOR_MAP_INTERLEAVE_NONE,
+ swizzle, CU_TENSOR_MAP_L2_PROMOTION_L2_128B, fill);
+ if (res != CUDA_SUCCESS) {
+ const char *str;
+ cuGetErrorString(res, &str);
+ char err[4096] = {0};
+ size_t off = 0;
+ off += snprintf(
+ err + off, sizeof(err) - off,
+ "Triton Error [CUDA]: Failed to create tensor map descriptor: %s\n",
+ str ? str : "Unknown error");
+ off += snprintf(err + off, sizeof(err) - off,
+ "elemType=%d rank=%d global_address=0x%llx elemSize=%d "
+ "swizzle=%d padding=%d\n",
+ elemType, rank, (unsigned long long)global_address,
+ elemSize, swizzle, padding);
+ off += snprintf(err + off, sizeof(err) - off, "shape=[");
+ for (int i = 0; i < rank; ++i) {
+ off +=
+ snprintf(err + off, sizeof(err) - off, "%llu%s",
+ (unsigned long long)shapeInt[i], (i + 1 < rank) ? ", " : "");
+ }
+ off += snprintf(err + off, sizeof(err) - off, "]\n");
+ off += snprintf(err + off, sizeof(err) - off, "strides=[");
+ for (int i = 0; i < rank; ++i) {
+ off += snprintf(err + off, sizeof(err) - off, "%llu%s",
+ (unsigned long long)stridesLL[i],
+ (i + 1 < rank) ? ", " : "");
+ }
+ off += snprintf(err + off, sizeof(err) - off, "]\n");
+ off += snprintf(err + off, sizeof(err) - off, "blockSize=[");
+ for (int i = 0; i < rank; ++i) {
+ off += snprintf(err + off, sizeof(err) - off, "%u%s",
+ (unsigned)blockSizeInt[i], (i + 1 < rank) ? ", " : "");
+ }
+ off += snprintf(err + off, sizeof(err) - off, "] elementStrides=[");
+ for (int i = 0; i < rank; ++i) {
+ off += snprintf(err + off, sizeof(err) - off, "%u%s",
+ (unsigned)elementStrides[i], (i + 1 < rank) ? ", " : "");
+ }
+ off += snprintf(err + off, sizeof(err) - off, "]\n");
+ PyErr_SetString(PyExc_RuntimeError, err);
+
+ goto cleanup;
+ }
+
+ return (PyObject *)desc;
+
+cleanup:
+ Py_XDECREF(blockSizeFast);
+ Py_XDECREF(shapeFast);
+ Py_XDECREF(stridesFast);
+ Py_XDECREF(desc);
+ return NULL;
+}
+
+static PyMethodDef ModuleMethods[] = {
+ {"load_binary", loadBinary, METH_VARARGS,
+ "Load provided cubin into CUDA driver"},
+ {"get_device_properties", getDeviceProperties, METH_VARARGS,
+ "Get the properties for a given device"},
+ {"cuOccupancyMaxActiveClusters", occupancyMaxActiveClusters, METH_VARARGS,
+ "Python interface for cuOccupancyMaxActiveClusters function"},
+ {"set_printf_fifo_size", setPrintfFifoSize, METH_VARARGS,
+ "Python interface for cuCtxSetLimit(CU_LIMIT_PRINTF_FIFO_SIZE, x), which "
+ "controls how many bytes can be streamed from kernels before data starts "
+ "being dropped. This inherits all the limitations of this call; in "
+ "particular it's an error to change this value after launching any kernel "
+ "that calls printf()."},
+ {"fill_tma_descriptor", fillTMADescriptor, METH_VARARGS, "doc"},
+
+ {NULL, NULL, 0, NULL} // sentinel
+};
+
+static struct PyModuleDef ModuleDef = {PyModuleDef_HEAD_INIT, "cuda_utils",
+ NULL, // documentation
+ -1, // size
+ ModuleMethods};
+
+PyMODINIT_FUNC PyInit_cuda_utils(void) {
+ if (PyType_Ready(&PyCUtensorMapType) < 0) {
+ return NULL;
+ }
+
+ PyObject *m = PyModule_Create(&ModuleDef);
+ if (m == NULL) {
+ return NULL;
+ }
+
+ PyModule_AddFunctions(m, ModuleMethods);
+ Py_INCREF(&PyCUtensorMapType);
+ PyModule_AddObject(m, "PyCUtensorMap", (PyObject *)&PyCUtensorMapType);
+
+ return m;
+}
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/driver.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/driver.py
new file mode 100644
index 0000000000000000000000000000000000000000..5a2ddb378d32165b53653e4540135fdc0080a1ba
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/driver.py
@@ -0,0 +1,764 @@
+import functools
+import os
+import subprocess
+import triton
+import re
+from pathlib import Path
+from triton import knobs
+from triton.runtime.build import compile_module_from_src
+from triton.runtime import _allocation
+from triton.backends.compiler import GPUTarget
+from triton.backends.driver import GPUDriver
+
+dirname = os.path.dirname(os.path.realpath(__file__))
+include_dirs = [os.path.join(dirname, "include")]
+libdevice_dir = os.path.join(dirname, "lib")
+libraries = ['libcuda.so.1']
+PyCUtensorMap = None
+
+
+@functools.lru_cache()
+def libcuda_dirs():
+ if env_libcuda_path := knobs.nvidia.libcuda_path:
+ return [env_libcuda_path]
+
+ libs = subprocess.check_output(["/sbin/ldconfig", "-p"]).decode(errors="ignore")
+ # each line looks like the following:
+ # libcuda.so.1 (libc6,x86-64) => /lib/x86_64-linux-gnu/libcuda.so.1
+ locs = [line.split()[-1] for line in libs.splitlines() if "libcuda.so.1" in line]
+ dirs = [os.path.dirname(loc) for loc in locs]
+ env_ld_library_path = os.getenv("LD_LIBRARY_PATH")
+ if env_ld_library_path and not dirs:
+ dirs = [dir for dir in env_ld_library_path.split(":") if os.path.exists(os.path.join(dir, "libcuda.so.1"))]
+ msg = 'libcuda.so cannot found!\n'
+ if locs:
+ msg += 'Possible files are located at %s.' % str(locs)
+ msg += 'Please create a symlink of libcuda.so to any of the files.'
+ else:
+ msg += 'Please make sure GPU is set up and then run "/sbin/ldconfig"'
+ msg += ' (requires sudo) to refresh the linker cache.'
+ assert any(os.path.exists(os.path.join(path, 'libcuda.so.1')) for path in dirs), msg
+ return dirs
+
+
+@functools.lru_cache()
+def library_dirs():
+ return [libdevice_dir, *libcuda_dirs()]
+
+
+# ------------------------
+# Utils
+# ------------------------
+
+
+class CudaUtils(object):
+
+ def __new__(cls):
+ if not hasattr(cls, "instance"):
+ cls.instance = super(CudaUtils, cls).__new__(cls)
+ return cls.instance
+
+ def __init__(self):
+ mod = compile_module_from_src(
+ src=Path(os.path.join(dirname, "driver.c")).read_text(),
+ name="cuda_utils",
+ library_dirs=library_dirs(),
+ include_dirs=include_dirs,
+ libraries=libraries,
+ )
+ global PyCUtensorMap
+ PyCUtensorMap = mod.PyCUtensorMap
+ self.load_binary = mod.load_binary
+ self.get_device_properties = mod.get_device_properties
+ self.cuOccupancyMaxActiveClusters = mod.cuOccupancyMaxActiveClusters
+ self.set_printf_fifo_size = mod.set_printf_fifo_size
+ self.fill_tma_descriptor = mod.fill_tma_descriptor
+
+
+# ------------------------
+# Launcher
+# ------------------------
+
+
+def ty_to_cpp(ty):
+ if ty[0] == '*':
+ return "CUdeviceptr"
+ if ty.startswith("tensordesc"):
+ return "CUtensorMap"
+ return {
+ "i1": "int8_t",
+ "i8": "int8_t",
+ "i16": "int16_t",
+ "i32": "int32_t",
+ "i64": "int64_t",
+ "u1": "uint8_t",
+ "u8": "uint8_t",
+ "u16": "uint16_t",
+ "u32": "uint32_t",
+ "u64": "uint64_t",
+ "fp16": "double",
+ "bf16": "double",
+ "fp32": "double",
+ "f32": "double",
+ "fp64": "double",
+ "nvTmaDesc": "CUtensorMap",
+ }[ty]
+
+
+FLOAT_STORAGE_TYPE = {
+ "fp16": "uint16_t",
+ "bf16": "uint16_t",
+ "fp32": "uint32_t",
+ "f32": "uint32_t",
+ "fp64": "uint64_t",
+}
+FLOAT_PACK_FUNCTION = {
+ "fp16": "pack_fp16",
+ "bf16": "pack_bf16",
+ "fp32": "pack_fp32",
+ "f32": "pack_fp32",
+ "fp64": "pack_fp64",
+}
+
+_BASE_ARGS_FORMAT = "iiiKKppOOOOOO"
+_BASE_ARGS_FORMAT_LEN = len(_BASE_ARGS_FORMAT)
+
+
+def make_launcher(constants, signature, tensordesc_meta):
+
+ def _expand_signature(signature):
+ output = []
+ tensordesc_idx = 0
+ # Expand tensor descriptor arguments into either nvTmaDesc, shape and
+ # strides, or base pointer, shape and strides depending on whether the
+ # kernel was lowered to use the nvTmaDesc or not.
+ for sig in signature:
+ if isinstance(sig, str) and sig.startswith("tensordesc"):
+ meta = tensordesc_meta[tensordesc_idx] if tensordesc_meta else None
+ tensordesc_idx += 1
+
+ match = re.match("tensordesc<([^[>]*)\\[([^]]*)\\]", sig)
+ dtype = match.group(1)
+ shape = match.group(2)
+ ndim = shape.count(",") + 1
+
+ if meta is None:
+ output.append("*" + dtype)
+ # Currently the host side tensor descriptors get passed in as a
+ # tensor desc, shape, and strides. We have no way to use these
+ # shape and strides when processing tensor descriptors which is
+ # why we provide our own decomposition above. Sadly this means
+ # we have to pass the shape and strides twice.
+ for _ in range(2 * ndim):
+ output.append("i64")
+ output.append("i1")
+ else:
+ output.append("nvTmaDesc")
+
+ for _ in range(ndim):
+ output.append("i32")
+ for _ in range(ndim):
+ output.append("i64")
+ else:
+ output.append(sig)
+
+ assert not tensordesc_meta or tensordesc_idx == len(tensordesc_meta)
+ return output
+
+ def _flatten_signature(sig, output):
+ # Flatten tuples
+ if isinstance(sig, tuple):
+ for x in sig:
+ _flatten_signature(x, output)
+ else:
+ output.append(sig)
+
+ def _extracted_type(ty):
+ if isinstance(ty, tuple):
+ val = ','.join(map(_extracted_type, ty))
+ return f"[{val}]"
+ if ty[0] == '*':
+ return "PyObject*"
+ if ty in ("constexpr", "nvTmaDesc"):
+ return "PyObject*"
+ return ty_to_cpp(ty)
+
+ def format_of(ty):
+ if isinstance(ty, tuple):
+ val = ''.join(map(format_of, ty))
+ return f"({val})"
+ if ty[0] == '*':
+ return "O"
+ if ty in ("constexpr", "nvTmaDesc"):
+ return "O"
+ if ty.startswith("tensordesc"):
+ return "O"
+ return {
+ "double": "d",
+ "long": "l",
+ "int8_t": "b",
+ "int16_t": "h",
+ "int32_t": "i",
+ "int64_t": "L",
+ "uint8_t": "B",
+ "uint16_t": "H",
+ "uint32_t": "I",
+ "uint64_t": "K",
+ }[ty_to_cpp(ty)]
+
+ expand_signature = _expand_signature(signature.values())
+ signature = {i: s for i, s in enumerate(expand_signature)}
+
+ args_format = ''.join([format_of(ty) for ty in signature.values()])
+ format = _BASE_ARGS_FORMAT + args_format
+
+ flat_signature = []
+ for sig in signature.values():
+ _flatten_signature(sig, flat_signature)
+ signature = {i: s for i, s in enumerate(flat_signature)}
+ args_list = ', ' + ', '.join(f"&_arg{i}" for i, ty in signature.items()) if len(signature) > 0 else ''
+ # Record the end of regular arguments;
+ # subsequent arguments are architecture-specific descriptors, such as tensor descriptors for CUDA.
+ arg_decl_list = []
+ for i, ty in signature.items():
+ if ty == "constexpr":
+ continue
+ if ty in FLOAT_STORAGE_TYPE:
+ arg_decl_list.append(f"{FLOAT_STORAGE_TYPE[ty]} arg{i}")
+ else:
+ arg_decl_list.append(f"{ty_to_cpp(ty)} arg{i}")
+ arg_decls = ', '.join(arg_decl_list)
+ internal_args_list = []
+ for i, ty in signature.items():
+ if ty[0] == "*":
+ internal_args_list.append(f"ptr_info{i}.dev_ptr")
+ elif ty in FLOAT_STORAGE_TYPE:
+ internal_args_list.append(f"_arg{i}_storage")
+ elif ty == "nvTmaDesc":
+ # Note: we have to dereference the pointer
+ internal_args_list.append(f"*tma_ptr{i}")
+ elif ty != "constexpr":
+ internal_args_list.append(f"_arg{i}")
+ params = range(len(signature))
+
+ # generate glue code
+ newline = '\n '
+ ptr_decls = [
+ f"DevicePtrInfo ptr_info{i} = getPointer(_arg{i}, {i}); if (!ptr_info{i}.valid) return NULL;"
+ for i, ty in signature.items()
+ if ty[0] == "*"
+ ]
+ tma_decls = [
+ f"CUtensorMap* tma_ptr{i} = getTmaDesc(_arg{i}); if (!tma_ptr{i}) return NULL;" for i, ty in signature.items()
+ if ty == "nvTmaDesc"
+ ]
+ float_storage_decls = [
+ f"{FLOAT_STORAGE_TYPE[ty]} _arg{i}_storage = {FLOAT_PACK_FUNCTION[ty]}(_arg{i});"
+ for i, ty in signature.items()
+ if ty in FLOAT_STORAGE_TYPE
+ ]
+ params = [f"&arg{i}" for i, ty in signature.items() if ty != "constexpr"]
+ params.append("&global_scratch")
+ params.append("&profile_scratch")
+ src = f"""
+#include \"cuda.h\"
+#include
+#include
+#include
+#define PY_SSIZE_T_CLEAN
+#include
+
+typedef struct {{
+ PyObject_HEAD;
+ _Alignas(128) CUtensorMap tensorMap;
+}} PyCUtensorMapObject;
+
+static inline void gpuAssert(CUresult code, const char *file, int line)
+{{
+ if (code != CUDA_SUCCESS)
+ {{
+ const char* prefix = "Triton Error [CUDA]: ";
+ const char* str;
+ cuGetErrorString(code, &str);
+ char err[1024] = {{0}};
+ strcat(err, prefix);
+ strcat(err, str);
+ PyGILState_STATE gil_state;
+ gil_state = PyGILState_Ensure();
+ PyErr_SetString(PyExc_RuntimeError, err);
+ PyGILState_Release(gil_state);
+ }}
+}}
+
+#define CUDA_CHECK(ans) {{ gpuAssert((ans), __FILE__, __LINE__); }}
+
+typedef CUresult (*cuLaunchKernelEx_t)(const CUlaunchConfig* config, CUfunction f, void** kernelParams, void** extra);
+
+static cuLaunchKernelEx_t getLaunchKernelExHandle() {{
+ // Open the shared library
+ void* handle = dlopen("libcuda.so.1", RTLD_LAZY);
+ if (!handle) {{
+ PyErr_SetString(PyExc_RuntimeError, "Failed to open libcuda.so.1");
+ return NULL;
+ }}
+ // Clear any existing error
+ dlerror();
+ cuLaunchKernelEx_t cuLaunchKernelExHandle = (cuLaunchKernelEx_t)dlsym(handle, "cuLaunchKernelEx");
+ // Check for errors
+ const char *dlsym_error = dlerror();
+ if (dlsym_error) {{
+ PyErr_SetString(PyExc_RuntimeError, "Failed to retrieve cuLaunchKernelEx from libcuda.so.1");
+ return NULL;
+ }}
+ return cuLaunchKernelExHandle;
+}}
+
+static void _launch(int gridX, int gridY, int gridZ, int num_warps, int num_ctas, int launch_cooperative_grid, int launch_pdl, int shared_memory, CUstream stream, CUfunction function, CUdeviceptr global_scratch, CUdeviceptr profile_scratch{', ' + arg_decls if len(arg_decls) > 0 else ''}) {{
+ void *params[] = {{ {', '.join(params)} }};
+ if (gridX*gridY*gridZ > 0) {{
+ // 4 attributes that we can currently pass maximum
+ CUlaunchAttribute launchAttr[4];
+ static cuLaunchKernelEx_t cuLaunchKernelExHandle = NULL;
+ if (cuLaunchKernelExHandle == NULL) {{
+ cuLaunchKernelExHandle = getLaunchKernelExHandle();
+ }}
+ CUlaunchConfig config;
+ config.gridDimX = gridX * num_ctas;
+ config.gridDimY = gridY;
+ config.gridDimZ = gridZ;
+
+ config.blockDimX = 32 * num_warps;
+ config.blockDimY = 1;
+ config.blockDimZ = 1;
+ config.sharedMemBytes = shared_memory;
+ config.hStream = stream;
+ config.attrs = launchAttr;
+ int num_attrs = 0;
+
+ if (launch_pdl != 0) {{
+ CUlaunchAttribute pdlAttr = {{ .id = CU_LAUNCH_ATTRIBUTE_PROGRAMMATIC_STREAM_SERIALIZATION, .value = 1}};
+ launchAttr[num_attrs] = pdlAttr;
+ ++num_attrs;
+ }}
+
+ if (launch_cooperative_grid != 0) {{
+ CUlaunchAttribute coopAttr = {{ .id = CU_LAUNCH_ATTRIBUTE_COOPERATIVE, .value = 1}};
+ launchAttr[num_attrs] = coopAttr;
+ ++num_attrs;
+ }}
+
+ if (num_ctas != 1) {{
+ CUlaunchAttribute clusterAttr = {{}};
+ clusterAttr.id = CU_LAUNCH_ATTRIBUTE_CLUSTER_DIMENSION;
+ clusterAttr.value.clusterDim.x = num_ctas;
+ clusterAttr.value.clusterDim.y = 1;
+ clusterAttr.value.clusterDim.z = 1;
+ launchAttr[num_attrs] = clusterAttr;
+ ++num_attrs;
+
+ CUlaunchAttribute clusterSchedulingAttr = {{}};
+ clusterSchedulingAttr.id = CU_LAUNCH_ATTRIBUTE_CLUSTER_SCHEDULING_POLICY_PREFERENCE;
+ clusterSchedulingAttr.value.clusterSchedulingPolicyPreference = CU_CLUSTER_SCHEDULING_POLICY_SPREAD;
+ launchAttr[num_attrs] = clusterSchedulingAttr;
+ ++num_attrs;
+ }}
+
+ // num_ctas == 16 is non-portable. Does work for H100 and B200 tho
+ config.numAttrs = num_attrs;
+ if (num_ctas == 16) {{
+ CUDA_CHECK(cuFuncSetAttribute(
+ function,
+ CU_FUNC_ATTRIBUTE_NON_PORTABLE_CLUSTER_SIZE_ALLOWED,
+ 1
+ ));
+ }}
+
+ CUDA_CHECK(cuLaunchKernelExHandle(&config, function, params, 0));
+ }}
+}}
+
+typedef struct _DevicePtrInfo {{
+ CUdeviceptr dev_ptr;
+ bool valid;
+}} DevicePtrInfo;
+
+static PyObject* data_ptr_str = NULL;
+static PyObject* py_tensor_map_type = NULL;
+
+static inline DevicePtrInfo getPointer(PyObject *obj, int idx) {{
+ DevicePtrInfo ptr_info;
+ ptr_info.dev_ptr = 0;
+ ptr_info.valid = true;
+ if (PyLong_Check(obj)) {{
+ ptr_info.dev_ptr = PyLong_AsUnsignedLongLong(obj);
+ return ptr_info;
+ }}
+ if (obj == Py_None) {{
+ // valid nullptr
+ return ptr_info;
+ }}
+ PyObject *ret = PyObject_CallMethodNoArgs(obj, data_ptr_str);
+ if (!ret) {{
+ PyErr_SetString(PyExc_TypeError, "Pointer argument must be either uint64 or have data_ptr method");
+ ptr_info.valid = false;
+ goto cleanup;
+ }}
+ if (!PyLong_Check(ret)) {{
+ PyErr_SetString(PyExc_TypeError, "data_ptr method of Pointer object must return 64-bit int");
+ ptr_info.valid = false;
+ goto cleanup;
+ }}
+ ptr_info.dev_ptr = PyLong_AsUnsignedLongLong(ret);
+ if(!ptr_info.dev_ptr)
+ return ptr_info;
+ uint64_t dev_ptr;
+ int status = cuPointerGetAttribute(&dev_ptr, CU_POINTER_ATTRIBUTE_DEVICE_POINTER, ptr_info.dev_ptr);
+ if (status == CUDA_ERROR_INVALID_VALUE) {{
+ PyErr_Format(PyExc_ValueError,
+ "Pointer argument (at %d) cannot be accessed from Triton (cpu tensor?)", idx);
+ ptr_info.valid = false;
+ }} else if (status != CUDA_SUCCESS) {{
+ CUDA_CHECK(status); // Catch any other cuda API errors
+ ptr_info.valid = false;
+ }}
+ ptr_info.dev_ptr = dev_ptr;
+cleanup:
+ Py_XDECREF(ret);
+ return ptr_info;
+
+}}
+
+static inline CUtensorMap* getTmaDesc(PyObject *obj) {{
+ if (sizeof(CUtensorMap*) != 8) {{
+ PyErr_SetString(PyExc_SystemError, "getTmaDesc() requires 64-bit compilation");
+ return NULL;
+ }}
+
+if (Py_TYPE(obj) != (PyTypeObject*)py_tensor_map_type) {{
+ PyErr_Format(PyExc_TypeError, "object must be of type PyCUtensorMap, got %s", Py_TYPE(obj)->tp_name);
+ return NULL;
+}}
+
+ CUtensorMap* map = &((PyCUtensorMapObject*)obj)->tensorMap;
+ uintptr_t align_128 = (uintptr_t)map & (128 - 1);
+ if (align_128 != 0) {{
+ PyErr_Format(PyExc_ValueError, "CUtensorMap must be aligned to 128B, but got (&map) mod 128 = %ld", align_128);
+ return NULL;
+ }}
+ return map;
+}}
+
+static void ensureCudaContext() {{
+ CUcontext pctx;
+ CUDA_CHECK(cuCtxGetCurrent(&pctx));
+ if (!pctx) {{
+ // Ensure device context.
+ CUdevice device;
+ CUDA_CHECK(cuDeviceGet(&device, 0));
+ CUDA_CHECK(cuDevicePrimaryCtxRetain(&pctx, device));
+ CUDA_CHECK(cuCtxSetCurrent(pctx));
+ }}
+}}
+
+static uint16_t pack_fp16(double f) {{
+ uint16_t result;
+ // from https://github.com/python/pythoncapi-compat
+#if 0x030600B1 <= PY_VERSION_HEX && PY_VERSION_HEX <= 0x030B00A1 && !defined(PYPY_VERSION)
+ _PyFloat_Pack2(f, (unsigned char*)&result, 1);
+#else
+ PyFloat_Pack2(f, (unsigned char*)&result, 1);
+#endif
+ return result;
+}}
+
+static uint16_t pack_bf16(double f) {{
+ float f32 = (float)f;
+ uint32_t u32 = *(uint32_t*)&f32;
+ return (uint16_t)(u32 >> 16);
+}}
+
+static uint32_t pack_fp32(double f) {{
+ float f32 = (float)f;
+ return *(uint32_t*)&f32;
+}}
+
+static uint64_t pack_fp64(double f) {{
+ return *(uint64_t*)&f;
+}}
+
+static PyObject* launch(PyObject* self, PyObject* args) {{
+ // ensure cuda context is valid before calling any CUDA APIs, e.g. before getPointer calls cuPointerGetAttributes
+ ensureCudaContext();
+
+ int gridX, gridY, gridZ;
+ uint64_t _stream;
+ uint64_t _function;
+ int launch_cooperative_grid;
+ int launch_pdl;
+ PyObject *launch_enter_hook = NULL;
+ PyObject *launch_exit_hook = NULL;
+ PyObject *kernel_metadata = NULL;
+ PyObject *launch_metadata = NULL;
+ PyObject *global_scratch_obj = NULL;
+ PyObject *profile_scratch_obj = NULL;
+ {newline.join([f"{_extracted_type(ty)} _arg{i};" for i, ty in signature.items()])}
+ if(!PyArg_ParseTuple(args, \"{format}\", &gridX, &gridY, &gridZ,
+ &_stream, &_function, &launch_cooperative_grid, &launch_pdl, &global_scratch_obj, &profile_scratch_obj,
+ &kernel_metadata, &launch_metadata,
+ &launch_enter_hook, &launch_exit_hook{args_list})) {{
+ return NULL;
+ }}
+
+ int num_warps, num_ctas, shared_memory;
+ if (!PyArg_ParseTuple(kernel_metadata, \"iii\", &num_warps, &num_ctas, &shared_memory)) {{
+ PyErr_SetString(PyExc_TypeError, "kernel_metadata must be a tuple");
+ return NULL;
+ }}
+
+ // extract launch metadata
+ if (launch_enter_hook != Py_None){{
+ PyObject* ret = PyObject_CallOneArg(launch_enter_hook, launch_metadata);
+ if (!ret)
+ return NULL;
+ Py_DECREF(ret);
+ }}
+
+ CUdeviceptr global_scratch = 0;
+ if (global_scratch_obj != Py_None) {{
+ DevicePtrInfo global_scratch_info = getPointer(global_scratch_obj, -1);
+ if (!global_scratch_info.valid) {{
+ return NULL;
+ }}
+ global_scratch = global_scratch_info.dev_ptr;
+ }}
+
+ CUdeviceptr profile_scratch = 0;
+ if (profile_scratch_obj != Py_None) {{
+ DevicePtrInfo profile_scratch_info = getPointer(profile_scratch_obj, -1);
+ if (!profile_scratch_info.valid) {{
+ return NULL;
+ }}
+ profile_scratch = profile_scratch_info.dev_ptr;
+ }}
+
+ // raise exception asap
+ {newline.join(ptr_decls)}
+ {newline.join(tma_decls)}
+ {newline.join(float_storage_decls)}
+ Py_BEGIN_ALLOW_THREADS;
+ _launch(gridX, gridY, gridZ, num_warps, num_ctas, launch_cooperative_grid, launch_pdl, shared_memory, (CUstream)_stream, (CUfunction)_function, global_scratch, profile_scratch{', ' + ', '.join(internal_args_list) if len(internal_args_list) > 0 else ''});
+ Py_END_ALLOW_THREADS;
+ if (PyErr_Occurred()) {{
+ return NULL;
+ }}
+
+ if(launch_exit_hook != Py_None){{
+ PyObject* ret = PyObject_CallOneArg(launch_exit_hook, launch_metadata);
+ if (!ret)
+ return NULL;
+ Py_DECREF(ret);
+ }}
+
+ Py_RETURN_NONE;
+}}
+
+static PyMethodDef ModuleMethods[] = {{
+ {{"launch", launch, METH_VARARGS, "Entry point for all kernels with this signature"}},
+ {{NULL, NULL, 0, NULL}} // sentinel
+}};
+
+static struct PyModuleDef ModuleDef = {{
+ PyModuleDef_HEAD_INIT,
+ \"__triton_launcher\",
+ NULL, //documentation
+ -1, //size
+ ModuleMethods
+}};
+
+PyMODINIT_FUNC PyInit___triton_launcher(void) {{
+ data_ptr_str = PyUnicode_InternFromString("data_ptr");
+ if(data_ptr_str == NULL) {{
+ return NULL;
+ }}
+ PyObject* driver_mod = PyImport_ImportModule("triton.backends.nvidia.driver");
+ if (driver_mod == NULL) {{
+ return NULL;
+ }}
+ py_tensor_map_type = PyObject_GetAttrString(driver_mod, "PyCUtensorMap");
+ if (py_tensor_map_type == NULL) {{
+ return NULL;
+ }}
+
+ PyObject *m = PyModule_Create(&ModuleDef);
+ if(m == NULL) {{
+ return NULL;
+ }}
+ PyModule_AddFunctions(m, ModuleMethods);
+ return m;
+}}
+"""
+ return src
+
+
+# The TMA dtype enum values are slightly different on host vs device...
+TMA_DTYPE_DEVICE_TO_HOST = dict((i, i) for i in range(16))
+TMA_DTYPE_DEVICE_TO_HOST[8] = 10
+TMA_DTYPE_DEVICE_TO_HOST[9] = 8
+TMA_DTYPE_DEVICE_TO_HOST[10] = 9
+
+
+def make_tensordesc_arg(arg, metadata):
+ if metadata is None:
+ # Currently the host side tensor descriptors get decomposed in
+ # the frontend to tensor desc, shape, and strides. We have no
+ # way to use these shape and strides when processing tensor
+ # descriptors which is why we provide our own decomposition
+ # above. Sadly this means we have to pass the shape and strides
+ # twice.
+ return [arg.base, *arg.shape, *arg.strides, arg.padding == "nan", *arg.shape, *arg.strides]
+
+ swizzle = metadata["swizzle"]
+ elem_size = metadata["elem_size"]
+ elem_type = metadata["elem_type"]
+ block_size = metadata["block_size"]
+ fp4_padded = metadata["fp4_padded"]
+
+ shape = arg.shape
+ strides = arg.strides
+ assert strides[-1] == 1
+ padding = 1 if arg.padding == "nan" else 0
+
+ if fp4_padded:
+ shape = list(shape)
+ shape[-1] *= 2
+
+ cu_tensor_map = triton.runtime.driver.active.utils.fill_tma_descriptor(
+ arg.base.data_ptr(),
+ swizzle,
+ elem_size,
+ TMA_DTYPE_DEVICE_TO_HOST[elem_type],
+ block_size,
+ shape,
+ strides,
+ padding,
+ )
+
+ return [cu_tensor_map, *shape, *strides]
+
+
+def wrap_handle_tensordesc(launcher, signature, tensordesc_meta):
+ has_tensor_desc_arg = any(isinstance(sig, str) and sig.startswith("tensordesc") for sig in signature.values())
+ if not has_tensor_desc_arg:
+ return launcher
+
+ tensordesc_indices = set(
+ [i for i, sig in enumerate(signature.values()) if isinstance(sig, str) and sig.startswith("tensordesc")])
+ assert not tensordesc_meta or len(tensordesc_meta) == len(tensordesc_indices)
+ if not tensordesc_meta:
+ tensordesc_meta = [None] * len(tensordesc_indices)
+
+ def inner(*args):
+ final_args = list(args[:_BASE_ARGS_FORMAT_LEN])
+ tensordesc_idx = 0
+ for i, arg in enumerate(args[_BASE_ARGS_FORMAT_LEN:]):
+ if i in tensordesc_indices:
+ final_args.extend(make_tensordesc_arg(arg, tensordesc_meta[tensordesc_idx]))
+ tensordesc_idx += 1
+ else:
+ final_args.append(arg)
+ return launcher(*final_args)
+
+ return inner
+
+
+class CudaLauncher(object):
+
+ def __init__(self, src, metadata):
+ constants = src.constants if hasattr(src, "constants") else dict()
+ arg_idx = lambda x: (src.fn.arg_names.index(x), ) if isinstance(x, str) else x
+ constants = {arg_idx(idx): value for idx, value in constants.items()}
+ signature = {idx: value for idx, value in src.signature.items()}
+ tensordesc_meta = getattr(metadata, "tensordesc_meta", None)
+ src = make_launcher(constants, signature, tensordesc_meta)
+ mod = compile_module_from_src(
+ src=src,
+ name="__triton_launcher",
+ library_dirs=library_dirs(),
+ include_dirs=include_dirs,
+ libraries=libraries,
+ )
+
+ self.num_ctas = getattr(metadata, "num_ctas", 1)
+ self.launch = wrap_handle_tensordesc(mod.launch, signature, tensordesc_meta)
+ self.global_scratch_size = metadata.global_scratch_size
+ self.global_scratch_align = metadata.global_scratch_align
+ self.profile_scratch_size = metadata.profile_scratch_size
+ self.profile_scratch_align = metadata.profile_scratch_align
+ self.launch_cooperative_grid = metadata.launch_cooperative_grid
+ self.launch_pdl = metadata.launch_pdl
+
+ def __call__(self, gridX, gridY, gridZ, stream, function, *args):
+
+ def allocate_scratch(size, align, allocator):
+ if size > 0:
+ grid_size = gridX * gridY * gridZ
+ alloc_size = grid_size * self.num_ctas * size
+ alloc_fn = allocator.get()
+ return alloc_fn(alloc_size, align, stream)
+ return None
+
+ global_scratch = allocate_scratch(self.global_scratch_size, self.global_scratch_align, _allocation._allocator)
+ profile_scratch = allocate_scratch(self.profile_scratch_size, self.profile_scratch_align,
+ _allocation._profile_allocator)
+ self.launch(gridX, gridY, gridZ, stream, function, self.launch_cooperative_grid, self.launch_pdl,
+ global_scratch, profile_scratch, *args)
+
+
+class CudaDriver(GPUDriver):
+
+ def __init__(self):
+ self.utils = CudaUtils() # TODO: make static
+ self.launcher_cls = CudaLauncher
+ super().__init__()
+
+ def get_current_target(self):
+ device = self.get_current_device()
+ capability = self.get_device_capability(device)
+ capability = capability[0] * 10 + capability[1]
+ warp_size = 32
+ return GPUTarget("cuda", capability, warp_size)
+
+ def get_active_torch_device(self):
+ import torch
+ return torch.device("cuda", self.get_current_device())
+
+ def get_device_interface(self):
+ import torch
+ return torch.cuda
+
+ @staticmethod
+ def is_active():
+ try:
+ import torch
+ return torch.cuda.is_available() and (torch.version.hip is None)
+ except ImportError:
+ return False
+
+ def map_python_to_cpp_type(self, ty: str) -> str:
+ return ty_to_cpp(ty)
+
+ def get_benchmarker(self):
+ from triton.testing import do_bench
+ return do_bench
+
+ def get_empty_cache_for_benchmark(self):
+ import torch
+
+ # We maintain a buffer of 256 MB that we clear
+ # before each kernel call to make sure that the L2 cache
+ # doesn't contain any input data before the run
+ cache_size = 256 * 1024 * 1024
+ return torch.empty(int(cache_size // 4), dtype=torch.int, device='cuda')
+
+ def clear_cache(self, cache):
+ cache.zero_()
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/cudaGL.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/cudaGL.h
new file mode 100644
index 0000000000000000000000000000000000000000..1a9c70e881774c8f3cf8b6430e7aa53a98d74669
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/cudaGL.h
@@ -0,0 +1,608 @@
+/*
+ * Copyright 1993-2014 NVIDIA Corporation. All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee. Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users. These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+
+#ifndef CUDAGL_H
+#define CUDAGL_H
+
+#include
+#include
+
+#if defined(__CUDA_API_VERSION_INTERNAL) || defined(__DOXYGEN_ONLY__) || defined(CUDA_ENABLE_DEPRECATED)
+#define __CUDA_DEPRECATED
+#elif defined(_MSC_VER)
+#define __CUDA_DEPRECATED __declspec(deprecated)
+#elif defined(__GNUC__)
+#define __CUDA_DEPRECATED __attribute__((deprecated))
+#else
+#define __CUDA_DEPRECATED
+#endif
+
+#ifdef CUDA_FORCE_API_VERSION
+#error "CUDA_FORCE_API_VERSION is no longer supported."
+#endif
+
+#if defined(__CUDA_API_VERSION_INTERNAL) || defined(CUDA_API_PER_THREAD_DEFAULT_STREAM)
+ #define __CUDA_API_PER_THREAD_DEFAULT_STREAM
+ #define __CUDA_API_PTDS(api) api ## _ptds
+ #define __CUDA_API_PTSZ(api) api ## _ptsz
+#else
+ #define __CUDA_API_PTDS(api) api
+ #define __CUDA_API_PTSZ(api) api
+#endif
+
+#define cuGLCtxCreate cuGLCtxCreate_v2
+#define cuGLMapBufferObject __CUDA_API_PTDS(cuGLMapBufferObject_v2)
+#define cuGLMapBufferObjectAsync __CUDA_API_PTSZ(cuGLMapBufferObjectAsync_v2)
+#define cuGLGetDevices cuGLGetDevices_v2
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * \file cudaGL.h
+ * \brief Header file for the OpenGL interoperability functions of the
+ * low-level CUDA driver application programming interface.
+ */
+
+/**
+ * \defgroup CUDA_GL OpenGL Interoperability
+ * \ingroup CUDA_DRIVER
+ *
+ * ___MANBRIEF___ OpenGL interoperability functions of the low-level CUDA
+ * driver API (___CURRENT_FILE___) ___ENDMANBRIEF___
+ *
+ * This section describes the OpenGL interoperability functions of the
+ * low-level CUDA driver application programming interface. Note that mapping
+ * of OpenGL resources is performed with the graphics API agnostic, resource
+ * mapping interface described in \ref CUDA_GRAPHICS "Graphics Interoperability".
+ *
+ * @{
+ */
+
+#if defined(_WIN32)
+#if !defined(WGL_NV_gpu_affinity)
+typedef void* HGPUNV;
+#endif
+#endif /* _WIN32 */
+
+/**
+ * \brief Registers an OpenGL buffer object
+ *
+ * Registers the buffer object specified by \p buffer for access by
+ * CUDA. A handle to the registered object is returned as \p
+ * pCudaResource. The register flags \p Flags specify the intended usage,
+ * as follows:
+ *
+ * - ::CU_GRAPHICS_REGISTER_FLAGS_NONE: Specifies no hints about how this
+ * resource will be used. It is therefore assumed that this resource will be
+ * read from and written to by CUDA. This is the default value.
+ * - ::CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY: Specifies that CUDA
+ * will not write to this resource.
+ * - ::CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD: Specifies that
+ * CUDA will not read from this resource and will write over the
+ * entire contents of the resource, so none of the data previously
+ * stored in the resource will be preserved.
+ *
+ * \param pCudaResource - Pointer to the returned object handle
+ * \param buffer - name of buffer object to be registered
+ * \param Flags - Register flags
+ *
+ * \return
+ * ::CUDA_SUCCESS,
+ * ::CUDA_ERROR_INVALID_HANDLE,
+ * ::CUDA_ERROR_ALREADY_MAPPED,
+ * ::CUDA_ERROR_INVALID_CONTEXT,
+ * ::CUDA_ERROR_OPERATING_SYSTEM
+ * \notefnerr
+ *
+ * \sa
+ * ::cuGraphicsUnregisterResource,
+ * ::cuGraphicsMapResources,
+ * ::cuGraphicsResourceGetMappedPointer,
+ * ::cudaGraphicsGLRegisterBuffer
+ */
+CUresult CUDAAPI cuGraphicsGLRegisterBuffer(CUgraphicsResource *pCudaResource, GLuint buffer, unsigned int Flags);
+
+/**
+ * \brief Register an OpenGL texture or renderbuffer object
+ *
+ * Registers the texture or renderbuffer object specified by \p image for access by CUDA.
+ * A handle to the registered object is returned as \p pCudaResource.
+ *
+ * \p target must match the type of the object, and must be one of ::GL_TEXTURE_2D,
+ * ::GL_TEXTURE_RECTANGLE, ::GL_TEXTURE_CUBE_MAP, ::GL_TEXTURE_3D, ::GL_TEXTURE_2D_ARRAY,
+ * or ::GL_RENDERBUFFER.
+ *
+ * The register flags \p Flags specify the intended usage, as follows:
+ *
+ * - ::CU_GRAPHICS_REGISTER_FLAGS_NONE: Specifies no hints about how this
+ * resource will be used. It is therefore assumed that this resource will be
+ * read from and written to by CUDA. This is the default value.
+ * - ::CU_GRAPHICS_REGISTER_FLAGS_READ_ONLY: Specifies that CUDA
+ * will not write to this resource.
+ * - ::CU_GRAPHICS_REGISTER_FLAGS_WRITE_DISCARD: Specifies that
+ * CUDA will not read from this resource and will write over the
+ * entire contents of the resource, so none of the data previously
+ * stored in the resource will be preserved.
+ * - ::CU_GRAPHICS_REGISTER_FLAGS_SURFACE_LDST: Specifies that CUDA will
+ * bind this resource to a surface reference.
+ * - ::CU_GRAPHICS_REGISTER_FLAGS_TEXTURE_GATHER: Specifies that CUDA will perform
+ * texture gather operations on this resource.
+ *
+ * The following image formats are supported. For brevity's sake, the list is abbreviated.
+ * For ex., {GL_R, GL_RG} X {8, 16} would expand to the following 4 formats
+ * {GL_R8, GL_R16, GL_RG8, GL_RG16} :
+ * - GL_RED, GL_RG, GL_RGBA, GL_LUMINANCE, GL_ALPHA, GL_LUMINANCE_ALPHA, GL_INTENSITY
+ * - {GL_R, GL_RG, GL_RGBA} X {8, 16, 16F, 32F, 8UI, 16UI, 32UI, 8I, 16I, 32I}
+ * - {GL_LUMINANCE, GL_ALPHA, GL_LUMINANCE_ALPHA, GL_INTENSITY} X
+ * {8, 16, 16F_ARB, 32F_ARB, 8UI_EXT, 16UI_EXT, 32UI_EXT, 8I_EXT, 16I_EXT, 32I_EXT}
+ *
+ * The following image classes are currently disallowed:
+ * - Textures with borders
+ * - Multisampled renderbuffers
+ *
+ * \param pCudaResource - Pointer to the returned object handle
+ * \param image - name of texture or renderbuffer object to be registered
+ * \param target - Identifies the type of object specified by \p image
+ * \param Flags - Register flags
+ *
+ * \return
+ * ::CUDA_SUCCESS,
+ * ::CUDA_ERROR_INVALID_HANDLE,
+ * ::CUDA_ERROR_ALREADY_MAPPED,
+ * ::CUDA_ERROR_INVALID_CONTEXT,
+ * ::CUDA_ERROR_OPERATING_SYSTEM
+ * \notefnerr
+ *
+ * \sa
+ * ::cuGraphicsUnregisterResource,
+ * ::cuGraphicsMapResources,
+ * ::cuGraphicsSubResourceGetMappedArray,
+ * ::cudaGraphicsGLRegisterImage
+ */
+CUresult CUDAAPI cuGraphicsGLRegisterImage(CUgraphicsResource *pCudaResource, GLuint image, GLenum target, unsigned int Flags);
+
+#ifdef _WIN32
+/**
+ * \brief Gets the CUDA device associated with hGpu
+ *
+ * Returns in \p *pDevice the CUDA device associated with a \p hGpu, if
+ * applicable.
+ *
+ * \param pDevice - Device associated with hGpu
+ * \param hGpu - Handle to a GPU, as queried via ::WGL_NV_gpu_affinity()
+ *
+ * \return
+ * ::CUDA_SUCCESS,
+ * ::CUDA_ERROR_DEINITIALIZED,
+ * ::CUDA_ERROR_NOT_INITIALIZED,
+ * ::CUDA_ERROR_INVALID_CONTEXT,
+ * ::CUDA_ERROR_INVALID_VALUE
+ * \notefnerr
+ *
+ * \sa ::cuGLMapBufferObject,
+ * ::cuGLRegisterBufferObject, ::cuGLUnmapBufferObject,
+ * ::cuGLUnregisterBufferObject, ::cuGLUnmapBufferObjectAsync,
+ * ::cuGLSetBufferObjectMapFlags,
+ * ::cudaWGLGetDevice
+ */
+CUresult CUDAAPI cuWGLGetDevice(CUdevice *pDevice, HGPUNV hGpu);
+#endif /* _WIN32 */
+
+/**
+ * CUDA devices corresponding to an OpenGL device
+ */
+typedef enum CUGLDeviceList_enum {
+ CU_GL_DEVICE_LIST_ALL = 0x01, /**< The CUDA devices for all GPUs used by the current OpenGL context */
+ CU_GL_DEVICE_LIST_CURRENT_FRAME = 0x02, /**< The CUDA devices for the GPUs used by the current OpenGL context in its currently rendering frame */
+ CU_GL_DEVICE_LIST_NEXT_FRAME = 0x03, /**< The CUDA devices for the GPUs to be used by the current OpenGL context in the next frame */
+} CUGLDeviceList;
+
+/**
+ * \brief Gets the CUDA devices associated with the current OpenGL context
+ *
+ * Returns in \p *pCudaDeviceCount the number of CUDA-compatible devices
+ * corresponding to the current OpenGL context. Also returns in \p *pCudaDevices
+ * at most cudaDeviceCount of the CUDA-compatible devices corresponding to
+ * the current OpenGL context. If any of the GPUs being used by the current OpenGL
+ * context are not CUDA capable then the call will return CUDA_ERROR_NO_DEVICE.
+ *
+ * The \p deviceList argument may be any of the following:
+ * - ::CU_GL_DEVICE_LIST_ALL: Query all devices used by the current OpenGL context.
+ * - ::CU_GL_DEVICE_LIST_CURRENT_FRAME: Query the devices used by the current OpenGL context to
+ * render the current frame (in SLI).
+ * - ::CU_GL_DEVICE_LIST_NEXT_FRAME: Query the devices used by the current OpenGL context to
+ * render the next frame (in SLI). Note that this is a prediction, it can't be guaranteed that
+ * this is correct in all cases.
+ *
+ * \param pCudaDeviceCount - Returned number of CUDA devices.
+ * \param pCudaDevices - Returned CUDA devices.
+ * \param cudaDeviceCount - The size of the output device array pCudaDevices.
+ * \param deviceList - The set of devices to return.
+ *
+ * \return
+ * ::CUDA_SUCCESS,
+ * ::CUDA_ERROR_NO_DEVICE,
+ * ::CUDA_ERROR_INVALID_VALUE,
+ * ::CUDA_ERROR_INVALID_CONTEXT,
+ * ::CUDA_ERROR_INVALID_GRAPHICS_CONTEXT,
+ * ::CUDA_ERROR_OPERATING_SYSTEM
+ *
+ * \notefnerr
+ *
+ * \sa
+ * ::cuWGLGetDevice,
+ * ::cudaGLGetDevices
+ */
+CUresult CUDAAPI cuGLGetDevices(unsigned int *pCudaDeviceCount, CUdevice *pCudaDevices, unsigned int cudaDeviceCount, CUGLDeviceList deviceList);
+
+/**
+ * \defgroup CUDA_GL_DEPRECATED OpenGL Interoperability [DEPRECATED]
+ *
+ * ___MANBRIEF___ deprecated OpenGL interoperability functions of the low-level
+ * CUDA driver API (___CURRENT_FILE___) ___ENDMANBRIEF___
+ *
+ * This section describes deprecated OpenGL interoperability functionality.
+ *
+ * @{
+ */
+
+/** Flags to map or unmap a resource */
+typedef enum CUGLmap_flags_enum {
+ CU_GL_MAP_RESOURCE_FLAGS_NONE = 0x00,
+ CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY = 0x01,
+ CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD = 0x02,
+} CUGLmap_flags;
+
+/**
+ * \brief Create a CUDA context for interoperability with OpenGL
+ *
+ * \deprecated This function is deprecated as of Cuda 5.0.
+ *
+ * This function is deprecated and should no longer be used. It is
+ * no longer necessary to associate a CUDA context with an OpenGL
+ * context in order to achieve maximum interoperability performance.
+ *
+ * \param pCtx - Returned CUDA context
+ * \param Flags - Options for CUDA context creation
+ * \param device - Device on which to create the context
+ *
+ * \return
+ * ::CUDA_SUCCESS,
+ * ::CUDA_ERROR_DEINITIALIZED,
+ * ::CUDA_ERROR_NOT_INITIALIZED,
+ * ::CUDA_ERROR_INVALID_CONTEXT,
+ * ::CUDA_ERROR_INVALID_VALUE,
+ * ::CUDA_ERROR_OUT_OF_MEMORY
+ * \notefnerr
+ *
+ * \sa ::cuCtxCreate, ::cuGLInit, ::cuGLMapBufferObject,
+ * ::cuGLRegisterBufferObject, ::cuGLUnmapBufferObject,
+ * ::cuGLUnregisterBufferObject, ::cuGLMapBufferObjectAsync,
+ * ::cuGLUnmapBufferObjectAsync, ::cuGLSetBufferObjectMapFlags,
+ * ::cuWGLGetDevice
+ */
+__CUDA_DEPRECATED CUresult CUDAAPI cuGLCtxCreate(CUcontext *pCtx, unsigned int Flags, CUdevice device );
+
+/**
+ * \brief Initializes OpenGL interoperability
+ *
+ * \deprecated This function is deprecated as of Cuda 3.0.
+ *
+ * Initializes OpenGL interoperability. This function is deprecated
+ * and calling it is no longer required. It may fail if the needed
+ * OpenGL driver facilities are not available.
+ *
+ * \return
+ * ::CUDA_SUCCESS,
+ * ::CUDA_ERROR_DEINITIALIZED,
+ * ::CUDA_ERROR_NOT_INITIALIZED,
+ * ::CUDA_ERROR_INVALID_CONTEXT,
+ * ::CUDA_ERROR_UNKNOWN
+ * \notefnerr
+ *
+ * \sa ::cuGLMapBufferObject,
+ * ::cuGLRegisterBufferObject, ::cuGLUnmapBufferObject,
+ * ::cuGLUnregisterBufferObject, ::cuGLMapBufferObjectAsync,
+ * ::cuGLUnmapBufferObjectAsync, ::cuGLSetBufferObjectMapFlags,
+ * ::cuWGLGetDevice
+ */
+__CUDA_DEPRECATED CUresult CUDAAPI cuGLInit(void);
+
+/**
+ * \brief Registers an OpenGL buffer object
+ *
+ * \deprecated This function is deprecated as of Cuda 3.0.
+ *
+ * Registers the buffer object specified by \p buffer for access by
+ * CUDA. This function must be called before CUDA can map the buffer
+ * object. There must be a valid OpenGL context bound to the current
+ * thread when this function is called, and the buffer name is
+ * resolved by that context.
+ *
+ * \param buffer - The name of the buffer object to register.
+ *
+ * \return
+ * ::CUDA_SUCCESS,
+ * ::CUDA_ERROR_DEINITIALIZED,
+ * ::CUDA_ERROR_NOT_INITIALIZED,
+ * ::CUDA_ERROR_INVALID_CONTEXT,
+ * ::CUDA_ERROR_ALREADY_MAPPED
+ * \notefnerr
+ *
+ * \sa ::cuGraphicsGLRegisterBuffer
+ */
+__CUDA_DEPRECATED CUresult CUDAAPI cuGLRegisterBufferObject(GLuint buffer);
+
+/**
+ * \brief Maps an OpenGL buffer object
+ *
+ * \deprecated This function is deprecated as of Cuda 3.0.
+ *
+ * Maps the buffer object specified by \p buffer into the address space of the
+ * current CUDA context and returns in \p *dptr and \p *size the base pointer
+ * and size of the resulting mapping.
+ *
+ * There must be a valid OpenGL context bound to the current thread
+ * when this function is called. This must be the same context, or a
+ * member of the same shareGroup, as the context that was bound when
+ * the buffer was registered.
+ *
+ * All streams in the current CUDA context are synchronized with the
+ * current GL context.
+ *
+ * \param dptr - Returned mapped base pointer
+ * \param size - Returned size of mapping
+ * \param buffer - The name of the buffer object to map
+ *
+ * \return
+ * ::CUDA_SUCCESS,
+ * ::CUDA_ERROR_DEINITIALIZED,
+ * ::CUDA_ERROR_NOT_INITIALIZED,
+ * ::CUDA_ERROR_INVALID_CONTEXT,
+ * ::CUDA_ERROR_INVALID_VALUE,
+ * ::CUDA_ERROR_MAP_FAILED
+ * \notefnerr
+ *
+ * \sa ::cuGraphicsMapResources
+ */
+__CUDA_DEPRECATED CUresult CUDAAPI cuGLMapBufferObject(CUdeviceptr *dptr, size_t *size, GLuint buffer);
+
+/**
+ * \brief Unmaps an OpenGL buffer object
+ *
+ * \deprecated This function is deprecated as of Cuda 3.0.
+ *
+ * Unmaps the buffer object specified by \p buffer for access by CUDA.
+ *
+ * There must be a valid OpenGL context bound to the current thread
+ * when this function is called. This must be the same context, or a
+ * member of the same shareGroup, as the context that was bound when
+ * the buffer was registered.
+ *
+ * All streams in the current CUDA context are synchronized with the
+ * current GL context.
+ *
+ * \param buffer - Buffer object to unmap
+ *
+ * \return
+ * ::CUDA_SUCCESS,
+ * ::CUDA_ERROR_DEINITIALIZED,
+ * ::CUDA_ERROR_NOT_INITIALIZED,
+ * ::CUDA_ERROR_INVALID_CONTEXT,
+ * ::CUDA_ERROR_INVALID_VALUE
+ * \notefnerr
+ *
+ * \sa ::cuGraphicsUnmapResources
+ */
+__CUDA_DEPRECATED CUresult CUDAAPI cuGLUnmapBufferObject(GLuint buffer);
+
+/**
+ * \brief Unregister an OpenGL buffer object
+ *
+ * \deprecated This function is deprecated as of Cuda 3.0.
+ *
+ * Unregisters the buffer object specified by \p buffer. This
+ * releases any resources associated with the registered buffer.
+ * After this call, the buffer may no longer be mapped for access by
+ * CUDA.
+ *
+ * There must be a valid OpenGL context bound to the current thread
+ * when this function is called. This must be the same context, or a
+ * member of the same shareGroup, as the context that was bound when
+ * the buffer was registered.
+ *
+ * \param buffer - Name of the buffer object to unregister
+ *
+ * \return
+ * ::CUDA_SUCCESS,
+ * ::CUDA_ERROR_DEINITIALIZED,
+ * ::CUDA_ERROR_NOT_INITIALIZED,
+ * ::CUDA_ERROR_INVALID_CONTEXT,
+ * ::CUDA_ERROR_INVALID_VALUE
+ * \notefnerr
+ *
+ * \sa ::cuGraphicsUnregisterResource
+ */
+__CUDA_DEPRECATED CUresult CUDAAPI cuGLUnregisterBufferObject(GLuint buffer);
+
+/**
+ * \brief Set the map flags for an OpenGL buffer object
+ *
+ * \deprecated This function is deprecated as of Cuda 3.0.
+ *
+ * Sets the map flags for the buffer object specified by \p buffer.
+ *
+ * Changes to \p Flags will take effect the next time \p buffer is mapped.
+ * The \p Flags argument may be any of the following:
+ * - ::CU_GL_MAP_RESOURCE_FLAGS_NONE: Specifies no hints about how this
+ * resource will be used. It is therefore assumed that this resource will be
+ * read from and written to by CUDA kernels. This is the default value.
+ * - ::CU_GL_MAP_RESOURCE_FLAGS_READ_ONLY: Specifies that CUDA kernels which
+ * access this resource will not write to this resource.
+ * - ::CU_GL_MAP_RESOURCE_FLAGS_WRITE_DISCARD: Specifies that CUDA kernels
+ * which access this resource will not read from this resource and will
+ * write over the entire contents of the resource, so none of the data
+ * previously stored in the resource will be preserved.
+ *
+ * If \p buffer has not been registered for use with CUDA, then
+ * ::CUDA_ERROR_INVALID_HANDLE is returned. If \p buffer is presently
+ * mapped for access by CUDA, then ::CUDA_ERROR_ALREADY_MAPPED is returned.
+ *
+ * There must be a valid OpenGL context bound to the current thread
+ * when this function is called. This must be the same context, or a
+ * member of the same shareGroup, as the context that was bound when
+ * the buffer was registered.
+ *
+ * \param buffer - Buffer object to unmap
+ * \param Flags - Map flags
+ *
+ * \return
+ * ::CUDA_SUCCESS,
+ * ::CUDA_ERROR_NOT_INITIALIZED,
+ * ::CUDA_ERROR_INVALID_HANDLE,
+ * ::CUDA_ERROR_ALREADY_MAPPED,
+ * ::CUDA_ERROR_INVALID_CONTEXT,
+ * \notefnerr
+ *
+ * \sa ::cuGraphicsResourceSetMapFlags
+ */
+__CUDA_DEPRECATED CUresult CUDAAPI cuGLSetBufferObjectMapFlags(GLuint buffer, unsigned int Flags);
+
+/**
+ * \brief Maps an OpenGL buffer object
+ *
+ * \deprecated This function is deprecated as of Cuda 3.0.
+ *
+ * Maps the buffer object specified by \p buffer into the address space of the
+ * current CUDA context and returns in \p *dptr and \p *size the base pointer
+ * and size of the resulting mapping.
+ *
+ * There must be a valid OpenGL context bound to the current thread
+ * when this function is called. This must be the same context, or a
+ * member of the same shareGroup, as the context that was bound when
+ * the buffer was registered.
+ *
+ * Stream \p hStream in the current CUDA context is synchronized with
+ * the current GL context.
+ *
+ * \param dptr - Returned mapped base pointer
+ * \param size - Returned size of mapping
+ * \param buffer - The name of the buffer object to map
+ * \param hStream - Stream to synchronize
+ *
+ * \return
+ * ::CUDA_SUCCESS,
+ * ::CUDA_ERROR_DEINITIALIZED,
+ * ::CUDA_ERROR_NOT_INITIALIZED,
+ * ::CUDA_ERROR_INVALID_CONTEXT,
+ * ::CUDA_ERROR_INVALID_VALUE,
+ * ::CUDA_ERROR_MAP_FAILED
+ * \notefnerr
+ *
+ * \sa ::cuGraphicsMapResources
+ */
+__CUDA_DEPRECATED CUresult CUDAAPI cuGLMapBufferObjectAsync(CUdeviceptr *dptr, size_t *size, GLuint buffer, CUstream hStream);
+
+/**
+ * \brief Unmaps an OpenGL buffer object
+ *
+ * \deprecated This function is deprecated as of Cuda 3.0.
+ *
+ * Unmaps the buffer object specified by \p buffer for access by CUDA.
+ *
+ * There must be a valid OpenGL context bound to the current thread
+ * when this function is called. This must be the same context, or a
+ * member of the same shareGroup, as the context that was bound when
+ * the buffer was registered.
+ *
+ * Stream \p hStream in the current CUDA context is synchronized with
+ * the current GL context.
+ *
+ * \param buffer - Name of the buffer object to unmap
+ * \param hStream - Stream to synchronize
+ *
+ * \return
+ * ::CUDA_SUCCESS,
+ * ::CUDA_ERROR_DEINITIALIZED,
+ * ::CUDA_ERROR_NOT_INITIALIZED,
+ * ::CUDA_ERROR_INVALID_CONTEXT,
+ * ::CUDA_ERROR_INVALID_VALUE
+ * \notefnerr
+ *
+ * \sa ::cuGraphicsUnmapResources
+ */
+__CUDA_DEPRECATED CUresult CUDAAPI cuGLUnmapBufferObjectAsync(GLuint buffer, CUstream hStream);
+
+/** @} */ /* END CUDA_GL_DEPRECATED */
+/** @} */ /* END CUDA_GL */
+
+
+#if defined(__CUDA_API_VERSION_INTERNAL)
+ #undef cuGLCtxCreate
+ #undef cuGLMapBufferObject
+ #undef cuGLMapBufferObjectAsync
+ #undef cuGLGetDevices
+
+ CUresult CUDAAPI cuGLGetDevices(unsigned int *pCudaDeviceCount, CUdevice *pCudaDevices, unsigned int cudaDeviceCount, CUGLDeviceList deviceList);
+ CUresult CUDAAPI cuGLMapBufferObject_v2(CUdeviceptr *dptr, size_t *size, GLuint buffer);
+ CUresult CUDAAPI cuGLMapBufferObjectAsync_v2(CUdeviceptr *dptr, size_t *size, GLuint buffer, CUstream hStream);
+ CUresult CUDAAPI cuGLCtxCreate(CUcontext *pCtx, unsigned int Flags, CUdevice device );
+ CUresult CUDAAPI cuGLMapBufferObject(CUdeviceptr_v1 *dptr, unsigned int *size, GLuint buffer);
+ CUresult CUDAAPI cuGLMapBufferObjectAsync(CUdeviceptr_v1 *dptr, unsigned int *size, GLuint buffer, CUstream hStream);
+#endif /* __CUDA_API_VERSION_INTERNAL */
+
+#ifdef __cplusplus
+};
+#endif
+
+#undef __CUDA_DEPRECATED
+
+#endif
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/cupti_pcsampling_util.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/cupti_pcsampling_util.h
new file mode 100644
index 0000000000000000000000000000000000000000..595d6028fbf2ff9a3bbffaafe90ec80f7d512533
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/cupti_pcsampling_util.h
@@ -0,0 +1,402 @@
+#if !defined(_CUPTI_PCSAMPLING_UTIL_H_)
+#define _CUPTI_PCSAMPLING_UTIL_H_
+
+#include
+#include
+
+#include
+
+#ifndef CUPTI_UTIL_STRUCT_SIZE
+#define CUPTI_UTIL_STRUCT_SIZE(type_, lastfield_) (offsetof(type_, lastfield_) + sizeof(((type_*)0)->lastfield_))
+#endif
+
+#ifndef CHECK_PC_SAMPLING_STRUCT_FIELD_EXISTS
+#define CHECK_PC_SAMPLING_STRUCT_FIELD_EXISTS(type, member, structSize) \
+ (offsetof(type, member) < structSize)
+#endif
+
+#if defined(__cplusplus)
+extern "C" {
+#endif
+
+#if defined(__GNUC__)
+ #pragma GCC visibility push(default)
+#endif
+
+namespace CUPTI { namespace PcSamplingUtil {
+
+/**
+ * \defgroup CUPTI_PCSAMPLING_UTILITY CUPTI PC Sampling Utility API
+ * Functions, types, and enums that implement the CUPTI PC Sampling Utility API.
+ * @{
+ */
+
+/**
+ * \brief Header info will be stored in file.
+ */
+typedef struct PACKED_ALIGNMENT {
+ /**
+ * Version of file format.
+ */
+ uint32_t version;
+ /**
+ * Total number of buffers present in the file.
+ */
+ uint32_t totalBuffers;
+} Header;
+
+/**
+ * \brief BufferInfo will be stored in the file for every buffer
+ * i.e for every call of UtilDumpPcSamplingBufferInFile() API.
+ */
+typedef struct PACKED_ALIGNMENT {
+ /**
+ * Total number of PC records.
+ */
+ uint64_t recordCount;
+ /**
+ * Count of all stall reasons supported on the GPU
+ */
+ size_t numStallReasons;
+ /**
+ * Total number of stall reasons in single record.
+ */
+ uint64_t numSelectedStallReasons;
+ /**
+ * Buffer size in Bytes.
+ */
+ uint64_t bufferByteSize;
+} BufferInfo;
+
+/**
+ * \brief All available stall reasons name and respective indexes
+ * will be stored in it.
+ */
+typedef struct PACKED_ALIGNMENT {
+ /**
+ * Number of all available stall reasons
+ */
+ size_t numStallReasons;
+ /**
+ * Stall reasons names of all available stall reasons
+ */
+ char **stallReasons;
+ /**
+ * Stall reason index of all available stall reasons
+ */
+ uint32_t *stallReasonIndex;
+} PcSamplingStallReasons;
+
+/**
+ * \brief CUPTI PC sampling buffer types.
+ *
+ */
+typedef enum {
+ /**
+ * Invalid buffer type.
+ */
+ PC_SAMPLING_BUFFER_INVALID = 0,
+ /**
+ * Refers to CUpti_PCSamplingData buffer.
+ */
+ PC_SAMPLING_BUFFER_PC_TO_COUNTER_DATA = 1
+} PcSamplingBufferType;
+
+/**
+ * \brief CUPTI PC sampling utility API result codes.
+ *
+ * Error and result codes returned by CUPTI PC sampling utility API.
+ */
+typedef enum {
+ /**
+ * No error
+ */
+ CUPTI_UTIL_SUCCESS = 0,
+ /**
+ * One or more of the parameters are invalid.
+ */
+ CUPTI_UTIL_ERROR_INVALID_PARAMETER = 1,
+ /**
+ * Unable to create a new file
+ */
+ CUPTI_UTIL_ERROR_UNABLE_TO_CREATE_FILE = 2,
+ /**
+ * Unable to open a file
+ */
+ CUPTI_UTIL_ERROR_UNABLE_TO_OPEN_FILE = 3,
+ /**
+ * Read or write operation failed
+ */
+ CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED = 4,
+ /**
+ * Provided file handle is corrupted.
+ */
+ CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED = 5,
+ /**
+ * seek operation failed.
+ */
+ CUPTI_UTIL_ERROR_SEEK_OPERATION_FAILED = 6,
+ /**
+ * Unable to allocate enough memory to perform the requested
+ * operation.
+ */
+ CUPTI_UTIL_ERROR_OUT_OF_MEMORY = 7,
+ /**
+ * An unknown internal error has occurred.
+ */
+ CUPTI_UTIL_ERROR_UNKNOWN = 999,
+ CUPTI_UTIL_ERROR_FORCE_INT = 0x7fffffff
+} CUptiUtilResult;
+
+/**
+ * \brief Params for \ref CuptiUtilPutPcSampData
+ */
+typedef struct {
+ /**
+ * Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
+ * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
+ * available in the structure. Used to preserve backward compatibility.
+ */
+ size_t size;
+ /**
+ * Type of buffer to store in file
+ */
+ PcSamplingBufferType bufferType;
+ /**
+ * PC sampling buffer.
+ */
+ void *pSamplingData;
+ /**
+ * Number of configured attributes
+ */
+ size_t numAttributes;
+ /**
+ * Refer \ref CUpti_PCSamplingConfigurationInfo
+ * It is expected to provide configuration details of at least
+ * CUPTI_PC_SAMPLING_CONFIGURATION_ATTR_TYPE_STALL_REASON attribute.
+ */
+ CUpti_PCSamplingConfigurationInfo *pPCSamplingConfigurationInfo;
+ /**
+ * Refer \ref PcSamplingStallReasons.
+ */
+ PcSamplingStallReasons *pPcSamplingStallReasons;
+ /**
+ * File name to store buffer into it.
+ */
+ const char* fileName;
+} CUptiUtil_PutPcSampDataParams;
+#define CUptiUtil_PutPcSampDataParamsSize CUPTI_UTIL_STRUCT_SIZE(CUptiUtil_PutPcSampDataParams, fileName)
+
+/**
+ * \brief Dump PC sampling data into the file.
+ *
+ * This API can be called multiple times.
+ * It will append buffer in the file.
+ * For every buffer it will store BufferInfo
+ * so that before retrieving data it will help to allocate buffer
+ * to store retrieved data.
+ * This API creates file if file does not present.
+ * If stallReasonIndex or stallReasons pointer of \ref CUptiUtil_PutPcSampDataParams is NULL
+ * then stall reasons data will not be stored in file.
+ * It is expected to store all available stall reason data at least once to refer it during
+ * offline correlation.
+ *
+ * \retval CUPTI_UTIL_SUCCESS
+ * \retval CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if buffer type is invalid
+ * or if either of pSamplingData, pParams pointer is NULL or stall reason configuration details not provided
+ * or filename is empty.
+ * \retval CUPTI_UTIL_ERROR_UNABLE_TO_CREATE_FILE
+ * \retval CUPTI_UTIL_ERROR_UNABLE_TO_OPEN_FILE
+ * \retval CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED
+ */
+CUptiUtilResult CUPTIUTILAPI CuptiUtilPutPcSampData(CUptiUtil_PutPcSampDataParams *pParams);
+
+/**
+ * \brief Params for \ref CuptiUtilGetHeaderData
+ */
+typedef struct {
+ /**
+ * Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
+ * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
+ * available in the structure. Used to preserve backward compatibility.
+ */
+ size_t size;
+ /**
+ * File handle.
+ */
+ std::ifstream *fileHandler;
+ /**
+ * Header Info.
+ */
+ Header headerInfo;
+
+} CUptiUtil_GetHeaderDataParams;
+#define CUptiUtil_GetHeaderDataParamsSize CUPTI_UTIL_STRUCT_SIZE(CUptiUtil_GetHeaderDataParams, headerInfo)
+
+/**
+ * \brief Get header data of file.
+ *
+ * This API must be called once initially while retrieving data from file.
+ * \ref Header structure, it gives info about total number
+ * of buffers present in the file.
+ *
+ * \retval CUPTI_UTIL_SUCCESS
+ * \retval CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if either of pParam or fileHandle is NULL or param struct size is incorrect.
+ * \retval CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED file handle is not in good state to read data from file
+ * \retval CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED failed to read data from file.
+ */
+CUptiUtilResult CUPTIUTILAPI CuptiUtilGetHeaderData(CUptiUtil_GetHeaderDataParams *pParams);
+
+/**
+ * \brief Params for \ref CuptiUtilGetBufferInfo
+ */
+typedef struct {
+ /**
+ * Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
+ * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
+ * available in the structure. Used to preserve backward compatibility.
+ */
+ size_t size;
+ /**
+ * File handle.
+ */
+ std::ifstream *fileHandler;
+ /**
+ * Buffer Info.
+ */
+ BufferInfo bufferInfoData;
+} CUptiUtil_GetBufferInfoParams;
+#define CUptiUtil_GetBufferInfoParamsSize CUPTI_UTIL_STRUCT_SIZE(CUptiUtil_GetBufferInfoParams, bufferInfoData)
+
+/**
+ * \brief Get buffer info data of file.
+ *
+ * This API must be called every time before calling CuptiUtilGetPcSampData API.
+ * \ref BufferInfo structure, it gives info about recordCount and stallReasonCount
+ * of every record in the buffer. This will help to allocate exact buffer to retrieve data into it.
+ *
+ * \retval CUPTI_UTIL_SUCCESS
+ * \retval CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if either of pParam or fileHandle is NULL or param struct size is incorrect.
+ * \retval CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED file handle is not in good state to read data from file.
+ * \retval CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED failed to read data from file.
+ */
+CUptiUtilResult CUPTIUTILAPI CuptiUtilGetBufferInfo(CUptiUtil_GetBufferInfoParams *pParams);
+
+/**
+ * \brief Params for \ref CuptiUtilGetPcSampData
+ */
+typedef struct {
+ /**
+ * Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
+ * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
+ * available in the structure. Used to preserve backward compatibility.
+ */
+ size_t size;
+ /**
+ * File handle.
+ */
+ std::ifstream *fileHandler;
+ /**
+ * Type of buffer to store in file
+ */
+ PcSamplingBufferType bufferType;
+ /**
+ * Pointer to collected buffer info using \ref CuptiUtilGetBufferInfo
+ */
+ BufferInfo *pBufferInfoData;
+ /**
+ * Pointer to allocated memory to store retrieved data from file.
+ */
+ void *pSamplingData;
+ /**
+ * Number of configuration attributes
+ */
+ size_t numAttributes;
+ /**
+ * Refer \ref CUpti_PCSamplingConfigurationInfo
+ */
+ CUpti_PCSamplingConfigurationInfo *pPCSamplingConfigurationInfo;
+ /**
+ * Refer \ref PcSamplingStallReasons.
+ * For stallReasons field of \ref PcSamplingStallReasons it is expected to
+ * allocate memory for each string element of array.
+ */
+ PcSamplingStallReasons *pPcSamplingStallReasons;
+} CUptiUtil_GetPcSampDataParams;
+#define CUptiUtil_GetPcSampDataParamsSize CUPTI_UTIL_STRUCT_SIZE(CUptiUtil_GetPcSampDataParams, pPcSamplingStallReasons)
+
+/**
+ * \brief Retrieve PC sampling data from file into allocated buffer.
+ *
+ * This API must be called after CuptiUtilGetBufferInfo API.
+ * It will retrieve data from file into allocated buffer.
+ *
+ * \retval CUPTI_UTIL_SUCCESS
+ * \retval CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if buffer type is invalid
+ * or if either of pSampData, pParams is NULL. If pPcSamplingStallReasons is not NULL then
+ * error out if either of stallReasonIndex, stallReasons or stallReasons array element pointer is NULL.
+ * or filename is empty.
+ * \retval CUPTI_UTIL_ERROR_READ_WRITE_OPERATION_FAILED
+ * \retval CUPTI_UTIL_ERROR_FILE_HANDLE_CORRUPTED file handle is not in good state to read data from file.
+ */
+CUptiUtilResult CUPTIUTILAPI CuptiUtilGetPcSampData(CUptiUtil_GetPcSampDataParams *pParams);
+
+/**
+ * \brief Params for \ref CuptiUtilMergePcSampData
+ */
+typedef struct
+{
+ /**
+ * Size of the data structure i.e. CUpti_PCSamplingDisableParamsSize
+ * CUPTI client should set the size of the structure. It will be used in CUPTI to check what fields are
+ * available in the structure. Used to preserve backward compatibility.
+ */
+ size_t size;
+ /**
+ * Number of buffers to merge.
+ */
+ size_t numberOfBuffers;
+ /**
+ * Pointer to array of buffers to merge
+ */
+ CUpti_PCSamplingData *PcSampDataBuffer;
+ /**
+ * Pointer to array of merged buffers as per the range id.
+ */
+ CUpti_PCSamplingData **MergedPcSampDataBuffers;
+ /**
+ * Number of merged buffers.
+ */
+ size_t *numMergedBuffer;
+} CUptiUtil_MergePcSampDataParams;
+#define CUptiUtil_MergePcSampDataParamsSize CUPTI_UTIL_STRUCT_SIZE(CUptiUtil_MergePcSampDataParams, numMergedBuffer)
+
+/**
+ * \brief Merge PC sampling data range id wise.
+ *
+ * This API merge PC sampling data range id wise.
+ * It allocates memory for merged data and fill data in it
+ * and provide buffer pointer in MergedPcSampDataBuffers field.
+ * It is expected from user to free merge data buffers after use.
+ *
+ * \retval CUPTI_UTIL_SUCCESS
+ * \retval CUPTI_UTIL_ERROR_INVALID_PARAMETER error out if param struct size is invalid
+ * or count of buffers to merge is invalid i.e less than 1
+ * or either of PcSampDataBuffer, MergedPcSampDataBuffers, numMergedBuffer is NULL
+ * \retval CUPTI_UTIL_ERROR_OUT_OF_MEMORY Unable to allocate memory for merged buffer.
+ */
+CUptiUtilResult CUPTIUTILAPI CuptiUtilMergePcSampData(CUptiUtil_MergePcSampDataParams *pParams);
+
+/** @} */ /* END CUPTI_PCSAMPLING_UTILITY */
+
+} }
+
+#if defined(__GNUC__)
+ #pragma GCC visibility pop
+#endif
+
+#if defined(__cplusplus)
+}
+#endif
+
+#endif
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/driver_types.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/driver_types.h
new file mode 100644
index 0000000000000000000000000000000000000000..541cbc6eb76dfdf42c407ca7c9537b603714d64c
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/driver_types.h
@@ -0,0 +1,4110 @@
+/*
+ * Copyright 1993-2023 NVIDIA Corporation. All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee. Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users. These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 12.212 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item. Consistent with 48 C.F.R.12.212 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+
+#if !defined(__DRIVER_TYPES_H__)
+#define __DRIVER_TYPES_H__
+
+#if !defined(__CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__)
+#define __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__
+#define __UNDEF_CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS_DRIVER_TYPES_H__
+#endif
+
+#ifndef __DOXYGEN_ONLY__
+#include "crt/host_defines.h"
+#endif
+#include "vector_types.h"
+
+
+
+#ifndef __CUDACC_RTC_MINIMAL__
+/**
+ * \defgroup CUDART_TYPES Data types used by CUDA Runtime
+ * \ingroup CUDART
+ *
+ * @{
+ */
+
+/*******************************************************************************
+* *
+* TYPE DEFINITIONS USED BY RUNTIME API *
+* *
+*******************************************************************************/
+
+#if !defined(__CUDA_INTERNAL_COMPILATION__)
+
+
+#if !defined(__CUDACC_RTC__)
+#include
+#include
+#endif /* !defined(__CUDACC_RTC__) */
+
+#define cudaHostAllocDefault 0x00 /**< Default page-locked allocation flag */
+#define cudaHostAllocPortable 0x01 /**< Pinned memory accessible by all CUDA contexts */
+#define cudaHostAllocMapped 0x02 /**< Map allocation into device space */
+#define cudaHostAllocWriteCombined 0x04 /**< Write-combined memory */
+
+#define cudaHostRegisterDefault 0x00 /**< Default host memory registration flag */
+#define cudaHostRegisterPortable 0x01 /**< Pinned memory accessible by all CUDA contexts */
+#define cudaHostRegisterMapped 0x02 /**< Map registered memory into device space */
+#define cudaHostRegisterIoMemory 0x04 /**< Memory-mapped I/O space */
+#define cudaHostRegisterReadOnly 0x08 /**< Memory-mapped read-only */
+
+#define cudaPeerAccessDefault 0x00 /**< Default peer addressing enable flag */
+
+#define cudaStreamDefault 0x00 /**< Default stream flag */
+#define cudaStreamNonBlocking 0x01 /**< Stream does not synchronize with stream 0 (the NULL stream) */
+
+ /**
+ * Legacy stream handle
+ *
+ * Stream handle that can be passed as a cudaStream_t to use an implicit stream
+ * with legacy synchronization behavior.
+ *
+ * See details of the \link_sync_behavior
+ */
+#define cudaStreamLegacy ((cudaStream_t)0x1)
+
+/**
+ * Per-thread stream handle
+ *
+ * Stream handle that can be passed as a cudaStream_t to use an implicit stream
+ * with per-thread synchronization behavior.
+ *
+ * See details of the \link_sync_behavior
+ */
+#define cudaStreamPerThread ((cudaStream_t)0x2)
+
+#define cudaEventDefault 0x00 /**< Default event flag */
+#define cudaEventBlockingSync 0x01 /**< Event uses blocking synchronization */
+#define cudaEventDisableTiming 0x02 /**< Event will not record timing data */
+#define cudaEventInterprocess 0x04 /**< Event is suitable for interprocess use. cudaEventDisableTiming must be set */
+
+#define cudaEventRecordDefault 0x00 /**< Default event record flag */
+#define cudaEventRecordExternal 0x01 /**< Event is captured in the graph as an external event node when performing stream capture */
+
+#define cudaEventWaitDefault 0x00 /**< Default event wait flag */
+#define cudaEventWaitExternal 0x01 /**< Event is captured in the graph as an external event node when performing stream capture */
+
+#define cudaDeviceScheduleAuto 0x00 /**< Device flag - Automatic scheduling */
+#define cudaDeviceScheduleSpin 0x01 /**< Device flag - Spin default scheduling */
+#define cudaDeviceScheduleYield 0x02 /**< Device flag - Yield default scheduling */
+#define cudaDeviceScheduleBlockingSync 0x04 /**< Device flag - Use blocking synchronization */
+#define cudaDeviceBlockingSync 0x04 /**< Device flag - Use blocking synchronization
+ * \deprecated This flag was deprecated as of CUDA 4.0 and
+ * replaced with ::cudaDeviceScheduleBlockingSync. */
+#define cudaDeviceScheduleMask 0x07 /**< Device schedule flags mask */
+#define cudaDeviceMapHost 0x08 /**< Device flag - Support mapped pinned allocations */
+#define cudaDeviceLmemResizeToMax 0x10 /**< Device flag - Keep local memory allocation after launch */
+#define cudaDeviceSyncMemops 0x80 /**< Device flag - Ensure synchronous memory operations on this context will synchronize */
+#define cudaDeviceMask 0xff /**< Device flags mask */
+
+#define cudaArrayDefault 0x00 /**< Default CUDA array allocation flag */
+#define cudaArrayLayered 0x01 /**< Must be set in cudaMalloc3DArray to create a layered CUDA array */
+#define cudaArraySurfaceLoadStore 0x02 /**< Must be set in cudaMallocArray or cudaMalloc3DArray in order to bind surfaces to the CUDA array */
+#define cudaArrayCubemap 0x04 /**< Must be set in cudaMalloc3DArray to create a cubemap CUDA array */
+#define cudaArrayTextureGather 0x08 /**< Must be set in cudaMallocArray or cudaMalloc3DArray in order to perform texture gather operations on the CUDA array */
+#define cudaArrayColorAttachment 0x20 /**< Must be set in cudaExternalMemoryGetMappedMipmappedArray if the mipmapped array is used as a color target in a graphics API */
+#define cudaArraySparse 0x40 /**< Must be set in cudaMallocArray, cudaMalloc3DArray or cudaMallocMipmappedArray in order to create a sparse CUDA array or CUDA mipmapped array */
+#define cudaArrayDeferredMapping 0x80 /**< Must be set in cudaMallocArray, cudaMalloc3DArray or cudaMallocMipmappedArray in order to create a deferred mapping CUDA array or CUDA mipmapped array */
+
+#define cudaIpcMemLazyEnablePeerAccess 0x01 /**< Automatically enable peer access between remote devices as needed */
+
+#define cudaMemAttachGlobal 0x01 /**< Memory can be accessed by any stream on any device*/
+#define cudaMemAttachHost 0x02 /**< Memory cannot be accessed by any stream on any device */
+#define cudaMemAttachSingle 0x04 /**< Memory can only be accessed by a single stream on the associated device */
+
+#define cudaOccupancyDefault 0x00 /**< Default behavior */
+#define cudaOccupancyDisableCachingOverride 0x01 /**< Assume global caching is enabled and cannot be automatically turned off */
+
+#define cudaCpuDeviceId ((int)-1) /**< Device id that represents the CPU */
+#define cudaInvalidDeviceId ((int)-2) /**< Device id that represents an invalid device */
+#define cudaInitDeviceFlagsAreValid 0x01 /**< Tell the CUDA runtime that DeviceFlags is being set in cudaInitDevice call */
+/**
+ * If set, each kernel launched as part of ::cudaLaunchCooperativeKernelMultiDevice only
+ * waits for prior work in the stream corresponding to that GPU to complete before the
+ * kernel begins execution.
+ */
+#define cudaCooperativeLaunchMultiDeviceNoPreSync 0x01
+
+/**
+ * If set, any subsequent work pushed in a stream that participated in a call to
+ * ::cudaLaunchCooperativeKernelMultiDevice will only wait for the kernel launched on
+ * the GPU corresponding to that stream to complete before it begins execution.
+ */
+#define cudaCooperativeLaunchMultiDeviceNoPostSync 0x02
+
+#endif /* !__CUDA_INTERNAL_COMPILATION__ */
+
+/** \cond impl_private */
+#if defined(__DOXYGEN_ONLY__) || defined(CUDA_ENABLE_DEPRECATED)
+#define __CUDA_DEPRECATED
+#elif defined(_MSC_VER)
+#define __CUDA_DEPRECATED __declspec(deprecated)
+#elif defined(__GNUC__)
+#define __CUDA_DEPRECATED __attribute__((deprecated))
+#else
+#define __CUDA_DEPRECATED
+#endif
+/** \endcond impl_private */
+
+/*******************************************************************************
+* *
+* *
+* *
+*******************************************************************************/
+
+/**
+ * CUDA error types
+ */
+enum __device_builtin__ cudaError
+{
+ /**
+ * The API call returned with no errors. In the case of query calls, this
+ * also means that the operation being queried is complete (see
+ * ::cudaEventQuery() and ::cudaStreamQuery()).
+ */
+ cudaSuccess = 0,
+
+ /**
+ * This indicates that one or more of the parameters passed to the API call
+ * is not within an acceptable range of values.
+ */
+ cudaErrorInvalidValue = 1,
+
+ /**
+ * The API call failed because it was unable to allocate enough memory or
+ * other resources to perform the requested operation.
+ */
+ cudaErrorMemoryAllocation = 2,
+
+ /**
+ * The API call failed because the CUDA driver and runtime could not be
+ * initialized.
+ */
+ cudaErrorInitializationError = 3,
+
+ /**
+ * This indicates that a CUDA Runtime API call cannot be executed because
+ * it is being called during process shut down, at a point in time after
+ * CUDA driver has been unloaded.
+ */
+ cudaErrorCudartUnloading = 4,
+
+ /**
+ * This indicates profiler is not initialized for this run. This can
+ * happen when the application is running with external profiling tools
+ * like visual profiler.
+ */
+ cudaErrorProfilerDisabled = 5,
+
+ /**
+ * \deprecated
+ * This error return is deprecated as of CUDA 5.0. It is no longer an error
+ * to attempt to enable/disable the profiling via ::cudaProfilerStart or
+ * ::cudaProfilerStop without initialization.
+ */
+ cudaErrorProfilerNotInitialized = 6,
+
+ /**
+ * \deprecated
+ * This error return is deprecated as of CUDA 5.0. It is no longer an error
+ * to call cudaProfilerStart() when profiling is already enabled.
+ */
+ cudaErrorProfilerAlreadyStarted = 7,
+
+ /**
+ * \deprecated
+ * This error return is deprecated as of CUDA 5.0. It is no longer an error
+ * to call cudaProfilerStop() when profiling is already disabled.
+ */
+ cudaErrorProfilerAlreadyStopped = 8,
+ /**
+ * This indicates that a kernel launch is requesting resources that can
+ * never be satisfied by the current device. Requesting more shared memory
+ * per block than the device supports will trigger this error, as will
+ * requesting too many threads or blocks. See ::cudaDeviceProp for more
+ * device limitations.
+ */
+ cudaErrorInvalidConfiguration = 9,
+
+ /**
+ * This indicates that one or more of the pitch-related parameters passed
+ * to the API call is not within the acceptable range for pitch.
+ */
+ cudaErrorInvalidPitchValue = 12,
+
+ /**
+ * This indicates that the symbol name/identifier passed to the API call
+ * is not a valid name or identifier.
+ */
+ cudaErrorInvalidSymbol = 13,
+
+ /**
+ * This indicates that at least one host pointer passed to the API call is
+ * not a valid host pointer.
+ * \deprecated
+ * This error return is deprecated as of CUDA 10.1.
+ */
+ cudaErrorInvalidHostPointer = 16,
+
+ /**
+ * This indicates that at least one device pointer passed to the API call is
+ * not a valid device pointer.
+ * \deprecated
+ * This error return is deprecated as of CUDA 10.1.
+ */
+ cudaErrorInvalidDevicePointer = 17,
+ /**
+ * This indicates that the texture passed to the API call is not a valid
+ * texture.
+ */
+ cudaErrorInvalidTexture = 18,
+
+ /**
+ * This indicates that the texture binding is not valid. This occurs if you
+ * call ::cudaGetTextureAlignmentOffset() with an unbound texture.
+ */
+ cudaErrorInvalidTextureBinding = 19,
+
+ /**
+ * This indicates that the channel descriptor passed to the API call is not
+ * valid. This occurs if the format is not one of the formats specified by
+ * ::cudaChannelFormatKind, or if one of the dimensions is invalid.
+ */
+ cudaErrorInvalidChannelDescriptor = 20,
+
+ /**
+ * This indicates that the direction of the memcpy passed to the API call is
+ * not one of the types specified by ::cudaMemcpyKind.
+ */
+ cudaErrorInvalidMemcpyDirection = 21,
+
+ /**
+ * This indicated that the user has taken the address of a constant variable,
+ * which was forbidden up until the CUDA 3.1 release.
+ * \deprecated
+ * This error return is deprecated as of CUDA 3.1. Variables in constant
+ * memory may now have their address taken by the runtime via
+ * ::cudaGetSymbolAddress().
+ */
+ cudaErrorAddressOfConstant = 22,
+
+ /**
+ * This indicated that a texture fetch was not able to be performed.
+ * This was previously used for device emulation of texture operations.
+ * \deprecated
+ * This error return is deprecated as of CUDA 3.1. Device emulation mode was
+ * removed with the CUDA 3.1 release.
+ */
+ cudaErrorTextureFetchFailed = 23,
+
+ /**
+ * This indicated that a texture was not bound for access.
+ * This was previously used for device emulation of texture operations.
+ * \deprecated
+ * This error return is deprecated as of CUDA 3.1. Device emulation mode was
+ * removed with the CUDA 3.1 release.
+ */
+ cudaErrorTextureNotBound = 24,
+
+ /**
+ * This indicated that a synchronization operation had failed.
+ * This was previously used for some device emulation functions.
+ * \deprecated
+ * This error return is deprecated as of CUDA 3.1. Device emulation mode was
+ * removed with the CUDA 3.1 release.
+ */
+ cudaErrorSynchronizationError = 25,
+ /**
+ * This indicates that a non-float texture was being accessed with linear
+ * filtering. This is not supported by CUDA.
+ */
+ cudaErrorInvalidFilterSetting = 26,
+
+ /**
+ * This indicates that an attempt was made to read an unsupported data type as a
+ * normalized float. This is not supported by CUDA.
+ */
+ cudaErrorInvalidNormSetting = 27,
+
+ /**
+ * Mixing of device and device emulation code was not allowed.
+ * \deprecated
+ * This error return is deprecated as of CUDA 3.1. Device emulation mode was
+ * removed with the CUDA 3.1 release.
+ */
+ cudaErrorMixedDeviceExecution = 28,
+
+ /**
+ * This indicates that the API call is not yet implemented. Production
+ * releases of CUDA will never return this error.
+ * \deprecated
+ * This error return is deprecated as of CUDA 4.1.
+ */
+ cudaErrorNotYetImplemented = 31,
+
+ /**
+ * This indicated that an emulated device pointer exceeded the 32-bit address
+ * range.
+ * \deprecated
+ * This error return is deprecated as of CUDA 3.1. Device emulation mode was
+ * removed with the CUDA 3.1 release.
+ */
+ cudaErrorMemoryValueTooLarge = 32,
+ /**
+ * This indicates that the CUDA driver that the application has loaded is a
+ * stub library. Applications that run with the stub rather than a real
+ * driver loaded will result in CUDA API returning this error.
+ */
+ cudaErrorStubLibrary = 34,
+
+ /**
+ * This indicates that the installed NVIDIA CUDA driver is older than the
+ * CUDA runtime library. This is not a supported configuration. Users should
+ * install an updated NVIDIA display driver to allow the application to run.
+ */
+ cudaErrorInsufficientDriver = 35,
+
+ /**
+ * This indicates that the API call requires a newer CUDA driver than the one
+ * currently installed. Users should install an updated NVIDIA CUDA driver
+ * to allow the API call to succeed.
+ */
+ cudaErrorCallRequiresNewerDriver = 36,
+
+ /**
+ * This indicates that the surface passed to the API call is not a valid
+ * surface.
+ */
+ cudaErrorInvalidSurface = 37,
+
+ /**
+ * This indicates that multiple global or constant variables (across separate
+ * CUDA source files in the application) share the same string name.
+ */
+ cudaErrorDuplicateVariableName = 43,
+
+ /**
+ * This indicates that multiple textures (across separate CUDA source
+ * files in the application) share the same string name.
+ */
+ cudaErrorDuplicateTextureName = 44,
+
+ /**
+ * This indicates that multiple surfaces (across separate CUDA source
+ * files in the application) share the same string name.
+ */
+ cudaErrorDuplicateSurfaceName = 45,
+
+ /**
+ * This indicates that all CUDA devices are busy or unavailable at the current
+ * time. Devices are often busy/unavailable due to use of
+ * ::cudaComputeModeProhibited, ::cudaComputeModeExclusiveProcess, or when long
+ * running CUDA kernels have filled up the GPU and are blocking new work
+ * from starting. They can also be unavailable due to memory constraints
+ * on a device that already has active CUDA work being performed.
+ */
+ cudaErrorDevicesUnavailable = 46,
+
+ /**
+ * This indicates that the current context is not compatible with this
+ * the CUDA Runtime. This can only occur if you are using CUDA
+ * Runtime/Driver interoperability and have created an existing Driver
+ * context using the driver API. The Driver context may be incompatible
+ * either because the Driver context was created using an older version
+ * of the API, because the Runtime API call expects a primary driver
+ * context and the Driver context is not primary, or because the Driver
+ * context has been destroyed. Please see \ref CUDART_DRIVER "Interactions
+ * with the CUDA Driver API" for more information.
+ */
+ cudaErrorIncompatibleDriverContext = 49,
+
+ /**
+ * The device function being invoked (usually via ::cudaLaunchKernel()) was not
+ * previously configured via the ::cudaConfigureCall() function.
+ */
+ cudaErrorMissingConfiguration = 52,
+
+ /**
+ * This indicated that a previous kernel launch failed. This was previously
+ * used for device emulation of kernel launches.
+ * \deprecated
+ * This error return is deprecated as of CUDA 3.1. Device emulation mode was
+ * removed with the CUDA 3.1 release.
+ */
+ cudaErrorPriorLaunchFailure = 53,
+ /**
+ * This error indicates that a device runtime grid launch did not occur
+ * because the depth of the child grid would exceed the maximum supported
+ * number of nested grid launches.
+ */
+ cudaErrorLaunchMaxDepthExceeded = 65,
+
+ /**
+ * This error indicates that a grid launch did not occur because the kernel
+ * uses file-scoped textures which are unsupported by the device runtime.
+ * Kernels launched via the device runtime only support textures created with
+ * the Texture Object API's.
+ */
+ cudaErrorLaunchFileScopedTex = 66,
+
+ /**
+ * This error indicates that a grid launch did not occur because the kernel
+ * uses file-scoped surfaces which are unsupported by the device runtime.
+ * Kernels launched via the device runtime only support surfaces created with
+ * the Surface Object API's.
+ */
+ cudaErrorLaunchFileScopedSurf = 67,
+
+ /**
+ * This error indicates that a call to ::cudaDeviceSynchronize made from
+ * the device runtime failed because the call was made at grid depth greater
+ * than than either the default (2 levels of grids) or user specified device
+ * limit ::cudaLimitDevRuntimeSyncDepth. To be able to synchronize on
+ * launched grids at a greater depth successfully, the maximum nested
+ * depth at which ::cudaDeviceSynchronize will be called must be specified
+ * with the ::cudaLimitDevRuntimeSyncDepth limit to the ::cudaDeviceSetLimit
+ * api before the host-side launch of a kernel using the device runtime.
+ * Keep in mind that additional levels of sync depth require the runtime
+ * to reserve large amounts of device memory that cannot be used for
+ * user allocations. Note that ::cudaDeviceSynchronize made from device
+ * runtime is only supported on devices of compute capability < 9.0.
+ */
+ cudaErrorSyncDepthExceeded = 68,
+
+ /**
+ * This error indicates that a device runtime grid launch failed because
+ * the launch would exceed the limit ::cudaLimitDevRuntimePendingLaunchCount.
+ * For this launch to proceed successfully, ::cudaDeviceSetLimit must be
+ * called to set the ::cudaLimitDevRuntimePendingLaunchCount to be higher
+ * than the upper bound of outstanding launches that can be issued to the
+ * device runtime. Keep in mind that raising the limit of pending device
+ * runtime launches will require the runtime to reserve device memory that
+ * cannot be used for user allocations.
+ */
+ cudaErrorLaunchPendingCountExceeded = 69,
+
+ /**
+ * The requested device function does not exist or is not compiled for the
+ * proper device architecture.
+ */
+ cudaErrorInvalidDeviceFunction = 98,
+
+ /**
+ * This indicates that no CUDA-capable devices were detected by the installed
+ * CUDA driver.
+ */
+ cudaErrorNoDevice = 100,
+
+ /**
+ * This indicates that the device ordinal supplied by the user does not
+ * correspond to a valid CUDA device or that the action requested is
+ * invalid for the specified device.
+ */
+ cudaErrorInvalidDevice = 101,
+
+ /**
+ * This indicates that the device doesn't have a valid Grid License.
+ */
+ cudaErrorDeviceNotLicensed = 102,
+
+ /**
+ * By default, the CUDA runtime may perform a minimal set of self-tests,
+ * as well as CUDA driver tests, to establish the validity of both.
+ * Introduced in CUDA 11.2, this error return indicates that at least one
+ * of these tests has failed and the validity of either the runtime
+ * or the driver could not be established.
+ */
+ cudaErrorSoftwareValidityNotEstablished = 103,
+
+ /**
+ * This indicates an internal startup failure in the CUDA runtime.
+ */
+ cudaErrorStartupFailure = 127,
+
+ /**
+ * This indicates that the device kernel image is invalid.
+ */
+ cudaErrorInvalidKernelImage = 200,
+
+ /**
+ * This most frequently indicates that there is no context bound to the
+ * current thread. This can also be returned if the context passed to an
+ * API call is not a valid handle (such as a context that has had
+ * ::cuCtxDestroy() invoked on it). This can also be returned if a user
+ * mixes different API versions (i.e. 3010 context with 3020 API calls).
+ * See ::cuCtxGetApiVersion() for more details.
+ */
+ cudaErrorDeviceUninitialized = 201,
+
+ /**
+ * This indicates that the buffer object could not be mapped.
+ */
+ cudaErrorMapBufferObjectFailed = 205,
+
+ /**
+ * This indicates that the buffer object could not be unmapped.
+ */
+ cudaErrorUnmapBufferObjectFailed = 206,
+
+ /**
+ * This indicates that the specified array is currently mapped and thus
+ * cannot be destroyed.
+ */
+ cudaErrorArrayIsMapped = 207,
+
+ /**
+ * This indicates that the resource is already mapped.
+ */
+ cudaErrorAlreadyMapped = 208,
+
+ /**
+ * This indicates that there is no kernel image available that is suitable
+ * for the device. This can occur when a user specifies code generation
+ * options for a particular CUDA source file that do not include the
+ * corresponding device configuration.
+ */
+ cudaErrorNoKernelImageForDevice = 209,
+
+ /**
+ * This indicates that a resource has already been acquired.
+ */
+ cudaErrorAlreadyAcquired = 210,
+
+ /**
+ * This indicates that a resource is not mapped.
+ */
+ cudaErrorNotMapped = 211,
+
+ /**
+ * This indicates that a mapped resource is not available for access as an
+ * array.
+ */
+ cudaErrorNotMappedAsArray = 212,
+
+ /**
+ * This indicates that a mapped resource is not available for access as a
+ * pointer.
+ */
+ cudaErrorNotMappedAsPointer = 213,
+
+ /**
+ * This indicates that an uncorrectable ECC error was detected during
+ * execution.
+ */
+ cudaErrorECCUncorrectable = 214,
+
+ /**
+ * This indicates that the ::cudaLimit passed to the API call is not
+ * supported by the active device.
+ */
+ cudaErrorUnsupportedLimit = 215,
+
+ /**
+ * This indicates that a call tried to access an exclusive-thread device that
+ * is already in use by a different thread.
+ */
+ cudaErrorDeviceAlreadyInUse = 216,
+
+ /**
+ * This error indicates that P2P access is not supported across the given
+ * devices.
+ */
+ cudaErrorPeerAccessUnsupported = 217,
+
+ /**
+ * A PTX compilation failed. The runtime may fall back to compiling PTX if
+ * an application does not contain a suitable binary for the current device.
+ */
+ cudaErrorInvalidPtx = 218,
+
+ /**
+ * This indicates an error with the OpenGL or DirectX context.
+ */
+ cudaErrorInvalidGraphicsContext = 219,
+
+ /**
+ * This indicates that an uncorrectable NVLink error was detected during the
+ * execution.
+ */
+ cudaErrorNvlinkUncorrectable = 220,
+
+ /**
+ * This indicates that the PTX JIT compiler library was not found. The JIT Compiler
+ * library is used for PTX compilation. The runtime may fall back to compiling PTX
+ * if an application does not contain a suitable binary for the current device.
+ */
+ cudaErrorJitCompilerNotFound = 221,
+
+ /**
+ * This indicates that the provided PTX was compiled with an unsupported toolchain.
+ * The most common reason for this, is the PTX was generated by a compiler newer
+ * than what is supported by the CUDA driver and PTX JIT compiler.
+ */
+ cudaErrorUnsupportedPtxVersion = 222,
+
+ /**
+ * This indicates that the JIT compilation was disabled. The JIT compilation compiles
+ * PTX. The runtime may fall back to compiling PTX if an application does not contain
+ * a suitable binary for the current device.
+ */
+ cudaErrorJitCompilationDisabled = 223,
+
+ /**
+ * This indicates that the provided execution affinity is not supported by the device.
+ */
+ cudaErrorUnsupportedExecAffinity = 224,
+
+ /**
+ * This indicates that the code to be compiled by the PTX JIT contains
+ * unsupported call to cudaDeviceSynchronize.
+ */
+ cudaErrorUnsupportedDevSideSync = 225,
+
+ /**
+ * This indicates that an exception occurred on the device that is now
+ * contained by the GPU's error containment capability. Common causes are -
+ * a. Certain types of invalid accesses of peer GPU memory over nvlink
+ * b. Certain classes of hardware errors
+ * This leaves the process in an inconsistent state and any further CUDA
+ * work will return the same error. To continue using CUDA, the process must
+ * be terminated and relaunched.
+ */
+ cudaErrorContained = 226,
+
+ /**
+ * This indicates that the device kernel source is invalid.
+ */
+ cudaErrorInvalidSource = 300,
+
+ /**
+ * This indicates that the file specified was not found.
+ */
+ cudaErrorFileNotFound = 301,
+
+ /**
+ * This indicates that a link to a shared object failed to resolve.
+ */
+ cudaErrorSharedObjectSymbolNotFound = 302,
+
+ /**
+ * This indicates that initialization of a shared object failed.
+ */
+ cudaErrorSharedObjectInitFailed = 303,
+
+ /**
+ * This error indicates that an OS call failed.
+ */
+ cudaErrorOperatingSystem = 304,
+
+ /**
+ * This indicates that a resource handle passed to the API call was not
+ * valid. Resource handles are opaque types like ::cudaStream_t and
+ * ::cudaEvent_t.
+ */
+ cudaErrorInvalidResourceHandle = 400,
+
+ /**
+ * This indicates that a resource required by the API call is not in a
+ * valid state to perform the requested operation.
+ */
+ cudaErrorIllegalState = 401,
+
+ /**
+ * This indicates an attempt was made to introspect an object in a way that
+ * would discard semantically important information. This is either due to
+ * the object using funtionality newer than the API version used to
+ * introspect it or omission of optional return arguments.
+ */
+ cudaErrorLossyQuery = 402,
+
+ /**
+ * This indicates that a named symbol was not found. Examples of symbols
+ * are global/constant variable names, driver function names, texture names,
+ * and surface names.
+ */
+ cudaErrorSymbolNotFound = 500,
+
+ /**
+ * This indicates that asynchronous operations issued previously have not
+ * completed yet. This result is not actually an error, but must be indicated
+ * differently than ::cudaSuccess (which indicates completion). Calls that
+ * may return this value include ::cudaEventQuery() and ::cudaStreamQuery().
+ */
+ cudaErrorNotReady = 600,
+
+ /**
+ * The device encountered a load or store instruction on an invalid memory address.
+ * This leaves the process in an inconsistent state and any further CUDA work
+ * will return the same error. To continue using CUDA, the process must be terminated
+ * and relaunched.
+ */
+ cudaErrorIllegalAddress = 700,
+
+ /**
+ * This indicates that a launch did not occur because it did not have
+ * appropriate resources. Although this error is similar to
+ * ::cudaErrorInvalidConfiguration, this error usually indicates that the
+ * user has attempted to pass too many arguments to the device kernel, or the
+ * kernel launch specifies too many threads for the kernel's register count.
+ */
+ cudaErrorLaunchOutOfResources = 701,
+
+ /**
+ * This indicates that the device kernel took too long to execute. This can
+ * only occur if timeouts are enabled - see the device property
+ * \ref ::cudaDeviceProp::kernelExecTimeoutEnabled "kernelExecTimeoutEnabled"
+ * for more information.
+ * This leaves the process in an inconsistent state and any further CUDA work
+ * will return the same error. To continue using CUDA, the process must be terminated
+ * and relaunched.
+ */
+ cudaErrorLaunchTimeout = 702,
+
+ /**
+ * This error indicates a kernel launch that uses an incompatible texturing
+ * mode.
+ */
+ cudaErrorLaunchIncompatibleTexturing = 703,
+
+ /**
+ * This error indicates that a call to ::cudaDeviceEnablePeerAccess() is
+ * trying to re-enable peer addressing on from a context which has already
+ * had peer addressing enabled.
+ */
+ cudaErrorPeerAccessAlreadyEnabled = 704,
+
+ /**
+ * This error indicates that ::cudaDeviceDisablePeerAccess() is trying to
+ * disable peer addressing which has not been enabled yet via
+ * ::cudaDeviceEnablePeerAccess().
+ */
+ cudaErrorPeerAccessNotEnabled = 705,
+
+ /**
+ * This indicates that the user has called ::cudaSetValidDevices(),
+ * ::cudaSetDeviceFlags(), ::cudaD3D9SetDirect3DDevice(),
+ * ::cudaD3D10SetDirect3DDevice, ::cudaD3D11SetDirect3DDevice(), or
+ * ::cudaVDPAUSetVDPAUDevice() after initializing the CUDA runtime by
+ * calling non-device management operations (allocating memory and
+ * launching kernels are examples of non-device management operations).
+ * This error can also be returned if using runtime/driver
+ * interoperability and there is an existing ::CUcontext active on the
+ * host thread.
+ */
+ cudaErrorSetOnActiveProcess = 708,
+
+ /**
+ * This error indicates that the context current to the calling thread
+ * has been destroyed using ::cuCtxDestroy, or is a primary context which
+ * has not yet been initialized.
+ */
+ cudaErrorContextIsDestroyed = 709,
+
+ /**
+ * An assert triggered in device code during kernel execution. The device
+ * cannot be used again. All existing allocations are invalid. To continue
+ * using CUDA, the process must be terminated and relaunched.
+ */
+ cudaErrorAssert = 710,
+
+ /**
+ * This error indicates that the hardware resources required to enable
+ * peer access have been exhausted for one or more of the devices
+ * passed to ::cudaEnablePeerAccess().
+ */
+ cudaErrorTooManyPeers = 711,
+
+ /**
+ * This error indicates that the memory range passed to ::cudaHostRegister()
+ * has already been registered.
+ */
+ cudaErrorHostMemoryAlreadyRegistered = 712,
+
+ /**
+ * This error indicates that the pointer passed to ::cudaHostUnregister()
+ * does not correspond to any currently registered memory region.
+ */
+ cudaErrorHostMemoryNotRegistered = 713,
+
+ /**
+ * Device encountered an error in the call stack during kernel execution,
+ * possibly due to stack corruption or exceeding the stack size limit.
+ * This leaves the process in an inconsistent state and any further CUDA work
+ * will return the same error. To continue using CUDA, the process must be terminated
+ * and relaunched.
+ */
+ cudaErrorHardwareStackError = 714,
+
+ /**
+ * The device encountered an illegal instruction during kernel execution
+ * This leaves the process in an inconsistent state and any further CUDA work
+ * will return the same error. To continue using CUDA, the process must be terminated
+ * and relaunched.
+ */
+ cudaErrorIllegalInstruction = 715,
+
+ /**
+ * The device encountered a load or store instruction
+ * on a memory address which is not aligned.
+ * This leaves the process in an inconsistent state and any further CUDA work
+ * will return the same error. To continue using CUDA, the process must be terminated
+ * and relaunched.
+ */
+ cudaErrorMisalignedAddress = 716,
+
+ /**
+ * While executing a kernel, the device encountered an instruction
+ * which can only operate on memory locations in certain address spaces
+ * (global, shared, or local), but was supplied a memory address not
+ * belonging to an allowed address space.
+ * This leaves the process in an inconsistent state and any further CUDA work
+ * will return the same error. To continue using CUDA, the process must be terminated
+ * and relaunched.
+ */
+ cudaErrorInvalidAddressSpace = 717,
+
+ /**
+ * The device encountered an invalid program counter.
+ * This leaves the process in an inconsistent state and any further CUDA work
+ * will return the same error. To continue using CUDA, the process must be terminated
+ * and relaunched.
+ */
+ cudaErrorInvalidPc = 718,
+
+ /**
+ * An exception occurred on the device while executing a kernel. Common
+ * causes include dereferencing an invalid device pointer and accessing
+ * out of bounds shared memory. Less common cases can be system specific - more
+ * information about these cases can be found in the system specific user guide.
+ * This leaves the process in an inconsistent state and any further CUDA work
+ * will return the same error. To continue using CUDA, the process must be terminated
+ * and relaunched.
+ */
+ cudaErrorLaunchFailure = 719,
+
+ /**
+ * This error indicates that the number of blocks launched per grid for a kernel that was
+ * launched via either ::cudaLaunchCooperativeKernel or ::cudaLaunchCooperativeKernelMultiDevice
+ * exceeds the maximum number of blocks as allowed by ::cudaOccupancyMaxActiveBlocksPerMultiprocessor
+ * or ::cudaOccupancyMaxActiveBlocksPerMultiprocessorWithFlags times the number of multiprocessors
+ * as specified by the device attribute ::cudaDevAttrMultiProcessorCount.
+ */
+ cudaErrorCooperativeLaunchTooLarge = 720,
+
+ /**
+ * An exception occurred on the device while exiting a kernel using tensor memory: the
+ * tensor memory was not completely deallocated. This leaves the process in an inconsistent
+ * state and any further CUDA work will return the same error. To continue using CUDA, the
+ * process must be terminated and relaunched.
+ */
+ cudaErrorTensorMemoryLeak = 721,
+
+ /**
+ * This error indicates the attempted operation is not permitted.
+ */
+ cudaErrorNotPermitted = 800,
+
+ /**
+ * This error indicates the attempted operation is not supported
+ * on the current system or device.
+ */
+ cudaErrorNotSupported = 801,
+
+ /**
+ * This error indicates that the system is not yet ready to start any CUDA
+ * work. To continue using CUDA, verify the system configuration is in a
+ * valid state and all required driver daemons are actively running.
+ * More information about this error can be found in the system specific
+ * user guide.
+ */
+ cudaErrorSystemNotReady = 802,
+
+ /**
+ * This error indicates that there is a mismatch between the versions of
+ * the display driver and the CUDA driver. Refer to the compatibility documentation
+ * for supported versions.
+ */
+ cudaErrorSystemDriverMismatch = 803,
+
+ /**
+ * This error indicates that the system was upgraded to run with forward compatibility
+ * but the visible hardware detected by CUDA does not support this configuration.
+ * Refer to the compatibility documentation for the supported hardware matrix or ensure
+ * that only supported hardware is visible during initialization via the CUDA_VISIBLE_DEVICES
+ * environment variable.
+ */
+ cudaErrorCompatNotSupportedOnDevice = 804,
+
+ /**
+ * This error indicates that the MPS client failed to connect to the MPS control daemon or the MPS server.
+ */
+ cudaErrorMpsConnectionFailed = 805,
+
+ /**
+ * This error indicates that the remote procedural call between the MPS server and the MPS client failed.
+ */
+ cudaErrorMpsRpcFailure = 806,
+
+ /**
+ * This error indicates that the MPS server is not ready to accept new MPS client requests.
+ * This error can be returned when the MPS server is in the process of recovering from a fatal failure.
+ */
+ cudaErrorMpsServerNotReady = 807,
+
+ /**
+ * This error indicates that the hardware resources required to create MPS client have been exhausted.
+ */
+ cudaErrorMpsMaxClientsReached = 808,
+
+ /**
+ * This error indicates the the hardware resources required to device connections have been exhausted.
+ */
+ cudaErrorMpsMaxConnectionsReached = 809,
+
+ /**
+ * This error indicates that the MPS client has been terminated by the server. To continue using CUDA, the process must be terminated and relaunched.
+ */
+ cudaErrorMpsClientTerminated = 810,
+
+ /**
+ * This error indicates, that the program is using CUDA Dynamic Parallelism, but the current configuration, like MPS, does not support it.
+ */
+ cudaErrorCdpNotSupported = 811,
+
+ /**
+ * This error indicates, that the program contains an unsupported interaction between different versions of CUDA Dynamic Parallelism.
+ */
+ cudaErrorCdpVersionMismatch = 812,
+
+ /**
+ * The operation is not permitted when the stream is capturing.
+ */
+ cudaErrorStreamCaptureUnsupported = 900,
+
+ /**
+ * The current capture sequence on the stream has been invalidated due to
+ * a previous error.
+ */
+ cudaErrorStreamCaptureInvalidated = 901,
+
+ /**
+ * The operation would have resulted in a merge of two independent capture
+ * sequences.
+ */
+ cudaErrorStreamCaptureMerge = 902,
+
+ /**
+ * The capture was not initiated in this stream.
+ */
+ cudaErrorStreamCaptureUnmatched = 903,
+
+ /**
+ * The capture sequence contains a fork that was not joined to the primary
+ * stream.
+ */
+ cudaErrorStreamCaptureUnjoined = 904,
+
+ /**
+ * A dependency would have been created which crosses the capture sequence
+ * boundary. Only implicit in-stream ordering dependencies are allowed to
+ * cross the boundary.
+ */
+ cudaErrorStreamCaptureIsolation = 905,
+
+ /**
+ * The operation would have resulted in a disallowed implicit dependency on
+ * a current capture sequence from cudaStreamLegacy.
+ */
+ cudaErrorStreamCaptureImplicit = 906,
+
+ /**
+ * The operation is not permitted on an event which was last recorded in a
+ * capturing stream.
+ */
+ cudaErrorCapturedEvent = 907,
+
+ /**
+ * A stream capture sequence not initiated with the ::cudaStreamCaptureModeRelaxed
+ * argument to ::cudaStreamBeginCapture was passed to ::cudaStreamEndCapture in a
+ * different thread.
+ */
+ cudaErrorStreamCaptureWrongThread = 908,
+
+ /**
+ * This indicates that the wait operation has timed out.
+ */
+ cudaErrorTimeout = 909,
+
+ /**
+ * This error indicates that the graph update was not performed because it included
+ * changes which violated constraints specific to instantiated graph update.
+ */
+ cudaErrorGraphExecUpdateFailure = 910,
+
+ /**
+ * This indicates that an async error has occurred in a device outside of CUDA.
+ * If CUDA was waiting for an external device's signal before consuming shared data,
+ * the external device signaled an error indicating that the data is not valid for
+ * consumption. This leaves the process in an inconsistent state and any further CUDA
+ * work will return the same error. To continue using CUDA, the process must be
+ * terminated and relaunched.
+ */
+ cudaErrorExternalDevice = 911,
+
+ /**
+ * This indicates that a kernel launch error has occurred due to cluster
+ * misconfiguration.
+ */
+ cudaErrorInvalidClusterSize = 912,
+
+ /**
+ * Indiciates a function handle is not loaded when calling an API that requires
+ * a loaded function.
+ */
+ cudaErrorFunctionNotLoaded = 913,
+
+ /**
+ * This error indicates one or more resources passed in are not valid resource
+ * types for the operation.
+ */
+ cudaErrorInvalidResourceType = 914,
+
+ /**
+ * This error indicates one or more resources are insufficient or non-applicable for
+ * the operation.
+ */
+ cudaErrorInvalidResourceConfiguration = 915,
+
+ /**
+ * This indicates that an unknown internal error has occurred.
+ */
+ cudaErrorUnknown = 999
+
+ /**
+ * Any unhandled CUDA driver error is added to this value and returned via
+ * the runtime. Production releases of CUDA should not return such errors.
+ * \deprecated
+ * This error return is deprecated as of CUDA 4.1.
+ */
+ , cudaErrorApiFailureBase = 10000
+};
+
+/**
+ * Channel format kind
+ */
+enum __device_builtin__ cudaChannelFormatKind
+{
+ cudaChannelFormatKindSigned = 0, /**< Signed channel format */
+ cudaChannelFormatKindUnsigned = 1, /**< Unsigned channel format */
+ cudaChannelFormatKindFloat = 2, /**< Float channel format */
+ cudaChannelFormatKindNone = 3, /**< No channel format */
+ cudaChannelFormatKindNV12 = 4, /**< Unsigned 8-bit integers, planar 4:2:0 YUV format */
+ cudaChannelFormatKindUnsignedNormalized8X1 = 5, /**< 1 channel unsigned 8-bit normalized integer */
+ cudaChannelFormatKindUnsignedNormalized8X2 = 6, /**< 2 channel unsigned 8-bit normalized integer */
+ cudaChannelFormatKindUnsignedNormalized8X4 = 7, /**< 4 channel unsigned 8-bit normalized integer */
+ cudaChannelFormatKindUnsignedNormalized16X1 = 8, /**< 1 channel unsigned 16-bit normalized integer */
+ cudaChannelFormatKindUnsignedNormalized16X2 = 9, /**< 2 channel unsigned 16-bit normalized integer */
+ cudaChannelFormatKindUnsignedNormalized16X4 = 10, /**< 4 channel unsigned 16-bit normalized integer */
+ cudaChannelFormatKindSignedNormalized8X1 = 11, /**< 1 channel signed 8-bit normalized integer */
+ cudaChannelFormatKindSignedNormalized8X2 = 12, /**< 2 channel signed 8-bit normalized integer */
+ cudaChannelFormatKindSignedNormalized8X4 = 13, /**< 4 channel signed 8-bit normalized integer */
+ cudaChannelFormatKindSignedNormalized16X1 = 14, /**< 1 channel signed 16-bit normalized integer */
+ cudaChannelFormatKindSignedNormalized16X2 = 15, /**< 2 channel signed 16-bit normalized integer */
+ cudaChannelFormatKindSignedNormalized16X4 = 16, /**< 4 channel signed 16-bit normalized integer */
+ cudaChannelFormatKindUnsignedBlockCompressed1 = 17, /**< 4 channel unsigned normalized block-compressed (BC1 compression) format */
+ cudaChannelFormatKindUnsignedBlockCompressed1SRGB = 18, /**< 4 channel unsigned normalized block-compressed (BC1 compression) format with sRGB encoding*/
+ cudaChannelFormatKindUnsignedBlockCompressed2 = 19, /**< 4 channel unsigned normalized block-compressed (BC2 compression) format */
+ cudaChannelFormatKindUnsignedBlockCompressed2SRGB = 20, /**< 4 channel unsigned normalized block-compressed (BC2 compression) format with sRGB encoding */
+ cudaChannelFormatKindUnsignedBlockCompressed3 = 21, /**< 4 channel unsigned normalized block-compressed (BC3 compression) format */
+ cudaChannelFormatKindUnsignedBlockCompressed3SRGB = 22, /**< 4 channel unsigned normalized block-compressed (BC3 compression) format with sRGB encoding */
+ cudaChannelFormatKindUnsignedBlockCompressed4 = 23, /**< 1 channel unsigned normalized block-compressed (BC4 compression) format */
+ cudaChannelFormatKindSignedBlockCompressed4 = 24, /**< 1 channel signed normalized block-compressed (BC4 compression) format */
+ cudaChannelFormatKindUnsignedBlockCompressed5 = 25, /**< 2 channel unsigned normalized block-compressed (BC5 compression) format */
+ cudaChannelFormatKindSignedBlockCompressed5 = 26, /**< 2 channel signed normalized block-compressed (BC5 compression) format */
+ cudaChannelFormatKindUnsignedBlockCompressed6H = 27, /**< 3 channel unsigned half-float block-compressed (BC6H compression) format */
+ cudaChannelFormatKindSignedBlockCompressed6H = 28, /**< 3 channel signed half-float block-compressed (BC6H compression) format */
+ cudaChannelFormatKindUnsignedBlockCompressed7 = 29, /**< 4 channel unsigned normalized block-compressed (BC7 compression) format */
+ cudaChannelFormatKindUnsignedBlockCompressed7SRGB = 30, /**< 4 channel unsigned normalized block-compressed (BC7 compression) format with sRGB encoding */
+ cudaChannelFormatKindUnsignedNormalized1010102 = 31 /**< 4 channel unsigned normalized (10-bit, 10-bit, 10-bit, 2-bit) format */
+
+};
+
+/**
+ * CUDA Channel format descriptor
+ */
+struct __device_builtin__ cudaChannelFormatDesc
+{
+ int x; /**< x */
+ int y; /**< y */
+ int z; /**< z */
+ int w; /**< w */
+ enum cudaChannelFormatKind f; /**< Channel format kind */
+};
+
+/**
+ * CUDA array
+ */
+typedef struct cudaArray *cudaArray_t;
+
+/**
+ * CUDA array (as source copy argument)
+ */
+typedef const struct cudaArray *cudaArray_const_t;
+
+struct cudaArray;
+
+/**
+ * CUDA mipmapped array
+ */
+typedef struct cudaMipmappedArray *cudaMipmappedArray_t;
+
+/**
+ * CUDA mipmapped array (as source argument)
+ */
+typedef const struct cudaMipmappedArray *cudaMipmappedArray_const_t;
+
+struct cudaMipmappedArray;
+
+/**
+ * Indicates that the layered sparse CUDA array or CUDA mipmapped array has a single mip tail region for all layers
+ */
+#define cudaArraySparsePropertiesSingleMipTail 0x1
+
+/**
+ * Sparse CUDA array and CUDA mipmapped array properties
+ */
+struct __device_builtin__ cudaArraySparseProperties {
+ struct {
+ unsigned int width; /**< Tile width in elements */
+ unsigned int height; /**< Tile height in elements */
+ unsigned int depth; /**< Tile depth in elements */
+ } tileExtent;
+ unsigned int miptailFirstLevel; /**< First mip level at which the mip tail begins */
+ unsigned long long miptailSize; /**< Total size of the mip tail. */
+ unsigned int flags; /**< Flags will either be zero or ::cudaArraySparsePropertiesSingleMipTail */
+ unsigned int reserved[4];
+};
+
+/**
+ * CUDA array and CUDA mipmapped array memory requirements
+ */
+struct __device_builtin__ cudaArrayMemoryRequirements {
+ size_t size; /**< Total size of the array. */
+ size_t alignment; /**< Alignment necessary for mapping the array. */
+ unsigned int reserved[4];
+};
+
+/**
+ * CUDA memory types
+ */
+enum __device_builtin__ cudaMemoryType
+{
+ cudaMemoryTypeUnregistered = 0, /**< Unregistered memory */
+ cudaMemoryTypeHost = 1, /**< Host memory */
+ cudaMemoryTypeDevice = 2, /**< Device memory */
+ cudaMemoryTypeManaged = 3 /**< Managed memory */
+};
+
+/**
+ * CUDA memory copy types
+ */
+enum __device_builtin__ cudaMemcpyKind
+{
+ cudaMemcpyHostToHost = 0, /**< Host -> Host */
+ cudaMemcpyHostToDevice = 1, /**< Host -> Device */
+ cudaMemcpyDeviceToHost = 2, /**< Device -> Host */
+ cudaMemcpyDeviceToDevice = 3, /**< Device -> Device */
+ cudaMemcpyDefault = 4 /**< Direction of the transfer is inferred from the pointer values. Requires unified virtual addressing */
+};
+
+/**
+ * CUDA Pitched memory pointer
+ *
+ * \sa ::make_cudaPitchedPtr
+ */
+struct __device_builtin__ cudaPitchedPtr
+{
+ void *ptr; /**< Pointer to allocated memory */
+ size_t pitch; /**< Pitch of allocated memory in bytes */
+ size_t xsize; /**< Logical width of allocation in elements */
+ size_t ysize; /**< Logical height of allocation in elements */
+};
+
+/**
+ * CUDA extent
+ *
+ * \sa ::make_cudaExtent
+ */
+struct __device_builtin__ cudaExtent
+{
+ size_t width; /**< Width in elements when referring to array memory, in bytes when referring to linear memory */
+ size_t height; /**< Height in elements */
+ size_t depth; /**< Depth in elements */
+};
+
+/**
+ * CUDA 3D position
+ *
+ * \sa ::make_cudaPos
+ */
+struct __device_builtin__ cudaPos
+{
+ size_t x; /**< x */
+ size_t y; /**< y */
+ size_t z; /**< z */
+};
+
+/**
+ * CUDA 3D memory copying parameters
+ */
+struct __device_builtin__ cudaMemcpy3DParms
+{
+ cudaArray_t srcArray; /**< Source memory address */
+ struct cudaPos srcPos; /**< Source position offset */
+ struct cudaPitchedPtr srcPtr; /**< Pitched source memory address */
+
+ cudaArray_t dstArray; /**< Destination memory address */
+ struct cudaPos dstPos; /**< Destination position offset */
+ struct cudaPitchedPtr dstPtr; /**< Pitched destination memory address */
+
+ struct cudaExtent extent; /**< Requested memory copy size */
+ enum cudaMemcpyKind kind; /**< Type of transfer */
+};
+
+/**
+ * Memcpy node parameters
+ */
+struct __device_builtin__ cudaMemcpyNodeParams {
+ int flags; /**< Must be zero */
+ int reserved[3]; /**< Must be zero */
+ struct cudaMemcpy3DParms copyParams; /**< Parameters for the memory copy */
+};
+
+/**
+ * CUDA 3D cross-device memory copying parameters
+ */
+struct __device_builtin__ cudaMemcpy3DPeerParms
+{
+ cudaArray_t srcArray; /**< Source memory address */
+ struct cudaPos srcPos; /**< Source position offset */
+ struct cudaPitchedPtr srcPtr; /**< Pitched source memory address */
+ int srcDevice; /**< Source device */
+
+ cudaArray_t dstArray; /**< Destination memory address */
+ struct cudaPos dstPos; /**< Destination position offset */
+ struct cudaPitchedPtr dstPtr; /**< Pitched destination memory address */
+ int dstDevice; /**< Destination device */
+
+ struct cudaExtent extent; /**< Requested memory copy size */
+};
+
+/**
+ * CUDA Memset node parameters
+ */
+struct __device_builtin__ cudaMemsetParams {
+ void *dst; /**< Destination device pointer */
+ size_t pitch; /**< Pitch of destination device pointer. Unused if height is 1 */
+ unsigned int value; /**< Value to be set */
+ unsigned int elementSize; /**< Size of each element in bytes. Must be 1, 2, or 4. */
+ size_t width; /**< Width of the row in elements */
+ size_t height; /**< Number of rows */
+};
+
+/**
+ * CUDA Memset node parameters
+ */
+struct __device_builtin__ cudaMemsetParamsV2 {
+ void *dst; /**< Destination device pointer */
+ size_t pitch; /**< Pitch of destination device pointer. Unused if height is 1 */
+ unsigned int value; /**< Value to be set */
+ unsigned int elementSize; /**< Size of each element in bytes. Must be 1, 2, or 4. */
+ size_t width; /**< Width of the row in elements */
+ size_t height; /**< Number of rows */
+};
+
+/**
+ * Specifies performance hint with ::cudaAccessPolicyWindow for hitProp and missProp members.
+ */
+enum __device_builtin__ cudaAccessProperty {
+ cudaAccessPropertyNormal = 0, /**< Normal cache persistence. */
+ cudaAccessPropertyStreaming = 1, /**< Streaming access is less likely to persit from cache. */
+ cudaAccessPropertyPersisting = 2 /**< Persisting access is more likely to persist in cache.*/
+};
+
+/**
+ * Specifies an access policy for a window, a contiguous extent of memory
+ * beginning at base_ptr and ending at base_ptr + num_bytes.
+ * Partition into many segments and assign segments such that.
+ * sum of "hit segments" / window == approx. ratio.
+ * sum of "miss segments" / window == approx 1-ratio.
+ * Segments and ratio specifications are fitted to the capabilities of
+ * the architecture.
+ * Accesses in a hit segment apply the hitProp access policy.
+ * Accesses in a miss segment apply the missProp access policy.
+ */
+struct __device_builtin__ cudaAccessPolicyWindow {
+ void *base_ptr; /**< Starting address of the access policy window. CUDA driver may align it. */
+ size_t num_bytes; /**< Size in bytes of the window policy. CUDA driver may restrict the maximum size and alignment. */
+ float hitRatio; /**< hitRatio specifies percentage of lines assigned hitProp, rest are assigned missProp. */
+ enum cudaAccessProperty hitProp; /**< ::CUaccessProperty set for hit. */
+ enum cudaAccessProperty missProp; /**< ::CUaccessProperty set for miss. Must be either NORMAL or STREAMING. */
+};
+
+#ifdef _WIN32
+#define CUDART_CB __stdcall
+#else
+#define CUDART_CB
+#endif
+
+/**
+ * CUDA host function
+ * \param userData Argument value passed to the function
+ */
+typedef void (CUDART_CB *cudaHostFn_t)(void *userData);
+
+/**
+ * CUDA host node parameters
+ */
+struct __device_builtin__ cudaHostNodeParams {
+ cudaHostFn_t fn; /**< The function to call when the node executes */
+ void* userData; /**< Argument to pass to the function */
+};
+
+/**
+ * CUDA host node parameters
+ */
+struct __device_builtin__ cudaHostNodeParamsV2 {
+ cudaHostFn_t fn; /**< The function to call when the node executes */
+ void* userData; /**< Argument to pass to the function */
+};
+
+/**
+ * Possible stream capture statuses returned by ::cudaStreamIsCapturing
+ */
+enum __device_builtin__ cudaStreamCaptureStatus {
+ cudaStreamCaptureStatusNone = 0, /**< Stream is not capturing */
+ cudaStreamCaptureStatusActive = 1, /**< Stream is actively capturing */
+ cudaStreamCaptureStatusInvalidated = 2 /**< Stream is part of a capture sequence that
+ has been invalidated, but not terminated */
+};
+
+/**
+ * Possible modes for stream capture thread interactions. For more details see
+ * ::cudaStreamBeginCapture and ::cudaThreadExchangeStreamCaptureMode
+ */
+enum __device_builtin__ cudaStreamCaptureMode {
+ cudaStreamCaptureModeGlobal = 0,
+ cudaStreamCaptureModeThreadLocal = 1,
+ cudaStreamCaptureModeRelaxed = 2
+};
+
+enum __device_builtin__ cudaSynchronizationPolicy {
+ cudaSyncPolicyAuto = 1,
+ cudaSyncPolicySpin = 2,
+ cudaSyncPolicyYield = 3,
+ cudaSyncPolicyBlockingSync = 4
+};
+
+/**
+ * Cluster scheduling policies. These may be passed to ::cudaFuncSetAttribute
+ */
+enum __device_builtin__ cudaClusterSchedulingPolicy {
+ cudaClusterSchedulingPolicyDefault = 0, /**< the default policy */
+ cudaClusterSchedulingPolicySpread = 1, /**< spread the blocks within a cluster to the SMs */
+ cudaClusterSchedulingPolicyLoadBalancing = 2 /**< allow the hardware to load-balance the blocks in a cluster to the SMs */
+};
+
+/**
+ * Flags for ::cudaStreamUpdateCaptureDependencies
+ */
+enum __device_builtin__ cudaStreamUpdateCaptureDependenciesFlags {
+ cudaStreamAddCaptureDependencies = 0x0, /**< Add new nodes to the dependency set */
+ cudaStreamSetCaptureDependencies = 0x1 /**< Replace the dependency set with the new nodes */
+};
+
+/**
+ * Flags for user objects for graphs
+ */
+enum __device_builtin__ cudaUserObjectFlags {
+ cudaUserObjectNoDestructorSync = 0x1 /**< Indicates the destructor execution is not synchronized by any CUDA handle. */
+};
+
+/**
+ * Flags for retaining user object references for graphs
+ */
+enum __device_builtin__ cudaUserObjectRetainFlags {
+ cudaGraphUserObjectMove = 0x1 /**< Transfer references from the caller rather than creating new references. */
+};
+
+/**
+ * CUDA graphics interop resource
+ */
+struct cudaGraphicsResource;
+
+/**
+ * CUDA graphics interop register flags
+ */
+enum __device_builtin__ cudaGraphicsRegisterFlags
+{
+ cudaGraphicsRegisterFlagsNone = 0, /**< Default */
+ cudaGraphicsRegisterFlagsReadOnly = 1, /**< CUDA will not write to this resource */
+ cudaGraphicsRegisterFlagsWriteDiscard = 2, /**< CUDA will only write to and will not read from this resource */
+ cudaGraphicsRegisterFlagsSurfaceLoadStore = 4, /**< CUDA will bind this resource to a surface reference */
+ cudaGraphicsRegisterFlagsTextureGather = 8 /**< CUDA will perform texture gather operations on this resource */
+};
+
+/**
+ * CUDA graphics interop map flags
+ */
+enum __device_builtin__ cudaGraphicsMapFlags
+{
+ cudaGraphicsMapFlagsNone = 0, /**< Default; Assume resource can be read/written */
+ cudaGraphicsMapFlagsReadOnly = 1, /**< CUDA will not write to this resource */
+ cudaGraphicsMapFlagsWriteDiscard = 2 /**< CUDA will only write to and will not read from this resource */
+};
+
+/**
+ * CUDA graphics interop array indices for cube maps
+ */
+enum __device_builtin__ cudaGraphicsCubeFace
+{
+ cudaGraphicsCubeFacePositiveX = 0x00, /**< Positive X face of cubemap */
+ cudaGraphicsCubeFaceNegativeX = 0x01, /**< Negative X face of cubemap */
+ cudaGraphicsCubeFacePositiveY = 0x02, /**< Positive Y face of cubemap */
+ cudaGraphicsCubeFaceNegativeY = 0x03, /**< Negative Y face of cubemap */
+ cudaGraphicsCubeFacePositiveZ = 0x04, /**< Positive Z face of cubemap */
+ cudaGraphicsCubeFaceNegativeZ = 0x05 /**< Negative Z face of cubemap */
+};
+
+/**
+ * CUDA resource types
+ */
+enum __device_builtin__ cudaResourceType
+{
+ cudaResourceTypeArray = 0x00, /**< Array resource */
+ cudaResourceTypeMipmappedArray = 0x01, /**< Mipmapped array resource */
+ cudaResourceTypeLinear = 0x02, /**< Linear resource */
+ cudaResourceTypePitch2D = 0x03 /**< Pitch 2D resource */
+};
+
+/**
+ * CUDA texture resource view formats
+ */
+enum __device_builtin__ cudaResourceViewFormat
+{
+ cudaResViewFormatNone = 0x00, /**< No resource view format (use underlying resource format) */
+ cudaResViewFormatUnsignedChar1 = 0x01, /**< 1 channel unsigned 8-bit integers */
+ cudaResViewFormatUnsignedChar2 = 0x02, /**< 2 channel unsigned 8-bit integers */
+ cudaResViewFormatUnsignedChar4 = 0x03, /**< 4 channel unsigned 8-bit integers */
+ cudaResViewFormatSignedChar1 = 0x04, /**< 1 channel signed 8-bit integers */
+ cudaResViewFormatSignedChar2 = 0x05, /**< 2 channel signed 8-bit integers */
+ cudaResViewFormatSignedChar4 = 0x06, /**< 4 channel signed 8-bit integers */
+ cudaResViewFormatUnsignedShort1 = 0x07, /**< 1 channel unsigned 16-bit integers */
+ cudaResViewFormatUnsignedShort2 = 0x08, /**< 2 channel unsigned 16-bit integers */
+ cudaResViewFormatUnsignedShort4 = 0x09, /**< 4 channel unsigned 16-bit integers */
+ cudaResViewFormatSignedShort1 = 0x0a, /**< 1 channel signed 16-bit integers */
+ cudaResViewFormatSignedShort2 = 0x0b, /**< 2 channel signed 16-bit integers */
+ cudaResViewFormatSignedShort4 = 0x0c, /**< 4 channel signed 16-bit integers */
+ cudaResViewFormatUnsignedInt1 = 0x0d, /**< 1 channel unsigned 32-bit integers */
+ cudaResViewFormatUnsignedInt2 = 0x0e, /**< 2 channel unsigned 32-bit integers */
+ cudaResViewFormatUnsignedInt4 = 0x0f, /**< 4 channel unsigned 32-bit integers */
+ cudaResViewFormatSignedInt1 = 0x10, /**< 1 channel signed 32-bit integers */
+ cudaResViewFormatSignedInt2 = 0x11, /**< 2 channel signed 32-bit integers */
+ cudaResViewFormatSignedInt4 = 0x12, /**< 4 channel signed 32-bit integers */
+ cudaResViewFormatHalf1 = 0x13, /**< 1 channel 16-bit floating point */
+ cudaResViewFormatHalf2 = 0x14, /**< 2 channel 16-bit floating point */
+ cudaResViewFormatHalf4 = 0x15, /**< 4 channel 16-bit floating point */
+ cudaResViewFormatFloat1 = 0x16, /**< 1 channel 32-bit floating point */
+ cudaResViewFormatFloat2 = 0x17, /**< 2 channel 32-bit floating point */
+ cudaResViewFormatFloat4 = 0x18, /**< 4 channel 32-bit floating point */
+ cudaResViewFormatUnsignedBlockCompressed1 = 0x19, /**< Block compressed 1 */
+ cudaResViewFormatUnsignedBlockCompressed2 = 0x1a, /**< Block compressed 2 */
+ cudaResViewFormatUnsignedBlockCompressed3 = 0x1b, /**< Block compressed 3 */
+ cudaResViewFormatUnsignedBlockCompressed4 = 0x1c, /**< Block compressed 4 unsigned */
+ cudaResViewFormatSignedBlockCompressed4 = 0x1d, /**< Block compressed 4 signed */
+ cudaResViewFormatUnsignedBlockCompressed5 = 0x1e, /**< Block compressed 5 unsigned */
+ cudaResViewFormatSignedBlockCompressed5 = 0x1f, /**< Block compressed 5 signed */
+ cudaResViewFormatUnsignedBlockCompressed6H = 0x20, /**< Block compressed 6 unsigned half-float */
+ cudaResViewFormatSignedBlockCompressed6H = 0x21, /**< Block compressed 6 signed half-float */
+ cudaResViewFormatUnsignedBlockCompressed7 = 0x22 /**< Block compressed 7 */
+};
+
+/**
+ * CUDA resource descriptor
+ */
+struct __device_builtin__ cudaResourceDesc {
+ enum cudaResourceType resType; /**< Resource type */
+
+ union {
+ struct {
+ cudaArray_t array; /**< CUDA array */
+ } array;
+ struct {
+ cudaMipmappedArray_t mipmap; /**< CUDA mipmapped array */
+ } mipmap;
+ struct {
+ void *devPtr; /**< Device pointer */
+ struct cudaChannelFormatDesc desc; /**< Channel descriptor */
+ size_t sizeInBytes; /**< Size in bytes */
+ } linear;
+ struct {
+ void *devPtr; /**< Device pointer */
+ struct cudaChannelFormatDesc desc; /**< Channel descriptor */
+ size_t width; /**< Width of the array in elements */
+ size_t height; /**< Height of the array in elements */
+ size_t pitchInBytes; /**< Pitch between two rows in bytes */
+ } pitch2D;
+ } res;
+};
+
+/**
+ * CUDA resource view descriptor
+ */
+struct __device_builtin__ cudaResourceViewDesc
+{
+ enum cudaResourceViewFormat format; /**< Resource view format */
+ size_t width; /**< Width of the resource view */
+ size_t height; /**< Height of the resource view */
+ size_t depth; /**< Depth of the resource view */
+ unsigned int firstMipmapLevel; /**< First defined mipmap level */
+ unsigned int lastMipmapLevel; /**< Last defined mipmap level */
+ unsigned int firstLayer; /**< First layer index */
+ unsigned int lastLayer; /**< Last layer index */
+};
+
+/**
+ * CUDA pointer attributes
+ */
+struct __device_builtin__ cudaPointerAttributes
+{
+ /**
+ * The type of memory - ::cudaMemoryTypeUnregistered, ::cudaMemoryTypeHost,
+ * ::cudaMemoryTypeDevice or ::cudaMemoryTypeManaged.
+ */
+ enum cudaMemoryType type;
+
+ /**
+ * The device against which the memory was allocated or registered.
+ * If the memory type is ::cudaMemoryTypeDevice then this identifies
+ * the device on which the memory referred physically resides. If
+ * the memory type is ::cudaMemoryTypeHost or::cudaMemoryTypeManaged then
+ * this identifies the device which was current when the memory was allocated
+ * or registered (and if that device is deinitialized then this allocation
+ * will vanish with that device's state).
+ */
+ int device;
+
+ /**
+ * The address which may be dereferenced on the current device to access
+ * the memory or NULL if no such address exists.
+ */
+ void *devicePointer;
+
+ /**
+ * The address which may be dereferenced on the host to access the
+ * memory or NULL if no such address exists.
+ *
+ * \note CUDA doesn't check if unregistered memory is allocated so this field
+ * may contain invalid pointer if an invalid pointer has been passed to CUDA.
+ */
+ void *hostPointer;
+};
+
+/**
+ * CUDA function attributes
+ */
+struct __device_builtin__ cudaFuncAttributes
+{
+ /**
+ * The size in bytes of statically-allocated shared memory per block
+ * required by this function. This does not include dynamically-allocated
+ * shared memory requested by the user at runtime.
+ */
+ size_t sharedSizeBytes;
+
+ /**
+ * The size in bytes of user-allocated constant memory required by this
+ * function.
+ */
+ size_t constSizeBytes;
+
+ /**
+ * The size in bytes of local memory used by each thread of this function.
+ */
+ size_t localSizeBytes;
+
+ /**
+ * The maximum number of threads per block, beyond which a launch of the
+ * function would fail. This number depends on both the function and the
+ * device on which the function is currently loaded.
+ */
+ int maxThreadsPerBlock;
+
+ /**
+ * The number of registers used by each thread of this function.
+ */
+ int numRegs;
+
+ /**
+ * The PTX virtual architecture version for which the function was
+ * compiled. This value is the major PTX version * 10 + the minor PTX
+ * version, so a PTX version 1.3 function would return the value 13.
+ */
+ int ptxVersion;
+
+ /**
+ * The binary architecture version for which the function was compiled.
+ * This value is the major binary version * 10 + the minor binary version,
+ * so a binary version 1.3 function would return the value 13.
+ */
+ int binaryVersion;
+
+ /**
+ * The attribute to indicate whether the function has been compiled with
+ * user specified option "-Xptxas --dlcm=ca" set.
+ */
+ int cacheModeCA;
+
+ /**
+ * The maximum size in bytes of dynamic shared memory per block for
+ * this function. Any launch must have a dynamic shared memory size
+ * smaller than this value.
+ */
+ int maxDynamicSharedSizeBytes;
+
+ /**
+ * On devices where the L1 cache and shared memory use the same hardware resources,
+ * this sets the shared memory carveout preference, in percent of the maximum shared memory.
+ * Refer to ::cudaDevAttrMaxSharedMemoryPerMultiprocessor.
+ * This is only a hint, and the driver can choose a different ratio if required to execute the function.
+ * See ::cudaFuncSetAttribute
+ */
+ int preferredShmemCarveout;
+
+ /**
+ * If this attribute is set, the kernel must launch with a valid cluster dimension
+ * specified.
+ */
+ int clusterDimMustBeSet;
+
+ /**
+ * The required cluster width/height/depth in blocks. The values must either
+ * all be 0 or all be positive. The validity of the cluster dimensions is
+ * otherwise checked at launch time.
+ *
+ * If the value is set during compile time, it cannot be set at runtime.
+ * Setting it at runtime should return cudaErrorNotPermitted.
+ * See ::cudaFuncSetAttribute
+ */
+ int requiredClusterWidth;
+ int requiredClusterHeight;
+ int requiredClusterDepth;
+
+ /**
+ * The block scheduling policy of a function.
+ * See ::cudaFuncSetAttribute
+ */
+ int clusterSchedulingPolicyPreference;
+
+ /**
+ * Whether the function can be launched with non-portable cluster size. 1 is
+ * allowed, 0 is disallowed. A non-portable cluster size may only function
+ * on the specific SKUs the program is tested on. The launch might fail if
+ * the program is run on a different hardware platform.
+ *
+ * CUDA API provides ::cudaOccupancyMaxActiveClusters to assist with checking
+ * whether the desired size can be launched on the current device.
+ *
+ * Portable Cluster Size
+ *
+ * A portable cluster size is guaranteed to be functional on all compute
+ * capabilities higher than the target compute capability. The portable
+ * cluster size for sm_90 is 8 blocks per cluster. This value may increase
+ * for future compute capabilities.
+ *
+ * The specific hardware unit may support higher cluster sizes that’s not
+ * guaranteed to be portable.
+ * See ::cudaFuncSetAttribute
+ */
+ int nonPortableClusterSizeAllowed;
+
+ /**
+ * Reserved for future use.
+ */
+ int reserved[16];
+};
+
+/**
+ * CUDA function attributes that can be set using ::cudaFuncSetAttribute
+ */
+enum __device_builtin__ cudaFuncAttribute
+{
+ cudaFuncAttributeMaxDynamicSharedMemorySize = 8, /**< Maximum dynamic shared memory size */
+ cudaFuncAttributePreferredSharedMemoryCarveout = 9, /**< Preferred shared memory-L1 cache split */
+ cudaFuncAttributeClusterDimMustBeSet = 10, /**< Indicator to enforce valid cluster dimension specification on kernel launch */
+ cudaFuncAttributeRequiredClusterWidth = 11, /**< Required cluster width */
+ cudaFuncAttributeRequiredClusterHeight = 12, /**< Required cluster height */
+ cudaFuncAttributeRequiredClusterDepth = 13, /**< Required cluster depth */
+ cudaFuncAttributeNonPortableClusterSizeAllowed = 14, /**< Whether non-portable cluster scheduling policy is supported */
+ cudaFuncAttributeClusterSchedulingPolicyPreference = 15, /**< Required cluster scheduling policy preference */
+ cudaFuncAttributeMax
+};
+
+/**
+ * CUDA function cache configurations
+ */
+enum __device_builtin__ cudaFuncCache
+{
+ cudaFuncCachePreferNone = 0, /**< Default function cache configuration, no preference */
+ cudaFuncCachePreferShared = 1, /**< Prefer larger shared memory and smaller L1 cache */
+ cudaFuncCachePreferL1 = 2, /**< Prefer larger L1 cache and smaller shared memory */
+ cudaFuncCachePreferEqual = 3 /**< Prefer equal size L1 cache and shared memory */
+};
+
+/**
+ * CUDA shared memory configuration
+ * \deprecated
+ */
+enum __device_builtin__ cudaSharedMemConfig
+{
+ cudaSharedMemBankSizeDefault = 0,
+ cudaSharedMemBankSizeFourByte = 1,
+ cudaSharedMemBankSizeEightByte = 2
+};
+
+/**
+ * Shared memory carveout configurations. These may be passed to cudaFuncSetAttribute
+ */
+enum __device_builtin__ cudaSharedCarveout {
+ cudaSharedmemCarveoutDefault = -1, /**< No preference for shared memory or L1 (default) */
+ cudaSharedmemCarveoutMaxShared = 100, /**< Prefer maximum available shared memory, minimum L1 cache */
+ cudaSharedmemCarveoutMaxL1 = 0 /**< Prefer maximum available L1 cache, minimum shared memory */
+};
+
+/**
+ * CUDA device compute modes
+ */
+enum __device_builtin__ cudaComputeMode
+{
+ cudaComputeModeDefault = 0, /**< Default compute mode (Multiple threads can use ::cudaSetDevice() with this device) */
+ cudaComputeModeExclusive = 1, /**< Compute-exclusive-thread mode (Only one thread in one process will be able to use ::cudaSetDevice() with this device) */
+ cudaComputeModeProhibited = 2, /**< Compute-prohibited mode (No threads can use ::cudaSetDevice() with this device) */
+ cudaComputeModeExclusiveProcess = 3 /**< Compute-exclusive-process mode (Many threads in one process will be able to use ::cudaSetDevice() with this device) */
+};
+
+/**
+ * CUDA Limits
+ */
+enum __device_builtin__ cudaLimit
+{
+ cudaLimitStackSize = 0x00, /**< GPU thread stack size */
+ cudaLimitPrintfFifoSize = 0x01, /**< GPU printf FIFO size */
+ cudaLimitMallocHeapSize = 0x02, /**< GPU malloc heap size */
+ cudaLimitDevRuntimeSyncDepth = 0x03, /**< GPU device runtime synchronize depth */
+ cudaLimitDevRuntimePendingLaunchCount = 0x04, /**< GPU device runtime pending launch count */
+ cudaLimitMaxL2FetchGranularity = 0x05, /**< A value between 0 and 128 that indicates the maximum fetch granularity of L2 (in Bytes). This is a hint */
+ cudaLimitPersistingL2CacheSize = 0x06 /**< A size in bytes for L2 persisting lines cache size */
+};
+
+/**
+ * CUDA Memory Advise values
+ */
+enum __device_builtin__ cudaMemoryAdvise
+{
+ cudaMemAdviseSetReadMostly = 1, /**< Data will mostly be read and only occassionally be written to */
+ cudaMemAdviseUnsetReadMostly = 2, /**< Undo the effect of ::cudaMemAdviseSetReadMostly */
+ cudaMemAdviseSetPreferredLocation = 3, /**< Set the preferred location for the data as the specified device */
+ cudaMemAdviseUnsetPreferredLocation = 4, /**< Clear the preferred location for the data */
+ cudaMemAdviseSetAccessedBy = 5, /**< Data will be accessed by the specified device, so prevent page faults as much as possible */
+ cudaMemAdviseUnsetAccessedBy = 6 /**< Let the Unified Memory subsystem decide on the page faulting policy for the specified device */
+};
+
+/**
+ * CUDA range attributes
+ */
+enum __device_builtin__ cudaMemRangeAttribute
+{
+ cudaMemRangeAttributeReadMostly = 1, /**< Whether the range will mostly be read and only occassionally be written to */
+ cudaMemRangeAttributePreferredLocation = 2, /**< The preferred location of the range */
+ cudaMemRangeAttributeAccessedBy = 3, /**< Memory range has ::cudaMemAdviseSetAccessedBy set for specified device */
+ cudaMemRangeAttributeLastPrefetchLocation = 4 /**< The last location to which the range was prefetched */
+ , cudaMemRangeAttributePreferredLocationType = 5 /**< The preferred location type of the range */
+ , cudaMemRangeAttributePreferredLocationId = 6 /**< The preferred location id of the range */
+ , cudaMemRangeAttributeLastPrefetchLocationType = 7 /**< The last location type to which the range was prefetched */
+ , cudaMemRangeAttributeLastPrefetchLocationId = 8 /**< The last location id to which the range was prefetched */
+};
+
+/**
+ * CUDA GPUDirect RDMA flush writes APIs supported on the device
+ */
+enum __device_builtin__ cudaFlushGPUDirectRDMAWritesOptions {
+ cudaFlushGPUDirectRDMAWritesOptionHost = 1<<0, /**< ::cudaDeviceFlushGPUDirectRDMAWrites() and its CUDA Driver API counterpart are supported on the device. */
+ cudaFlushGPUDirectRDMAWritesOptionMemOps = 1<<1 /**< The ::CU_STREAM_WAIT_VALUE_FLUSH flag and the ::CU_STREAM_MEM_OP_FLUSH_REMOTE_WRITES MemOp are supported on the CUDA device. */
+};
+
+/**
+ * CUDA GPUDirect RDMA flush writes ordering features of the device
+ */
+enum __device_builtin__ cudaGPUDirectRDMAWritesOrdering {
+ cudaGPUDirectRDMAWritesOrderingNone = 0, /**< The device does not natively support ordering of GPUDirect RDMA writes. ::cudaFlushGPUDirectRDMAWrites() can be leveraged if supported. */
+ cudaGPUDirectRDMAWritesOrderingOwner = 100, /**< Natively, the device can consistently consume GPUDirect RDMA writes, although other CUDA devices may not. */
+ cudaGPUDirectRDMAWritesOrderingAllDevices = 200 /**< Any CUDA device in the system can consistently consume GPUDirect RDMA writes to this device. */
+};
+
+/**
+ * CUDA GPUDirect RDMA flush writes scopes
+ */
+enum __device_builtin__ cudaFlushGPUDirectRDMAWritesScope {
+ cudaFlushGPUDirectRDMAWritesToOwner = 100, /**< Blocks until remote writes are visible to the CUDA device context owning the data. */
+ cudaFlushGPUDirectRDMAWritesToAllDevices = 200 /**< Blocks until remote writes are visible to all CUDA device contexts. */
+};
+
+/**
+ * CUDA GPUDirect RDMA flush writes targets
+ */
+enum __device_builtin__ cudaFlushGPUDirectRDMAWritesTarget {
+ cudaFlushGPUDirectRDMAWritesTargetCurrentDevice /**< Sets the target for ::cudaDeviceFlushGPUDirectRDMAWrites() to the currently active CUDA device context. */
+};
+
+
+/**
+ * CUDA device attributes
+ */
+enum __device_builtin__ cudaDeviceAttr
+{
+ cudaDevAttrMaxThreadsPerBlock = 1, /**< Maximum number of threads per block */
+ cudaDevAttrMaxBlockDimX = 2, /**< Maximum block dimension X */
+ cudaDevAttrMaxBlockDimY = 3, /**< Maximum block dimension Y */
+ cudaDevAttrMaxBlockDimZ = 4, /**< Maximum block dimension Z */
+ cudaDevAttrMaxGridDimX = 5, /**< Maximum grid dimension X */
+ cudaDevAttrMaxGridDimY = 6, /**< Maximum grid dimension Y */
+ cudaDevAttrMaxGridDimZ = 7, /**< Maximum grid dimension Z */
+ cudaDevAttrMaxSharedMemoryPerBlock = 8, /**< Maximum shared memory available per block in bytes */
+ cudaDevAttrTotalConstantMemory = 9, /**< Memory available on device for __constant__ variables in a CUDA C kernel in bytes */
+ cudaDevAttrWarpSize = 10, /**< Warp size in threads */
+ cudaDevAttrMaxPitch = 11, /**< Maximum pitch in bytes allowed by memory copies */
+ cudaDevAttrMaxRegistersPerBlock = 12, /**< Maximum number of 32-bit registers available per block */
+ cudaDevAttrClockRate = 13, /**< Peak clock frequency in kilohertz */
+ cudaDevAttrTextureAlignment = 14, /**< Alignment requirement for textures */
+ cudaDevAttrGpuOverlap = 15, /**< Device can possibly copy memory and execute a kernel concurrently */
+ cudaDevAttrMultiProcessorCount = 16, /**< Number of multiprocessors on device */
+ cudaDevAttrKernelExecTimeout = 17, /**< Specifies whether there is a run time limit on kernels */
+ cudaDevAttrIntegrated = 18, /**< Device is integrated with host memory */
+ cudaDevAttrCanMapHostMemory = 19, /**< Device can map host memory into CUDA address space */
+ cudaDevAttrComputeMode = 20, /**< Compute mode (See ::cudaComputeMode for details) */
+ cudaDevAttrMaxTexture1DWidth = 21, /**< Maximum 1D texture width */
+ cudaDevAttrMaxTexture2DWidth = 22, /**< Maximum 2D texture width */
+ cudaDevAttrMaxTexture2DHeight = 23, /**< Maximum 2D texture height */
+ cudaDevAttrMaxTexture3DWidth = 24, /**< Maximum 3D texture width */
+ cudaDevAttrMaxTexture3DHeight = 25, /**< Maximum 3D texture height */
+ cudaDevAttrMaxTexture3DDepth = 26, /**< Maximum 3D texture depth */
+ cudaDevAttrMaxTexture2DLayeredWidth = 27, /**< Maximum 2D layered texture width */
+ cudaDevAttrMaxTexture2DLayeredHeight = 28, /**< Maximum 2D layered texture height */
+ cudaDevAttrMaxTexture2DLayeredLayers = 29, /**< Maximum layers in a 2D layered texture */
+ cudaDevAttrSurfaceAlignment = 30, /**< Alignment requirement for surfaces */
+ cudaDevAttrConcurrentKernels = 31, /**< Device can possibly execute multiple kernels concurrently */
+ cudaDevAttrEccEnabled = 32, /**< Device has ECC support enabled */
+ cudaDevAttrPciBusId = 33, /**< PCI bus ID of the device */
+ cudaDevAttrPciDeviceId = 34, /**< PCI device ID of the device */
+ cudaDevAttrTccDriver = 35, /**< Device is using TCC driver model */
+ cudaDevAttrMemoryClockRate = 36, /**< Peak memory clock frequency in kilohertz */
+ cudaDevAttrGlobalMemoryBusWidth = 37, /**< Global memory bus width in bits */
+ cudaDevAttrL2CacheSize = 38, /**< Size of L2 cache in bytes */
+ cudaDevAttrMaxThreadsPerMultiProcessor = 39, /**< Maximum resident threads per multiprocessor */
+ cudaDevAttrAsyncEngineCount = 40, /**< Number of asynchronous engines */
+ cudaDevAttrUnifiedAddressing = 41, /**< Device shares a unified address space with the host */
+ cudaDevAttrMaxTexture1DLayeredWidth = 42, /**< Maximum 1D layered texture width */
+ cudaDevAttrMaxTexture1DLayeredLayers = 43, /**< Maximum layers in a 1D layered texture */
+ cudaDevAttrMaxTexture2DGatherWidth = 45, /**< Maximum 2D texture width if cudaArrayTextureGather is set */
+ cudaDevAttrMaxTexture2DGatherHeight = 46, /**< Maximum 2D texture height if cudaArrayTextureGather is set */
+ cudaDevAttrMaxTexture3DWidthAlt = 47, /**< Alternate maximum 3D texture width */
+ cudaDevAttrMaxTexture3DHeightAlt = 48, /**< Alternate maximum 3D texture height */
+ cudaDevAttrMaxTexture3DDepthAlt = 49, /**< Alternate maximum 3D texture depth */
+ cudaDevAttrPciDomainId = 50, /**< PCI domain ID of the device */
+ cudaDevAttrTexturePitchAlignment = 51, /**< Pitch alignment requirement for textures */
+ cudaDevAttrMaxTextureCubemapWidth = 52, /**< Maximum cubemap texture width/height */
+ cudaDevAttrMaxTextureCubemapLayeredWidth = 53, /**< Maximum cubemap layered texture width/height */
+ cudaDevAttrMaxTextureCubemapLayeredLayers = 54, /**< Maximum layers in a cubemap layered texture */
+ cudaDevAttrMaxSurface1DWidth = 55, /**< Maximum 1D surface width */
+ cudaDevAttrMaxSurface2DWidth = 56, /**< Maximum 2D surface width */
+ cudaDevAttrMaxSurface2DHeight = 57, /**< Maximum 2D surface height */
+ cudaDevAttrMaxSurface3DWidth = 58, /**< Maximum 3D surface width */
+ cudaDevAttrMaxSurface3DHeight = 59, /**< Maximum 3D surface height */
+ cudaDevAttrMaxSurface3DDepth = 60, /**< Maximum 3D surface depth */
+ cudaDevAttrMaxSurface1DLayeredWidth = 61, /**< Maximum 1D layered surface width */
+ cudaDevAttrMaxSurface1DLayeredLayers = 62, /**< Maximum layers in a 1D layered surface */
+ cudaDevAttrMaxSurface2DLayeredWidth = 63, /**< Maximum 2D layered surface width */
+ cudaDevAttrMaxSurface2DLayeredHeight = 64, /**< Maximum 2D layered surface height */
+ cudaDevAttrMaxSurface2DLayeredLayers = 65, /**< Maximum layers in a 2D layered surface */
+ cudaDevAttrMaxSurfaceCubemapWidth = 66, /**< Maximum cubemap surface width */
+ cudaDevAttrMaxSurfaceCubemapLayeredWidth = 67, /**< Maximum cubemap layered surface width */
+ cudaDevAttrMaxSurfaceCubemapLayeredLayers = 68, /**< Maximum layers in a cubemap layered surface */
+ cudaDevAttrMaxTexture1DLinearWidth = 69, /**< Maximum 1D linear texture width */
+ cudaDevAttrMaxTexture2DLinearWidth = 70, /**< Maximum 2D linear texture width */
+ cudaDevAttrMaxTexture2DLinearHeight = 71, /**< Maximum 2D linear texture height */
+ cudaDevAttrMaxTexture2DLinearPitch = 72, /**< Maximum 2D linear texture pitch in bytes */
+ cudaDevAttrMaxTexture2DMipmappedWidth = 73, /**< Maximum mipmapped 2D texture width */
+ cudaDevAttrMaxTexture2DMipmappedHeight = 74, /**< Maximum mipmapped 2D texture height */
+ cudaDevAttrComputeCapabilityMajor = 75, /**< Major compute capability version number */
+ cudaDevAttrComputeCapabilityMinor = 76, /**< Minor compute capability version number */
+ cudaDevAttrMaxTexture1DMipmappedWidth = 77, /**< Maximum mipmapped 1D texture width */
+ cudaDevAttrStreamPrioritiesSupported = 78, /**< Device supports stream priorities */
+ cudaDevAttrGlobalL1CacheSupported = 79, /**< Device supports caching globals in L1 */
+ cudaDevAttrLocalL1CacheSupported = 80, /**< Device supports caching locals in L1 */
+ cudaDevAttrMaxSharedMemoryPerMultiprocessor = 81, /**< Maximum shared memory available per multiprocessor in bytes */
+ cudaDevAttrMaxRegistersPerMultiprocessor = 82, /**< Maximum number of 32-bit registers available per multiprocessor */
+ cudaDevAttrManagedMemory = 83, /**< Device can allocate managed memory on this system */
+ cudaDevAttrIsMultiGpuBoard = 84, /**< Device is on a multi-GPU board */
+ cudaDevAttrMultiGpuBoardGroupID = 85, /**< Unique identifier for a group of devices on the same multi-GPU board */
+ cudaDevAttrHostNativeAtomicSupported = 86, /**< Link between the device and the host supports native atomic operations */
+ cudaDevAttrSingleToDoublePrecisionPerfRatio = 87, /**< Ratio of single precision performance (in floating-point operations per second) to double precision performance */
+ cudaDevAttrPageableMemoryAccess = 88, /**< Device supports coherently accessing pageable memory without calling cudaHostRegister on it */
+ cudaDevAttrConcurrentManagedAccess = 89, /**< Device can coherently access managed memory concurrently with the CPU */
+ cudaDevAttrComputePreemptionSupported = 90, /**< Device supports Compute Preemption */
+ cudaDevAttrCanUseHostPointerForRegisteredMem = 91, /**< Device can access host registered memory at the same virtual address as the CPU */
+ cudaDevAttrReserved92 = 92,
+ cudaDevAttrReserved93 = 93,
+ cudaDevAttrReserved94 = 94,
+ cudaDevAttrCooperativeLaunch = 95, /**< Device supports launching cooperative kernels via ::cudaLaunchCooperativeKernel*/
+ cudaDevAttrCooperativeMultiDeviceLaunch = 96, /**< Deprecated, cudaLaunchCooperativeKernelMultiDevice is deprecated. */
+ cudaDevAttrMaxSharedMemoryPerBlockOptin = 97, /**< The maximum optin shared memory per block. This value may vary by chip. See ::cudaFuncSetAttribute */
+ cudaDevAttrCanFlushRemoteWrites = 98, /**< Device supports flushing of outstanding remote writes. */
+ cudaDevAttrHostRegisterSupported = 99, /**< Device supports host memory registration via ::cudaHostRegister. */
+ cudaDevAttrPageableMemoryAccessUsesHostPageTables = 100, /**< Device accesses pageable memory via the host's page tables. */
+ cudaDevAttrDirectManagedMemAccessFromHost = 101, /**< Host can directly access managed memory on the device without migration. */
+ cudaDevAttrMaxBlocksPerMultiprocessor = 106, /**< Maximum number of blocks per multiprocessor */
+ cudaDevAttrMaxPersistingL2CacheSize = 108, /**< Maximum L2 persisting lines capacity setting in bytes. */
+ cudaDevAttrMaxAccessPolicyWindowSize = 109, /**< Maximum value of cudaAccessPolicyWindow::num_bytes. */
+ cudaDevAttrReservedSharedMemoryPerBlock = 111, /**< Shared memory reserved by CUDA driver per block in bytes */
+ cudaDevAttrSparseCudaArraySupported = 112, /**< Device supports sparse CUDA arrays and sparse CUDA mipmapped arrays */
+ cudaDevAttrHostRegisterReadOnlySupported = 113, /**< Device supports using the ::cudaHostRegister flag cudaHostRegisterReadOnly to register memory that must be mapped as read-only to the GPU */
+ cudaDevAttrTimelineSemaphoreInteropSupported = 114, /**< External timeline semaphore interop is supported on the device */
+ cudaDevAttrMaxTimelineSemaphoreInteropSupported = 114, /**< Deprecated, External timeline semaphore interop is supported on the device */
+ cudaDevAttrMemoryPoolsSupported = 115, /**< Device supports using the ::cudaMallocAsync and ::cudaMemPool family of APIs */
+ cudaDevAttrGPUDirectRDMASupported = 116, /**< Device supports GPUDirect RDMA APIs, like nvidia_p2p_get_pages (see https://docs.nvidia.com/cuda/gpudirect-rdma for more information) */
+ cudaDevAttrGPUDirectRDMAFlushWritesOptions = 117, /**< The returned attribute shall be interpreted as a bitmask, where the individual bits are listed in the ::cudaFlushGPUDirectRDMAWritesOptions enum */
+ cudaDevAttrGPUDirectRDMAWritesOrdering = 118, /**< GPUDirect RDMA writes to the device do not need to be flushed for consumers within the scope indicated by the returned attribute. See ::cudaGPUDirectRDMAWritesOrdering for the numerical values returned here. */
+ cudaDevAttrMemoryPoolSupportedHandleTypes = 119, /**< Handle types supported with mempool based IPC */
+ cudaDevAttrClusterLaunch = 120, /**< Indicates device supports cluster launch */
+ cudaDevAttrDeferredMappingCudaArraySupported = 121, /**< Device supports deferred mapping CUDA arrays and CUDA mipmapped arrays */
+ cudaDevAttrReserved122 = 122,
+ cudaDevAttrReserved123 = 123,
+ cudaDevAttrReserved124 = 124,
+ cudaDevAttrIpcEventSupport = 125, /**< Device supports IPC Events. */
+ cudaDevAttrMemSyncDomainCount = 126, /**< Number of memory synchronization domains the device supports. */
+ cudaDevAttrReserved127 = 127,
+ cudaDevAttrReserved128 = 128,
+ cudaDevAttrReserved129 = 129,
+ cudaDevAttrNumaConfig = 130, /**< NUMA configuration of a device: value is of type ::cudaDeviceNumaConfig enum */
+ cudaDevAttrNumaId = 131, /**< NUMA node ID of the GPU memory */
+ cudaDevAttrReserved132 = 132,
+ cudaDevAttrMpsEnabled = 133, /**< Contexts created on this device will be shared via MPS */
+ cudaDevAttrHostNumaId = 134, /**< NUMA ID of the host node closest to the device or -1 when system does not support NUMA */
+ cudaDevAttrD3D12CigSupported = 135, /**< Device supports CIG with D3D12. */
+ cudaDevAttrGpuPciDeviceId = 139, /**< The combined 16-bit PCI device ID and 16-bit PCI vendor ID. */
+ cudaDevAttrGpuPciSubsystemId = 140, /**< The combined 16-bit PCI subsystem ID and 16-bit PCI subsystem vendor ID. */
+ cudaDevAttrHostNumaMultinodeIpcSupported = 143, /**< Device supports HostNuma location IPC between nodes in a multi-node system. */
+ cudaDevAttrMax
+};
+
+/**
+ * CUDA memory pool attributes
+ */
+enum __device_builtin__ cudaMemPoolAttr
+{
+ /**
+ * (value type = int)
+ * Allow cuMemAllocAsync to use memory asynchronously freed
+ * in another streams as long as a stream ordering dependency
+ * of the allocating stream on the free action exists.
+ * Cuda events and null stream interactions can create the required
+ * stream ordered dependencies. (default enabled)
+ */
+ cudaMemPoolReuseFollowEventDependencies = 0x1,
+
+ /**
+ * (value type = int)
+ * Allow reuse of already completed frees when there is no dependency
+ * between the free and allocation. (default enabled)
+ */
+ cudaMemPoolReuseAllowOpportunistic = 0x2,
+
+ /**
+ * (value type = int)
+ * Allow cuMemAllocAsync to insert new stream dependencies
+ * in order to establish the stream ordering required to reuse
+ * a piece of memory released by cuFreeAsync (default enabled).
+ */
+ cudaMemPoolReuseAllowInternalDependencies = 0x3,
+
+
+ /**
+ * (value type = cuuint64_t)
+ * Amount of reserved memory in bytes to hold onto before trying
+ * to release memory back to the OS. When more than the release
+ * threshold bytes of memory are held by the memory pool, the
+ * allocator will try to release memory back to the OS on the
+ * next call to stream, event or context synchronize. (default 0)
+ */
+ cudaMemPoolAttrReleaseThreshold = 0x4,
+
+ /**
+ * (value type = cuuint64_t)
+ * Amount of backing memory currently allocated for the mempool.
+ */
+ cudaMemPoolAttrReservedMemCurrent = 0x5,
+
+ /**
+ * (value type = cuuint64_t)
+ * High watermark of backing memory allocated for the mempool since the
+ * last time it was reset. High watermark can only be reset to zero.
+ */
+ cudaMemPoolAttrReservedMemHigh = 0x6,
+
+ /**
+ * (value type = cuuint64_t)
+ * Amount of memory from the pool that is currently in use by the application.
+ */
+ cudaMemPoolAttrUsedMemCurrent = 0x7,
+
+ /**
+ * (value type = cuuint64_t)
+ * High watermark of the amount of memory from the pool that was in use by the application since
+ * the last time it was reset. High watermark can only be reset to zero.
+ */
+ cudaMemPoolAttrUsedMemHigh = 0x8
+};
+
+/**
+ * Specifies the type of location
+ */
+enum __device_builtin__ cudaMemLocationType {
+ cudaMemLocationTypeInvalid = 0,
+ cudaMemLocationTypeDevice = 1 /**< Location is a device location, thus id is a device ordinal */
+ , cudaMemLocationTypeHost = 2 /**< Location is host, id is ignored */
+ , cudaMemLocationTypeHostNuma = 3 /**< Location is a host NUMA node, thus id is a host NUMA node id */
+ , cudaMemLocationTypeHostNumaCurrent = 4 /**< Location is the host NUMA node closest to the current thread's CPU, id is ignored */
+};
+
+/**
+ * Specifies a memory location.
+ *
+ * To specify a gpu, set type = ::cudaMemLocationTypeDevice and set id = the gpu's device ordinal.
+ * To specify a cpu NUMA node, set type = ::cudaMemLocationTypeHostNuma and set id = host NUMA node id.
+ */
+struct __device_builtin__ cudaMemLocation {
+ enum cudaMemLocationType type; /**< Specifies the location type, which modifies the meaning of id. */
+ int id; /**< identifier for a given this location's ::CUmemLocationType. */
+};
+
+/**
+ * Specifies the memory protection flags for mapping.
+ */
+enum __device_builtin__ cudaMemAccessFlags {
+ cudaMemAccessFlagsProtNone = 0, /**< Default, make the address range not accessible */
+ cudaMemAccessFlagsProtRead = 1, /**< Make the address range read accessible */
+ cudaMemAccessFlagsProtReadWrite = 3 /**< Make the address range read-write accessible */
+};
+
+/**
+ * Memory access descriptor
+ */
+struct __device_builtin__ cudaMemAccessDesc {
+ struct cudaMemLocation location; /**< Location on which the request is to change it's accessibility */
+ enum cudaMemAccessFlags flags; /**< ::CUmemProt accessibility flags to set on the request */
+};
+
+/**
+ * Defines the allocation types available
+ */
+enum __device_builtin__ cudaMemAllocationType {
+ cudaMemAllocationTypeInvalid = 0x0,
+ /** This allocation type is 'pinned', i.e. cannot migrate from its current
+ * location while the application is actively using it
+ */
+ cudaMemAllocationTypePinned = 0x1,
+ cudaMemAllocationTypeMax = 0x7FFFFFFF
+};
+
+/**
+ * Flags for specifying particular handle types
+ */
+enum __device_builtin__ cudaMemAllocationHandleType {
+ cudaMemHandleTypeNone = 0x0, /**< Does not allow any export mechanism. > */
+ cudaMemHandleTypePosixFileDescriptor = 0x1, /**< Allows a file descriptor to be used for exporting. Permitted only on POSIX systems. (int) */
+ cudaMemHandleTypeWin32 = 0x2, /**< Allows a Win32 NT handle to be used for exporting. (HANDLE) */
+ cudaMemHandleTypeWin32Kmt = 0x4, /**< Allows a Win32 KMT handle to be used for exporting. (D3DKMT_HANDLE) */
+ cudaMemHandleTypeFabric = 0x8 /**< Allows a fabric handle to be used for exporting. (cudaMemFabricHandle_t) */
+};
+
+/**
+ * This flag, if set, indicates that the memory will be used as a buffer for
+ * hardware accelerated decompression.
+ */
+#define cudaMemPoolCreateUsageHwDecompress 0x2
+
+/**
+ * Specifies the properties of allocations made from the pool.
+ */
+struct __device_builtin__ cudaMemPoolProps {
+ enum cudaMemAllocationType allocType; /**< Allocation type. Currently must be specified as cudaMemAllocationTypePinned */
+ enum cudaMemAllocationHandleType handleTypes; /**< Handle types that will be supported by allocations from the pool. */
+ struct cudaMemLocation location; /**< Location allocations should reside. */
+ /**
+ * Windows-specific LPSECURITYATTRIBUTES required when
+ * ::cudaMemHandleTypeWin32 is specified. This security attribute defines
+ * the scope of which exported allocations may be tranferred to other
+ * processes. In all other cases, this field is required to be zero.
+ */
+ void *win32SecurityAttributes;
+ size_t maxSize; /**< Maximum pool size. When set to 0, defaults to a system dependent value.*/
+ unsigned short usage; /**< Bitmask indicating intended usage for the pool. */
+ unsigned char reserved[54]; /**< reserved for future use, must be 0 */
+};
+
+/**
+ * Opaque data for exporting a pool allocation
+ */
+struct __device_builtin__ cudaMemPoolPtrExportData {
+ unsigned char reserved[64];
+};
+
+/**
+ * Memory allocation node parameters
+ */
+struct __device_builtin__ cudaMemAllocNodeParams {
+ /**
+ * in: location where the allocation should reside (specified in ::location).
+ * ::handleTypes must be ::cudaMemHandleTypeNone. IPC is not supported.
+ */
+ struct cudaMemPoolProps poolProps; /**< in: array of memory access descriptors. Used to describe peer GPU access */
+ const struct cudaMemAccessDesc *accessDescs; /**< in: number of memory access descriptors. Must not exceed the number of GPUs. */
+ size_t accessDescCount; /**< in: Number of `accessDescs`s */
+ size_t bytesize; /**< in: size in bytes of the requested allocation */
+ void *dptr; /**< out: address of the allocation returned by CUDA */
+};
+
+/**
+ * Memory allocation node parameters
+ */
+struct __device_builtin__ cudaMemAllocNodeParamsV2 {
+ /**
+ * in: location where the allocation should reside (specified in ::location).
+ * ::handleTypes must be ::cudaMemHandleTypeNone. IPC is not supported.
+ */
+ struct cudaMemPoolProps poolProps; /**< in: array of memory access descriptors. Used to describe peer GPU access */
+ const struct cudaMemAccessDesc *accessDescs; /**< in: number of memory access descriptors. Must not exceed the number of GPUs. */
+ size_t accessDescCount; /**< in: Number of `accessDescs`s */
+ size_t bytesize; /**< in: size in bytes of the requested allocation */
+ void *dptr; /**< out: address of the allocation returned by CUDA */
+};
+
+/**
+ * Memory free node parameters
+ */
+struct __device_builtin__ cudaMemFreeNodeParams {
+ void *dptr; /**< in: the pointer to free */
+};
+
+/**
+ * Graph memory attributes
+ */
+enum __device_builtin__ cudaGraphMemAttributeType {
+ /**
+ * (value type = cuuint64_t)
+ * Amount of memory, in bytes, currently associated with graphs.
+ */
+ cudaGraphMemAttrUsedMemCurrent = 0x0,
+
+ /**
+ * (value type = cuuint64_t)
+ * High watermark of memory, in bytes, associated with graphs since the
+ * last time it was reset. High watermark can only be reset to zero.
+ */
+ cudaGraphMemAttrUsedMemHigh = 0x1,
+
+ /**
+ * (value type = cuuint64_t)
+ * Amount of memory, in bytes, currently allocated for use by
+ * the CUDA graphs asynchronous allocator.
+ */
+ cudaGraphMemAttrReservedMemCurrent = 0x2,
+
+ /**
+ * (value type = cuuint64_t)
+ * High watermark of memory, in bytes, currently allocated for use by
+ * the CUDA graphs asynchronous allocator.
+ */
+ cudaGraphMemAttrReservedMemHigh = 0x3
+};
+
+/**
+ * Flags to specify for copies within a batch. For more details see ::cudaMemcpyBatchAsync.
+ */
+enum __device_builtin__ cudaMemcpyFlags {
+ cudaMemcpyFlagDefault = 0x0,
+
+ /**
+ * Hint to the driver to try and overlap the copy with compute work on the SMs.
+ */
+ cudaMemcpyFlagPreferOverlapWithCompute = 0x1
+};
+
+enum __device_builtin__ cudaMemcpySrcAccessOrder {
+ /**
+ * Default invalid.
+ */
+ cudaMemcpySrcAccessOrderInvalid = 0x0,
+
+ /**
+ * Indicates that access to the source pointer must be in stream order.
+ */
+ cudaMemcpySrcAccessOrderStream = 0x1,
+
+ /**
+ * Indicates that access to the source pointer can be out of stream order and all
+ * accesses must be complete before the API call returns. This flag is suited for
+ * ephemeral sources (ex., stack variables) when it's known that no prior operations
+ * in the stream can be accessing the memory and also that the lifetime of the memory
+ * is limited to the scope that the source variable was declared in. Specifying
+ * this flag allows the driver to optimize the copy and removes the need for the user
+ * to synchronize the stream after the API call.
+ */
+ cudaMemcpySrcAccessOrderDuringApiCall = 0x2,
+
+ /**
+ * Indicates that access to the source pointer can be out of stream order and the accesses
+ * can happen even after the API call returns. This flag is suited for host pointers
+ * allocated outside CUDA (ex., via malloc) when it's known that no prior operations
+ * in the stream can be accessing the memory. Specifying this flag allows the driver
+ * to optimize the copy on certain platforms.
+ */
+ cudaMemcpySrcAccessOrderAny = 0x3,
+
+ cudaMemcpySrcAccessOrderMax = 0x7FFFFFFF
+};
+
+/**
+ * Attributes specific to copies within a batch. For more details on usage see ::cudaMemcpyBatchAsync.
+ */
+struct __device_builtin__ cudaMemcpyAttributes {
+ enum cudaMemcpySrcAccessOrder srcAccessOrder; /**< Source access ordering to be observed for copies with this attribute. */
+ struct cudaMemLocation srcLocHint; /**< Hint location for the source operand. Ignored when the pointers are not managed memory or memory allocated outside CUDA. */
+ struct cudaMemLocation dstLocHint; /**< Hint location for the destination operand. Ignored when the pointers are not managed memory or memory allocated outside CUDA. */
+ unsigned int flags; /**< Additional flags for copies with this attribute. See ::cudaMemcpyFlags. */
+};
+
+/**
+ * These flags allow applications to convey the operand type for individual copies specified in ::cudaMemcpy3DBatchAsync.
+ */
+enum __device_builtin__ cudaMemcpy3DOperandType {
+ cudaMemcpyOperandTypePointer = 0x1, /**< Memcpy operand is a valid pointer. */
+ cudaMemcpyOperandTypeArray = 0x2, /**< Memcpy operand is a CUarray. */
+ cudaMemcpyOperandTypeMax = 0x7FFFFFFF
+};
+
+/**
+ * Struct representing offset into a ::cudaArray_t in elements
+ */
+struct __device_builtin__ cudaOffset3D {
+ size_t x;
+ size_t y;
+ size_t z;
+};
+
+/**
+ * Struct representing an operand for copy with ::cudaMemcpy3DBatchAsync
+ */
+struct __device_builtin__ cudaMemcpy3DOperand {
+ enum cudaMemcpy3DOperandType type;
+ union {
+ /**
+ * Struct representing an operand when ::cudaMemcpy3DOperand::type is ::cudaMemcpyOperandTypePointer
+ */
+ struct {
+ void *ptr;
+ size_t rowLength; /**< Length of each row in elements. */
+ size_t layerHeight; /**< Height of each layer in elements. */
+ struct cudaMemLocation locHint; /**< Hint location for the operand. Ignored when the pointers are not managed memory or memory allocated outside CUDA. */
+ } ptr;
+
+ /**
+ * Struct representing an operand when ::cudaMemcpy3DOperand::type is ::cudaMemcpyOperandTypeArray
+ */
+ struct {
+ cudaArray_t array;
+ struct cudaOffset3D offset;
+ } array;
+ } op;
+};
+
+struct __device_builtin__ cudaMemcpy3DBatchOp {
+ struct cudaMemcpy3DOperand src; /**< Source memcpy operand. */
+ struct cudaMemcpy3DOperand dst; /**< Destination memcpy operand. */
+ struct cudaExtent extent; /**< Extents of the memcpy between src and dst. The width, height and depth components must not be 0.*/
+ enum cudaMemcpySrcAccessOrder srcAccessOrder; /**< Source access ordering to be observed for copy from src to dst. */
+ unsigned int flags; /**< Additional flags for copy from src to dst. See ::cudaMemcpyFlags. */
+};
+
+/**
+ * CUDA device P2P attributes
+ */
+
+enum __device_builtin__ cudaDeviceP2PAttr {
+ cudaDevP2PAttrPerformanceRank = 1, /**< A relative value indicating the performance of the link between two devices */
+ cudaDevP2PAttrAccessSupported = 2, /**< Peer access is enabled */
+ cudaDevP2PAttrNativeAtomicSupported = 3, /**< Native atomic operation over the link supported */
+ cudaDevP2PAttrCudaArrayAccessSupported = 4 /**< Accessing CUDA arrays over the link supported */
+};
+
+/**
+ * CUDA UUID types
+ */
+#ifndef CU_UUID_HAS_BEEN_DEFINED
+#define CU_UUID_HAS_BEEN_DEFINED
+struct __device_builtin__ CUuuid_st { /**< CUDA definition of UUID */
+ char bytes[16];
+};
+typedef __device_builtin__ struct CUuuid_st CUuuid;
+#endif
+typedef __device_builtin__ struct CUuuid_st cudaUUID_t;
+
+/**
+ * CUDA device properties
+ */
+struct __device_builtin__ cudaDeviceProp
+{
+ char name[256]; /**< ASCII string identifying device */
+ cudaUUID_t uuid; /**< 16-byte unique identifier */
+ char luid[8]; /**< 8-byte locally unique identifier. Value is undefined on TCC and non-Windows platforms */
+ unsigned int luidDeviceNodeMask; /**< LUID device node mask. Value is undefined on TCC and non-Windows platforms */
+ size_t totalGlobalMem; /**< Global memory available on device in bytes */
+ size_t sharedMemPerBlock; /**< Shared memory available per block in bytes */
+ int regsPerBlock; /**< 32-bit registers available per block */
+ int warpSize; /**< Warp size in threads */
+ size_t memPitch; /**< Maximum pitch in bytes allowed by memory copies */
+ int maxThreadsPerBlock; /**< Maximum number of threads per block */
+ int maxThreadsDim[3]; /**< Maximum size of each dimension of a block */
+ int maxGridSize[3]; /**< Maximum size of each dimension of a grid */
+ int clockRate; /**< Deprecated, Clock frequency in kilohertz */
+ size_t totalConstMem; /**< Constant memory available on device in bytes */
+ int major; /**< Major compute capability */
+ int minor; /**< Minor compute capability */
+ size_t textureAlignment; /**< Alignment requirement for textures */
+ size_t texturePitchAlignment; /**< Pitch alignment requirement for texture references bound to pitched memory */
+ int deviceOverlap; /**< Device can concurrently copy memory and execute a kernel. Deprecated. Use instead asyncEngineCount. */
+ int multiProcessorCount; /**< Number of multiprocessors on device */
+ int kernelExecTimeoutEnabled; /**< Deprecated, Specified whether there is a run time limit on kernels */
+ int integrated; /**< Device is integrated as opposed to discrete */
+ int canMapHostMemory; /**< Device can map host memory with cudaHostAlloc/cudaHostGetDevicePointer */
+ int computeMode; /**< Deprecated, Compute mode (See ::cudaComputeMode) */
+ int maxTexture1D; /**< Maximum 1D texture size */
+ int maxTexture1DMipmap; /**< Maximum 1D mipmapped texture size */
+ int maxTexture1DLinear; /**< Deprecated, do not use. Use cudaDeviceGetTexture1DLinearMaxWidth() or cuDeviceGetTexture1DLinearMaxWidth() instead. */
+ int maxTexture2D[2]; /**< Maximum 2D texture dimensions */
+ int maxTexture2DMipmap[2]; /**< Maximum 2D mipmapped texture dimensions */
+ int maxTexture2DLinear[3]; /**< Maximum dimensions (width, height, pitch) for 2D textures bound to pitched memory */
+ int maxTexture2DGather[2]; /**< Maximum 2D texture dimensions if texture gather operations have to be performed */
+ int maxTexture3D[3]; /**< Maximum 3D texture dimensions */
+ int maxTexture3DAlt[3]; /**< Maximum alternate 3D texture dimensions */
+ int maxTextureCubemap; /**< Maximum Cubemap texture dimensions */
+ int maxTexture1DLayered[2]; /**< Maximum 1D layered texture dimensions */
+ int maxTexture2DLayered[3]; /**< Maximum 2D layered texture dimensions */
+ int maxTextureCubemapLayered[2];/**< Maximum Cubemap layered texture dimensions */
+ int maxSurface1D; /**< Maximum 1D surface size */
+ int maxSurface2D[2]; /**< Maximum 2D surface dimensions */
+ int maxSurface3D[3]; /**< Maximum 3D surface dimensions */
+ int maxSurface1DLayered[2]; /**< Maximum 1D layered surface dimensions */
+ int maxSurface2DLayered[3]; /**< Maximum 2D layered surface dimensions */
+ int maxSurfaceCubemap; /**< Maximum Cubemap surface dimensions */
+ int maxSurfaceCubemapLayered[2];/**< Maximum Cubemap layered surface dimensions */
+ size_t surfaceAlignment; /**< Alignment requirements for surfaces */
+ int concurrentKernels; /**< Device can possibly execute multiple kernels concurrently */
+ int ECCEnabled; /**< Device has ECC support enabled */
+ int pciBusID; /**< PCI bus ID of the device */
+ int pciDeviceID; /**< PCI device ID of the device */
+ int pciDomainID; /**< PCI domain ID of the device */
+ int tccDriver; /**< 1 if device is a Tesla device using TCC driver, 0 otherwise */
+ int asyncEngineCount; /**< Number of asynchronous engines */
+ int unifiedAddressing; /**< Device shares a unified address space with the host */
+ int memoryClockRate; /**< Deprecated, Peak memory clock frequency in kilohertz */
+ int memoryBusWidth; /**< Global memory bus width in bits */
+ int l2CacheSize; /**< Size of L2 cache in bytes */
+ int persistingL2CacheMaxSize; /**< Device's maximum l2 persisting lines capacity setting in bytes */
+ int maxThreadsPerMultiProcessor;/**< Maximum resident threads per multiprocessor */
+ int streamPrioritiesSupported; /**< Device supports stream priorities */
+ int globalL1CacheSupported; /**< Device supports caching globals in L1 */
+ int localL1CacheSupported; /**< Device supports caching locals in L1 */
+ size_t sharedMemPerMultiprocessor; /**< Shared memory available per multiprocessor in bytes */
+ int regsPerMultiprocessor; /**< 32-bit registers available per multiprocessor */
+ int managedMemory; /**< Device supports allocating managed memory on this system */
+ int isMultiGpuBoard; /**< Device is on a multi-GPU board */
+ int multiGpuBoardGroupID; /**< Unique identifier for a group of devices on the same multi-GPU board */
+ int hostNativeAtomicSupported; /**< Link between the device and the host supports native atomic operations */
+ int singleToDoublePrecisionPerfRatio; /**< Deprecated, Ratio of single precision performance (in floating-point operations per second) to double precision performance */
+ int pageableMemoryAccess; /**< Device supports coherently accessing pageable memory without calling cudaHostRegister on it */
+ int concurrentManagedAccess; /**< Device can coherently access managed memory concurrently with the CPU */
+ int computePreemptionSupported; /**< Device supports Compute Preemption */
+ int canUseHostPointerForRegisteredMem; /**< Device can access host registered memory at the same virtual address as the CPU */
+ int cooperativeLaunch; /**< Device supports launching cooperative kernels via ::cudaLaunchCooperativeKernel */
+ int cooperativeMultiDeviceLaunch; /**< Deprecated, cudaLaunchCooperativeKernelMultiDevice is deprecated. */
+ size_t sharedMemPerBlockOptin; /**< Per device maximum shared memory per block usable by special opt in */
+ int pageableMemoryAccessUsesHostPageTables; /**< Device accesses pageable memory via the host's page tables */
+ int directManagedMemAccessFromHost; /**< Host can directly access managed memory on the device without migration. */
+ int maxBlocksPerMultiProcessor; /**< Maximum number of resident blocks per multiprocessor */
+ int accessPolicyMaxWindowSize; /**< The maximum value of ::cudaAccessPolicyWindow::num_bytes. */
+ size_t reservedSharedMemPerBlock; /**< Shared memory reserved by CUDA driver per block in bytes */
+ int hostRegisterSupported; /**< Device supports host memory registration via ::cudaHostRegister. */
+ int sparseCudaArraySupported; /**< 1 if the device supports sparse CUDA arrays and sparse CUDA mipmapped arrays, 0 otherwise */
+ int hostRegisterReadOnlySupported; /**< Device supports using the ::cudaHostRegister flag cudaHostRegisterReadOnly to register memory that must be mapped as read-only to the GPU */
+ int timelineSemaphoreInteropSupported; /**< External timeline semaphore interop is supported on the device */
+ int memoryPoolsSupported; /**< 1 if the device supports using the cudaMallocAsync and cudaMemPool family of APIs, 0 otherwise */
+ int gpuDirectRDMASupported; /**< 1 if the device supports GPUDirect RDMA APIs, 0 otherwise */
+ unsigned int gpuDirectRDMAFlushWritesOptions; /**< Bitmask to be interpreted according to the ::cudaFlushGPUDirectRDMAWritesOptions enum */
+ int gpuDirectRDMAWritesOrdering;/**< See the ::cudaGPUDirectRDMAWritesOrdering enum for numerical values */
+ unsigned int memoryPoolSupportedHandleTypes; /**< Bitmask of handle types supported with mempool-based IPC */
+ int deferredMappingCudaArraySupported; /**< 1 if the device supports deferred mapping CUDA arrays and CUDA mipmapped arrays */
+ int ipcEventSupported; /**< Device supports IPC Events. */
+ int clusterLaunch; /**< Indicates device supports cluster launch */
+ int unifiedFunctionPointers; /**< Indicates device supports unified pointers */
+ int reserved[63]; /**< Reserved for future use */
+};
+
+/**
+ * CUDA IPC Handle Size
+ */
+#define CUDA_IPC_HANDLE_SIZE 64
+
+/**
+ * CUDA IPC event handle
+ */
+typedef __device_builtin__ struct __device_builtin__ cudaIpcEventHandle_st
+{
+ char reserved[CUDA_IPC_HANDLE_SIZE];
+}cudaIpcEventHandle_t;
+
+/**
+ * CUDA IPC memory handle
+ */
+typedef __device_builtin__ struct __device_builtin__ cudaIpcMemHandle_st
+{
+ char reserved[CUDA_IPC_HANDLE_SIZE];
+}cudaIpcMemHandle_t;
+
+/*
+ * CUDA Mem Fabric Handle
+ */
+typedef __device_builtin__ struct __device_builtin__ cudaMemFabricHandle_st
+{
+ char reserved[CUDA_IPC_HANDLE_SIZE];
+}cudaMemFabricHandle_t;
+
+/**
+ * External memory handle types
+ */
+enum __device_builtin__ cudaExternalMemoryHandleType {
+ /**
+ * Handle is an opaque file descriptor
+ */
+ cudaExternalMemoryHandleTypeOpaqueFd = 1,
+ /**
+ * Handle is an opaque shared NT handle
+ */
+ cudaExternalMemoryHandleTypeOpaqueWin32 = 2,
+ /**
+ * Handle is an opaque, globally shared handle
+ */
+ cudaExternalMemoryHandleTypeOpaqueWin32Kmt = 3,
+ /**
+ * Handle is a D3D12 heap object
+ */
+ cudaExternalMemoryHandleTypeD3D12Heap = 4,
+ /**
+ * Handle is a D3D12 committed resource
+ */
+ cudaExternalMemoryHandleTypeD3D12Resource = 5,
+ /**
+ * Handle is a shared NT handle to a D3D11 resource
+ */
+ cudaExternalMemoryHandleTypeD3D11Resource = 6,
+ /**
+ * Handle is a globally shared handle to a D3D11 resource
+ */
+ cudaExternalMemoryHandleTypeD3D11ResourceKmt = 7,
+ /**
+ * Handle is an NvSciBuf object
+ */
+ cudaExternalMemoryHandleTypeNvSciBuf = 8
+};
+
+/**
+ * Indicates that the external memory object is a dedicated resource
+ */
+#define cudaExternalMemoryDedicated 0x1
+
+/** When the /p flags parameter of ::cudaExternalSemaphoreSignalParams
+ * contains this flag, it indicates that signaling an external semaphore object
+ * should skip performing appropriate memory synchronization operations over all
+ * the external memory objects that are imported as ::cudaExternalMemoryHandleTypeNvSciBuf,
+ * which otherwise are performed by default to ensure data coherency with other
+ * importers of the same NvSciBuf memory objects.
+ */
+#define cudaExternalSemaphoreSignalSkipNvSciBufMemSync 0x01
+
+/** When the /p flags parameter of ::cudaExternalSemaphoreWaitParams
+ * contains this flag, it indicates that waiting an external semaphore object
+ * should skip performing appropriate memory synchronization operations over all
+ * the external memory objects that are imported as ::cudaExternalMemoryHandleTypeNvSciBuf,
+ * which otherwise are performed by default to ensure data coherency with other
+ * importers of the same NvSciBuf memory objects.
+ */
+#define cudaExternalSemaphoreWaitSkipNvSciBufMemSync 0x02
+
+/**
+ * When /p flags of ::cudaDeviceGetNvSciSyncAttributes is set to this,
+ * it indicates that application need signaler specific NvSciSyncAttr
+ * to be filled by ::cudaDeviceGetNvSciSyncAttributes.
+ */
+#define cudaNvSciSyncAttrSignal 0x1
+
+/**
+ * When /p flags of ::cudaDeviceGetNvSciSyncAttributes is set to this,
+ * it indicates that application need waiter specific NvSciSyncAttr
+ * to be filled by ::cudaDeviceGetNvSciSyncAttributes.
+ */
+#define cudaNvSciSyncAttrWait 0x2
+
+/**
+ * External memory handle descriptor
+ */
+struct __device_builtin__ cudaExternalMemoryHandleDesc {
+ /**
+ * Type of the handle
+ */
+ enum cudaExternalMemoryHandleType type;
+ union {
+ /**
+ * File descriptor referencing the memory object. Valid
+ * when type is
+ * ::cudaExternalMemoryHandleTypeOpaqueFd
+ */
+ int fd;
+ /**
+ * Win32 handle referencing the semaphore object. Valid when
+ * type is one of the following:
+ * - ::cudaExternalMemoryHandleTypeOpaqueWin32
+ * - ::cudaExternalMemoryHandleTypeOpaqueWin32Kmt
+ * - ::cudaExternalMemoryHandleTypeD3D12Heap
+ * - ::cudaExternalMemoryHandleTypeD3D12Resource
+ * - ::cudaExternalMemoryHandleTypeD3D11Resource
+ * - ::cudaExternalMemoryHandleTypeD3D11ResourceKmt
+ * Exactly one of 'handle' and 'name' must be non-NULL. If
+ * type is one of the following:
+ * ::cudaExternalMemoryHandleTypeOpaqueWin32Kmt
+ * ::cudaExternalMemoryHandleTypeD3D11ResourceKmt
+ * then 'name' must be NULL.
+ */
+ struct {
+ /**
+ * Valid NT handle. Must be NULL if 'name' is non-NULL
+ */
+ void *handle;
+ /**
+ * Name of a valid memory object.
+ * Must be NULL if 'handle' is non-NULL.
+ */
+ const void *name;
+ } win32;
+ /**
+ * A handle representing NvSciBuf Object. Valid when type
+ * is ::cudaExternalMemoryHandleTypeNvSciBuf
+ */
+ const void *nvSciBufObject;
+ } handle;
+ /**
+ * Size of the memory allocation
+ */
+ unsigned long long size;
+ /**
+ * Flags must either be zero or ::cudaExternalMemoryDedicated
+ */
+ unsigned int flags;
+};
+
+/**
+ * External memory buffer descriptor
+ */
+struct __device_builtin__ cudaExternalMemoryBufferDesc {
+ /**
+ * Offset into the memory object where the buffer's base is
+ */
+ unsigned long long offset;
+ /**
+ * Size of the buffer
+ */
+ unsigned long long size;
+ /**
+ * Flags reserved for future use. Must be zero.
+ */
+ unsigned int flags;
+};
+
+/**
+ * External memory mipmap descriptor
+ */
+struct __device_builtin__ cudaExternalMemoryMipmappedArrayDesc {
+ /**
+ * Offset into the memory object where the base level of the
+ * mipmap chain is.
+ */
+ unsigned long long offset;
+ /**
+ * Format of base level of the mipmap chain
+ */
+ struct cudaChannelFormatDesc formatDesc;
+ /**
+ * Dimensions of base level of the mipmap chain
+ */
+ struct cudaExtent extent;
+ /**
+ * Flags associated with CUDA mipmapped arrays.
+ * See ::cudaMallocMipmappedArray
+ */
+ unsigned int flags;
+ /**
+ * Total number of levels in the mipmap chain
+ */
+ unsigned int numLevels;
+};
+
+/**
+ * External semaphore handle types
+ */
+enum __device_builtin__ cudaExternalSemaphoreHandleType {
+ /**
+ * Handle is an opaque file descriptor
+ */
+ cudaExternalSemaphoreHandleTypeOpaqueFd = 1,
+ /**
+ * Handle is an opaque shared NT handle
+ */
+ cudaExternalSemaphoreHandleTypeOpaqueWin32 = 2,
+ /**
+ * Handle is an opaque, globally shared handle
+ */
+ cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt = 3,
+ /**
+ * Handle is a shared NT handle referencing a D3D12 fence object
+ */
+ cudaExternalSemaphoreHandleTypeD3D12Fence = 4,
+ /**
+ * Handle is a shared NT handle referencing a D3D11 fence object
+ */
+ cudaExternalSemaphoreHandleTypeD3D11Fence = 5,
+ /**
+ * Opaque handle to NvSciSync Object
+ */
+ cudaExternalSemaphoreHandleTypeNvSciSync = 6,
+ /**
+ * Handle is a shared NT handle referencing a D3D11 keyed mutex object
+ */
+ cudaExternalSemaphoreHandleTypeKeyedMutex = 7,
+ /**
+ * Handle is a shared KMT handle referencing a D3D11 keyed mutex object
+ */
+ cudaExternalSemaphoreHandleTypeKeyedMutexKmt = 8,
+ /**
+ * Handle is an opaque handle file descriptor referencing a timeline semaphore
+ */
+ cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd = 9,
+ /**
+ * Handle is an opaque handle file descriptor referencing a timeline semaphore
+ */
+ cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32 = 10
+};
+
+/**
+ * External semaphore handle descriptor
+ */
+struct __device_builtin__ cudaExternalSemaphoreHandleDesc {
+ /**
+ * Type of the handle
+ */
+ enum cudaExternalSemaphoreHandleType type;
+ union {
+ /**
+ * File descriptor referencing the semaphore object. Valid when
+ * type is one of the following:
+ * - ::cudaExternalSemaphoreHandleTypeOpaqueFd
+ * - ::cudaExternalSemaphoreHandleTypeTimelineSemaphoreFd
+ */
+ int fd;
+ /**
+ * Win32 handle referencing the semaphore object. Valid when
+ * type is one of the following:
+ * - ::cudaExternalSemaphoreHandleTypeOpaqueWin32
+ * - ::cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt
+ * - ::cudaExternalSemaphoreHandleTypeD3D12Fence
+ * - ::cudaExternalSemaphoreHandleTypeD3D11Fence
+ * - ::cudaExternalSemaphoreHandleTypeKeyedMutex
+ * - ::cudaExternalSemaphoreHandleTypeTimelineSemaphoreWin32
+ * Exactly one of 'handle' and 'name' must be non-NULL. If
+ * type is one of the following:
+ * ::cudaExternalSemaphoreHandleTypeOpaqueWin32Kmt
+ * ::cudaExternalSemaphoreHandleTypeKeyedMutexKmt
+ * then 'name' must be NULL.
+ */
+ struct {
+ /**
+ * Valid NT handle. Must be NULL if 'name' is non-NULL
+ */
+ void *handle;
+ /**
+ * Name of a valid synchronization primitive.
+ * Must be NULL if 'handle' is non-NULL.
+ */
+ const void *name;
+ } win32;
+ /**
+ * Valid NvSciSyncObj. Must be non NULL
+ */
+ const void* nvSciSyncObj;
+ } handle;
+ /**
+ * Flags reserved for the future. Must be zero.
+ */
+ unsigned int flags;
+};
+
+/**
+ * External semaphore signal parameters(deprecated)
+ */
+struct __device_builtin__ cudaExternalSemaphoreSignalParams_v1 {
+ struct {
+ /**
+ * Parameters for fence objects
+ */
+ struct {
+ /**
+ * Value of fence to be signaled
+ */
+ unsigned long long value;
+ } fence;
+ union {
+ /**
+ * Pointer to NvSciSyncFence. Valid if ::cudaExternalSemaphoreHandleType
+ * is of type ::cudaExternalSemaphoreHandleTypeNvSciSync.
+ */
+ void *fence;
+ unsigned long long reserved;
+ } nvSciSync;
+ /**
+ * Parameters for keyed mutex objects
+ */
+ struct {
+ /*
+ * Value of key to release the mutex with
+ */
+ unsigned long long key;
+ } keyedMutex;
+ } params;
+ /**
+ * Only when ::cudaExternalSemaphoreSignalParams is used to
+ * signal a ::cudaExternalSemaphore_t of type
+ * ::cudaExternalSemaphoreHandleTypeNvSciSync, the valid flag is
+ * ::cudaExternalSemaphoreSignalSkipNvSciBufMemSync: which indicates
+ * that while signaling the ::cudaExternalSemaphore_t, no memory
+ * synchronization operations should be performed for any external memory
+ * object imported as ::cudaExternalMemoryHandleTypeNvSciBuf.
+ * For all other types of ::cudaExternalSemaphore_t, flags must be zero.
+ */
+ unsigned int flags;
+};
+
+/**
+* External semaphore wait parameters(deprecated)
+*/
+struct __device_builtin__ cudaExternalSemaphoreWaitParams_v1 {
+ struct {
+ /**
+ * Parameters for fence objects
+ */
+ struct {
+ /**
+ * Value of fence to be waited on
+ */
+ unsigned long long value;
+ } fence;
+ union {
+ /**
+ * Pointer to NvSciSyncFence. Valid if ::cudaExternalSemaphoreHandleType
+ * is of type ::cudaExternalSemaphoreHandleTypeNvSciSync.
+ */
+ void *fence;
+ unsigned long long reserved;
+ } nvSciSync;
+ /**
+ * Parameters for keyed mutex objects
+ */
+ struct {
+ /**
+ * Value of key to acquire the mutex with
+ */
+ unsigned long long key;
+ /**
+ * Timeout in milliseconds to wait to acquire the mutex
+ */
+ unsigned int timeoutMs;
+ } keyedMutex;
+ } params;
+ /**
+ * Only when ::cudaExternalSemaphoreSignalParams is used to
+ * signal a ::cudaExternalSemaphore_t of type
+ * ::cudaExternalSemaphoreHandleTypeNvSciSync, the valid flag is
+ * ::cudaExternalSemaphoreSignalSkipNvSciBufMemSync: which indicates
+ * that while waiting for the ::cudaExternalSemaphore_t, no memory
+ * synchronization operations should be performed for any external memory
+ * object imported as ::cudaExternalMemoryHandleTypeNvSciBuf.
+ * For all other types of ::cudaExternalSemaphore_t, flags must be zero.
+ */
+ unsigned int flags;
+};
+
+/**
+ * External semaphore signal parameters, compatible with driver type
+ */
+struct __device_builtin__ cudaExternalSemaphoreSignalParams{
+ struct {
+ /**
+ * Parameters for fence objects
+ */
+ struct {
+ /**
+ * Value of fence to be signaled
+ */
+ unsigned long long value;
+ } fence;
+ union {
+ /**
+ * Pointer to NvSciSyncFence. Valid if ::cudaExternalSemaphoreHandleType
+ * is of type ::cudaExternalSemaphoreHandleTypeNvSciSync.
+ */
+ void *fence;
+ unsigned long long reserved;
+ } nvSciSync;
+ /**
+ * Parameters for keyed mutex objects
+ */
+ struct {
+ /*
+ * Value of key to release the mutex with
+ */
+ unsigned long long key;
+ } keyedMutex;
+ unsigned int reserved[12];
+ } params;
+ /**
+ * Only when ::cudaExternalSemaphoreSignalParams is used to
+ * signal a ::cudaExternalSemaphore_t of type
+ * ::cudaExternalSemaphoreHandleTypeNvSciSync, the valid flag is
+ * ::cudaExternalSemaphoreSignalSkipNvSciBufMemSync: which indicates
+ * that while signaling the ::cudaExternalSemaphore_t, no memory
+ * synchronization operations should be performed for any external memory
+ * object imported as ::cudaExternalMemoryHandleTypeNvSciBuf.
+ * For all other types of ::cudaExternalSemaphore_t, flags must be zero.
+ */
+ unsigned int flags;
+ unsigned int reserved[16];
+};
+
+/**
+ * External semaphore wait parameters, compatible with driver type
+ */
+struct __device_builtin__ cudaExternalSemaphoreWaitParams {
+ struct {
+ /**
+ * Parameters for fence objects
+ */
+ struct {
+ /**
+ * Value of fence to be waited on
+ */
+ unsigned long long value;
+ } fence;
+ union {
+ /**
+ * Pointer to NvSciSyncFence. Valid if ::cudaExternalSemaphoreHandleType
+ * is of type ::cudaExternalSemaphoreHandleTypeNvSciSync.
+ */
+ void *fence;
+ unsigned long long reserved;
+ } nvSciSync;
+ /**
+ * Parameters for keyed mutex objects
+ */
+ struct {
+ /**
+ * Value of key to acquire the mutex with
+ */
+ unsigned long long key;
+ /**
+ * Timeout in milliseconds to wait to acquire the mutex
+ */
+ unsigned int timeoutMs;
+ } keyedMutex;
+ unsigned int reserved[10];
+ } params;
+ /**
+ * Only when ::cudaExternalSemaphoreSignalParams is used to
+ * signal a ::cudaExternalSemaphore_t of type
+ * ::cudaExternalSemaphoreHandleTypeNvSciSync, the valid flag is
+ * ::cudaExternalSemaphoreSignalSkipNvSciBufMemSync: which indicates
+ * that while waiting for the ::cudaExternalSemaphore_t, no memory
+ * synchronization operations should be performed for any external memory
+ * object imported as ::cudaExternalMemoryHandleTypeNvSciBuf.
+ * For all other types of ::cudaExternalSemaphore_t, flags must be zero.
+ */
+ unsigned int flags;
+ unsigned int reserved[16];
+};
+
+/*******************************************************************************
+* *
+* SHORTHAND TYPE DEFINITION USED BY RUNTIME API *
+* *
+*******************************************************************************/
+
+/**
+ * CUDA Error types
+ */
+typedef __device_builtin__ enum cudaError cudaError_t;
+
+/**
+ * CUDA stream
+ */
+typedef __device_builtin__ struct CUstream_st *cudaStream_t;
+
+/**
+ * CUDA event types
+ */
+typedef __device_builtin__ struct CUevent_st *cudaEvent_t;
+
+/**
+ * CUDA graphics resource types
+ */
+typedef __device_builtin__ struct cudaGraphicsResource *cudaGraphicsResource_t;
+
+/**
+ * CUDA external memory
+ */
+typedef __device_builtin__ struct CUexternalMemory_st *cudaExternalMemory_t;
+
+/**
+ * CUDA external semaphore
+ */
+typedef __device_builtin__ struct CUexternalSemaphore_st *cudaExternalSemaphore_t;
+
+/**
+ * CUDA graph
+ */
+typedef __device_builtin__ struct CUgraph_st *cudaGraph_t;
+
+/**
+ * CUDA graph node.
+ */
+typedef __device_builtin__ struct CUgraphNode_st *cudaGraphNode_t;
+
+/**
+ * CUDA user object for graphs
+ */
+typedef __device_builtin__ struct CUuserObject_st *cudaUserObject_t;
+
+/**
+ * CUDA handle for conditional graph nodes
+ */
+typedef __device_builtin__ unsigned long long cudaGraphConditionalHandle;
+
+/**
+ * CUDA function
+ */
+typedef __device_builtin__ struct CUfunc_st *cudaFunction_t;
+
+/**
+ * CUDA kernel
+ */
+typedef __device_builtin__ struct CUkern_st *cudaKernel_t;
+
+/**
+ * Online compiler and linker options
+ */
+enum __device_builtin__ cudaJitOption
+{
+ /**
+ * Max number of registers that a thread may use.\n
+ * Option type: unsigned int\n
+ * Applies to: compiler only
+ */
+ cudaJitMaxRegisters = 0,
+
+ /**
+ * IN: Specifies minimum number of threads per block to target compilation
+ * for\n
+ * OUT: Returns the number of threads the compiler actually targeted.
+ * This restricts the resource utilization of the compiler (e.g. max
+ * registers) such that a block with the given number of threads should be
+ * able to launch based on register limitations. Note, this option does not
+ * currently take into account any other resource limitations, such as
+ * shared memory utilization.\n
+ * Option type: unsigned int\n
+ * Applies to: compiler only
+ */
+ cudaJitThreadsPerBlock = 1,
+
+ /**
+ * Overwrites the option value with the total wall clock time, in
+ * milliseconds, spent in the compiler and linker\n
+ * Option type: float\n
+ * Applies to: compiler and linker
+ */
+ cudaJitWallTime = 2,
+
+ /**
+ * Pointer to a buffer in which to print any log messages
+ * that are informational in nature (the buffer size is specified via
+ * option ::cudaJitInfoLogBufferSizeBytes)\n
+ * Option type: char *\n
+ * Applies to: compiler and linker
+ */
+ cudaJitInfoLogBuffer = 3,
+
+ /**
+ * IN: Log buffer size in bytes. Log messages will be capped at this size
+ * (including null terminator)\n
+ * OUT: Amount of log buffer filled with messages\n
+ * Option type: unsigned int\n
+ * Applies to: compiler and linker
+ */
+ cudaJitInfoLogBufferSizeBytes = 4,
+
+ /**
+ * Pointer to a buffer in which to print any log messages that
+ * reflect errors (the buffer size is specified via option
+ * ::cudaJitErrorLogBufferSizeBytes)\n
+ * Option type: char *\n
+ * Applies to: compiler and linker
+ */
+ cudaJitErrorLogBuffer = 5,
+
+ /**
+ * IN: Log buffer size in bytes. Log messages will be capped at this size
+ * (including null terminator)\n
+ * OUT: Amount of log buffer filled with messages\n
+ * Option type: unsigned int\n
+ * Applies to: compiler and linker
+ */
+ cudaJitErrorLogBufferSizeBytes = 6,
+
+ /**
+ * Level of optimizations to apply to generated code (0 - 4), with 4
+ * being the default and highest level of optimizations.\n
+ * Option type: unsigned int\n
+ * Applies to: compiler only
+ */
+ cudaJitOptimizationLevel = 7,
+
+ /**
+ * Specifies choice of fallback strategy if matching cubin is not found.
+ * Choice is based on supplied ::cudaJit_Fallback.
+ * Option type: unsigned int for enumerated type ::cudaJit_Fallback\n
+ * Applies to: compiler only
+ */
+ cudaJitFallbackStrategy = 10,
+
+ /**
+ * Specifies whether to create debug information in output (-g)
+ * (0: false, default)\n
+ * Option type: int\n
+ * Applies to: compiler and linker
+ */
+ cudaJitGenerateDebugInfo = 11,
+
+ /**
+ * Generate verbose log messages (0: false, default)\n
+ * Option type: int\n
+ * Applies to: compiler and linker
+ */
+ cudaJitLogVerbose = 12,
+
+ /**
+ * Generate line number information (-lineinfo) (0: false, default)\n
+ * Option type: int\n
+ * Applies to: compiler only
+ */
+ cudaJitGenerateLineInfo = 13,
+
+ /**
+ * Specifies whether to enable caching explicitly (-dlcm) \n
+ * Choice is based on supplied ::cudaJit_CacheMode.\n
+ * Option type: unsigned int for enumerated type ::cudaJit_CacheMode\n
+ * Applies to: compiler only
+ */
+ cudaJitCacheMode = 14,
+
+ /**
+ * Generate position independent code (0: false)\n
+ * Option type: int\n
+ * Applies to: compiler only
+ */
+ cudaJitPositionIndependentCode = 30,
+
+ /**
+ * This option hints to the JIT compiler the minimum number of CTAs from the
+ * kernel’s grid to be mapped to a SM. This option is ignored when used together
+ * with ::cudaJitMaxRegisters or ::cudaJitThreadsPerBlock.
+ * Optimizations based on this option need ::cudaJitMaxThreadsPerBlock to
+ * be specified as well. For kernels already using PTX directive .minnctapersm,
+ * this option will be ignored by default. Use ::cudaJitOverrideDirectiveValues
+ * to let this option take precedence over the PTX directive.
+ * Option type: unsigned int\n
+ * Applies to: compiler only
+ */
+ cudaJitMinCtaPerSm = 31,
+
+ /**
+ * Maximum number threads in a thread block, computed as the product of
+ * the maximum extent specifed for each dimension of the block. This limit
+ * is guaranteed not to be exeeded in any invocation of the kernel. Exceeding
+ * the the maximum number of threads results in runtime error or kernel launch
+ * failure. For kernels already using PTX directive .maxntid, this option will
+ * be ignored by default. Use ::cudaJitOverrideDirectiveValues to let this
+ * option take precedence over the PTX directive.
+ * Option type: int\n
+ * Applies to: compiler only
+ */
+ cudaJitMaxThreadsPerBlock = 32,
+
+ /**
+ * This option lets the values specified using ::cudaJitMaxRegisters,
+ * ::cudaJitThreadsPerBlock, ::cudaJitMaxThreadsPerBlock and
+ * ::cudaJitMinCtaPerSm take precedence over any PTX directives.
+ * (0: Disable, default; 1: Enable)
+ * Option type: int\n
+ * Applies to: compiler only
+ */
+ cudaJitOverrideDirectiveValues = 33,
+};
+
+
+/**
+ * Library options to be specified with ::cudaLibraryLoadData() or ::cudaLibraryLoadFromFile()
+ */
+enum __device_builtin__ cudaLibraryOption
+{
+ cudaLibraryHostUniversalFunctionAndDataTable = 0,
+
+ /**
+ * Specifes that the argument \p code passed to ::cudaLibraryLoadData() will be preserved.
+ * Specifying this option will let the driver know that \p code can be accessed at any point
+ * until ::cudaLibraryUnload(). The default behavior is for the driver to allocate and
+ * maintain its own copy of \p code. Note that this is only a memory usage optimization
+ * hint and the driver can choose to ignore it if required.
+ * Specifying this option with ::cudaLibraryLoadFromFile() is invalid and
+ * will return ::cudaErrorInvalidValue.
+ */
+ cudaLibraryBinaryIsPreserved = 1,
+};
+
+struct __device_builtin__ cudalibraryHostUniversalFunctionAndDataTable
+{
+ void *functionTable;
+ size_t functionWindowSize;
+ void *dataTable;
+ size_t dataWindowSize;
+};
+
+/**
+ * Caching modes for dlcm
+ */
+enum __device_builtin__ cudaJit_CacheMode
+{
+ cudaJitCacheOptionNone = 0, /**< Compile with no -dlcm flag specified */
+ cudaJitCacheOptionCG, /**< Compile with L1 cache disabled */
+ cudaJitCacheOptionCA /**< Compile with L1 cache enabled */
+};
+
+/**
+ * Cubin matching fallback strategies
+ */
+enum __device_builtin__ cudaJit_Fallback
+{
+ cudaPreferPtx = 0, /**< Prefer to compile ptx if exact binary match not found */
+
+ cudaPreferBinary /**< Prefer to fall back to compatible binary code if exact match not found */
+};
+
+/**
+ * CUDA library
+ */
+typedef __device_builtin__ struct CUlib_st *cudaLibrary_t;
+
+/**
+ * CUDA memory pool
+ */
+typedef __device_builtin__ struct CUmemPoolHandle_st *cudaMemPool_t;
+
+/**
+ * CUDA cooperative group scope
+ */
+enum __device_builtin__ cudaCGScope {
+ cudaCGScopeInvalid = 0, /**< Invalid cooperative group scope */
+ cudaCGScopeGrid = 1, /**< Scope represented by a grid_group */
+ cudaCGScopeMultiGrid = 2 /**< Scope represented by a multi_grid_group */
+};
+
+/**
+ * CUDA launch parameters
+ */
+struct __device_builtin__ cudaLaunchParams
+{
+ void *func; /**< Device function symbol */
+ dim3 gridDim; /**< Grid dimentions */
+ dim3 blockDim; /**< Block dimentions */
+ void **args; /**< Arguments */
+ size_t sharedMem; /**< Shared memory */
+ cudaStream_t stream; /**< Stream identifier */
+};
+
+/**
+ * CUDA GPU kernel node parameters
+ */
+struct __device_builtin__ cudaKernelNodeParams {
+ void* func; /**< Kernel to launch */
+ dim3 gridDim; /**< Grid dimensions */
+ dim3 blockDim; /**< Block dimensions */
+ unsigned int sharedMemBytes; /**< Dynamic shared-memory size per thread block in bytes */
+ void **kernelParams; /**< Array of pointers to individual kernel arguments*/
+ void **extra; /**< Pointer to kernel arguments in the "extra" format */
+};
+
+/**
+ * CUDA GPU kernel node parameters
+ */
+struct __device_builtin__ cudaKernelNodeParamsV2 {
+ void* func; /**< Kernel to launch */
+ #if !defined(__cplusplus) || __cplusplus >= 201103L
+ dim3 gridDim; /**< Grid dimensions */
+ dim3 blockDim; /**< Block dimensions */
+ #else
+ /* Union members cannot have nontrivial constructors until C++11. */
+ uint3 gridDim; /**< Grid dimensions */
+ uint3 blockDim; /**< Block dimensions */
+ #endif
+ unsigned int sharedMemBytes; /**< Dynamic shared-memory size per thread block in bytes */
+ void **kernelParams; /**< Array of pointers to individual kernel arguments*/
+ void **extra; /**< Pointer to kernel arguments in the "extra" format */
+};
+
+/**
+ * External semaphore signal node parameters
+ */
+struct __device_builtin__ cudaExternalSemaphoreSignalNodeParams {
+ cudaExternalSemaphore_t* extSemArray; /**< Array of external semaphore handles. */
+ const struct cudaExternalSemaphoreSignalParams* paramsArray; /**< Array of external semaphore signal parameters. */
+ unsigned int numExtSems; /**< Number of handles and parameters supplied in extSemArray and paramsArray. */
+};
+
+/**
+ * External semaphore signal node parameters
+ */
+struct __device_builtin__ cudaExternalSemaphoreSignalNodeParamsV2 {
+ cudaExternalSemaphore_t* extSemArray; /**< Array of external semaphore handles. */
+ const struct cudaExternalSemaphoreSignalParams* paramsArray; /**< Array of external semaphore signal parameters. */
+ unsigned int numExtSems; /**< Number of handles and parameters supplied in extSemArray and paramsArray. */
+};
+
+/**
+ * External semaphore wait node parameters
+ */
+struct __device_builtin__ cudaExternalSemaphoreWaitNodeParams {
+ cudaExternalSemaphore_t* extSemArray; /**< Array of external semaphore handles. */
+ const struct cudaExternalSemaphoreWaitParams* paramsArray; /**< Array of external semaphore wait parameters. */
+ unsigned int numExtSems; /**< Number of handles and parameters supplied in extSemArray and paramsArray. */
+};
+
+/**
+ * External semaphore wait node parameters
+ */
+struct __device_builtin__ cudaExternalSemaphoreWaitNodeParamsV2 {
+ cudaExternalSemaphore_t* extSemArray; /**< Array of external semaphore handles. */
+ const struct cudaExternalSemaphoreWaitParams* paramsArray; /**< Array of external semaphore wait parameters. */
+ unsigned int numExtSems; /**< Number of handles and parameters supplied in extSemArray and paramsArray. */
+};
+
+enum __device_builtin__ cudaGraphConditionalHandleFlags {
+ cudaGraphCondAssignDefault = 1 /**< Apply default handle value when graph is launched. */
+};
+
+/**
+ * CUDA conditional node types
+ */
+enum __device_builtin__ cudaGraphConditionalNodeType {
+ cudaGraphCondTypeIf = 0, /**< Conditional 'if/else' Node. Body[0] executed if condition is non-zero. If \p size == 2, an optional ELSE graph is created and this is executed if the condition is zero. */
+ cudaGraphCondTypeWhile = 1, /**< Conditional 'while' Node. Body executed repeatedly while condition value is non-zero. */
+ cudaGraphCondTypeSwitch = 2, /**< Conditional 'switch' Node. Body[n] is executed once, where 'n' is the value of the condition. If the condition does not match a body index, no body is launched. */
+};
+
+/**
+ * CUDA conditional node parameters
+ */
+struct __device_builtin__ cudaConditionalNodeParams {
+ cudaGraphConditionalHandle handle; /**< Conditional node handle.
+ Handles must be created in advance of creating the node
+ using ::cudaGraphConditionalHandleCreate. */
+ enum cudaGraphConditionalNodeType type; /**< Type of conditional node. */
+ unsigned int size; /**< Size of graph output array. Allowed values are 1 for cudaGraphCondTypeWhile, 1 or 2
+ for cudaGraphCondTypeWhile, or any value greater than zero for cudaGraphCondTypeSwitch. */
+ cudaGraph_t *phGraph_out; /**< CUDA-owned array populated with conditional node child graphs during creation of the node.
+ Valid for the lifetime of the conditional node.
+ The contents of the graph(s) are subject to the following constraints:
+
+ - Allowed node types are kernel nodes, empty nodes, child graphs, memsets,
+ memcopies, and conditionals. This applies recursively to child graphs and conditional bodies.
+ - All kernels, including kernels in nested conditionals or child graphs at any level,
+ must belong to the same CUDA context.
+
+ These graphs may be populated using graph node creation APIs or ::cudaStreamBeginCaptureToGraph.
+ cudaGraphCondTypeIf:
+ phGraph_out[0] is executed when the condition is non-zero. If \p size == 2, phGraph_out[1] will
+ be executed when the condition is zero.
+ cudaGraphCondTypeWhile:
+ phGraph_out[0] is executed as long as the condition is non-zero.
+ cudaGraphCondTypeSwitch:
+ phGraph_out[n] is executed when the condition is equal to n. If the condition >= \p size,
+ no body graph is executed.
+ */
+};
+
+/**
+* CUDA Graph node types
+*/
+enum __device_builtin__ cudaGraphNodeType {
+ cudaGraphNodeTypeKernel = 0x00, /**< GPU kernel node */
+ cudaGraphNodeTypeMemcpy = 0x01, /**< Memcpy node */
+ cudaGraphNodeTypeMemset = 0x02, /**< Memset node */
+ cudaGraphNodeTypeHost = 0x03, /**< Host (executable) node */
+ cudaGraphNodeTypeGraph = 0x04, /**< Node which executes an embedded graph */
+ cudaGraphNodeTypeEmpty = 0x05, /**< Empty (no-op) node */
+ cudaGraphNodeTypeWaitEvent = 0x06, /**< External event wait node */
+ cudaGraphNodeTypeEventRecord = 0x07, /**< External event record node */
+ cudaGraphNodeTypeExtSemaphoreSignal = 0x08, /**< External semaphore signal node */
+ cudaGraphNodeTypeExtSemaphoreWait = 0x09, /**< External semaphore wait node */
+ cudaGraphNodeTypeMemAlloc = 0x0a, /**< Memory allocation node */
+ cudaGraphNodeTypeMemFree = 0x0b, /**< Memory free node */
+ cudaGraphNodeTypeConditional = 0x0d, /**< Conditional node
+
+ May be used to implement a conditional execution path or loop
+ inside of a graph. The graph(s) contained within the body of the conditional node
+ can be selectively executed or iterated upon based on the value of a conditional
+ variable.
+
+ Handles must be created in advance of creating the node
+ using ::cudaGraphConditionalHandleCreate.
+
+ The following restrictions apply to graphs which contain conditional nodes:
+ The graph cannot be used in a child node.
+ Only one instantiation of the graph may exist at any point in time.
+ The graph cannot be cloned.
+
+ To set the control value, supply a default value when creating the handle and/or
+ call ::cudaGraphSetConditional from device code.*/
+ cudaGraphNodeTypeCount
+};
+
+/**
+ * Child graph node parameters
+ */
+struct __device_builtin__ cudaChildGraphNodeParams {
+ cudaGraph_t graph; /**< The child graph to clone into the node for node creation, or
+ a handle to the graph owned by the node for node query */
+};
+
+/**
+ * Event record node parameters
+ */
+struct __device_builtin__ cudaEventRecordNodeParams {
+ cudaEvent_t event; /**< The event to record when the node executes */
+};
+
+/**
+ * Event wait node parameters
+ */
+struct __device_builtin__ cudaEventWaitNodeParams {
+ cudaEvent_t event; /**< The event to wait on from the node */
+};
+
+/**
+ * Graph node parameters. See ::cudaGraphAddNode.
+ */
+struct __device_builtin__ cudaGraphNodeParams {
+ enum cudaGraphNodeType type; /**< Type of the node */
+ int reserved0[3]; /**< Reserved. Must be zero. */
+
+ union {
+ long long reserved1[29]; /**< Padding. Unused bytes must be zero. */
+ struct cudaKernelNodeParamsV2 kernel; /**< Kernel node parameters. */
+ struct cudaMemcpyNodeParams memcpy; /**< Memcpy node parameters. */
+ struct cudaMemsetParamsV2 memset; /**< Memset node parameters. */
+ struct cudaHostNodeParamsV2 host; /**< Host node parameters. */
+ struct cudaChildGraphNodeParams graph; /**< Child graph node parameters. */
+ struct cudaEventWaitNodeParams eventWait; /**< Event wait node parameters. */
+ struct cudaEventRecordNodeParams eventRecord; /**< Event record node parameters. */
+ struct cudaExternalSemaphoreSignalNodeParamsV2 extSemSignal; /**< External semaphore signal node parameters. */
+ struct cudaExternalSemaphoreWaitNodeParamsV2 extSemWait; /**< External semaphore wait node parameters. */
+ struct cudaMemAllocNodeParamsV2 alloc; /**< Memory allocation node parameters. */
+ struct cudaMemFreeNodeParams free; /**< Memory free node parameters. */
+ struct cudaConditionalNodeParams conditional; /**< Conditional node parameters. */
+ };
+
+ long long reserved2; /**< Reserved bytes. Must be zero. */
+};
+
+/**
+ * Type annotations that can be applied to graph edges as part of ::cudaGraphEdgeData.
+ */
+typedef __device_builtin__ enum cudaGraphDependencyType_enum {
+ cudaGraphDependencyTypeDefault = 0, /**< This is an ordinary dependency. */
+ cudaGraphDependencyTypeProgrammatic = 1 /**< This dependency type allows the downstream node to
+ use \c cudaGridDependencySynchronize(). It may only be used
+ between kernel nodes, and must be used with either the
+ ::cudaGraphKernelNodePortProgrammatic or
+ ::cudaGraphKernelNodePortLaunchCompletion outgoing port. */
+} cudaGraphDependencyType;
+
+/**
+ * Optional annotation for edges in a CUDA graph. Note, all edges implicitly have annotations and
+ * default to a zero-initialized value if not specified. A zero-initialized struct indicates a
+ * standard full serialization of two nodes with memory visibility.
+ */
+typedef __device_builtin__ struct cudaGraphEdgeData_st {
+ unsigned char from_port; /**< This indicates when the dependency is triggered from the upstream
+ node on the edge. The meaning is specfic to the node type. A value
+ of 0 in all cases means full completion of the upstream node, with
+ memory visibility to the downstream node or portion thereof
+ (indicated by \c to_port).
+
+ Only kernel nodes define non-zero ports. A kernel node
+ can use the following output port types:
+ ::cudaGraphKernelNodePortDefault, ::cudaGraphKernelNodePortProgrammatic,
+ or ::cudaGraphKernelNodePortLaunchCompletion. */
+ unsigned char to_port; /**< This indicates what portion of the downstream node is dependent on
+ the upstream node or portion thereof (indicated by \c from_port). The
+ meaning is specific to the node type. A value of 0 in all cases means
+ the entirety of the downstream node is dependent on the upstream work.
+
+ Currently no node types define non-zero ports. Accordingly, this field
+ must be set to zero. */
+ unsigned char type; /**< This should be populated with a value from ::cudaGraphDependencyType. (It
+ is typed as char due to compiler-specific layout of bitfields.) See
+ ::cudaGraphDependencyType. */
+ unsigned char reserved[5]; /**< These bytes are unused and must be zeroed. This ensures
+ compatibility if additional fields are added in the future. */
+} cudaGraphEdgeData;
+
+/**
+ * This port activates when the kernel has finished executing.
+ */
+#define cudaGraphKernelNodePortDefault 0
+/**
+ * This port activates when all blocks of the kernel have performed cudaTriggerProgrammaticLaunchCompletion()
+ * or have terminated. It must be used with edge type ::cudaGraphDependencyTypeProgrammatic. See also
+ * ::cudaLaunchAttributeProgrammaticEvent.
+ */
+#define cudaGraphKernelNodePortProgrammatic 1
+/**
+ * This port activates when all blocks of the kernel have begun execution. See also
+ * ::cudaLaunchAttributeLaunchCompletionEvent.
+ */
+#define cudaGraphKernelNodePortLaunchCompletion 2
+
+/**
+ * CUDA executable (launchable) graph
+ */
+typedef struct CUgraphExec_st* cudaGraphExec_t;
+
+/**
+* CUDA Graph Update error types
+*/
+enum __device_builtin__ cudaGraphExecUpdateResult {
+ cudaGraphExecUpdateSuccess = 0x0, /**< The update succeeded */
+ cudaGraphExecUpdateError = 0x1, /**< The update failed for an unexpected reason which is described in the return value of the function */
+ cudaGraphExecUpdateErrorTopologyChanged = 0x2, /**< The update failed because the topology changed */
+ cudaGraphExecUpdateErrorNodeTypeChanged = 0x3, /**< The update failed because a node type changed */
+ cudaGraphExecUpdateErrorFunctionChanged = 0x4, /**< The update failed because the function of a kernel node changed (CUDA driver < 11.2) */
+ cudaGraphExecUpdateErrorParametersChanged = 0x5, /**< The update failed because the parameters changed in a way that is not supported */
+ cudaGraphExecUpdateErrorNotSupported = 0x6, /**< The update failed because something about the node is not supported */
+ cudaGraphExecUpdateErrorUnsupportedFunctionChange = 0x7, /**< The update failed because the function of a kernel node changed in an unsupported way */
+ cudaGraphExecUpdateErrorAttributesChanged = 0x8 /**< The update failed because the node attributes changed in a way that is not supported */
+};
+
+/**
+ * Graph instantiation results
+*/
+typedef __device_builtin__ enum cudaGraphInstantiateResult {
+ cudaGraphInstantiateSuccess = 0, /**< Instantiation succeeded */
+ cudaGraphInstantiateError = 1, /**< Instantiation failed for an unexpected reason which is described in the return value of the function */
+ cudaGraphInstantiateInvalidStructure = 2, /**< Instantiation failed due to invalid structure, such as cycles */
+ cudaGraphInstantiateNodeOperationNotSupported = 3, /**< Instantiation for device launch failed because the graph contained an unsupported operation */
+ cudaGraphInstantiateMultipleDevicesNotSupported = 4, /**< Instantiation for device launch failed due to the nodes belonging to different contexts */
+ cudaGraphInstantiateConditionalHandleUnused = 5 /**< One or more conditional handles are not associated with conditional nodes */
+} cudaGraphInstantiateResult;
+
+/**
+ * Graph instantiation parameters
+ */
+typedef __device_builtin__ struct cudaGraphInstantiateParams_st
+{
+ unsigned long long flags; /**< Instantiation flags */
+ cudaStream_t uploadStream; /**< Upload stream */
+ cudaGraphNode_t errNode_out; /**< The node which caused instantiation to fail, if any */
+ cudaGraphInstantiateResult result_out; /**< Whether instantiation was successful. If it failed, the reason why */
+} cudaGraphInstantiateParams;
+
+/**
+ * Result information returned by cudaGraphExecUpdate
+ */
+typedef __device_builtin__ struct cudaGraphExecUpdateResultInfo_st {
+ /**
+ * Gives more specific detail when a cuda graph update fails.
+ */
+ enum cudaGraphExecUpdateResult result;
+
+ /**
+ * The "to node" of the error edge when the topologies do not match.
+ * The error node when the error is associated with a specific node.
+ * NULL when the error is generic.
+ */
+ cudaGraphNode_t errorNode;
+
+ /**
+ * The from node of error edge when the topologies do not match. Otherwise NULL.
+ */
+ cudaGraphNode_t errorFromNode;
+} cudaGraphExecUpdateResultInfo;
+
+/**
+ * CUDA device node handle for device-side node update
+ */
+typedef struct CUgraphDeviceUpdatableNode_st* cudaGraphDeviceNode_t;
+
+/**
+ * Specifies the field to update when performing multiple node updates from the device
+ */
+enum __device_builtin__ cudaGraphKernelNodeField
+{
+ cudaGraphKernelNodeFieldInvalid = 0, /**< Invalid field */
+ cudaGraphKernelNodeFieldGridDim, /**< Grid dimension update */
+ cudaGraphKernelNodeFieldParam, /**< Kernel parameter update */
+ cudaGraphKernelNodeFieldEnabled /**< Node enable/disable */
+};
+
+/**
+ * Struct to specify a single node update to pass as part of a larger array to ::cudaGraphKernelNodeUpdatesApply
+ */
+struct __device_builtin__ cudaGraphKernelNodeUpdate {
+ cudaGraphDeviceNode_t node; /**< Node to update */
+ enum cudaGraphKernelNodeField field; /**< Which type of update to apply. Determines how updateData is interpreted */
+ union {
+#if !defined(__cplusplus) || __cplusplus >= 201103L
+ dim3 gridDim; /**< Grid dimensions */
+#else
+ /* Union members cannot have nontrivial constructors until C++11. */
+ uint3 gridDim; /**< Grid dimensions */
+#endif
+ struct {
+ const void *pValue; /**< Kernel parameter data to write in */
+ size_t offset; /**< Offset into the parameter buffer at which to apply the update */
+ size_t size; /**< Number of bytes to update */
+ } param; /**< Kernel parameter data */
+ unsigned int isEnabled; /**< Node enable/disable data. Nonzero if the node should be enabled, 0 if it should be disabled */
+ } updateData; /**< Update data to apply. Which field is used depends on field's value */
+};
+
+/**
+ * Flags to specify search options to be used with ::cudaGetDriverEntryPoint
+ * For more details see ::cuGetProcAddress
+ */
+enum __device_builtin__ cudaGetDriverEntryPointFlags {
+ cudaEnableDefault = 0x0, /**< Default search mode for driver symbols. */
+ cudaEnableLegacyStream = 0x1, /**< Search for legacy versions of driver symbols. */
+ cudaEnablePerThreadDefaultStream = 0x2 /**< Search for per-thread versions of driver symbols. */
+};
+
+/**
+ * Enum for status from obtaining driver entry points, used with ::cudaApiGetDriverEntryPoint
+ */
+enum __device_builtin__ cudaDriverEntryPointQueryResult {
+ cudaDriverEntryPointSuccess = 0, /**< Search for symbol found a match */
+ cudaDriverEntryPointSymbolNotFound = 1, /**< Search for symbol was not found */
+ cudaDriverEntryPointVersionNotSufficent = 2 /**< Search for symbol was found but version wasn't great enough */
+};
+
+/**
+ * CUDA Graph debug write options
+ */
+enum __device_builtin__ cudaGraphDebugDotFlags {
+ cudaGraphDebugDotFlagsVerbose = 1<<0, /**< Output all debug data as if every debug flag is enabled */
+ cudaGraphDebugDotFlagsKernelNodeParams = 1<<2, /**< Adds cudaKernelNodeParams to output */
+ cudaGraphDebugDotFlagsMemcpyNodeParams = 1<<3, /**< Adds cudaMemcpy3DParms to output */
+ cudaGraphDebugDotFlagsMemsetNodeParams = 1<<4, /**< Adds cudaMemsetParams to output */
+ cudaGraphDebugDotFlagsHostNodeParams = 1<<5, /**< Adds cudaHostNodeParams to output */
+ cudaGraphDebugDotFlagsEventNodeParams = 1<<6, /**< Adds cudaEvent_t handle from record and wait nodes to output */
+ cudaGraphDebugDotFlagsExtSemasSignalNodeParams = 1<<7, /**< Adds cudaExternalSemaphoreSignalNodeParams values to output */
+ cudaGraphDebugDotFlagsExtSemasWaitNodeParams = 1<<8, /**< Adds cudaExternalSemaphoreWaitNodeParams to output */
+ cudaGraphDebugDotFlagsKernelNodeAttributes = 1<<9, /**< Adds cudaKernelNodeAttrID values to output */
+ cudaGraphDebugDotFlagsHandles = 1<<10, /**< Adds node handles and every kernel function handle to output */
+ cudaGraphDebugDotFlagsConditionalNodeParams = 1<<15, /**< Adds cudaConditionalNodeParams to output */
+};
+
+/**
+ * Flags for instantiating a graph
+ */
+enum __device_builtin__ cudaGraphInstantiateFlags {
+ cudaGraphInstantiateFlagAutoFreeOnLaunch = 1 /**< Automatically free memory allocated in a graph before relaunching. */
+ , cudaGraphInstantiateFlagUpload = 2 /**< Automatically upload the graph after instantiation. Only supported by
+ ::cudaGraphInstantiateWithParams. The upload will be performed using the
+ stream provided in \p instantiateParams. */
+ , cudaGraphInstantiateFlagDeviceLaunch = 4 /**< Instantiate the graph to be launchable from the device. This flag can only
+ be used on platforms which support unified addressing. This flag cannot be
+ used in conjunction with cudaGraphInstantiateFlagAutoFreeOnLaunch. */
+ , cudaGraphInstantiateFlagUseNodePriority = 8 /**< Run the graph using the per-node priority attributes rather than the
+ priority of the stream it is launched into. */
+};
+
+/**
+ * Memory Synchronization Domain
+ *
+ * A kernel can be launched in a specified memory synchronization domain that affects all memory operations issued by
+ * that kernel. A memory barrier issued in one domain will only order memory operations in that domain, thus eliminating
+ * latency increase from memory barriers ordering unrelated traffic.
+ *
+ * By default, kernels are launched in domain 0. Kernel launched with ::cudaLaunchMemSyncDomainRemote will have a
+ * different domain ID. User may also alter the domain ID with ::cudaLaunchMemSyncDomainMap for a specific stream /
+ * graph node / kernel launch. See ::cudaLaunchAttributeMemSyncDomain, ::cudaStreamSetAttribute, ::cudaLaunchKernelEx,
+ * ::cudaGraphKernelNodeSetAttribute.
+ *
+ * Memory operations done in kernels launched in different domains are considered system-scope distanced. In other
+ * words, a GPU scoped memory synchronization is not sufficient for memory order to be observed by kernels in another
+ * memory synchronization domain even if they are on the same GPU.
+ */
+typedef __device_builtin__ enum cudaLaunchMemSyncDomain {
+ cudaLaunchMemSyncDomainDefault = 0, /**< Launch kernels in the default domain */
+ cudaLaunchMemSyncDomainRemote = 1 /**< Launch kernels in the remote domain */
+} cudaLaunchMemSyncDomain;
+
+/**
+ * Memory Synchronization Domain map
+ *
+ * See ::cudaLaunchMemSyncDomain.
+ *
+ * By default, kernels are launched in domain 0. Kernel launched with ::cudaLaunchMemSyncDomainRemote will have a
+ * different domain ID. User may also alter the domain ID with ::cudaLaunchMemSyncDomainMap for a specific stream /
+ * graph node / kernel launch. See ::cudaLaunchAttributeMemSyncDomainMap.
+ *
+ * Domain ID range is available through ::cudaDevAttrMemSyncDomainCount.
+ */
+typedef __device_builtin__ struct cudaLaunchMemSyncDomainMap_st {
+ unsigned char default_; /**< The default domain ID to use for designated kernels */
+ unsigned char remote; /**< The remote domain ID to use for designated kernels */
+} cudaLaunchMemSyncDomainMap;
+
+/**
+ * Launch attributes enum; used as id field of ::cudaLaunchAttribute
+ */
+typedef __device_builtin__ enum cudaLaunchAttributeID {
+ cudaLaunchAttributeIgnore = 0 /**< Ignored entry, for convenient composition */
+ , cudaLaunchAttributeAccessPolicyWindow = 1 /**< Valid for streams, graph nodes, launches. See
+ ::cudaLaunchAttributeValue::accessPolicyWindow. */
+ , cudaLaunchAttributeCooperative = 2 /**< Valid for graph nodes, launches. See
+ ::cudaLaunchAttributeValue::cooperative. */
+ , cudaLaunchAttributeSynchronizationPolicy = 3 /**< Valid for streams. See ::cudaLaunchAttributeValue::syncPolicy. */
+ , cudaLaunchAttributeClusterDimension = 4 /**< Valid for graph nodes, launches. See
+ ::cudaLaunchAttributeValue::clusterDim. */
+ , cudaLaunchAttributeClusterSchedulingPolicyPreference = 5 /**< Valid for graph nodes, launches. See
+ ::cudaLaunchAttributeValue::clusterSchedulingPolicyPreference. */
+ , cudaLaunchAttributeProgrammaticStreamSerialization = 6 /**< Valid for launches. Setting
+ ::cudaLaunchAttributeValue::programmaticStreamSerializationAllowed
+ to non-0 signals that the kernel will use programmatic
+ means to resolve its stream dependency, so that the
+ CUDA runtime should opportunistically allow the grid's
+ execution to overlap with the previous kernel in the
+ stream, if that kernel requests the overlap. The
+ dependent launches can choose to wait on the
+ dependency using the programmatic sync
+ (cudaGridDependencySynchronize() or equivalent PTX
+ instructions). */
+ , cudaLaunchAttributeProgrammaticEvent = 7 /**< Valid for launches. Set
+ ::cudaLaunchAttributeValue::programmaticEvent to
+ record the event. Event recorded through this launch
+ attribute is guaranteed to only trigger after all
+ block in the associated kernel trigger the event. A
+ block can trigger the event programmatically in a
+ future CUDA release. A trigger can also be inserted at
+ the beginning of each block's execution if
+ triggerAtBlockStart is set to non-0. The dependent
+ launches can choose to wait on the dependency using
+ the programmatic sync (cudaGridDependencySynchronize()
+ or equivalent PTX instructions). Note that dependents
+ (including the CPU thread calling
+ cudaEventSynchronize()) are not guaranteed to observe
+ the release precisely when it is released. For
+ example, cudaEventSynchronize() may only observe the
+ event trigger long after the associated kernel has
+ completed. This recording type is primarily meant for
+ establishing programmatic dependency between device
+ tasks. Note also this type of dependency allows, but
+ does not guarantee, concurrent execution of tasks.
+
+ The event supplied must not be an interprocess or
+ interop event. The event must disable timing (i.e.
+ must be created with the ::cudaEventDisableTiming flag
+ set). */
+ , cudaLaunchAttributePriority = 8 /**< Valid for streams, graph nodes, launches. See
+ ::cudaLaunchAttributeValue::priority. */
+ , cudaLaunchAttributeMemSyncDomainMap = 9 /**< Valid for streams, graph nodes, launches. See
+ ::cudaLaunchAttributeValue::memSyncDomainMap. */
+ , cudaLaunchAttributeMemSyncDomain = 10 /**< Valid for streams, graph nodes, launches. See
+ ::cudaLaunchAttributeValue::memSyncDomain. */
+ , cudaLaunchAttributePreferredClusterDimension = 11 /**< Valid for graph nodes and launches. Set
+ ::cudaLaunchAttributeValue::preferredClusterDim
+ to allow the kernel launch to specify a preferred substitute
+ cluster dimension. Blocks may be grouped according to either
+ the dimensions specified with this attribute (grouped into a
+ "preferred substitute cluster"), or the one specified with
+ ::cudaLaunchAttributeClusterDimension attribute (grouped
+ into a "regular cluster"). The cluster dimensions of a
+ "preferred substitute cluster" shall be an integer multiple
+ greater than zero of the regular cluster dimensions. The
+ device will attempt - on a best-effort basis - to group
+ thread blocks into preferred clusters over grouping them
+ into regular clusters. When it deems necessary (primarily
+ when the device temporarily runs out of physical resources
+ to launch the larger preferred clusters), the device may
+ switch to launch the regular clusters instead to attempt to
+ utilize as much of the physical device resources as possible.
+
+ Each type of cluster will have its enumeration / coordinate
+ setup as if the grid consists solely of its type of cluster.
+ For example, if the preferred substitute cluster dimensions
+ double the regular cluster dimensions, there might be
+ simultaneously a regular cluster indexed at (1,0,0), and a
+ preferred cluster indexed at (1,0,0). In this example, the
+ preferred substitute cluster (1,0,0) replaces regular
+ clusters (2,0,0) and (3,0,0) and groups their blocks.
+
+ This attribute will only take effect when a regular cluster
+ dimension has been specified. The preferred substitute cluster
+ dimension must be an integer multiple greater than zero of the
+ regular cluster dimension and must divide the grid. It must
+ also be no more than `maxBlocksPerCluster`, if it is set in
+ the kernel's `__launch_bounds__`. Otherwise it must be less
+ than the maximum value the driver can support. Otherwise,
+ setting this attribute to a value physically unable to fit on
+ any particular device is permitted. */
+ , cudaLaunchAttributeLaunchCompletionEvent = 12 /**< Valid for launches. Set
+ ::cudaLaunchAttributeValue::launchCompletionEvent to record the
+ event.
+
+ Nominally, the event is triggered once all blocks of the kernel
+ have begun execution. Currently this is a best effort. If a kernel
+ B has a launch completion dependency on a kernel A, B may wait
+ until A is complete. Alternatively, blocks of B may begin before
+ all blocks of A have begun, for example if B can claim execution
+ resources unavailable to A (e.g. they run on different GPUs) or
+ if B is a higher priority than A.
+ Exercise caution if such an ordering inversion could lead
+ to deadlock.
+
+ A launch completion event is nominally similar to a programmatic
+ event with \c triggerAtBlockStart set except that it is not
+ visible to \c cudaGridDependencySynchronize() and can be used with
+ compute capability less than 9.0.
+
+ The event supplied must not be an interprocess or interop event.
+ The event must disable timing (i.e. must be created with the
+ ::cudaEventDisableTiming flag set). */
+ , cudaLaunchAttributeDeviceUpdatableKernelNode = 13 /**< Valid for graph nodes, launches. This attribute is graphs-only,
+ and passing it to a launch in a non-capturing stream will result
+ in an error.
+
+ :cudaLaunchAttributeValue::deviceUpdatableKernelNode::deviceUpdatable can
+ only be set to 0 or 1. Setting the field to 1 indicates that the
+ corresponding kernel node should be device-updatable. On success, a handle
+ will be returned via
+ ::cudaLaunchAttributeValue::deviceUpdatableKernelNode::devNode which can be
+ passed to the various device-side update functions to update the node's
+ kernel parameters from within another kernel. For more information on the
+ types of device updates that can be made, as well as the relevant limitations
+ thereof, see ::cudaGraphKernelNodeUpdatesApply.
+
+ Nodes which are device-updatable have additional restrictions compared to
+ regular kernel nodes. Firstly, device-updatable nodes cannot be removed
+ from their graph via ::cudaGraphDestroyNode. Additionally, once opted-in
+ to this functionality, a node cannot opt out, and any attempt to set the
+ deviceUpdatable attribute to 0 will result in an error. Device-updatable
+ kernel nodes also cannot have their attributes copied to/from another kernel
+ node via ::cudaGraphKernelNodeCopyAttributes. Graphs containing one or more
+ device-updatable nodes also do not allow multiple instantiation, and neither
+ the graph nor its instantiated version can be passed to ::cudaGraphExecUpdate.
+
+ If a graph contains device-updatable nodes and updates those nodes from the device
+ from within the graph, the graph must be uploaded with ::cuGraphUpload before it
+ is launched. For such a graph, if host-side executable graph updates are made to the
+ device-updatable nodes, the graph must be uploaded before it is launched again. */
+ , cudaLaunchAttributePreferredSharedMemoryCarveout = 14 /**< Valid for launches. On devices where the L1 cache and shared memory use the
+ same hardware resources, setting ::cudaLaunchAttributeValue::sharedMemCarveout
+ to a percentage between 0-100 signals sets the shared memory carveout
+ preference in percent of the total shared memory for that kernel launch.
+ This attribute takes precedence over ::cudaFuncAttributePreferredSharedMemoryCarveout.
+ This is only a hint, and the driver can choose a different configuration if
+ required for the launch.*/
+} cudaLaunchAttributeID;
+
+/**
+ * Launch attributes union; used as value field of ::cudaLaunchAttribute
+ */
+typedef __device_builtin__ union cudaLaunchAttributeValue {
+ char pad[64]; /* Pad to 64 bytes */
+ struct cudaAccessPolicyWindow accessPolicyWindow; /**< Value of launch attribute ::cudaLaunchAttributeAccessPolicyWindow. */
+ int cooperative; /**< Value of launch attribute ::cudaLaunchAttributeCooperative. Nonzero indicates a cooperative
+ kernel (see ::cudaLaunchCooperativeKernel). */
+ enum cudaSynchronizationPolicy syncPolicy; /**< Value of launch attribute
+ ::cudaLaunchAttributeSynchronizationPolicy. ::cudaSynchronizationPolicy
+ for work queued up in this stream. */
+ /**
+ * Value of launch attribute ::cudaLaunchAttributeClusterDimension that
+ * represents the desired cluster dimensions for the kernel. Opaque type
+ * with the following fields:
+ * - \p x - The X dimension of the cluster, in blocks. Must be a divisor
+ * of the grid X dimension.
+ * - \p y - The Y dimension of the cluster, in blocks. Must be a divisor
+ * of the grid Y dimension.
+ * - \p z - The Z dimension of the cluster, in blocks. Must be a divisor
+ * of the grid Z dimension.
+ */
+ struct {
+ unsigned int x;
+ unsigned int y;
+ unsigned int z;
+ } clusterDim;
+ enum cudaClusterSchedulingPolicy clusterSchedulingPolicyPreference; /**< Value of launch attribute
+ ::cudaLaunchAttributeClusterSchedulingPolicyPreference. Cluster
+ scheduling policy preference for the kernel. */
+ int programmaticStreamSerializationAllowed; /**< Value of launch attribute
+ ::cudaLaunchAttributeProgrammaticStreamSerialization. */
+
+ /**
+ * Value of launch attribute ::cudaLaunchAttributeProgrammaticEvent
+ * with the following fields:
+ * - \p cudaEvent_t event - Event to fire when all blocks trigger it.
+ * - \p int flags; - Event record flags, see ::cudaEventRecordWithFlags. Does not accept
+ * ::cudaEventRecordExternal.
+ * - \p int triggerAtBlockStart - If this is set to non-0, each block launch will automatically trigger the event.
+ */
+ struct {
+ cudaEvent_t event;
+ int flags;
+ int triggerAtBlockStart;
+ } programmaticEvent;
+ int priority; /**< Value of launch attribute ::cudaLaunchAttributePriority. Execution priority of the kernel. */
+ cudaLaunchMemSyncDomainMap memSyncDomainMap; /**< Value of launch attribute
+ ::cudaLaunchAttributeMemSyncDomainMap. See
+ ::cudaLaunchMemSyncDomainMap. */
+ cudaLaunchMemSyncDomain memSyncDomain; /**< Value of launch attribute ::cudaLaunchAttributeMemSyncDomain. See
+ ::cudaLaunchMemSyncDomain. */
+ /**
+ * Value of launch attribute ::cudaLaunchAttributePreferredClusterDimension
+ * that represents the desired preferred cluster dimensions for the kernel.
+ * Opaque type with the following fields:
+ * - \p x - The X dimension of the preferred cluster, in blocks. Must be
+ * a divisor of the grid X dimension, and must be a multiple of
+ * the \p x field of ::cudaLaunchAttributeValue::clusterDim.
+ * - \p y - The Y dimension of the preferred cluster, in blocks. Must be
+ * a divisor of the grid Y dimension, and must be a multiple of
+ * the \p y field of ::cudaLaunchAttributeValue::clusterDim.
+ * - \p z - The Z dimension of the preferred cluster, in blocks. Must be
+ * equal to the \p z field of ::cudaLaunchAttributeValue::clusterDim.
+ */
+ struct {
+ unsigned int x;
+ unsigned int y;
+ unsigned int z;
+ } preferredClusterDim;
+
+ /**
+ * Value of launch attribute ::cudaLaunchAttributeLaunchCompletionEvent
+ * with the following fields:
+ * - \p cudaEvent_t event - Event to fire when the last block launches.
+ * - \p int flags - Event record flags, see ::cudaEventRecordWithFlags. Does not accept
+ * ::cudaEventRecordExternal.
+ */
+ struct {
+ cudaEvent_t event;
+ int flags;
+ } launchCompletionEvent;
+
+ /**
+ * Value of launch attribute ::cudaLaunchAttributeDeviceUpdatableKernelNode
+ * with the following fields:
+ * - \p int deviceUpdatable - Whether or not the resulting kernel node should be device-updatable.
+ * - \p cudaGraphDeviceNode_t devNode - Returns a handle to pass to the various device-side update functions.
+ */
+ struct {
+ int deviceUpdatable;
+ cudaGraphDeviceNode_t devNode;
+ } deviceUpdatableKernelNode;
+ unsigned int sharedMemCarveout; /**< Value of launch attribute ::cudaLaunchAttributePreferredSharedMemoryCarveout. */
+} cudaLaunchAttributeValue;
+
+/**
+ * Launch attribute
+ */
+typedef __device_builtin__ struct cudaLaunchAttribute_st {
+ cudaLaunchAttributeID id; /**< Attribute to set */
+ char pad[8 - sizeof(cudaLaunchAttributeID)];
+ cudaLaunchAttributeValue val; /**< Value of the attribute */
+} cudaLaunchAttribute;
+
+/**
+ * CUDA extensible launch configuration
+ */
+typedef __device_builtin__ struct cudaLaunchConfig_st {
+ dim3 gridDim; /**< Grid dimensions */
+ dim3 blockDim; /**< Block dimensions */
+ size_t dynamicSmemBytes; /**< Dynamic shared-memory size per thread block in bytes */
+ cudaStream_t stream; /**< Stream identifier */
+ cudaLaunchAttribute *attrs; /**< List of attributes; nullable if ::cudaLaunchConfig_t::numAttrs == 0 */
+ unsigned int numAttrs; /**< Number of attributes populated in ::cudaLaunchConfig_t::attrs */
+} cudaLaunchConfig_t;
+
+#define cudaStreamAttrID cudaLaunchAttributeID
+#define cudaStreamAttributeAccessPolicyWindow cudaLaunchAttributeAccessPolicyWindow
+#define cudaStreamAttributeSynchronizationPolicy cudaLaunchAttributeSynchronizationPolicy
+#define cudaStreamAttributeMemSyncDomainMap cudaLaunchAttributeMemSyncDomainMap
+#define cudaStreamAttributeMemSyncDomain cudaLaunchAttributeMemSyncDomain
+#define cudaStreamAttributePriority cudaLaunchAttributePriority
+
+#define cudaStreamAttrValue cudaLaunchAttributeValue
+
+#define cudaKernelNodeAttrID cudaLaunchAttributeID
+#define cudaKernelNodeAttributeAccessPolicyWindow cudaLaunchAttributeAccessPolicyWindow
+#define cudaKernelNodeAttributeCooperative cudaLaunchAttributeCooperative
+#define cudaKernelNodeAttributePriority cudaLaunchAttributePriority
+#define cudaKernelNodeAttributeClusterDimension cudaLaunchAttributeClusterDimension
+#define cudaKernelNodeAttributeClusterSchedulingPolicyPreference cudaLaunchAttributeClusterSchedulingPolicyPreference
+#define cudaKernelNodeAttributeMemSyncDomainMap cudaLaunchAttributeMemSyncDomainMap
+#define cudaKernelNodeAttributeMemSyncDomain cudaLaunchAttributeMemSyncDomain
+#define cudaKernelNodeAttributePreferredSharedMemoryCarveout cudaLaunchAttributePreferredSharedMemoryCarveout
+#define cudaKernelNodeAttributeDeviceUpdatableKernelNode cudaLaunchAttributeDeviceUpdatableKernelNode
+
+#define cudaKernelNodeAttrValue cudaLaunchAttributeValue
+
+/**
+ * CUDA device NUMA config
+ */
+enum __device_builtin__ cudaDeviceNumaConfig {
+ cudaDeviceNumaConfigNone = 0, /**< The GPU is not a NUMA node */
+ cudaDeviceNumaConfigNumaNode, /**< The GPU is a NUMA node, cudaDevAttrNumaId contains its NUMA ID */
+};
+
+/**
+ * CUDA async callback handle
+ */
+typedef struct cudaAsyncCallbackEntry* cudaAsyncCallbackHandle_t;
+
+struct cudaAsyncCallbackEntry;
+
+/**
+* Types of async notification that can occur
+*/
+typedef __device_builtin__ enum cudaAsyncNotificationType_enum {
+ cudaAsyncNotificationTypeOverBudget = 0x1
+} cudaAsyncNotificationType;
+
+/**
+* Information describing an async notification event
+*/
+typedef __device_builtin__ struct cudaAsyncNotificationInfo
+{
+ cudaAsyncNotificationType type;
+ union {
+ struct {
+ unsigned long long bytesOverBudget;
+ } overBudget;
+ } info;
+} cudaAsyncNotificationInfo_t;
+
+typedef void (*cudaAsyncCallback)(cudaAsyncNotificationInfo_t*, void*, cudaAsyncCallbackHandle_t);
+
+
+/** @} */
+/** @} */ /* END CUDART_TYPES */
+
+#endif /* !__CUDACC_RTC_MINIMAL__ */
+
+#if defined(__UNDEF_CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS_DRIVER_TYPES_H__)
+#undef __CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS__
+#undef __UNDEF_CUDA_INCLUDE_COMPILER_INTERNAL_HEADERS_DRIVER_TYPES_H__
+#endif
+
+#undef __CUDA_DEPRECATED
+
+
+
+#endif /* !__DRIVER_TYPES_H__ */
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/generated_cudaVDPAU_meta.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/generated_cudaVDPAU_meta.h
new file mode 100644
index 0000000000000000000000000000000000000000..abc603c8d9be21e012a9b1641330c2e203d623b2
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/generated_cudaVDPAU_meta.h
@@ -0,0 +1,46 @@
+// This file is generated. Any changes you make will be lost during the next clean build.
+
+// Dependent includes
+#include
+
+// CUDA public interface, for type definitions and cu* function prototypes
+#include "cudaVDPAU.h"
+
+
+// *************************************************************************
+// Definitions of structs to hold parameters for each function
+// *************************************************************************
+
+typedef struct cuVDPAUGetDevice_params_st {
+ CUdevice *pDevice;
+ VdpDevice vdpDevice;
+ VdpGetProcAddress *vdpGetProcAddress;
+} cuVDPAUGetDevice_params;
+
+typedef struct cuVDPAUCtxCreate_v2_params_st {
+ CUcontext *pCtx;
+ unsigned int flags;
+ CUdevice device;
+ VdpDevice vdpDevice;
+ VdpGetProcAddress *vdpGetProcAddress;
+} cuVDPAUCtxCreate_v2_params;
+
+typedef struct cuGraphicsVDPAURegisterVideoSurface_params_st {
+ CUgraphicsResource *pCudaResource;
+ VdpVideoSurface vdpSurface;
+ unsigned int flags;
+} cuGraphicsVDPAURegisterVideoSurface_params;
+
+typedef struct cuGraphicsVDPAURegisterOutputSurface_params_st {
+ CUgraphicsResource *pCudaResource;
+ VdpOutputSurface vdpSurface;
+ unsigned int flags;
+} cuGraphicsVDPAURegisterOutputSurface_params;
+
+typedef struct cuVDPAUCtxCreate_params_st {
+ CUcontext *pCtx;
+ unsigned int flags;
+ CUdevice device;
+ VdpDevice vdpDevice;
+ VdpGetProcAddress *vdpGetProcAddress;
+} cuVDPAUCtxCreate_params;
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/nvperf_target.h b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/nvperf_target.h
new file mode 100644
index 0000000000000000000000000000000000000000..b1c5c85b403c5ebb16d66882aa26c1f1db1d5089
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/nvperf_target.h
@@ -0,0 +1,626 @@
+#ifndef NVPERF_TARGET_H
+#define NVPERF_TARGET_H
+
+/*
+ * Copyright 2014-2024 NVIDIA Corporation. All rights reserved.
+ *
+ * NOTICE TO USER:
+ *
+ * This source code is subject to NVIDIA ownership rights under U.S. and
+ * international Copyright laws.
+ *
+ * This software and the information contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and conditions
+ * of a form of NVIDIA software license agreement.
+ *
+ * NVIDIA MAKES NO REPRESENTATION ABOUT THE SUITABILITY OF THIS SOURCE
+ * CODE FOR ANY PURPOSE. IT IS PROVIDED "AS IS" WITHOUT EXPRESS OR
+ * IMPLIED WARRANTY OF ANY KIND. NVIDIA DISCLAIMS ALL WARRANTIES WITH
+ * REGARD TO THIS SOURCE CODE, INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY, NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY SPECIAL, INDIRECT, INCIDENTAL,
+ * OR CONSEQUENTIAL DAMAGES, OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS
+ * OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE
+ * OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE
+ * OR PERFORMANCE OF THIS SOURCE CODE.
+ *
+ * U.S. Government End Users. This source code is a "commercial item" as
+ * that term is defined at 48 C.F.R. 2.101 (OCT 1995), consisting of
+ * "commercial computer software" and "commercial computer software
+ * documentation" as such terms are used in 48 C.F.R. 12.212 (SEPT 1995)
+ * and is provided to the U.S. Government only as a commercial end item.
+ * Consistent with 48 C.F.R.12.212 and 48 C.F.R. 227.7202-1 through
+ * 227.7202-4 (JUNE 1995), all U.S. Government End Users acquire the
+ * source code with only those rights set forth herein.
+ *
+ * Any use of this source code in individual and commercial software must
+ * include, in the user documentation and internal comments to the code,
+ * the above Disclaimer and U.S. Government End Users Notice.
+ */
+
+#include
+#include
+#include "nvperf_common.h"
+
+#if defined(__GNUC__) && defined(NVPA_SHARED_LIB)
+ #pragma GCC visibility push(default)
+ #if !defined(NVPW_LOCAL)
+ #define NVPW_LOCAL __attribute__ ((visibility ("hidden")))
+ #endif
+#else
+ #if !defined(NVPW_LOCAL)
+ #define NVPW_LOCAL
+ #endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/**
+ * @file nvperf_target.h
+ */
+
+#ifndef NVPW_GPU_ARCHITECTURE_SUPPORT_LEVEL_DEFINED
+#define NVPW_GPU_ARCHITECTURE_SUPPORT_LEVEL_DEFINED
+ /// GPU architecture support level
+ typedef enum NVPW_GpuArchitectureSupportLevel
+ {
+ NVPW_GPU_ARCHITECTURE_SUPPORT_LEVEL_UNKNOWN = 0,
+ NVPW_GPU_ARCHITECTURE_SUPPORT_LEVEL_UNSUPPORTED,
+ NVPW_GPU_ARCHITECTURE_SUPPORT_LEVEL_SUPPORTED
+ } NVPW_GpuArchitectureSupportLevel;
+#endif //NVPW_GPU_ARCHITECTURE_SUPPORT_LEVEL_DEFINED
+
+#ifndef NVPW_SLI_SUPPORT_LEVEL_DEFINED
+#define NVPW_SLI_SUPPORT_LEVEL_DEFINED
+ /// SLI configuration support level
+ typedef enum NVPW_SliSupportLevel
+ {
+ NVPW_SLI_SUPPORT_LEVEL_UNKNOWN = 0,
+ NVPW_SLI_SUPPORT_LEVEL_UNSUPPORTED,
+ /// Only Non-SLI configurations are supported.
+ NVPW_SLI_SUPPORT_LEVEL_SUPPORTED_NON_SLI_CONFIGURATION
+ } NVPW_SliSupportLevel;
+#endif //NVPW_SLI_SUPPORT_LEVEL_DEFINED
+
+#ifndef NVPW_VGPU_SUPPORT_LEVEL_DEFINED
+#define NVPW_VGPU_SUPPORT_LEVEL_DEFINED
+ /// Virtualized GPU configuration support level
+ typedef enum NVPW_VGpuSupportLevel
+ {
+ NVPW_VGPU_SUPPORT_LEVEL_UNKNOWN = 0,
+ NVPW_VGPU_SUPPORT_LEVEL_UNSUPPORTED,
+ /// Supported but not allowed by system admin.
+ NVPW_VGPU_SUPPORT_LEVEL_SUPPORTED_DISALLOWED,
+ NVPW_VGPU_SUPPORT_LEVEL_SUPPORTED_ALLOWED,
+ NVPW_VGPU_SUPPORT_LEVEL_SUPPORTED_NON_VGPU_CONFIGURATION
+ } NVPW_VGpuSupportLevel;
+#endif //NVPW_VGPU_SUPPORT_LEVEL_DEFINED
+
+#ifndef NVPW_CONF_COMPUTE_SUPPORT_LEVEL_DEFINED
+#define NVPW_CONF_COMPUTE_SUPPORT_LEVEL_DEFINED
+ /// Confidential Compute mode support level
+ typedef enum NVPW_ConfidentialComputeSupportLevel
+ {
+ NVPW_CONF_COMPUTE_SUPPORT_LEVEL_UNKNOWN = 0,
+ NVPW_CONF_COMPUTE_SUPPORT_LEVEL_UNSUPPORTED,
+ NVPW_CONF_COMPUTE_SUPPORT_LEVEL_SUPPORTED_NON_CONF_COMPUTE_CONFIGURATION,
+ NVPW_CONF_COMPUTE_SUPPORT_LEVEL_SUPPORTED_CONF_COMPUTE_DEVTOOLS_MODE
+ } NVPW_ConfidentialComputeSupportLevel;
+#endif //NVPW_CONF_COMPUTE_SUPPORT_LEVEL_DEFINED
+
+#ifndef NVPW_CMP_SUPPORT_LEVEL_DEFINED
+#define NVPW_CMP_SUPPORT_LEVEL_DEFINED
+ /// CMP support level
+ typedef enum NVPW_CmpSupportLevel
+ {
+ NVPW_CMP_SUPPORT_LEVEL_UNKNOWN = 0,
+ NVPW_CMP_SUPPORT_LEVEL_UNSUPPORTED,
+ NVPW_CMP_SUPPORT_LEVEL_SUPPORTED_NON_CMP_CONFIGURATON
+ } NVPW_CmpSupportLevel;
+#endif //NVPW_CMP_SUPPORT_LEVEL_DEFINED
+
+#ifndef NVPW_WSL_SUPPORT_LEVEL_DEFINED
+#define NVPW_WSL_SUPPORT_LEVEL_DEFINED
+ /// WSL support level
+ typedef enum NVPW_WslSupportLevel
+ {
+ NVPW_WSL_SUPPORT_LEVEL_UNKNOWN = 0,
+ NVPW_WSL_SUPPORT_LEVEL_UNSUPPORTED_INSUFFICIENT_DRIVER_VERSION,
+ NVPW_WSL_SUPPORT_LEVEL_SUPPORTED,
+ NVPW_WSL_SUPPORT_LEVEL_SUPPORTED_NON_WSL_CONFIGURATION
+ } NVPW_WslSupportLevel;
+#endif //NVPW_WSL_SUPPORT_LEVEL_DEFINED
+
+#ifndef NVPW_MIG_SUPPORT_LEVEL_DEFINED
+#define NVPW_MIG_SUPPORT_LEVEL_DEFINED
+ /// MIG support level
+ typedef enum NVPW_MigSupportLevel
+ {
+ NVPW_MIG_SUPPORT_LEVEL_UNKNOWN = 0,
+ NVPW_MIG_SUPPORT_LEVEL_UNSUPPORTED,
+ NVPW_MIG_SUPPORT_LEVEL_SUPPORTED,
+ NVPW_MIG_SUPPORT_LEVEL_SUPPORTED_NON_MIG_CONFIGURATION
+ } NVPW_MigSupportLevel;
+#endif //NVPW_MIG_SUPPORT_LEVEL_DEFINED
+
+ typedef struct NVPW_InitializeTarget_Params
+ {
+ /// [in]
+ size_t structSize;
+ /// [in] assign to NULL
+ void* pPriv;
+ } NVPW_InitializeTarget_Params;
+#define NVPW_InitializeTarget_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_InitializeTarget_Params, pPriv)
+
+ /// Load the target library.
+ NVPA_Status NVPW_InitializeTarget(NVPW_InitializeTarget_Params* pParams);
+
+ typedef struct NVPW_GetDeviceCount_Params
+ {
+ /// [in]
+ size_t structSize;
+ /// [in] assign to NULL
+ void* pPriv;
+ size_t numDevices;
+ } NVPW_GetDeviceCount_Params;
+#define NVPW_GetDeviceCount_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_GetDeviceCount_Params, numDevices)
+
+ NVPA_Status NVPW_GetDeviceCount(NVPW_GetDeviceCount_Params* pParams);
+
+ typedef struct NVPW_Device_GetNames_Params
+ {
+ /// [in]
+ size_t structSize;
+ /// [in] assign to NULL
+ void* pPriv;
+ size_t deviceIndex;
+ const char* pDeviceName;
+ const char* pChipName;
+ } NVPW_Device_GetNames_Params;
+#define NVPW_Device_GetNames_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_Device_GetNames_Params, pChipName)
+
+ NVPA_Status NVPW_Device_GetNames(NVPW_Device_GetNames_Params* pParams);
+
+ typedef struct NVPW_PciBusId
+ {
+ /// The PCI domain on which the device bus resides.
+ uint32_t domain;
+ /// The bus on which the device resides.
+ uint16_t bus;
+ /// device ID.
+ uint16_t device;
+ } NVPW_PciBusId;
+#define NVPW_PciBusId_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_PciBusId, device)
+
+ typedef struct NVPW_Device_GetPciBusIds_Params
+ {
+ /// [in]
+ size_t structSize;
+ /// [in] assign to NULL
+ void* pPriv;
+ /// [in] caller-allocated array of NVPW_PciBusId, indexed by NVPW deviceIndex
+ NVPW_PciBusId* pBusIds;
+ /// [in] size of the pBusIDs array; use result from NVPW_GetDeviceCount
+ size_t numDevices;
+ } NVPW_Device_GetPciBusIds_Params;
+#define NVPW_Device_GetPciBusIds_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_Device_GetPciBusIds_Params, numDevices)
+
+ NVPA_Status NVPW_Device_GetPciBusIds(NVPW_Device_GetPciBusIds_Params* pParams);
+
+
+#define NVPW_DEVICE_MIG_GPU_INSTANCE_ID_INVALID 0xFFFFFFFFu
+#define NVPW_DEVICE_MIG_GPU_INSTANCE_ID_FULLCHIP 0xFFFFFFFEu
+
+
+ typedef struct NVPW_Device_GetMigAttributes_Params
+ {
+ /// [in]
+ size_t structSize;
+ /// [in] assign to NULL
+ void* pPriv;
+ /// [in]
+ size_t deviceIndex;
+ /// [out]
+ NVPA_Bool isMigPartition;
+ /// [out]
+ uint32_t gpuInstanceId;
+ /// [out]
+ uint32_t computeInstanceId;
+ } NVPW_Device_GetMigAttributes_Params;
+#define NVPW_Device_GetMigAttributes_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_Device_GetMigAttributes_Params, computeInstanceId)
+
+ NVPA_Status NVPW_Device_GetMigAttributes(NVPW_Device_GetMigAttributes_Params* pParams);
+
+ typedef struct NVPW_Adapter_GetDeviceIndex_Params
+ {
+ /// [in]
+ size_t structSize;
+ /// [in] assign to NULL
+ void* pPriv;
+ /// [in]
+ struct IDXGIAdapter* pAdapter;
+ /// [in]
+ size_t sliIndex;
+ /// [out]
+ size_t deviceIndex;
+ } NVPW_Adapter_GetDeviceIndex_Params;
+#define NVPW_Adapter_GetDeviceIndex_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_Adapter_GetDeviceIndex_Params, deviceIndex)
+
+ NVPA_Status NVPW_Adapter_GetDeviceIndex(NVPW_Adapter_GetDeviceIndex_Params* pParams);
+
+ typedef struct NVPW_CounterData_GetNumRanges_Params
+ {
+ /// [in]
+ size_t structSize;
+ /// [in] assign to NULL
+ void* pPriv;
+ const uint8_t* pCounterDataImage;
+ size_t numRanges;
+ } NVPW_CounterData_GetNumRanges_Params;
+#define NVPW_CounterData_GetNumRanges_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterData_GetNumRanges_Params, numRanges)
+
+ NVPA_Status NVPW_CounterData_GetNumRanges(NVPW_CounterData_GetNumRanges_Params* pParams);
+
+ typedef struct NVPW_CounterData_GetChipName_Params
+ {
+ /// [in]
+ size_t structSize;
+ /// [in] assign to NULL
+ void* pPriv;
+ /// [in]
+ const uint8_t* pCounterDataImage;
+ /// [in]
+ size_t counterDataImageSize;
+ /// [out]
+ const char* pChipName;
+ } NVPW_CounterData_GetChipName_Params;
+#define NVPW_CounterData_GetChipName_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterData_GetChipName_Params, pChipName)
+
+ NVPA_Status NVPW_CounterData_GetChipName(NVPW_CounterData_GetChipName_Params* pParams);
+
+ typedef struct NVPW_Config_GetNumPasses_Params
+ {
+ /// [in]
+ size_t structSize;
+ /// [in] assign to NULL
+ void* pPriv;
+ /// [in]
+ const uint8_t* pConfig;
+ /// [out]
+ size_t numPipelinedPasses;
+ /// [out]
+ size_t numIsolatedPasses;
+ } NVPW_Config_GetNumPasses_Params;
+#define NVPW_Config_GetNumPasses_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_Config_GetNumPasses_Params, numIsolatedPasses)
+
+ /// Total num passes = numPipelinedPasses + numIsolatedPasses * numNestingLevels
+ NVPA_Status NVPW_Config_GetNumPasses(NVPW_Config_GetNumPasses_Params* pParams);
+
+ typedef struct NVPW_Config_GetNumPasses_V2_Params
+ {
+ /// [in]
+ size_t structSize;
+ /// [in] assign to NULL
+ void* pPriv;
+ /// [in]
+ const uint8_t* pConfig;
+ /// [out]
+ size_t numPasses;
+ } NVPW_Config_GetNumPasses_V2_Params;
+#define NVPW_Config_GetNumPasses_V2_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_Config_GetNumPasses_V2_Params, numPasses)
+
+ /// Total num passes = numPasses * numNestingLevels
+ NVPA_Status NVPW_Config_GetNumPasses_V2(NVPW_Config_GetNumPasses_V2_Params* pParams);
+
+#define NVPW_API_SET_CUDA_PROFILER 0x18209d0775b2f89dULL
+
+#define NVPW_API_SET_D3D11_PROFILER 0xca55c6738445db2bULL
+
+#define NVPW_API_SET_D3D12_PROFILER 0xc0c2d46dd7c7ad78ULL
+
+#define NVPW_API_SET_EGL_PROFILER 0x3c3747dae1f9565cULL
+
+#define NVPW_API_SET_GPU_PERIODICSAMPLER 0x9f4c2571fc0b2e8aULL
+
+#define NVPW_API_SET_METRICSEVALUATOR 0x0368a8768d811af9ULL
+
+#define NVPW_API_SET_METRICS_AD10X_COMP 0xbe57278e12cb5288ULL
+
+#define NVPW_API_SET_METRICS_AD10X_GRFX 0x5cbf0774f81bf491ULL
+
+#define NVPW_API_SET_METRICS_GA100_COMP 0x16b7d8c20d8b4915ULL
+
+#define NVPW_API_SET_METRICS_GA100_GRFX 0xc94eaabec04a94faULL
+
+#define NVPW_API_SET_METRICS_GA10X_COMP 0xb5d6391c2e299ab5ULL
+
+#define NVPW_API_SET_METRICS_GA10X_GRFX 0x6ebc121178b5ce0bULL
+
+#define NVPW_API_SET_METRICS_GV100_COMP 0x863705cc57919f72ULL
+
+#define NVPW_API_SET_METRICS_GV100_GRFX 0x9900da75d164fecfULL
+
+#define NVPW_API_SET_METRICS_GV11B_COMP 0xd3f79a859235848fULL
+
+#define NVPW_API_SET_METRICS_GV11B_GRFX 0xeb8e26220106e227ULL
+
+#define NVPW_API_SET_METRICS_TU10X_COMP 0x70f40be0afd35da8ULL
+
+#define NVPW_API_SET_METRICS_TU10X_GRFX 0xdf219cb838db6968ULL
+
+#define NVPW_API_SET_METRICS_TU11X_COMP 0xeb0069d7d0956678ULL
+
+#define NVPW_API_SET_METRICS_TU11X_GRFX 0x0977d9342bd62743ULL
+
+#define NVPW_API_SET_OPENGL_PROFILER 0xe4cd9ea40f2ee777ULL
+
+#define NVPW_API_SET_VULKAN_PROFILER 0x8c56b6a03d779689ULL
+
+#define NVPW_SDK_VERSION 0x1e128b6f001423fcULL
+
+ typedef struct NVPW_QueryVersionNumber_Params
+ {
+ /// [in]
+ size_t structSize;
+ /// [in] assign to NULL
+ void* pPriv;
+ /// [in]
+ uint64_t apiSet;
+ /// [out]
+ uint32_t major;
+ /// [out]
+ uint32_t minor;
+ /// [out]
+ uint32_t patch;
+ /// [out]
+ uint32_t relMajor;
+ /// [out]
+ uint32_t relMinor;
+ /// [out]
+ uint32_t relPatch;
+ } NVPW_QueryVersionNumber_Params;
+#define NVPW_QueryVersionNumber_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_QueryVersionNumber_Params, relPatch)
+
+ /// Query version number of an API set
+ NVPA_Status NVPW_QueryVersionNumber(NVPW_QueryVersionNumber_Params* pParams);
+
+ typedef enum NVPW_Device_ClockStatus
+ {
+ /// clock status is unknown
+ NVPW_DEVICE_CLOCK_STATUS_UNKNOWN,
+ /// clocks are locked to rated tdp values - Deprecated, use NVPW_DEVICE_CLOCK_STATUS_LOCKED instead
+ NVPW_DEVICE_CLOCK_STATUS_LOCKED_TO_RATED_TDP,
+ /// clocks are not locked and can boost above rated tdp
+ NVPW_DEVICE_CLOCK_STATUS_BOOST_ENABLED,
+ /// clocks are not locked and will not go above rated tdp
+ NVPW_DEVICE_CLOCK_STATUS_BOOST_DISABLED,
+ /// clocks are locked
+ NVPW_DEVICE_CLOCK_STATUS_LOCKED,
+ /// clocks are not locked
+ NVPW_DEVICE_CLOCK_STATUS_UNLOCKED,
+ NVPW_DEVICE_CLOCK_STATUS__COUNT
+ } NVPW_Device_ClockStatus;
+
+ typedef enum NVPW_Device_ClockLevel
+ {
+ /// clock level is invalid
+ NVPW_DEVICE_CLOCK_LEVEL_INVALID,
+ /// clock level is at rated tdp
+ NVPW_DEVICE_CLOCK_LEVEL_RATED_TDP,
+ /// clock level is at turbo boost
+ NVPW_DEVICE_CLOCK_LEVEL_TURBO_BOOST,
+ NVPW_DEVICE_CLOCK_LEVEL__COUNT
+ } NVPW_Device_ClockLevel;
+
+ typedef struct NVPW_Device_GetClockStatus_Params
+ {
+ /// [in]
+ size_t structSize;
+ /// [in] assign to NULL
+ void* pPriv;
+ size_t deviceIndex;
+ /// [in]
+ NVPW_Device_ClockStatus clockStatus;
+ /// [in]
+ NVPW_Device_ClockLevel clockLevel;
+ } NVPW_Device_GetClockStatus_Params;
+#define NVPW_Device_GetClockStatus_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_Device_GetClockStatus_Params, clockLevel)
+
+ NVPA_Status NVPW_Device_GetClockStatus(NVPW_Device_GetClockStatus_Params* pParams);
+
+ typedef enum NVPW_Device_ClockSetting
+ {
+ /// invalid op, specify valid clocks operation during profiling
+ NVPW_DEVICE_CLOCK_SETTING_INVALID,
+ /// default to driver/application config (normally unlocked and not boosted, but could be unlocked boosted, or
+ /// locked to rated TDP)
+ NVPW_DEVICE_CLOCK_SETTING_DEFAULT,
+ /// lock clocks at rated tdp base values
+ NVPW_DEVICE_CLOCK_SETTING_LOCK_TO_RATED_TDP,
+ /// lock clocks at turbo boost values
+ NVPW_DEVICE_CLOCK_SETTING_LOCK_TO_TURBO_BOOST,
+ NVPW_DEVICE_CLOCK_SETTING__COUNT
+ } NVPW_Device_ClockSetting;
+
+ typedef struct NVPW_Device_SetClockSetting_Params
+ {
+ /// [in]
+ size_t structSize;
+ /// [in] assign to NULL
+ void* pPriv;
+ size_t deviceIndex;
+ /// [in]
+ NVPW_Device_ClockSetting clockSetting;
+ } NVPW_Device_SetClockSetting_Params;
+#define NVPW_Device_SetClockSetting_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_Device_SetClockSetting_Params, clockSetting)
+
+ NVPA_Status NVPW_Device_SetClockSetting(NVPW_Device_SetClockSetting_Params* pParams);
+
+ typedef struct NVPW_CounterData_GetRangeDescriptions_Params
+ {
+ /// [in]
+ size_t structSize;
+ /// [in] assign to NULL
+ void* pPriv;
+ const uint8_t* pCounterDataImage;
+ size_t rangeIndex;
+ /// [inout] Number of descriptions allocated in ppDescriptions
+ size_t numDescriptions;
+ const char** ppDescriptions;
+ } NVPW_CounterData_GetRangeDescriptions_Params;
+#define NVPW_CounterData_GetRangeDescriptions_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_CounterData_GetRangeDescriptions_Params, ppDescriptions)
+
+ NVPA_Status NVPW_CounterData_GetRangeDescriptions(NVPW_CounterData_GetRangeDescriptions_Params* pParams);
+
+ typedef struct NVPW_Profiler_CounterData_GetRangeDescriptions_Params
+ {
+ /// [in]
+ size_t structSize;
+ /// [in] assign to NULL
+ void* pPriv;
+ const uint8_t* pCounterDataImage;
+ size_t rangeIndex;
+ /// [inout] Number of descriptions allocated in ppDescriptions
+ size_t numDescriptions;
+ const char** ppDescriptions;
+ } NVPW_Profiler_CounterData_GetRangeDescriptions_Params;
+#define NVPW_Profiler_CounterData_GetRangeDescriptions_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_Profiler_CounterData_GetRangeDescriptions_Params, ppDescriptions)
+
+ NVPA_Status NVPW_Profiler_CounterData_GetRangeDescriptions(NVPW_Profiler_CounterData_GetRangeDescriptions_Params* pParams);
+
+#ifndef NVPW_PERIODIC_SAMPLER_COUNTER_DATA_APPEND_MODE_DEFINED
+#define NVPW_PERIODIC_SAMPLER_COUNTER_DATA_APPEND_MODE_DEFINED
+ typedef enum NVPW_PeriodicSampler_CounterData_AppendMode
+ {
+ NVPW_PERIODIC_SAMPLER_COUNTER_DATA_APPEND_MODE_LINEAR = 0,
+ NVPW_PERIODIC_SAMPLER_COUNTER_DATA_APPEND_MODE_CIRCULAR = 1,
+ NVPW_PERIODIC_SAMPLER_COUNTER_DATA_APPEND_MODE__COUNT
+ } NVPW_PeriodicSampler_CounterData_AppendMode;
+#endif //NVPW_PERIODIC_SAMPLER_COUNTER_DATA_APPEND_MODE_DEFINED
+
+ typedef struct NVPW_PeriodicSampler_CounterData_GetSampleTime_Params
+ {
+ /// [in]
+ size_t structSize;
+ /// [in] assign to NULL
+ void* pPriv;
+ /// [in]
+ const uint8_t* pCounterDataImage;
+ /// [in]
+ size_t rangeIndex;
+ /// [out]
+ uint64_t timestampStart;
+ /// [out]
+ uint64_t timestampEnd;
+ } NVPW_PeriodicSampler_CounterData_GetSampleTime_Params;
+#define NVPW_PeriodicSampler_CounterData_GetSampleTime_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_PeriodicSampler_CounterData_GetSampleTime_Params, timestampEnd)
+
+ NVPA_Status NVPW_PeriodicSampler_CounterData_GetSampleTime(NVPW_PeriodicSampler_CounterData_GetSampleTime_Params* pParams);
+
+ typedef struct NVPW_PeriodicSampler_CounterData_TrimInPlace_Params
+ {
+ /// [in]
+ size_t structSize;
+ /// [in] assign to NULL
+ void* pPriv;
+ /// [in]
+ uint8_t* pCounterDataImage;
+ /// [in]
+ size_t counterDataImageSize;
+ /// [out]
+ size_t counterDataImageTrimmedSize;
+ } NVPW_PeriodicSampler_CounterData_TrimInPlace_Params;
+#define NVPW_PeriodicSampler_CounterData_TrimInPlace_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_PeriodicSampler_CounterData_TrimInPlace_Params, counterDataImageTrimmedSize)
+
+ NVPA_Status NVPW_PeriodicSampler_CounterData_TrimInPlace(NVPW_PeriodicSampler_CounterData_TrimInPlace_Params* pParams);
+
+ typedef struct NVPW_PeriodicSampler_CounterData_GetInfo_Params
+ {
+ /// [in]
+ size_t structSize;
+ /// [in] assign to NULL
+ void* pPriv;
+ /// [in]
+ const uint8_t* pCounterDataImage;
+ /// [in]
+ size_t counterDataImageSize;
+ /// [out] total number of ranges in the counter data
+ size_t numTotalRanges;
+ /// [out] if in "linear" mode, this API returns the number of "populated" ranges; if it's in "circular" mode,
+ /// then it returns the last "populated" range index + 1, when there is no such range, it returns 0.
+ size_t numPopulatedRanges;
+ /// [out] if in "linear" mode, this API returns the number of "completed" ranges; if it's in "circular" mode,
+ /// then it returns the last "completed" range index + 1, when there is no such range, it returns 0.
+ size_t numCompletedRanges;
+ } NVPW_PeriodicSampler_CounterData_GetInfo_Params;
+#define NVPW_PeriodicSampler_CounterData_GetInfo_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_PeriodicSampler_CounterData_GetInfo_Params, numCompletedRanges)
+
+ /// In periodic sampler, a range in counter data stores exactly one sample's data. For better performance, periodic
+ /// sampler may operate in an out-of-order fashion when populating sample data, i.e. it may not fully populate all
+ /// counters of a sample/range before starting to populate the next sample/range. As a result, we have two concepts
+ /// here, "populated" & "completed": a range is considered "populated" even if only partial counters have been
+ /// written; on the other hand, a range is only considered "completed" if all the collecting counters have been
+ /// written.
+ NVPA_Status NVPW_PeriodicSampler_CounterData_GetInfo(NVPW_PeriodicSampler_CounterData_GetInfo_Params* pParams);
+
+ typedef struct NVPW_PeriodicSampler_CounterData_GetTriggerCount_Params
+ {
+ /// [in]
+ size_t structSize;
+ /// [in] assign to NULL
+ void* pPriv;
+ /// [in]
+ const uint8_t* pCounterDataImage;
+ /// [in]
+ size_t counterDataImageSize;
+ /// [in]
+ size_t rangeIndex;
+ /// [out]
+ uint32_t triggerCount;
+ } NVPW_PeriodicSampler_CounterData_GetTriggerCount_Params;
+#define NVPW_PeriodicSampler_CounterData_GetTriggerCount_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_PeriodicSampler_CounterData_GetTriggerCount_Params, triggerCount)
+
+ NVPA_Status NVPW_PeriodicSampler_CounterData_GetTriggerCount(NVPW_PeriodicSampler_CounterData_GetTriggerCount_Params* pParams);
+
+ typedef struct NVPW_PeriodicSampler_CounterData_IsDataComplete_Params
+ {
+ /// [in]
+ size_t structSize;
+ /// [in] assign to NULL
+ void* pPriv;
+ /// [in]
+ const uint8_t* pCounterDataImage;
+ /// [in]
+ size_t counterDataImageSize;
+ /// [in]
+ size_t rangeIndex;
+ /// [out]
+ NVPA_Bool isComplete;
+ } NVPW_PeriodicSampler_CounterData_IsDataComplete_Params;
+#define NVPW_PeriodicSampler_CounterData_IsDataComplete_Params_STRUCT_SIZE NVPA_STRUCT_SIZE(NVPW_PeriodicSampler_CounterData_IsDataComplete_Params, isComplete)
+
+ /// Checks whether a given sample's data is complete. See also 'NVPW_PeriodicSampler_CounterData_GetInfo'
+ NVPA_Status NVPW_PeriodicSampler_CounterData_IsDataComplete(NVPW_PeriodicSampler_CounterData_IsDataComplete_Params* pParams);
+
+
+ typedef struct NVPW_TimestampReport
+ {
+ uint32_t payload;
+ uint8_t reserved0004[4];
+ uint64_t timestamp;
+ } NVPW_TimestampReport;
+
+
+
+
+#ifdef __cplusplus
+} // extern "C"
+#endif
+
+#if defined(__GNUC__) && defined(NVPA_SHARED_LIB)
+ #pragma GCC visibility pop
+#endif
+
+#endif // NVPERF_TARGET_H
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/sm_32_atomic_functions.hpp b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/sm_32_atomic_functions.hpp
new file mode 100644
index 0000000000000000000000000000000000000000..7cfea072a044ceac7fb96f3bf3006520a108020e
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/backends/nvidia/include/sm_32_atomic_functions.hpp
@@ -0,0 +1,151 @@
+/*
+ * Copyright 1993-2023 NVIDIA Corporation. All rights reserved.
+ *
+ * NOTICE TO LICENSEE:
+ *
+ * This source code and/or documentation ("Licensed Deliverables") are
+ * subject to NVIDIA intellectual property rights under U.S. and
+ * international Copyright laws.
+ *
+ * These Licensed Deliverables contained herein is PROPRIETARY and
+ * CONFIDENTIAL to NVIDIA and is being provided under the terms and
+ * conditions of a form of NVIDIA software license agreement by and
+ * between NVIDIA and Licensee ("License Agreement") or electronically
+ * accepted by Licensee. Notwithstanding any terms or conditions to
+ * the contrary in the License Agreement, reproduction or disclosure
+ * of the Licensed Deliverables to any third party without the express
+ * written consent of NVIDIA is prohibited.
+ *
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, NVIDIA MAKES NO REPRESENTATION ABOUT THE
+ * SUITABILITY OF THESE LICENSED DELIVERABLES FOR ANY PURPOSE. IT IS
+ * PROVIDED "AS IS" WITHOUT EXPRESS OR IMPLIED WARRANTY OF ANY KIND.
+ * NVIDIA DISCLAIMS ALL WARRANTIES WITH REGARD TO THESE LICENSED
+ * DELIVERABLES, INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY,
+ * NONINFRINGEMENT, AND FITNESS FOR A PARTICULAR PURPOSE.
+ * NOTWITHSTANDING ANY TERMS OR CONDITIONS TO THE CONTRARY IN THE
+ * LICENSE AGREEMENT, IN NO EVENT SHALL NVIDIA BE LIABLE FOR ANY
+ * SPECIAL, INDIRECT, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, OR ANY
+ * DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS,
+ * WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS
+ * ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE
+ * OF THESE LICENSED DELIVERABLES.
+ *
+ * U.S. Government End Users. These Licensed Deliverables are a
+ * "commercial item" as that term is defined at 48 C.F.R. 2.101 (OCT
+ * 1995), consisting of "commercial computer software" and "commercial
+ * computer software documentation" as such terms are used in 48
+ * C.F.R. 35.235 (SEPT 1995) and is provided to the U.S. Government
+ * only as a commercial end item. Consistent with 48 C.F.R.35.235 and
+ * 48 C.F.R. 227.7202-1 through 227.7202-4 (JUNE 1995), all
+ * U.S. Government End Users acquire the Licensed Deliverables with
+ * only those rights set forth herein.
+ *
+ * Any use of the Licensed Deliverables in individual and commercial
+ * software must include, in the user documentation and internal
+ * comments to the code, the above Disclaimer and U.S. Government End
+ * Users Notice.
+ */
+
+#if !defined(__SM_32_ATOMIC_FUNCTIONS_HPP__)
+#define __SM_32_ATOMIC_FUNCTIONS_HPP__
+
+#ifdef __CUDA_ARCH__
+extern "C"
+{
+extern __device__ __device_builtin__ long long __illAtomicMin(long long *address, long long val);
+extern __device__ __device_builtin__ long long __illAtomicMax(long long *address, long long val);
+extern __device__ __device_builtin__ long long __llAtomicAnd(long long *address, long long val);
+extern __device__ __device_builtin__ long long __llAtomicOr(long long *address, long long val);
+extern __device__ __device_builtin__ long long __llAtomicXor(long long *address, long long val);
+extern __device__ __device_builtin__ unsigned long long __ullAtomicMin(unsigned long long *address, unsigned long long val);
+extern __device__ __device_builtin__ unsigned long long __ullAtomicMax(unsigned long long *address, unsigned long long val);
+extern __device__ __device_builtin__ unsigned long long __ullAtomicAnd(unsigned long long *address, unsigned long long val);
+extern __device__ __device_builtin__ unsigned long long __ullAtomicOr (unsigned long long *address, unsigned long long val);
+extern __device__ __device_builtin__ unsigned long long __ullAtomicXor(unsigned long long *address, unsigned long long val);
+}
+#endif /* __CUDA_ARCH__ */
+
+
+#if defined(__CUDACC_RTC__)
+#define __SM_32_ATOMIC_FUNCTIONS_DECL__ __device__
+#else /* !__CUDACC_RTC__ */
+#define __SM_32_ATOMIC_FUNCTIONS_DECL__ static __inline__ __device__
+#endif /* __CUDACC_RTC__ */
+
+#if defined(__cplusplus) && defined(__CUDACC__)
+
+#if defined(_NVHPC_CUDA) || !defined(__CUDA_ARCH__) || __CUDA_ARCH__ >= 320
+
+/*******************************************************************************
+* *
+* *
+* *
+*******************************************************************************/
+
+#include "cuda_runtime_api.h"
+
+/*******************************************************************************
+* *
+* *
+* *
+*******************************************************************************/
+
+__SM_32_ATOMIC_FUNCTIONS_DECL__ long long atomicMin(long long *address, long long val)
+{
+ return __illAtomicMin(address, val);
+}
+
+__SM_32_ATOMIC_FUNCTIONS_DECL__ long long atomicMax(long long *address, long long val)
+{
+ return __illAtomicMax(address, val);
+}
+
+__SM_32_ATOMIC_FUNCTIONS_DECL__ long long atomicAnd(long long *address, long long val)
+{
+ return __llAtomicAnd(address, val);
+}
+
+__SM_32_ATOMIC_FUNCTIONS_DECL__ long long atomicOr(long long *address, long long val)
+{
+ return __llAtomicOr(address, val);
+}
+
+__SM_32_ATOMIC_FUNCTIONS_DECL__ long long atomicXor(long long *address, long long val)
+{
+ return __llAtomicXor(address, val);
+}
+
+__SM_32_ATOMIC_FUNCTIONS_DECL__ unsigned long long atomicMin(unsigned long long *address, unsigned long long val)
+{
+ return __ullAtomicMin(address, val);
+}
+
+__SM_32_ATOMIC_FUNCTIONS_DECL__ unsigned long long atomicMax(unsigned long long *address, unsigned long long val)
+{
+ return __ullAtomicMax(address, val);
+}
+
+__SM_32_ATOMIC_FUNCTIONS_DECL__ unsigned long long atomicAnd(unsigned long long *address, unsigned long long val)
+{
+ return __ullAtomicAnd(address, val);
+}
+
+__SM_32_ATOMIC_FUNCTIONS_DECL__ unsigned long long atomicOr(unsigned long long *address, unsigned long long val)
+{
+ return __ullAtomicOr(address, val);
+}
+
+__SM_32_ATOMIC_FUNCTIONS_DECL__ unsigned long long atomicXor(unsigned long long *address, unsigned long long val)
+{
+ return __ullAtomicXor(address, val);
+}
+
+#endif /* _NVHPC_CUDA || !__CUDA_ARCH__ || __CUDA_ARCH__ >= 320 */
+
+#endif /* __cplusplus && __CUDACC__ */
+
+#undef __SM_32_ATOMIC_FUNCTIONS_DECL__
+
+#endif /* !__SM_32_ATOMIC_FUNCTIONS_HPP__ */
+
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/__pycache__/__init__.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..46bf8342aee5bcbed2908cc08f59d233d8756957
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/__pycache__/__init__.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/__pycache__/math.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/__pycache__/math.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9022ed3ae77ddb28a17511296d0674db39104505
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/__pycache__/math.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/__pycache__/random.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/__pycache__/random.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4d8133dd21782763e4e06b1461061d0152ed1276
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/__pycache__/random.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/__pycache__/standard.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/__pycache__/standard.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..774a911e2df302abb1b20e4ef52290d087bda101
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/__pycache__/standard.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/__pycache__/target_info.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/__pycache__/target_info.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fb11d05bd2cbb59c80899aee5b443b9916a03e0a
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/__pycache__/target_info.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/__init__.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..3f8c70a716a3da3473a4906b44aec7d35fcc35a5
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/__init__.py
@@ -0,0 +1,26 @@
+import pkgutil
+from importlib.util import module_from_spec
+from sys import modules
+
+_backends = []
+for module_finder, module_name, is_pkg in pkgutil.iter_modules(
+ __path__,
+ prefix=__name__ + ".",
+):
+ # skip .py files (like libdevice.py)
+ if not is_pkg:
+ continue
+
+ # import backends (like cuda and hip) that are included during setup.py
+ spec = module_finder.find_spec(module_name)
+ if spec is None or spec.loader is None:
+ continue
+ module = module_from_spec(spec)
+ spec.loader.exec_module(module)
+
+ _backends.append(module_name)
+ modules[module_name] = module
+
+__all__ = _backends
+
+del _backends
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/__pycache__/__init__.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6049581a4e89b108d389cdb174e05227d3923db2
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/__pycache__/__init__.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/__pycache__/libdevice.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/__pycache__/libdevice.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c96f6c8e7e8b8206a93bc063361f113f3bc21199
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/__pycache__/libdevice.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/cuda/__init__.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/cuda/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..fbececf1defce4a9493a9e75cc7cb39571465175
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/cuda/__init__.py
@@ -0,0 +1,16 @@
+from . import libdevice
+
+from .utils import (globaltimer, num_threads, num_warps, smid, convert_custom_float8_sm70, convert_custom_float8_sm80)
+from .gdc import (gdc_launch_dependents, gdc_wait)
+
+__all__ = [
+ "libdevice",
+ "globaltimer",
+ "num_threads",
+ "num_warps",
+ "smid",
+ "convert_custom_float8_sm70",
+ "convert_custom_float8_sm80",
+ "gdc_launch_dependents",
+ "gdc_wait",
+]
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/cuda/__pycache__/__init__.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/cuda/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9969758e6e929107bb5eb1d722045e5f8c1049fd
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/cuda/__pycache__/__init__.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/cuda/__pycache__/gdc.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/cuda/__pycache__/gdc.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dc6d28f3b749cf0a3add32983e6b5a8fb65ec356
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/cuda/__pycache__/gdc.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/cuda/__pycache__/libdevice.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/cuda/__pycache__/libdevice.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5b9dc6a02eba1177af530dc4fe81c2f2648d0c98
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/cuda/__pycache__/libdevice.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/cuda/__pycache__/utils.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/cuda/__pycache__/utils.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..97fb52985ac36729a80ab3bb2a850e9f237b944a
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/cuda/__pycache__/utils.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/cuda/gdc.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/cuda/gdc.py
new file mode 100644
index 0000000000000000000000000000000000000000..4376719e3dbe63ac2dfe65bfc6bf936116056676
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/cuda/gdc.py
@@ -0,0 +1,42 @@
+"""
+Grid Dependency Control (GDC) is a mechanism used when enabling programmatic dependent launch to launch and
+synchronize grids. These APIs expose GDC to the programmer.
+
+Programmatic dependent launch is supported on SM90 (Hopper) and beyond.
+For PTX reference on grid dependency control see https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-griddepcontrol.
+"""
+
+from triton.language import core
+
+
+@core.extern
+def gdc_wait(_semantic=None):
+ """
+ GDC wait is a blocking instruction that waits for all instructions in a prior kernel to complete before continuing.
+ This ensures all memory operations happening before the wait is visible to instructions after it,
+ e.g. if the prior kernel writes to address "x" the new values will be visible in this kernel after the wait.
+
+ This instruction is also safe to execute when programmatic dependent launch is disabled.
+
+ See https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-griddepcontrol for more details.
+ """
+ core.inline_asm_elementwise("griddepcontrol.wait; // dummy $0", "=r", [], dtype=core.int32, is_pure=False, pack=1,
+ _semantic=_semantic)
+
+
+@core.extern
+def gdc_launch_dependents(_semantic=None):
+ """
+ This operation when launched with programmatic dependent launch signals that
+ the next program may launch once all programs in the current kernel
+ call this function or complete.
+
+ Repeated calls to this function have no effect past the first call, and the first call should be
+ treated by the programmer as a hint to the runtime system to launch the next kernel.
+
+ This instruction is also safe to execute when programmatic dependent launch is disabled.
+
+ See https://docs.nvidia.com/cuda/parallel-thread-execution/index.html#parallel-synchronization-and-communication-instructions-griddepcontrol for more details.
+ """
+ core.inline_asm_elementwise("griddepcontrol.launch_dependents; // dummy $0", "=r", [], dtype=core.int32,
+ is_pure=False, pack=1, _semantic=_semantic)
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/cuda/libdevice.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/cuda/libdevice.py
new file mode 100644
index 0000000000000000000000000000000000000000..08661f5414a68f43b1fe35a2de945ed30322d73f
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/cuda/libdevice.py
@@ -0,0 +1,1629 @@
+from triton.language import core
+
+
+@core.extern
+def clz(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("int32"), ): ("__nv_clz", core.dtype("int32")),
+ (core.dtype("int64"), ): ("__nv_clzll", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def popc(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("int32"), ): ("__nv_popc", core.dtype("int32")),
+ (core.dtype("int64"), ): ("__nv_popcll", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def byte_perm(arg0, arg1, arg2, _semantic=None):
+ return core.extern_elementwise("", "", [arg0, arg1, arg2], {
+ (core.dtype("int32"), core.dtype("int32"), core.dtype("int32")): ("__nv_byte_perm", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def mulhi(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("int32"), core.dtype("int32")): ("__nv_mulhi", core.dtype("int32")),
+ (core.dtype("uint32"), core.dtype("uint32")): ("__nv_umulhi", core.dtype("uint32")),
+ (core.dtype("int64"), core.dtype("int64")): ("__nv_mul64hi", core.dtype("int64")),
+ (core.dtype("uint64"), core.dtype("uint64")): ("__nv_umul64hi", core.dtype("uint64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def mul24(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("int32"), core.dtype("int32")): ("__nv_mul24", core.dtype("int32")),
+ (core.dtype("uint32"), core.dtype("uint32")): ("__nv_umul24", core.dtype("uint32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def brev(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("int32"), ): ("__nv_brev", core.dtype("int32")),
+ (core.dtype("int64"), ): ("__nv_brevll", core.dtype("int64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def sad(arg0, arg1, arg2, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1, arg2], {
+ (core.dtype("int32"), core.dtype("int32"), core.dtype("uint32")): ("__nv_sad", core.dtype("int32")),
+ (core.dtype("uint32"), core.dtype("uint32"), core.dtype("uint32")): ("__nv_usad", core.dtype("uint32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def abs(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("int32"), ): ("__nv_abs", core.dtype("int32")),
+ (core.dtype("int64"), ): ("__nv_llabs", core.dtype("int64")),
+ (core.dtype("fp32"), ): ("__nv_fabsf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_fabs", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def floor(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_floorf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_floor", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def rcp64h(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp64"), ): ("__nv_rcp64h", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def rsqrt(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_rsqrtf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_rsqrt", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def ceil(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp64"), ): ("__nv_ceil", core.dtype("fp64")),
+ (core.dtype("fp32"), ): ("__nv_ceilf", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def trunc(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp64"), ): ("__nv_trunc", core.dtype("fp64")),
+ (core.dtype("fp32"), ): ("__nv_truncf", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def exp2(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_exp2f", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_exp2", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def saturatef(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_saturatef", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def fma_rn(arg0, arg1, arg2, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1, arg2], {
+ (core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32")): ("__nv_fmaf_rn", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64")): ("__nv_fma_rn", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def fma_rz(arg0, arg1, arg2, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1, arg2], {
+ (core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32")): ("__nv_fmaf_rz", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64")): ("__nv_fma_rz", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def fma_rd(arg0, arg1, arg2, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1, arg2], {
+ (core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32")): ("__nv_fmaf_rd", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64")): ("__nv_fma_rd", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def fma_ru(arg0, arg1, arg2, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1, arg2], {
+ (core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32")): ("__nv_fmaf_ru", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64")): ("__nv_fma_ru", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def fast_dividef(arg0, arg1, _semantic=None):
+ return core.extern_elementwise("", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fast_fdividef", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def div_rn(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fdiv_rn", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_ddiv_rn", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def div_rz(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fdiv_rz", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_ddiv_rz", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def div_rd(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fdiv_rd", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_ddiv_rd", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def div_ru(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fdiv_ru", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_ddiv_ru", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def rcp_rn(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_frcp_rn", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_drcp_rn", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def rcp_rz(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_frcp_rz", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_drcp_rz", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def rcp_rd(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_frcp_rd", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_drcp_rd", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def rcp_ru(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_frcp_ru", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_drcp_ru", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def sqrt_rn(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_fsqrt_rn", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_dsqrt_rn", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def sqrt_rz(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_fsqrt_rz", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_dsqrt_rz", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def sqrt_rd(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_fsqrt_rd", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_dsqrt_rd", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def sqrt_ru(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_fsqrt_ru", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_dsqrt_ru", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def sqrt(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_sqrtf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_sqrt", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def add_rn(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_dadd_rn", core.dtype("fp64")),
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fadd_rn", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def add_rz(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_dadd_rz", core.dtype("fp64")),
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fadd_rz", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def add_rd(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_dadd_rd", core.dtype("fp64")),
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fadd_rd", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def add_ru(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_dadd_ru", core.dtype("fp64")),
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fadd_ru", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def mul_rn(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_dmul_rn", core.dtype("fp64")),
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fmul_rn", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def mul_rz(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_dmul_rz", core.dtype("fp64")),
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fmul_rz", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def mul_rd(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_dmul_rd", core.dtype("fp64")),
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fmul_rd", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def mul_ru(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [
+ arg0,
+ arg1,
+ ], {
+ (
+ core.dtype("fp64"),
+ core.dtype("fp64"),
+ ): ("__nv_dmul_ru", core.dtype("fp64")),
+ (
+ core.dtype("fp32"),
+ core.dtype("fp32"),
+ ): ("__nv_fmul_ru", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def double2float_rn(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp64"), ): ("__nv_double2float_rn", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def double2float_rz(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp64"), ): ("__nv_double2float_rz", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def double2float_rd(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp64"), ): ("__nv_double2float_rd", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def double2float_ru(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp64"), ): ("__nv_double2float_ru", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def double2int_rn(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp64"), ): ("__nv_double2int_rn", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def double2int_rz(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp64"), ): ("__nv_double2int_rz", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def double2int_rd(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp64"), ): ("__nv_double2int_rd", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def double2int_ru(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp64"), ): ("__nv_double2int_ru", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def double2uint_rn(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp64"), ): ("__nv_double2uint_rn", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def double2uint_rz(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp64"), ): ("__nv_double2uint_rz", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def double2uint_rd(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp64"), ): ("__nv_double2uint_rd", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def double2uint_ru(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp64"), ): ("__nv_double2uint_ru", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def int2double_rn(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("int32"), ): ("__nv_int2double_rn", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def uint2double_rn(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("uint32"), ): ("__nv_uint2double_rn", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def float2int_rn(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_float2int_rn", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def float2int_rz(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_float2int_rz", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def float2int_rd(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_float2int_rd", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def float2int_ru(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_float2int_ru", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def float2uint_rn(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_float2uint_rn", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def float2uint_rz(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_float2uint_rz", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def float2uint_rd(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_float2uint_rd", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def float2uint_ru(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_float2uint_ru", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def int2float_rn(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("int32"), ): ("__nv_int2float_rn", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def int2float_rz(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("int32"), ): ("__nv_int2float_rz", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def int2float_rd(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("int32"), ): ("__nv_int2float_rd", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def int2float_ru(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("int32"), ): ("__nv_int2float_ru", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def uint2float_rn(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("uint32"), ): ("__nv_uint2float_rn", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def uint2float_rz(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("uint32"), ): ("__nv_uint2float_rz", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def uint2float_rd(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("uint32"), ): ("__nv_uint2float_rd", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def uint2float_ru(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("uint32"), ): ("__nv_uint2float_ru", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def hiloint2double(arg0, arg1, _semantic=None):
+ return core.extern_elementwise("", "", [arg0, arg1], {
+ (core.dtype("int32"), core.dtype("int32")): ("__nv_hiloint2double", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def double2loint(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp64"), ): ("__nv_double2loint", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def double2hiint(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp64"), ): ("__nv_double2hiint", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def float2ll_rn(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_float2ll_rn", core.dtype("int64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def float2ll_rz(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_float2ll_rz", core.dtype("int64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def float2ll_rd(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_float2ll_rd", core.dtype("int64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def float2ll_ru(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_float2ll_ru", core.dtype("int64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def float2ull_rn(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_float2ull_rn", core.dtype("int64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def float2ull_rz(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_float2ull_rz", core.dtype("int64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def float2ull_rd(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_float2ull_rd", core.dtype("int64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def float2ull_ru(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_float2ull_ru", core.dtype("int64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def double2ll_rn(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp64"), ): ("__nv_double2ll_rn", core.dtype("int64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def double2ll_rz(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp64"), ): ("__nv_double2ll_rz", core.dtype("int64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def double2ll_rd(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp64"), ): ("__nv_double2ll_rd", core.dtype("int64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def double2ll_ru(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp64"), ): ("__nv_double2ll_ru", core.dtype("int64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def double2ull_rn(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp64"), ): ("__nv_double2ull_rn", core.dtype("int64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def double2ull_rz(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp64"), ): ("__nv_double2ull_rz", core.dtype("int64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def double2ull_rd(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp64"), ): ("__nv_double2ull_rd", core.dtype("int64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def double2ull_ru(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp64"), ): ("__nv_double2ull_ru", core.dtype("int64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def ll2float_rn(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("int64"), ): ("__nv_ll2float_rn", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def ll2float_rz(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("int64"), ): ("__nv_ll2float_rz", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def ll2float_rd(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("int64"), ): ("__nv_ll2float_rd", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def ll2float_ru(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("int64"), ): ("__nv_ll2float_ru", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def ull2float_rn(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("uint64"), ): ("__nv_ull2float_rn", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def ull2float_rz(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("uint64"), ): ("__nv_ull2float_rz", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def ull2float_rd(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("uint64"), ): ("__nv_ull2float_rd", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def ull2float_ru(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("uint64"), ): ("__nv_ull2float_ru", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def ll2double_rn(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("int64"), ): ("__nv_ll2double_rn", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def ll2double_rz(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("int64"), ): ("__nv_ll2double_rz", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def ll2double_rd(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("int64"), ): ("__nv_ll2double_rd", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def ll2double_ru(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("int64"), ): ("__nv_ll2double_ru", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def ull2double_rn(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("uint64"), ): ("__nv_ull2double_rn", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def ull2double_rz(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("uint64"), ): ("__nv_ull2double_rz", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def ull2double_rd(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("uint64"), ): ("__nv_ull2double_rd", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def ull2double_ru(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("uint64"), ): ("__nv_ull2double_ru", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def int_as_float(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("int32"), ): ("__nv_int_as_float", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def float_as_int(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_float_as_int", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def uint_as_float(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("uint32"), ): ("__nv_uint_as_float", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def float_as_uint(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_float_as_uint", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def longlong_as_double(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("int64"), ): ("__nv_longlong_as_double", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def double_as_longlong(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp64"), ): ("__nv_double_as_longlong", core.dtype("int64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def fast_sinf(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_fast_sinf", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def fast_cosf(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_fast_cosf", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def fast_log2f(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_fast_log2f", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def fast_logf(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_fast_logf", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def fast_expf(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_fast_expf", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def fast_tanf(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_fast_tanf", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def fast_exp10f(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_fast_exp10f", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def fast_log10f(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_fast_log10f", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def fast_powf(arg0, arg1, _semantic=None):
+ return core.extern_elementwise("", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fast_powf", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def hadd(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("int32"), core.dtype("int32")): ("__nv_hadd", core.dtype("int32")),
+ (core.dtype("uint32"), core.dtype("uint32")): ("__nv_uhadd", core.dtype("uint32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def rhadd(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("int32"), core.dtype("int32")): ("__nv_rhadd", core.dtype("int32")),
+ (core.dtype("uint32"), core.dtype("uint32")): ("__nv_urhadd", core.dtype("uint32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def sub_rn(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fsub_rn", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_dsub_rn", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def sub_rz(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fsub_rz", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_dsub_rz", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def sub_rd(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fsub_rd", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_dsub_rd", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def sub_ru(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fsub_ru", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_dsub_ru", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def rsqrt_rn(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [
+ arg0,
+ ], {
+ (core.dtype("fp32"), ): ("__nv_frsqrt_rn", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def ffs(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [
+ arg0,
+ ], {
+ (core.dtype("int32"), ): ("__nv_ffs", core.dtype("int32")),
+ (core.dtype("int64"), ): ("__nv_ffsll", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def rint(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [
+ arg0,
+ ], {
+ (core.dtype("fp32"), ): ("__nv_rintf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_rint", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def llrint(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [
+ arg0,
+ ], {
+ (core.dtype("fp32"), ): ("__nv_llrintf", core.dtype("int64")),
+ (core.dtype("fp64"), ): ("__nv_llrint", core.dtype("int64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def nearbyint(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [
+ arg0,
+ ], {
+ (core.dtype("fp32"), ): ("__nv_nearbyintf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_nearbyint", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def isnan(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [
+ arg0,
+ ], {
+ (core.dtype("fp32"), ): ("__nv_isnanf", core.dtype("int32")),
+ (core.dtype("fp64"), ): ("__nv_isnand", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic).to(core.int1, _semantic=_semantic)
+
+
+@core.extern
+def signbit(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [
+ arg0,
+ ], {
+ (core.dtype("fp32"), ): ("__nv_signbitf", core.dtype("int32")),
+ (core.dtype("fp64"), ): ("__nv_signbitd", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def copysign(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_copysignf", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_copysign", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def finitef(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_finitef", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic).to(core.int1, _semantic=_semantic)
+
+
+@core.extern
+def isinf(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_isinff", core.dtype("int32")),
+ (core.dtype("fp64"), ): ("__nv_isinfd", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic).to(core.int1, _semantic=_semantic)
+
+
+@core.extern
+def nextafter(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_nextafterf", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_nextafter", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def sin(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_sinf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_sin", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def cos(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_cosf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_cos", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def sinpi(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_sinpif", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_sinpi", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def cospi(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_cospif", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_cospi", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def tan(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_tanf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_tan", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def log2(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_log2f", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_log2", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def exp(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_expf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_exp", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def exp10(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_exp10f", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_exp10", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def cosh(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_coshf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_cosh", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def sinh(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_sinhf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_sinh", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def tanh(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_tanhf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_tanh", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def atan2(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_atan2f", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_atan2", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def atan(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_atanf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_atan", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def asin(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_asinf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_asin", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def acos(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_acosf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_acos", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def log(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_logf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_log", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def log10(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_log10f", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_log10", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def log1p(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_log1pf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_log1p", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def acosh(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_acoshf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_acosh", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def asinh(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_asinhf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_asinh", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def atanh(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_atanhf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_atanh", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def expm1(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_expm1f", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_expm1", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def hypot(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_hypotf", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_hypot", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def rhypot(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_rhypotf", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_rhypot", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def norm3d(arg0, arg1, arg2, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1, arg2], {
+ (core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32")): ("__nv_norm3df", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64")): ("__nv_norm3d", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def rnorm3d(arg0, arg1, arg2, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1, arg2], {
+ (core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32")): ("__nv_rnorm3df", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64")): ("__nv_rnorm3d", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def norm4d(arg0, arg1, arg2, arg3, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1, arg2, arg3], {
+ (core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32")):
+ ("__nv_norm4df", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64")):
+ ("__nv_norm4d", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def rnorm4d(arg0, arg1, arg2, arg3, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1, arg2, arg3], {
+ (core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32")):
+ ("__nv_rnorm4df", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64")):
+ ("__nv_rnorm4d", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def cbrt(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_cbrtf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_cbrt", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def rcbrt(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_rcbrtf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_rcbrt", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def j0(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_j0f", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_j0", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def j1(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_j1f", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_j1", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def y0(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_y0f", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_y0", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def y1(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_y1f", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_y1", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def yn(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("int32"), core.dtype("fp32")): ("__nv_ynf", core.dtype("fp32")),
+ (core.dtype("int32"), core.dtype("fp64")): ("__nv_yn", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def jn(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("int32"), core.dtype("fp32")): ("__nv_jnf", core.dtype("fp32")),
+ (core.dtype("int32"), core.dtype("fp64")): ("__nv_jn", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def cyl_bessel_i0(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_cyl_bessel_i0f", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_cyl_bessel_i0", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def cyl_bessel_i1(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_cyl_bessel_i1f", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_cyl_bessel_i1", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def erf(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_erff", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_erf", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def erfinv(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_erfinvf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_erfinv", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def erfc(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_erfcf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_erfc", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def erfcx(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_erfcxf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_erfcx", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def erfcinv(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_erfcinvf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_erfcinv", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def normcdfinv(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_normcdfinvf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_normcdfinv", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def normcdf(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_normcdff", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_normcdf", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def lgamma(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_lgammaf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_lgamma", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def ldexp(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("int32")): ("__nv_ldexpf", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("int32")): ("__nv_ldexp", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def scalbn(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("int32")): ("__nv_scalbnf", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("int32")): ("__nv_scalbn", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def fmod(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fmodf", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_fmod", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def remainder(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_remainderf", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_remainder", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def fma(arg0, arg1, arg2, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1, arg2], {
+ (core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32")): ("__nv_fmaf", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64")): ("__nv_fma", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def pow(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("int32")): ("__nv_powif", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("int32")): ("__nv_powi", core.dtype("fp64")),
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_powf", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_pow", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def tgamma(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_tgammaf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_tgamma", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def round(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_roundf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_round", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def llround(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_llroundf", core.dtype("int64")),
+ (core.dtype("fp64"), ): ("__nv_llround", core.dtype("int64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def fdim(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("fp32")): ("__nv_fdimf", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64")): ("__nv_fdim", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def ilogb(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_ilogbf", core.dtype("int32")),
+ (core.dtype("fp64"), ): ("__nv_ilogb", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def logb(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__nv_logbf", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__nv_logb", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def isfinited(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp64"), ): ("__nv_isfinited", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic).to(core.int1, _semantic=_semantic)
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/cuda/utils.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/cuda/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..bb67b573a381156e7713a3359db859409701d7d7
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/cuda/utils.py
@@ -0,0 +1,109 @@
+from triton.language import core
+
+
+@core.extern
+def globaltimer(_semantic=None):
+ return core.inline_asm_elementwise("mov.u64 $0, %globaltimer;", "=l", [], dtype=core.int64, is_pure=False, pack=1,
+ _semantic=_semantic)
+
+
+@core.extern
+def smid(_semantic=None):
+ return core.inline_asm_elementwise("mov.u32 $0, %smid;", "=r", [], dtype=core.int32, is_pure=True, pack=1,
+ _semantic=_semantic)
+
+
+@core.builtin
+def num_threads(_semantic=None):
+ return core.constexpr(_semantic.builder.options.num_warps * 32)
+
+
+@core.builtin
+def num_warps(_semantic=None):
+ return core.constexpr(_semantic.builder.options.num_warps)
+
+
+# ----- FP8E4M3B15 ------
+# This data-type is a variant of the standard FP8E4M3 format.
+# It was designed for fast software conversion to FP16 on
+# nvidia GPUs that do not support it natively.
+# This is the same format as FP8E4M3Nv, but:
+# - the exponent bias is 15 instead of 7
+# - 0xff and 0x7f are mapped to +-1.750 instead of +-nan
+@core.builtin
+def convert_fp8e4b15_to_float16(arg, _semantic=None):
+ return core.inline_asm_elementwise(
+ "{ \n"
+ ".reg .b32 a<2>, b<2>; \n"
+ "prmt.b32 a0, 0, $2, 0x5746; \n"
+ "and.b32 b0, a0, 0x7f007f00; \n"
+ "and.b32 b1, a0, 0x00ff00ff; \n"
+ "and.b32 a1, a0, 0x00800080; \n"
+ "shr.b32 b0, b0, 1; \n"
+ "add.u32 b1, b1, a1; \n"
+ "lop3.b32 $0, b0, 0x80008000, a0, 0xf8; \n"
+ "shl.b32 $1, b1, 7; \n"
+ "} \n", "=r,=r,r", [arg], dtype=core.float16, is_pure=True, pack=4,
+ _semantic=_semantic)
+
+
+@core.builtin
+def convert_float16_to_fp8e4b15(arg, has_minx2, _semantic=None):
+ asm = """{
+ .reg .pred p<4>;
+ .reg .b32 a<2>, b<2>;
+ .reg .b16 c<4>;
+ .reg .b16 max_val_f16;
+ .reg .b32 max_val_f16x2;
+ mov.b16 max_val_f16, 0x3F00;
+ mov.b32 max_val_f16x2, 0x3F003F00;
+ and.b32 a0, $1, 0x7fff7fff;
+ and.b32 a1, $2, 0x7fff7fff;"""
+ if has_minx2:
+ asm += """min.f16x2 a0, a0, max_val_f16x2;
+ min.f16x2 a1, a1, max_val_f16x2;"""
+ else:
+ asm += """setp.lt.f16x2 p0|p1, a0, max_val_f16x2;
+ setp.lt.f16x2 p2|p3, a1, max_val_f16x2;
+ mov.b32 {c0, c1}, a0;
+ mov.b32 {c2, c3}, a1;
+ selp.b16 c0, c0, max_val_f16, p0;
+ selp.b16 c1, c1, max_val_f16, p1;
+ selp.b16 c2, c2, max_val_f16, p2;
+ selp.b16 c3, c3, max_val_f16, p3;
+ mov.b32 a0, {c0, c1};
+ mov.b32 a1, {c2, c3};"""
+ asm += """mad.lo.u32 a0, a0, 2, 0x00800080;
+ mad.lo.u32 a1, a1, 2, 0x00800080;
+ lop3.b32 b0, $1, 0x80008000, a0, 0xea;
+ lop3.b32 b1, $2, 0x80008000, a1, 0xea;
+ prmt.b32 $0, b0, b1, 0x7531;
+ }"""
+ return core.inline_asm_elementwise(asm, "=r,r,r", [arg], dtype=core.float8e4b15, is_pure=True, pack=4,
+ _semantic=_semantic)
+
+
+@core.builtin
+def convert_custom_float8(arg, dst_ty, fp_downcast_rounding, has_minx2, _semantic=None):
+ if arg.type.scalar.is_fp8e4b15():
+ upcast_val = convert_fp8e4b15_to_float16(arg, _semantic=_semantic)
+ if dst_ty.scalar.is_fp32():
+ upcast_val = upcast_val.to(core.float32, _semantic=_semantic)
+ return upcast_val
+
+ assert arg.type.scalar.is_fp16() or arg.type.scalar.is_fp32()
+ downcast_val = arg
+ if arg.type.scalar.is_fp32():
+ downcast_val = downcast_val.to(core.float16, fp_downcast_rounding="rtz", _semantic=_semantic)
+ downcast_val = convert_float16_to_fp8e4b15(downcast_val, has_minx2=has_minx2, _semantic=_semantic)
+ return downcast_val
+
+
+@core.builtin
+def convert_custom_float8_sm80(arg, dst_ty, fp_downcast_rounding=None, _semantic=None):
+ return convert_custom_float8(arg, dst_ty, fp_downcast_rounding, has_minx2=True, _semantic=_semantic)
+
+
+@core.builtin
+def convert_custom_float8_sm70(arg, dst_ty, fp_downcast_rounding=None, _semantic=None):
+ return convert_custom_float8(arg, dst_ty, fp_downcast_rounding, has_minx2=False, _semantic=_semantic)
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/hip/__init__.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/hip/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc9b571ddfacbd15b1e8258cce592313f7d45a3e
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/hip/__init__.py
@@ -0,0 +1,5 @@
+from . import libdevice
+
+from .utils import memrealtime
+
+__all__ = ["libdevice", "memrealtime"]
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/hip/__pycache__/__init__.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/hip/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ce0037709e25e9b2d8fd7cdb1e2972c397e2f400
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/hip/__pycache__/__init__.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/hip/__pycache__/libdevice.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/hip/__pycache__/libdevice.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b14b27d62cb626fdcb215fdf9349568578b97570
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/hip/__pycache__/libdevice.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/hip/__pycache__/utils.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/hip/__pycache__/utils.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d3c42b312a3fbe6f3d362edb0f9c45872d5b51ae
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/hip/__pycache__/utils.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/hip/libdevice.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/hip/libdevice.py
new file mode 100644
index 0000000000000000000000000000000000000000..fc8d1b11a80299ae9f203bc48f039020faa80353
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/hip/libdevice.py
@@ -0,0 +1,491 @@
+from triton.language import core
+
+
+@core.extern
+def abs(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("int32"), ): ("__triton_hip_iabs", core.dtype("int32")),
+ (core.dtype("int64"), ): ("__triton_hip_iabs", core.dtype("int64")),
+ (core.dtype("fp32"), ): ("__triton_hip_fabs", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__triton_hip_fabs", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def floor(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_floor_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_floor_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def rsqrt(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_rsqrt_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_rsqrt_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def ceil(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_ceil_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_ceil_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def trunc(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_trunc_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_trunc_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def exp2(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_exp2_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_exp2_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def exp(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_exp_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_exp_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def fast_expf(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__triton_hip_fast_expf", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def fast_tanhf(arg0, _semantic=None):
+ return core.extern_elementwise("", "", [arg0], {
+ (core.dtype("fp32"), ): ("__triton_hip_fast_tanhf", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def fast_dividef(arg0, arg1, _semantic=None):
+ return core.extern_elementwise("", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("fp32")): ("__triton_hip_fast_fdividef", core.dtype("fp32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def sqrt(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_sqrt_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_sqrt_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def llrint(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__triton_hip_llrint", core.dtype("int64")),
+ (core.dtype("fp64"), ): ("__triton_hip_llrint", core.dtype("int64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def nearbyint(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [
+ arg0,
+ ], {
+ (core.dtype("fp32"), ): ("__ocml_nearbyint_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_nearbyint_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def isnan(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [
+ arg0,
+ ], {
+ (core.dtype("fp32"), ): ("__ocml_isnan_f32", core.dtype("int32")),
+ (core.dtype("fp64"), ): ("__ocml_isnan_f64", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic).to(core.int1, _semantic=_semantic)
+
+
+@core.extern
+def signbit(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [
+ arg0,
+ ], {
+ (core.dtype("fp32"), ): ("__ocml_signbit_f32", core.dtype("int32")),
+ (core.dtype("fp64"), ): ("__ocml_signbit_f64", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def copysign(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("fp32")): ("__ocml_copysign_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64")): ("__ocml_copysign_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def isinf(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_isinf_f32", core.dtype("int32")),
+ (core.dtype("fp64"), ): ("__ocml_isinf_f64", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic).to(core.int1, _semantic=_semantic)
+
+
+@core.extern
+def nextafter(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("fp32")): ("__ocml_nextafter_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64")): ("__ocml_nextafter_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def sin(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_sin_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_sin_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def cos(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_cos_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_cos_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def tan(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_tan_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_tan_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def log2(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_log2_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_log2_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def cosh(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_cosh_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_cosh_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def sinh(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_sinh_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_sinh_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def tanh(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_tanh_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_tanh_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def atan2(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("fp32")): ("__ocml_atan2_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64")): ("__ocml_atan2_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def atan(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_atan_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_atan_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def asin(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_asin_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_asin_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def acos(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_acos_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_acos_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def log(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_log_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_log_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def log10(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_log10_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_log10_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def log1p(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_log1p_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_log1p_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def acosh(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_acosh_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_acosh_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def asinh(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_asinh_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_asinh_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def atanh(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_atanh_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_atanh_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def expm1(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_expm1_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_expm1_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def hypot(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("fp32")): ("__ocml_hypot_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64")): ("__ocml_hypot_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def j0(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_j0_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_j0_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def j1(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_j1_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_j1_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def y0(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_y0_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_y0_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def y1(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_y1_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_y1_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def cyl_bessel_i0(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_i0_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_i0_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def cyl_bessel_i1(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_i1_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_i1_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def erf(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_erf_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_erf_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def erfinv(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_erfinv_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_erfinv_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def erfc(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_erfc_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_erfc_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def erfcx(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_erfcx_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_erfcx_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def lgamma(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_lgamma_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_lgamma_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def ldexp(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("int32")): ("__ocml_ldexp_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("int32")): ("__ocml_ldexp_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def fmod(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("fp32")): ("__ocml_fmod_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64")): ("__ocml_fmod_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def fma(arg0, arg1, arg2, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1, arg2], {
+ (core.dtype("fp32"), core.dtype("fp32"), core.dtype("fp32")): ("__ocml_fma_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64"), core.dtype("fp64")): ("__ocml_fma_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def pow(arg0, arg1, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0, arg1], {
+ (core.dtype("fp32"), core.dtype("int32")): ("__ocml_pown_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("int32")): ("__ocml_pown_f64", core.dtype("fp64")),
+ (core.dtype("fp32"), core.dtype("fp32")): ("__ocml_pow_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), core.dtype("fp64")): ("__ocml_pow_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def ilogb(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_ilogb_f32", core.dtype("int32")),
+ (core.dtype("fp64"), ): ("__ocml_ilogb_f64", core.dtype("int32")),
+ }, is_pure=True, _semantic=_semantic)
+
+
+@core.extern
+def round(arg0, _semantic=None):
+ return core.extern_elementwise(
+ "", "", [arg0], {
+ (core.dtype("fp32"), ): ("__ocml_round_f32", core.dtype("fp32")),
+ (core.dtype("fp64"), ): ("__ocml_round_f64", core.dtype("fp64")),
+ }, is_pure=True, _semantic=_semantic)
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/hip/utils.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/hip/utils.py
new file mode 100644
index 0000000000000000000000000000000000000000..c9dbabc4d3cfdbd5ee91b38ef3be969b9f187046
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/hip/utils.py
@@ -0,0 +1,35 @@
+from triton.language import core
+
+
+@core.extern
+def memrealtime(_semantic=None):
+ """
+ Returns a 64-bit real time-counter value
+ """
+ target_arch = _semantic.builder.options.arch
+ if 'gfx11' in target_arch or 'gfx12' in target_arch:
+ return core.inline_asm_elementwise(
+ """
+ s_sendmsg_rtn_b64 $0, sendmsg(MSG_RTN_GET_REALTIME)
+ s_waitcnt lgkmcnt(0)
+ """,
+ "=r",
+ [],
+ dtype=core.int64,
+ is_pure=False,
+ pack=1,
+ _semantic=_semantic,
+ )
+ else:
+ return core.inline_asm_elementwise(
+ """
+ s_memrealtime $0
+ s_waitcnt vmcnt(0)
+ """,
+ "=r",
+ [],
+ dtype=core.int64,
+ is_pure=False,
+ pack=1,
+ _semantic=_semantic,
+ )
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/libdevice.py b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/libdevice.py
new file mode 100644
index 0000000000000000000000000000000000000000..e29810bfbabdcc09d6a28f062c18ee6af3fe7575
--- /dev/null
+++ b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/language/extra/libdevice.py
@@ -0,0 +1,790 @@
+def clz(arg0):
+ ...
+
+
+def popc(arg0):
+ ...
+
+
+def byte_perm(arg0, arg1, arg2):
+ ...
+
+
+def mulhi(arg0, arg1):
+ ...
+
+
+def mul24(arg0, arg1):
+ ...
+
+
+def brev(arg0):
+ ...
+
+
+def sad(arg0, arg1, arg2):
+ ...
+
+
+def abs(arg0):
+ ...
+
+
+def floor(arg0):
+ ...
+
+
+def rcp64h(arg0):
+ ...
+
+
+def rsqrt(arg0):
+ ...
+
+
+def ceil(arg0):
+ ...
+
+
+def trunc(arg0):
+ ...
+
+
+def exp2(arg0):
+ ...
+
+
+def saturatef(arg0):
+ ...
+
+
+def fma_rn(arg0, arg1, arg2):
+ ...
+
+
+def fma_rz(arg0, arg1, arg2):
+ ...
+
+
+def fma_rd(arg0, arg1, arg2):
+ ...
+
+
+def fma_ru(arg0, arg1, arg2):
+ ...
+
+
+def fast_dividef(arg0, arg1):
+ ...
+
+
+def div_rn(arg0, arg1):
+ ...
+
+
+def div_rz(arg0, arg1):
+ ...
+
+
+def div_rd(arg0, arg1):
+ ...
+
+
+def div_ru(arg0, arg1):
+ ...
+
+
+def rcp_rn(arg0):
+ ...
+
+
+def rcp_rz(arg0):
+ ...
+
+
+def rcp_rd(arg0):
+ ...
+
+
+def rcp_ru(arg0):
+ ...
+
+
+def sqrt_rn(arg0):
+ ...
+
+
+def sqrt_rz(arg0):
+ ...
+
+
+def sqrt_rd(arg0):
+ ...
+
+
+def sqrt_ru(arg0):
+ ...
+
+
+def sqrt(arg0):
+ ...
+
+
+def add_rn(arg0, arg1):
+ ...
+
+
+def add_rz(arg0, arg1):
+ ...
+
+
+def add_rd(arg0, arg1):
+ ...
+
+
+def add_ru(arg0, arg1):
+ ...
+
+
+def mul_rn(arg0, arg1):
+ ...
+
+
+def mul_rz(arg0, arg1):
+ ...
+
+
+def mul_rd(arg0, arg1):
+ ...
+
+
+def mul_ru(arg0, arg1):
+ ...
+
+
+def double2float_rn(arg0):
+ ...
+
+
+def double2float_rz(arg0):
+ ...
+
+
+def double2float_rd(arg0):
+ ...
+
+
+def double2float_ru(arg0):
+ ...
+
+
+def double2int_rn(arg0):
+ ...
+
+
+def double2int_rz(arg0):
+ ...
+
+
+def double2int_rd(arg0):
+ ...
+
+
+def double2int_ru(arg0):
+ ...
+
+
+def double2uint_rn(arg0):
+ ...
+
+
+def double2uint_rz(arg0):
+ ...
+
+
+def double2uint_rd(arg0):
+ ...
+
+
+def double2uint_ru(arg0):
+ ...
+
+
+def int2double_rn(arg0):
+ ...
+
+
+def uint2double_rn(arg0):
+ ...
+
+
+def float2int_rn(arg0):
+ ...
+
+
+def float2int_rz(arg0):
+ ...
+
+
+def float2int_rd(arg0):
+ ...
+
+
+def float2int_ru(arg0):
+ ...
+
+
+def float2uint_rn(arg0):
+ ...
+
+
+def float2uint_rz(arg0):
+ ...
+
+
+def float2uint_rd(arg0):
+ ...
+
+
+def float2uint_ru(arg0):
+ ...
+
+
+def int2float_rn(arg0):
+ ...
+
+
+def int2float_rz(arg0):
+ ...
+
+
+def int2float_rd(arg0):
+ ...
+
+
+def int2float_ru(arg0):
+ ...
+
+
+def uint2float_rn(arg0):
+ ...
+
+
+def uint2float_rz(arg0):
+ ...
+
+
+def uint2float_rd(arg0):
+ ...
+
+
+def uint2float_ru(arg0):
+ ...
+
+
+def hiloint2double(arg0, arg1):
+ ...
+
+
+def double2loint(arg0):
+ ...
+
+
+def double2hiint(arg0):
+ ...
+
+
+def float2ll_rn(arg0):
+ ...
+
+
+def float2ll_rz(arg0):
+ ...
+
+
+def float2ll_rd(arg0):
+ ...
+
+
+def float2ll_ru(arg0):
+ ...
+
+
+def float2ull_rn(arg0):
+ ...
+
+
+def float2ull_rz(arg0):
+ ...
+
+
+def float2ull_rd(arg0):
+ ...
+
+
+def float2ull_ru(arg0):
+ ...
+
+
+def double2ll_rn(arg0):
+ ...
+
+
+def double2ll_rz(arg0):
+ ...
+
+
+def double2ll_rd(arg0):
+ ...
+
+
+def double2ll_ru(arg0):
+ ...
+
+
+def double2ull_rn(arg0):
+ ...
+
+
+def double2ull_rz(arg0):
+ ...
+
+
+def double2ull_rd(arg0):
+ ...
+
+
+def double2ull_ru(arg0):
+ ...
+
+
+def ll2float_rn(arg0):
+ ...
+
+
+def ll2float_rz(arg0):
+ ...
+
+
+def ll2float_rd(arg0):
+ ...
+
+
+def ll2float_ru(arg0):
+ ...
+
+
+def ull2float_rn(arg0):
+ ...
+
+
+def ull2float_rz(arg0):
+ ...
+
+
+def ull2float_rd(arg0):
+ ...
+
+
+def ull2float_ru(arg0):
+ ...
+
+
+def ll2double_rn(arg0):
+ ...
+
+
+def ll2double_rz(arg0):
+ ...
+
+
+def ll2double_rd(arg0):
+ ...
+
+
+def ll2double_ru(arg0):
+ ...
+
+
+def ull2double_rn(arg0):
+ ...
+
+
+def ull2double_rz(arg0):
+ ...
+
+
+def ull2double_rd(arg0):
+ ...
+
+
+def ull2double_ru(arg0):
+ ...
+
+
+def int_as_float(arg0):
+ ...
+
+
+def float_as_int(arg0):
+ ...
+
+
+def uint_as_float(arg0):
+ ...
+
+
+def float_as_uint(arg0):
+ ...
+
+
+def longlong_as_double(arg0):
+ ...
+
+
+def double_as_longlong(arg0):
+ ...
+
+
+def fast_sinf(arg0):
+ ...
+
+
+def fast_cosf(arg0):
+ ...
+
+
+def fast_log2f(arg0):
+ ...
+
+
+def fast_logf(arg0):
+ ...
+
+
+def fast_expf(arg0):
+ ...
+
+
+def fast_tanhf(arg0):
+ ...
+
+
+def fast_tanf(arg0):
+ ...
+
+
+def fast_exp10f(arg0):
+ ...
+
+
+def fast_log10f(arg0):
+ ...
+
+
+def fast_powf(arg0, arg1):
+ ...
+
+
+def hadd(arg0, arg1):
+ ...
+
+
+def rhadd(arg0, arg1):
+ ...
+
+
+def sub_rn(arg0, arg1):
+ ...
+
+
+def sub_rz(arg0, arg1):
+ ...
+
+
+def sub_rd(arg0, arg1):
+ ...
+
+
+def sub_ru(arg0, arg1):
+ ...
+
+
+def rsqrt_rn(arg0):
+ ...
+
+
+def ffs(arg0):
+ ...
+
+
+def rint(arg0):
+ ...
+
+
+def llrint(arg0):
+ ...
+
+
+def nearbyint(arg0):
+ ...
+
+
+def isnan(arg0):
+ ...
+
+
+def signbit(arg0):
+ ...
+
+
+def copysign(arg0, arg1):
+ ...
+
+
+def finitef(arg0):
+ ...
+
+
+def isinf(arg0):
+ ...
+
+
+def nextafter(arg0, arg1):
+ ...
+
+
+def sin(arg0):
+ ...
+
+
+def cos(arg0):
+ ...
+
+
+def sinpi(arg0):
+ ...
+
+
+def cospi(arg0):
+ ...
+
+
+def tan(arg0):
+ ...
+
+
+def log2(arg0):
+ ...
+
+
+def exp(arg0):
+ ...
+
+
+def exp10(arg0):
+ ...
+
+
+def cosh(arg0):
+ ...
+
+
+def sinh(arg0):
+ ...
+
+
+def tanh(arg0):
+ ...
+
+
+def atan2(arg0, arg1):
+ ...
+
+
+def atan(arg0):
+ ...
+
+
+def asin(arg0):
+ ...
+
+
+def acos(arg0):
+ ...
+
+
+def log(arg0):
+ ...
+
+
+def log10(arg0):
+ ...
+
+
+def log1p(arg0):
+ ...
+
+
+def acosh(arg0):
+ ...
+
+
+def asinh(arg0):
+ ...
+
+
+def atanh(arg0):
+ ...
+
+
+def expm1(arg0):
+ ...
+
+
+def hypot(arg0, arg1):
+ ...
+
+
+def rhypot(arg0, arg1):
+ ...
+
+
+def norm3d(arg0, arg1, arg2):
+ ...
+
+
+def rnorm3d(arg0, arg1, arg2):
+ ...
+
+
+def norm4d(arg0, arg1, arg2, arg3):
+ ...
+
+
+def rnorm4d(arg0, arg1, arg2, arg3):
+ ...
+
+
+def cbrt(arg0):
+ ...
+
+
+def rcbrt(arg0):
+ ...
+
+
+def j0(arg0):
+ ...
+
+
+def j1(arg0):
+ ...
+
+
+def y0(arg0):
+ ...
+
+
+def y1(arg0):
+ ...
+
+
+def yn(arg0, arg1):
+ ...
+
+
+def jn(arg0, arg1):
+ ...
+
+
+def cyl_bessel_i0(arg0):
+ ...
+
+
+def cyl_bessel_i1(arg0):
+ ...
+
+
+def erf(arg0):
+ ...
+
+
+def erfinv(arg0):
+ ...
+
+
+def erfc(arg0):
+ ...
+
+
+def erfcx(arg0):
+ ...
+
+
+def erfcinv(arg0):
+ ...
+
+
+def normcdfinv(arg0):
+ ...
+
+
+def normcdf(arg0):
+ ...
+
+
+def lgamma(arg0):
+ ...
+
+
+def ldexp(arg0, arg1):
+ ...
+
+
+def scalbn(arg0, arg1):
+ ...
+
+
+def fmod(arg0, arg1):
+ ...
+
+
+def remainder(arg0, arg1):
+ ...
+
+
+def fma(arg0, arg1, arg2):
+ ...
+
+
+def pow(arg0, arg1):
+ ...
+
+
+def tgamma(arg0):
+ ...
+
+
+def round(arg0):
+ ...
+
+
+def llround(arg0):
+ ...
+
+
+def fdim(arg0, arg1):
+ ...
+
+
+def ilogb(arg0):
+ ...
+
+
+def logb(arg0):
+ ...
+
+
+def isfinited(arg0):
+ ...
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/__pycache__/__init__.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7448c6b4d9acfbab0ed6862cf4e7a702971f381a
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/__pycache__/__init__.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/__pycache__/context.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/__pycache__/context.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bf7aaaabbe04ea3ab0ba9398b81c1b3207f760df
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/__pycache__/context.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/__pycache__/flags.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/__pycache__/flags.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..aba2d83f2cf8b67d11b798b729ac7126fe815d3e
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/__pycache__/flags.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/__pycache__/language.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/__pycache__/language.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..be5696d17d10407d8bee60239aa00d5eccba4f97
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/__pycache__/language.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/__pycache__/mode.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/__pycache__/mode.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..abe9e9f5e7a3b7f2de7569b5e8b8cc2de276cd60
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/__pycache__/mode.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/__pycache__/profile.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/__pycache__/profile.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..64e2dbc280c5b228cd773feb8b1c3d672908b574
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/__pycache__/profile.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/__pycache__/scope.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/__pycache__/scope.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b001d3f1aa4833cb1f7b29d9521d489ce94651bc
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/__pycache__/scope.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/__pycache__/state.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/__pycache__/state.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a10ce55e94388064802881724d449f2e4035770d
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/__pycache__/state.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/__pycache__/viewer.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/__pycache__/viewer.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ce9efedfdf082cec4d80d32c793ba17812c9512d
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/__pycache__/viewer.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/hooks/__pycache__/__init__.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/hooks/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..18553d1a2ddd38ef250e98f6c52f6b244ebe4af4
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/hooks/__pycache__/__init__.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/hooks/__pycache__/hook.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/hooks/__pycache__/hook.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e0d67ad2f9bbf612fdbdcbf7bcc1a9c61303b55c
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/hooks/__pycache__/hook.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/hooks/__pycache__/instrumentation.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/hooks/__pycache__/instrumentation.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..26253b81aa88d29806324df886b3042f687d9dea
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/hooks/__pycache__/instrumentation.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/hooks/__pycache__/launch.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/hooks/__pycache__/launch.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4c4e539aae1d094541e00366717cb73115c81abc
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/profiler/hooks/__pycache__/launch.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/runtime/__pycache__/__init__.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/runtime/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7a1d9fccf4b20e2dad371f2f8af6de47c9a243a5
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/runtime/__pycache__/__init__.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/runtime/__pycache__/_allocation.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/runtime/__pycache__/_allocation.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..47076289d2d8b88844020d98f1442c9228b726af
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/runtime/__pycache__/_allocation.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/runtime/__pycache__/_async_compile.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/runtime/__pycache__/_async_compile.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8d631ccf27d475377e3dc5933ec5d12e462a516f
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/runtime/__pycache__/_async_compile.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/runtime/__pycache__/autotuner.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/runtime/__pycache__/autotuner.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dafa39bccb78115f5fcb6344c95b9f9396d183f7
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/runtime/__pycache__/autotuner.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/runtime/__pycache__/build.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/runtime/__pycache__/build.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4afc52d5938096cc8c69a51209efe28907528f88
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/runtime/__pycache__/build.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/runtime/__pycache__/cache.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/runtime/__pycache__/cache.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bd98d73cccd2ed05914b23120141c70f72157be8
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/runtime/__pycache__/cache.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/runtime/__pycache__/driver.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/runtime/__pycache__/driver.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5a8bf38a069adf642ad0a9ded510b16daed4c549
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/runtime/__pycache__/driver.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/runtime/__pycache__/errors.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/runtime/__pycache__/errors.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c8512fff0c12e3c03896d220d8eb40c23ac6b2d6
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/runtime/__pycache__/errors.cpython-312.pyc differ
diff --git a/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/runtime/__pycache__/jit.cpython-312.pyc b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/runtime/__pycache__/jit.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c7171af0df238f2c5bd4cb09e742e5106cb6b3ff
Binary files /dev/null and b/Prism/LLaDA/LLaDA_Prism/.venv/lib/python3.12/site-packages/triton/runtime/__pycache__/jit.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c590c78fc38c7e776e10864ffda877d1055a8ce7
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/__pycache__/accumulationbounds.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/__pycache__/accumulationbounds.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c268d1b7abefe6e82cec3f66e65b5a2d6c885a8f
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/__pycache__/accumulationbounds.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/__pycache__/euler.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/__pycache__/euler.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b39882782f6e77a2dfb4719c51a582d592ada6a3
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/__pycache__/euler.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/__pycache__/finite_diff.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/__pycache__/finite_diff.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d9f68f558455aeb0163a91fbbb75f3a9b6b8d0b4
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/__pycache__/finite_diff.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/__pycache__/singularities.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/__pycache__/singularities.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..11069b552a81aae0899b4a310a3d7a333cfbe8c1
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/__pycache__/singularities.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/__pycache__/util.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/__pycache__/util.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ceba7a10c2aea995950b443645e9fd3190985258
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/__pycache__/util.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/tests/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/tests/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..5fdc0dcbfddf473715047cf0f9bc7bc8248728ec
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/tests/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/tests/__pycache__/test_accumulationbounds.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/tests/__pycache__/test_accumulationbounds.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..a30c9a575112c68fd065bd181a197567d9b6e892
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/tests/__pycache__/test_accumulationbounds.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/tests/__pycache__/test_euler.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/tests/__pycache__/test_euler.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..f966c18d18d3c7a7b6e043df1981e7cab973e285
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/tests/__pycache__/test_euler.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/tests/__pycache__/test_finite_diff.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/tests/__pycache__/test_finite_diff.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d9f1b5964c5c4472a8b77343ebc5a5c80a593df4
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/tests/__pycache__/test_finite_diff.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/tests/__pycache__/test_singularities.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/tests/__pycache__/test_singularities.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..914aa0a9ce6a5661f8fe712b54ebfcfa84dedcb5
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/tests/__pycache__/test_singularities.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/tests/__pycache__/test_util.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/tests/__pycache__/test_util.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..45e7ebefd56c3036cc51e396acacf3bf4678b2ef
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/tests/__pycache__/test_util.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/tests/test_finite_diff.py b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/tests/test_finite_diff.py
new file mode 100644
index 0000000000000000000000000000000000000000..e9ecfbdd61b15f516c54bd6d716ba1f264ee2ca0
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/calculus/tests/test_finite_diff.py
@@ -0,0 +1,164 @@
+from itertools import product
+
+from sympy.core.function import (Function, diff)
+from sympy.core.numbers import Rational
+from sympy.core.singleton import S
+from sympy.core.symbol import symbols
+from sympy.functions.elementary.exponential import exp
+from sympy.calculus.finite_diff import (
+ apply_finite_diff, differentiate_finite, finite_diff_weights,
+ _as_finite_diff
+)
+from sympy.testing.pytest import raises, warns_deprecated_sympy
+
+
+def test_apply_finite_diff():
+ x, h = symbols('x h')
+ f = Function('f')
+ assert (apply_finite_diff(1, [x-h, x+h], [f(x-h), f(x+h)], x) -
+ (f(x+h)-f(x-h))/(2*h)).simplify() == 0
+
+ assert (apply_finite_diff(1, [5, 6, 7], [f(5), f(6), f(7)], 5) -
+ (Rational(-3, 2)*f(5) + 2*f(6) - S.Half*f(7))).simplify() == 0
+ raises(ValueError, lambda: apply_finite_diff(1, [x, h], [f(x)]))
+
+
+def test_finite_diff_weights():
+
+ d = finite_diff_weights(1, [5, 6, 7], 5)
+ assert d[1][2] == [Rational(-3, 2), 2, Rational(-1, 2)]
+
+ # Table 1, p. 702 in doi:10.1090/S0025-5718-1988-0935077-0
+ # --------------------------------------------------------
+ xl = [0, 1, -1, 2, -2, 3, -3, 4, -4]
+
+ # d holds all coefficients
+ d = finite_diff_weights(4, xl, S.Zero)
+
+ # Zeroeth derivative
+ for i in range(5):
+ assert d[0][i] == [S.One] + [S.Zero]*8
+
+ # First derivative
+ assert d[1][0] == [S.Zero]*9
+ assert d[1][2] == [S.Zero, S.Half, Rational(-1, 2)] + [S.Zero]*6
+ assert d[1][4] == [S.Zero, Rational(2, 3), Rational(-2, 3), Rational(-1, 12), Rational(1, 12)] + [S.Zero]*4
+ assert d[1][6] == [S.Zero, Rational(3, 4), Rational(-3, 4), Rational(-3, 20), Rational(3, 20),
+ Rational(1, 60), Rational(-1, 60)] + [S.Zero]*2
+ assert d[1][8] == [S.Zero, Rational(4, 5), Rational(-4, 5), Rational(-1, 5), Rational(1, 5),
+ Rational(4, 105), Rational(-4, 105), Rational(-1, 280), Rational(1, 280)]
+
+ # Second derivative
+ for i in range(2):
+ assert d[2][i] == [S.Zero]*9
+ assert d[2][2] == [-S(2), S.One, S.One] + [S.Zero]*6
+ assert d[2][4] == [Rational(-5, 2), Rational(4, 3), Rational(4, 3), Rational(-1, 12), Rational(-1, 12)] + [S.Zero]*4
+ assert d[2][6] == [Rational(-49, 18), Rational(3, 2), Rational(3, 2), Rational(-3, 20), Rational(-3, 20),
+ Rational(1, 90), Rational(1, 90)] + [S.Zero]*2
+ assert d[2][8] == [Rational(-205, 72), Rational(8, 5), Rational(8, 5), Rational(-1, 5), Rational(-1, 5),
+ Rational(8, 315), Rational(8, 315), Rational(-1, 560), Rational(-1, 560)]
+
+ # Third derivative
+ for i in range(3):
+ assert d[3][i] == [S.Zero]*9
+ assert d[3][4] == [S.Zero, -S.One, S.One, S.Half, Rational(-1, 2)] + [S.Zero]*4
+ assert d[3][6] == [S.Zero, Rational(-13, 8), Rational(13, 8), S.One, -S.One,
+ Rational(-1, 8), Rational(1, 8)] + [S.Zero]*2
+ assert d[3][8] == [S.Zero, Rational(-61, 30), Rational(61, 30), Rational(169, 120), Rational(-169, 120),
+ Rational(-3, 10), Rational(3, 10), Rational(7, 240), Rational(-7, 240)]
+
+ # Fourth derivative
+ for i in range(4):
+ assert d[4][i] == [S.Zero]*9
+ assert d[4][4] == [S(6), -S(4), -S(4), S.One, S.One] + [S.Zero]*4
+ assert d[4][6] == [Rational(28, 3), Rational(-13, 2), Rational(-13, 2), S(2), S(2),
+ Rational(-1, 6), Rational(-1, 6)] + [S.Zero]*2
+ assert d[4][8] == [Rational(91, 8), Rational(-122, 15), Rational(-122, 15), Rational(169, 60), Rational(169, 60),
+ Rational(-2, 5), Rational(-2, 5), Rational(7, 240), Rational(7, 240)]
+
+ # Table 2, p. 703 in doi:10.1090/S0025-5718-1988-0935077-0
+ # --------------------------------------------------------
+ xl = [[j/S(2) for j in list(range(-i*2+1, 0, 2))+list(range(1, i*2+1, 2))]
+ for i in range(1, 5)]
+
+ # d holds all coefficients
+ d = [finite_diff_weights({0: 1, 1: 2, 2: 4, 3: 4}[i], xl[i], 0) for
+ i in range(4)]
+
+ # Zeroth derivative
+ assert d[0][0][1] == [S.Half, S.Half]
+ assert d[1][0][3] == [Rational(-1, 16), Rational(9, 16), Rational(9, 16), Rational(-1, 16)]
+ assert d[2][0][5] == [Rational(3, 256), Rational(-25, 256), Rational(75, 128), Rational(75, 128),
+ Rational(-25, 256), Rational(3, 256)]
+ assert d[3][0][7] == [Rational(-5, 2048), Rational(49, 2048), Rational(-245, 2048), Rational(1225, 2048),
+ Rational(1225, 2048), Rational(-245, 2048), Rational(49, 2048), Rational(-5, 2048)]
+
+ # First derivative
+ assert d[0][1][1] == [-S.One, S.One]
+ assert d[1][1][3] == [Rational(1, 24), Rational(-9, 8), Rational(9, 8), Rational(-1, 24)]
+ assert d[2][1][5] == [Rational(-3, 640), Rational(25, 384), Rational(-75, 64),
+ Rational(75, 64), Rational(-25, 384), Rational(3, 640)]
+ assert d[3][1][7] == [Rational(5, 7168), Rational(-49, 5120),
+ Rational(245, 3072), Rational(-1225, 1024),
+ Rational(1225, 1024), Rational(-245, 3072),
+ Rational(49, 5120), Rational(-5, 7168)]
+
+ # Reasonably the rest of the table is also correct... (testing of that
+ # deemed excessive at the moment)
+ raises(ValueError, lambda: finite_diff_weights(-1, [1, 2]))
+ raises(ValueError, lambda: finite_diff_weights(1.2, [1, 2]))
+ x = symbols('x')
+ raises(ValueError, lambda: finite_diff_weights(x, [1, 2]))
+
+
+def test_as_finite_diff():
+ x = symbols('x')
+ f = Function('f')
+ dx = Function('dx')
+
+ _as_finite_diff(f(x).diff(x), [x-2, x-1, x, x+1, x+2])
+
+ # Use of undefined functions in ``points``
+ df_true = -f(x+dx(x)/2-dx(x+dx(x)/2)/2) / dx(x+dx(x)/2) \
+ + f(x+dx(x)/2+dx(x+dx(x)/2)/2) / dx(x+dx(x)/2)
+ df_test = diff(f(x), x).as_finite_difference(points=dx(x), x0=x+dx(x)/2)
+ assert (df_test - df_true).simplify() == 0
+
+
+def test_differentiate_finite():
+ x, y, h = symbols('x y h')
+ f = Function('f')
+ with warns_deprecated_sympy():
+ res0 = differentiate_finite(f(x, y) + exp(42), x, y, evaluate=True)
+ xm, xp, ym, yp = [v + sign*S.Half for v, sign in product([x, y], [-1, 1])]
+ ref0 = f(xm, ym) + f(xp, yp) - f(xm, yp) - f(xp, ym)
+ assert (res0 - ref0).simplify() == 0
+
+ g = Function('g')
+ with warns_deprecated_sympy():
+ res1 = differentiate_finite(f(x)*g(x) + 42, x, evaluate=True)
+ ref1 = (-f(x - S.Half) + f(x + S.Half))*g(x) + \
+ (-g(x - S.Half) + g(x + S.Half))*f(x)
+ assert (res1 - ref1).simplify() == 0
+
+ res2 = differentiate_finite(f(x) + x**3 + 42, x, points=[x-1, x+1])
+ ref2 = (f(x + 1) + (x + 1)**3 - f(x - 1) - (x - 1)**3)/2
+ assert (res2 - ref2).simplify() == 0
+ raises(TypeError, lambda: differentiate_finite(f(x)*g(x), x,
+ pints=[x-1, x+1]))
+
+ res3 = differentiate_finite(f(x)*g(x).diff(x), x)
+ ref3 = (-g(x) + g(x + 1))*f(x + S.Half) - (g(x) - g(x - 1))*f(x - S.Half)
+ assert res3 == ref3
+
+ res4 = differentiate_finite(f(x)*g(x).diff(x).diff(x), x)
+ ref4 = -((g(x - Rational(3, 2)) - 2*g(x - S.Half) + g(x + S.Half))*f(x - S.Half)) \
+ + (g(x - S.Half) - 2*g(x + S.Half) + g(x + Rational(3, 2)))*f(x + S.Half)
+ assert res4 == ref4
+
+ res5_expr = f(x).diff(x)*g(x).diff(x)
+ res5 = differentiate_finite(res5_expr, points=[x-h, x, x+h])
+ ref5 = (-2*f(x)/h + f(-h + x)/(2*h) + 3*f(h + x)/(2*h))*(-2*g(x)/h + g(-h + x)/(2*h) \
+ + 3*g(h + x)/(2*h))/(2*h) - (2*f(x)/h - 3*f(-h + x)/(2*h) - \
+ f(h + x)/(2*h))*(2*g(x)/h - 3*g(-h + x)/(2*h) - g(h + x)/(2*h))/(2*h)
+ assert res5 == ref5
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/__pycache__/expr_with_intlimits.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/__pycache__/expr_with_intlimits.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..667bb7185f32642f6d4d0bb1c921530c443f8a36
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/__pycache__/expr_with_intlimits.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/__pycache__/gosper.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/__pycache__/gosper.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7b0a625574d8923b4b15aad5a74a363957d93407
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/__pycache__/gosper.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/__pycache__/products.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/__pycache__/products.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..233997d76c363ec9a29a670ee31a1e058dc905a1
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/__pycache__/products.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/__pycache__/summations.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/__pycache__/summations.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bb88b4bc3b9626f4605e576b3e9b7355bd9981e9
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/__pycache__/summations.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/tests/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/tests/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/tests/test_delta.py b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/tests/test_delta.py
new file mode 100644
index 0000000000000000000000000000000000000000..9dc6e88d16346acc7dc775446d7de3f3696d0e03
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/tests/test_delta.py
@@ -0,0 +1,499 @@
+from sympy.concrete import Sum
+from sympy.concrete.delta import deltaproduct as dp, deltasummation as ds, _extract_delta
+from sympy.core import Eq, S, symbols, oo
+from sympy.functions import KroneckerDelta as KD, Piecewise, piecewise_fold
+from sympy.logic import And
+from sympy.testing.pytest import raises
+
+i, j, k, l, m = symbols("i j k l m", integer=True, finite=True)
+x, y = symbols("x y", commutative=False)
+
+
+def test_deltaproduct_trivial():
+ assert dp(x, (j, 1, 0)) == 1
+ assert dp(x, (j, 1, 3)) == x**3
+ assert dp(x + y, (j, 1, 3)) == (x + y)**3
+ assert dp(x*y, (j, 1, 3)) == (x*y)**3
+ assert dp(KD(i, j), (k, 1, 3)) == KD(i, j)
+ assert dp(x*KD(i, j), (k, 1, 3)) == x**3*KD(i, j)
+ assert dp(x*y*KD(i, j), (k, 1, 3)) == (x*y)**3*KD(i, j)
+
+
+def test_deltaproduct_basic():
+ assert dp(KD(i, j), (j, 1, 3)) == 0
+ assert dp(KD(i, j), (j, 1, 1)) == KD(i, 1)
+ assert dp(KD(i, j), (j, 2, 2)) == KD(i, 2)
+ assert dp(KD(i, j), (j, 3, 3)) == KD(i, 3)
+ assert dp(KD(i, j), (j, 1, k)) == KD(i, 1)*KD(k, 1) + KD(k, 0)
+ assert dp(KD(i, j), (j, k, 3)) == KD(i, 3)*KD(k, 3) + KD(k, 4)
+ assert dp(KD(i, j), (j, k, l)) == KD(i, l)*KD(k, l) + KD(k, l + 1)
+
+
+def test_deltaproduct_mul_x_kd():
+ assert dp(x*KD(i, j), (j, 1, 3)) == 0
+ assert dp(x*KD(i, j), (j, 1, 1)) == x*KD(i, 1)
+ assert dp(x*KD(i, j), (j, 2, 2)) == x*KD(i, 2)
+ assert dp(x*KD(i, j), (j, 3, 3)) == x*KD(i, 3)
+ assert dp(x*KD(i, j), (j, 1, k)) == x*KD(i, 1)*KD(k, 1) + KD(k, 0)
+ assert dp(x*KD(i, j), (j, k, 3)) == x*KD(i, 3)*KD(k, 3) + KD(k, 4)
+ assert dp(x*KD(i, j), (j, k, l)) == x*KD(i, l)*KD(k, l) + KD(k, l + 1)
+
+
+def test_deltaproduct_mul_add_x_y_kd():
+ assert dp((x + y)*KD(i, j), (j, 1, 3)) == 0
+ assert dp((x + y)*KD(i, j), (j, 1, 1)) == (x + y)*KD(i, 1)
+ assert dp((x + y)*KD(i, j), (j, 2, 2)) == (x + y)*KD(i, 2)
+ assert dp((x + y)*KD(i, j), (j, 3, 3)) == (x + y)*KD(i, 3)
+ assert dp((x + y)*KD(i, j), (j, 1, k)) == \
+ (x + y)*KD(i, 1)*KD(k, 1) + KD(k, 0)
+ assert dp((x + y)*KD(i, j), (j, k, 3)) == \
+ (x + y)*KD(i, 3)*KD(k, 3) + KD(k, 4)
+ assert dp((x + y)*KD(i, j), (j, k, l)) == \
+ (x + y)*KD(i, l)*KD(k, l) + KD(k, l + 1)
+
+
+def test_deltaproduct_add_kd_kd():
+ assert dp(KD(i, k) + KD(j, k), (k, 1, 3)) == 0
+ assert dp(KD(i, k) + KD(j, k), (k, 1, 1)) == KD(i, 1) + KD(j, 1)
+ assert dp(KD(i, k) + KD(j, k), (k, 2, 2)) == KD(i, 2) + KD(j, 2)
+ assert dp(KD(i, k) + KD(j, k), (k, 3, 3)) == KD(i, 3) + KD(j, 3)
+ assert dp(KD(i, k) + KD(j, k), (k, 1, l)) == KD(l, 0) + \
+ KD(i, 1)*KD(l, 1) + KD(j, 1)*KD(l, 1) + \
+ KD(i, 1)*KD(j, 2)*KD(l, 2) + KD(j, 1)*KD(i, 2)*KD(l, 2)
+ assert dp(KD(i, k) + KD(j, k), (k, l, 3)) == KD(l, 4) + \
+ KD(i, 3)*KD(l, 3) + KD(j, 3)*KD(l, 3) + \
+ KD(i, 2)*KD(j, 3)*KD(l, 2) + KD(i, 3)*KD(j, 2)*KD(l, 2)
+ assert dp(KD(i, k) + KD(j, k), (k, l, m)) == KD(l, m + 1) + \
+ KD(i, m)*KD(l, m) + KD(j, m)*KD(l, m) + \
+ KD(i, m)*KD(j, m - 1)*KD(l, m - 1) + KD(i, m - 1)*KD(j, m)*KD(l, m - 1)
+
+
+def test_deltaproduct_mul_x_add_kd_kd():
+ assert dp(x*(KD(i, k) + KD(j, k)), (k, 1, 3)) == 0
+ assert dp(x*(KD(i, k) + KD(j, k)), (k, 1, 1)) == x*(KD(i, 1) + KD(j, 1))
+ assert dp(x*(KD(i, k) + KD(j, k)), (k, 2, 2)) == x*(KD(i, 2) + KD(j, 2))
+ assert dp(x*(KD(i, k) + KD(j, k)), (k, 3, 3)) == x*(KD(i, 3) + KD(j, 3))
+ assert dp(x*(KD(i, k) + KD(j, k)), (k, 1, l)) == KD(l, 0) + \
+ x*KD(i, 1)*KD(l, 1) + x*KD(j, 1)*KD(l, 1) + \
+ x**2*KD(i, 1)*KD(j, 2)*KD(l, 2) + x**2*KD(j, 1)*KD(i, 2)*KD(l, 2)
+ assert dp(x*(KD(i, k) + KD(j, k)), (k, l, 3)) == KD(l, 4) + \
+ x*KD(i, 3)*KD(l, 3) + x*KD(j, 3)*KD(l, 3) + \
+ x**2*KD(i, 2)*KD(j, 3)*KD(l, 2) + x**2*KD(i, 3)*KD(j, 2)*KD(l, 2)
+ assert dp(x*(KD(i, k) + KD(j, k)), (k, l, m)) == KD(l, m + 1) + \
+ x*KD(i, m)*KD(l, m) + x*KD(j, m)*KD(l, m) + \
+ x**2*KD(i, m - 1)*KD(j, m)*KD(l, m - 1) + \
+ x**2*KD(i, m)*KD(j, m - 1)*KD(l, m - 1)
+
+
+def test_deltaproduct_mul_add_x_y_add_kd_kd():
+ assert dp((x + y)*(KD(i, k) + KD(j, k)), (k, 1, 3)) == 0
+ assert dp((x + y)*(KD(i, k) + KD(j, k)), (k, 1, 1)) == \
+ (x + y)*(KD(i, 1) + KD(j, 1))
+ assert dp((x + y)*(KD(i, k) + KD(j, k)), (k, 2, 2)) == \
+ (x + y)*(KD(i, 2) + KD(j, 2))
+ assert dp((x + y)*(KD(i, k) + KD(j, k)), (k, 3, 3)) == \
+ (x + y)*(KD(i, 3) + KD(j, 3))
+ assert dp((x + y)*(KD(i, k) + KD(j, k)), (k, 1, l)) == KD(l, 0) + \
+ (x + y)*KD(i, 1)*KD(l, 1) + (x + y)*KD(j, 1)*KD(l, 1) + \
+ (x + y)**2*KD(i, 1)*KD(j, 2)*KD(l, 2) + \
+ (x + y)**2*KD(j, 1)*KD(i, 2)*KD(l, 2)
+ assert dp((x + y)*(KD(i, k) + KD(j, k)), (k, l, 3)) == KD(l, 4) + \
+ (x + y)*KD(i, 3)*KD(l, 3) + (x + y)*KD(j, 3)*KD(l, 3) + \
+ (x + y)**2*KD(i, 2)*KD(j, 3)*KD(l, 2) + \
+ (x + y)**2*KD(i, 3)*KD(j, 2)*KD(l, 2)
+ assert dp((x + y)*(KD(i, k) + KD(j, k)), (k, l, m)) == KD(l, m + 1) + \
+ (x + y)*KD(i, m)*KD(l, m) + (x + y)*KD(j, m)*KD(l, m) + \
+ (x + y)**2*KD(i, m - 1)*KD(j, m)*KD(l, m - 1) + \
+ (x + y)**2*KD(i, m)*KD(j, m - 1)*KD(l, m - 1)
+
+
+def test_deltaproduct_add_mul_x_y_mul_x_kd():
+ assert dp(x*y + x*KD(i, j), (j, 1, 3)) == (x*y)**3 + \
+ x*(x*y)**2*KD(i, 1) + (x*y)*x*(x*y)*KD(i, 2) + (x*y)**2*x*KD(i, 3)
+ assert dp(x*y + x*KD(i, j), (j, 1, 1)) == x*y + x*KD(i, 1)
+ assert dp(x*y + x*KD(i, j), (j, 2, 2)) == x*y + x*KD(i, 2)
+ assert dp(x*y + x*KD(i, j), (j, 3, 3)) == x*y + x*KD(i, 3)
+ assert dp(x*y + x*KD(i, j), (j, 1, k)) == \
+ (x*y)**k + Piecewise(
+ ((x*y)**(i - 1)*x*(x*y)**(k - i), And(1 <= i, i <= k)),
+ (0, True)
+ )
+ assert dp(x*y + x*KD(i, j), (j, k, 3)) == \
+ (x*y)**(-k + 4) + Piecewise(
+ ((x*y)**(i - k)*x*(x*y)**(3 - i), And(k <= i, i <= 3)),
+ (0, True)
+ )
+ assert dp(x*y + x*KD(i, j), (j, k, l)) == \
+ (x*y)**(-k + l + 1) + Piecewise(
+ ((x*y)**(i - k)*x*(x*y)**(l - i), And(k <= i, i <= l)),
+ (0, True)
+ )
+
+
+def test_deltaproduct_mul_x_add_y_kd():
+ assert dp(x*(y + KD(i, j)), (j, 1, 3)) == (x*y)**3 + \
+ x*(x*y)**2*KD(i, 1) + (x*y)*x*(x*y)*KD(i, 2) + (x*y)**2*x*KD(i, 3)
+ assert dp(x*(y + KD(i, j)), (j, 1, 1)) == x*(y + KD(i, 1))
+ assert dp(x*(y + KD(i, j)), (j, 2, 2)) == x*(y + KD(i, 2))
+ assert dp(x*(y + KD(i, j)), (j, 3, 3)) == x*(y + KD(i, 3))
+ assert dp(x*(y + KD(i, j)), (j, 1, k)) == \
+ (x*y)**k + Piecewise(
+ ((x*y)**(i - 1)*x*(x*y)**(k - i), And(1 <= i, i <= k)),
+ (0, True)
+ ).expand()
+ assert dp(x*(y + KD(i, j)), (j, k, 3)) == \
+ ((x*y)**(-k + 4) + Piecewise(
+ ((x*y)**(i - k)*x*(x*y)**(3 - i), And(k <= i, i <= 3)),
+ (0, True)
+ )).expand()
+ assert dp(x*(y + KD(i, j)), (j, k, l)) == \
+ ((x*y)**(-k + l + 1) + Piecewise(
+ ((x*y)**(i - k)*x*(x*y)**(l - i), And(k <= i, i <= l)),
+ (0, True)
+ )).expand()
+
+
+def test_deltaproduct_mul_x_add_y_twokd():
+ assert dp(x*(y + 2*KD(i, j)), (j, 1, 3)) == (x*y)**3 + \
+ 2*x*(x*y)**2*KD(i, 1) + 2*x*y*x*x*y*KD(i, 2) + 2*(x*y)**2*x*KD(i, 3)
+ assert dp(x*(y + 2*KD(i, j)), (j, 1, 1)) == x*(y + 2*KD(i, 1))
+ assert dp(x*(y + 2*KD(i, j)), (j, 2, 2)) == x*(y + 2*KD(i, 2))
+ assert dp(x*(y + 2*KD(i, j)), (j, 3, 3)) == x*(y + 2*KD(i, 3))
+ assert dp(x*(y + 2*KD(i, j)), (j, 1, k)) == \
+ (x*y)**k + Piecewise(
+ (2*(x*y)**(i - 1)*x*(x*y)**(k - i), And(1 <= i, i <= k)),
+ (0, True)
+ ).expand()
+ assert dp(x*(y + 2*KD(i, j)), (j, k, 3)) == \
+ ((x*y)**(-k + 4) + Piecewise(
+ (2*(x*y)**(i - k)*x*(x*y)**(3 - i), And(k <= i, i <= 3)),
+ (0, True)
+ )).expand()
+ assert dp(x*(y + 2*KD(i, j)), (j, k, l)) == \
+ ((x*y)**(-k + l + 1) + Piecewise(
+ (2*(x*y)**(i - k)*x*(x*y)**(l - i), And(k <= i, i <= l)),
+ (0, True)
+ )).expand()
+
+
+def test_deltaproduct_mul_add_x_y_add_y_kd():
+ assert dp((x + y)*(y + KD(i, j)), (j, 1, 3)) == ((x + y)*y)**3 + \
+ (x + y)*((x + y)*y)**2*KD(i, 1) + \
+ (x + y)*y*(x + y)**2*y*KD(i, 2) + \
+ ((x + y)*y)**2*(x + y)*KD(i, 3)
+ assert dp((x + y)*(y + KD(i, j)), (j, 1, 1)) == (x + y)*(y + KD(i, 1))
+ assert dp((x + y)*(y + KD(i, j)), (j, 2, 2)) == (x + y)*(y + KD(i, 2))
+ assert dp((x + y)*(y + KD(i, j)), (j, 3, 3)) == (x + y)*(y + KD(i, 3))
+ assert dp((x + y)*(y + KD(i, j)), (j, 1, k)) == \
+ ((x + y)*y)**k + Piecewise(
+ (((x + y)*y)**(-1)*((x + y)*y)**i*(x + y)*((x + y)*y
+ )**k*((x + y)*y)**(-i), (i >= 1) & (i <= k)), (0, True))
+ assert dp((x + y)*(y + KD(i, j)), (j, k, 3)) == (
+ (x + y)*y)**4*((x + y)*y)**(-k) + Piecewise((((x + y)*y)**i*(
+ (x + y)*y)**(-k)*(x + y)*((x + y)*y)**3*((x + y)*y)**(-i),
+ (i >= k) & (i <= 3)), (0, True))
+ assert dp((x + y)*(y + KD(i, j)), (j, k, l)) == \
+ (x + y)*y*((x + y)*y)**l*((x + y)*y)**(-k) + Piecewise(
+ (((x + y)*y)**i*((x + y)*y)**(-k)*(x + y)*((x + y)*y
+ )**l*((x + y)*y)**(-i), (i >= k) & (i <= l)), (0, True))
+
+
+def test_deltaproduct_mul_add_x_kd_add_y_kd():
+ assert dp((x + KD(i, k))*(y + KD(i, j)), (j, 1, 3)) == \
+ KD(i, 1)*(KD(i, k) + x)*((KD(i, k) + x)*y)**2 + \
+ KD(i, 2)*(KD(i, k) + x)*y*(KD(i, k) + x)**2*y + \
+ KD(i, 3)*((KD(i, k) + x)*y)**2*(KD(i, k) + x) + \
+ ((KD(i, k) + x)*y)**3
+ assert dp((x + KD(i, k))*(y + KD(i, j)), (j, 1, 1)) == \
+ (x + KD(i, k))*(y + KD(i, 1))
+ assert dp((x + KD(i, k))*(y + KD(i, j)), (j, 2, 2)) == \
+ (x + KD(i, k))*(y + KD(i, 2))
+ assert dp((x + KD(i, k))*(y + KD(i, j)), (j, 3, 3)) == \
+ (x + KD(i, k))*(y + KD(i, 3))
+ assert dp((x + KD(i, k))*(y + KD(i, j)), (j, 1, k)) == \
+ ((KD(i, k) + x)*y)**k + Piecewise(
+ (((KD(i, k) + x)*y)**(-1)*((KD(i, k) + x)*y)**i*(KD(i, k) + x
+ )*((KD(i, k) + x)*y)**k*((KD(i, k) + x)*y)**(-i), (i >= 1
+ ) & (i <= k)), (0, True))
+ assert dp((x + KD(i, k))*(y + KD(i, j)), (j, k, 3)) == (
+ (KD(i, k) + x)*y)**4*((KD(i, k) + x)*y)**(-k) + Piecewise(
+ (((KD(i, k) + x)*y)**i*((KD(i, k) + x)*y)**(-k)*(KD(i, k)
+ + x)*((KD(i, k) + x)*y)**3*((KD(i, k) + x)*y)**(-i),
+ (i >= k) & (i <= 3)), (0, True))
+ assert dp((x + KD(i, k))*(y + KD(i, j)), (j, k, l)) == (
+ KD(i, k) + x)*y*((KD(i, k) + x)*y)**l*((KD(i, k) + x)*y
+ )**(-k) + Piecewise((((KD(i, k) + x)*y)**i*((KD(i, k) + x
+ )*y)**(-k)*(KD(i, k) + x)*((KD(i, k) + x)*y)**l*((KD(i, k) + x
+ )*y)**(-i), (i >= k) & (i <= l)), (0, True))
+
+
+def test_deltasummation_trivial():
+ assert ds(x, (j, 1, 0)) == 0
+ assert ds(x, (j, 1, 3)) == 3*x
+ assert ds(x + y, (j, 1, 3)) == 3*(x + y)
+ assert ds(x*y, (j, 1, 3)) == 3*x*y
+ assert ds(KD(i, j), (k, 1, 3)) == 3*KD(i, j)
+ assert ds(x*KD(i, j), (k, 1, 3)) == 3*x*KD(i, j)
+ assert ds(x*y*KD(i, j), (k, 1, 3)) == 3*x*y*KD(i, j)
+
+
+def test_deltasummation_basic_numerical():
+ n = symbols('n', integer=True, nonzero=True)
+ assert ds(KD(n, 0), (n, 1, 3)) == 0
+
+ # return unevaluated, until it gets implemented
+ assert ds(KD(i**2, j**2), (j, -oo, oo)) == \
+ Sum(KD(i**2, j**2), (j, -oo, oo))
+
+ assert Piecewise((KD(i, k), And(1 <= i, i <= 3)), (0, True)) == \
+ ds(KD(i, j)*KD(j, k), (j, 1, 3)) == \
+ ds(KD(j, k)*KD(i, j), (j, 1, 3))
+
+ assert ds(KD(i, k), (k, -oo, oo)) == 1
+ assert ds(KD(i, k), (k, 0, oo)) == Piecewise((1, S.Zero <= i), (0, True))
+ assert ds(KD(i, k), (k, 1, 3)) == \
+ Piecewise((1, And(1 <= i, i <= 3)), (0, True))
+ assert ds(k*KD(i, j)*KD(j, k), (k, -oo, oo)) == j*KD(i, j)
+ assert ds(j*KD(i, j), (j, -oo, oo)) == i
+ assert ds(i*KD(i, j), (i, -oo, oo)) == j
+ assert ds(x, (i, 1, 3)) == 3*x
+ assert ds((i + j)*KD(i, j), (j, -oo, oo)) == 2*i
+
+
+def test_deltasummation_basic_symbolic():
+ assert ds(KD(i, j), (j, 1, 3)) == \
+ Piecewise((1, And(1 <= i, i <= 3)), (0, True))
+ assert ds(KD(i, j), (j, 1, 1)) == Piecewise((1, Eq(i, 1)), (0, True))
+ assert ds(KD(i, j), (j, 2, 2)) == Piecewise((1, Eq(i, 2)), (0, True))
+ assert ds(KD(i, j), (j, 3, 3)) == Piecewise((1, Eq(i, 3)), (0, True))
+ assert ds(KD(i, j), (j, 1, k)) == \
+ Piecewise((1, And(1 <= i, i <= k)), (0, True))
+ assert ds(KD(i, j), (j, k, 3)) == \
+ Piecewise((1, And(k <= i, i <= 3)), (0, True))
+ assert ds(KD(i, j), (j, k, l)) == \
+ Piecewise((1, And(k <= i, i <= l)), (0, True))
+
+
+def test_deltasummation_mul_x_kd():
+ assert ds(x*KD(i, j), (j, 1, 3)) == \
+ Piecewise((x, And(1 <= i, i <= 3)), (0, True))
+ assert ds(x*KD(i, j), (j, 1, 1)) == Piecewise((x, Eq(i, 1)), (0, True))
+ assert ds(x*KD(i, j), (j, 2, 2)) == Piecewise((x, Eq(i, 2)), (0, True))
+ assert ds(x*KD(i, j), (j, 3, 3)) == Piecewise((x, Eq(i, 3)), (0, True))
+ assert ds(x*KD(i, j), (j, 1, k)) == \
+ Piecewise((x, And(1 <= i, i <= k)), (0, True))
+ assert ds(x*KD(i, j), (j, k, 3)) == \
+ Piecewise((x, And(k <= i, i <= 3)), (0, True))
+ assert ds(x*KD(i, j), (j, k, l)) == \
+ Piecewise((x, And(k <= i, i <= l)), (0, True))
+
+
+def test_deltasummation_mul_add_x_y_kd():
+ assert ds((x + y)*KD(i, j), (j, 1, 3)) == \
+ Piecewise((x + y, And(1 <= i, i <= 3)), (0, True))
+ assert ds((x + y)*KD(i, j), (j, 1, 1)) == \
+ Piecewise((x + y, Eq(i, 1)), (0, True))
+ assert ds((x + y)*KD(i, j), (j, 2, 2)) == \
+ Piecewise((x + y, Eq(i, 2)), (0, True))
+ assert ds((x + y)*KD(i, j), (j, 3, 3)) == \
+ Piecewise((x + y, Eq(i, 3)), (0, True))
+ assert ds((x + y)*KD(i, j), (j, 1, k)) == \
+ Piecewise((x + y, And(1 <= i, i <= k)), (0, True))
+ assert ds((x + y)*KD(i, j), (j, k, 3)) == \
+ Piecewise((x + y, And(k <= i, i <= 3)), (0, True))
+ assert ds((x + y)*KD(i, j), (j, k, l)) == \
+ Piecewise((x + y, And(k <= i, i <= l)), (0, True))
+
+
+def test_deltasummation_add_kd_kd():
+ assert ds(KD(i, k) + KD(j, k), (k, 1, 3)) == piecewise_fold(
+ Piecewise((1, And(1 <= i, i <= 3)), (0, True)) +
+ Piecewise((1, And(1 <= j, j <= 3)), (0, True)))
+ assert ds(KD(i, k) + KD(j, k), (k, 1, 1)) == piecewise_fold(
+ Piecewise((1, Eq(i, 1)), (0, True)) +
+ Piecewise((1, Eq(j, 1)), (0, True)))
+ assert ds(KD(i, k) + KD(j, k), (k, 2, 2)) == piecewise_fold(
+ Piecewise((1, Eq(i, 2)), (0, True)) +
+ Piecewise((1, Eq(j, 2)), (0, True)))
+ assert ds(KD(i, k) + KD(j, k), (k, 3, 3)) == piecewise_fold(
+ Piecewise((1, Eq(i, 3)), (0, True)) +
+ Piecewise((1, Eq(j, 3)), (0, True)))
+ assert ds(KD(i, k) + KD(j, k), (k, 1, l)) == piecewise_fold(
+ Piecewise((1, And(1 <= i, i <= l)), (0, True)) +
+ Piecewise((1, And(1 <= j, j <= l)), (0, True)))
+ assert ds(KD(i, k) + KD(j, k), (k, l, 3)) == piecewise_fold(
+ Piecewise((1, And(l <= i, i <= 3)), (0, True)) +
+ Piecewise((1, And(l <= j, j <= 3)), (0, True)))
+ assert ds(KD(i, k) + KD(j, k), (k, l, m)) == piecewise_fold(
+ Piecewise((1, And(l <= i, i <= m)), (0, True)) +
+ Piecewise((1, And(l <= j, j <= m)), (0, True)))
+
+
+def test_deltasummation_add_mul_x_kd_kd():
+ assert ds(x*KD(i, k) + KD(j, k), (k, 1, 3)) == piecewise_fold(
+ Piecewise((x, And(1 <= i, i <= 3)), (0, True)) +
+ Piecewise((1, And(1 <= j, j <= 3)), (0, True)))
+ assert ds(x*KD(i, k) + KD(j, k), (k, 1, 1)) == piecewise_fold(
+ Piecewise((x, Eq(i, 1)), (0, True)) +
+ Piecewise((1, Eq(j, 1)), (0, True)))
+ assert ds(x*KD(i, k) + KD(j, k), (k, 2, 2)) == piecewise_fold(
+ Piecewise((x, Eq(i, 2)), (0, True)) +
+ Piecewise((1, Eq(j, 2)), (0, True)))
+ assert ds(x*KD(i, k) + KD(j, k), (k, 3, 3)) == piecewise_fold(
+ Piecewise((x, Eq(i, 3)), (0, True)) +
+ Piecewise((1, Eq(j, 3)), (0, True)))
+ assert ds(x*KD(i, k) + KD(j, k), (k, 1, l)) == piecewise_fold(
+ Piecewise((x, And(1 <= i, i <= l)), (0, True)) +
+ Piecewise((1, And(1 <= j, j <= l)), (0, True)))
+ assert ds(x*KD(i, k) + KD(j, k), (k, l, 3)) == piecewise_fold(
+ Piecewise((x, And(l <= i, i <= 3)), (0, True)) +
+ Piecewise((1, And(l <= j, j <= 3)), (0, True)))
+ assert ds(x*KD(i, k) + KD(j, k), (k, l, m)) == piecewise_fold(
+ Piecewise((x, And(l <= i, i <= m)), (0, True)) +
+ Piecewise((1, And(l <= j, j <= m)), (0, True)))
+
+
+def test_deltasummation_mul_x_add_kd_kd():
+ assert ds(x*(KD(i, k) + KD(j, k)), (k, 1, 3)) == piecewise_fold(
+ Piecewise((x, And(1 <= i, i <= 3)), (0, True)) +
+ Piecewise((x, And(1 <= j, j <= 3)), (0, True)))
+ assert ds(x*(KD(i, k) + KD(j, k)), (k, 1, 1)) == piecewise_fold(
+ Piecewise((x, Eq(i, 1)), (0, True)) +
+ Piecewise((x, Eq(j, 1)), (0, True)))
+ assert ds(x*(KD(i, k) + KD(j, k)), (k, 2, 2)) == piecewise_fold(
+ Piecewise((x, Eq(i, 2)), (0, True)) +
+ Piecewise((x, Eq(j, 2)), (0, True)))
+ assert ds(x*(KD(i, k) + KD(j, k)), (k, 3, 3)) == piecewise_fold(
+ Piecewise((x, Eq(i, 3)), (0, True)) +
+ Piecewise((x, Eq(j, 3)), (0, True)))
+ assert ds(x*(KD(i, k) + KD(j, k)), (k, 1, l)) == piecewise_fold(
+ Piecewise((x, And(1 <= i, i <= l)), (0, True)) +
+ Piecewise((x, And(1 <= j, j <= l)), (0, True)))
+ assert ds(x*(KD(i, k) + KD(j, k)), (k, l, 3)) == piecewise_fold(
+ Piecewise((x, And(l <= i, i <= 3)), (0, True)) +
+ Piecewise((x, And(l <= j, j <= 3)), (0, True)))
+ assert ds(x*(KD(i, k) + KD(j, k)), (k, l, m)) == piecewise_fold(
+ Piecewise((x, And(l <= i, i <= m)), (0, True)) +
+ Piecewise((x, And(l <= j, j <= m)), (0, True)))
+
+
+def test_deltasummation_mul_add_x_y_add_kd_kd():
+ assert ds((x + y)*(KD(i, k) + KD(j, k)), (k, 1, 3)) == piecewise_fold(
+ Piecewise((x + y, And(1 <= i, i <= 3)), (0, True)) +
+ Piecewise((x + y, And(1 <= j, j <= 3)), (0, True)))
+ assert ds((x + y)*(KD(i, k) + KD(j, k)), (k, 1, 1)) == piecewise_fold(
+ Piecewise((x + y, Eq(i, 1)), (0, True)) +
+ Piecewise((x + y, Eq(j, 1)), (0, True)))
+ assert ds((x + y)*(KD(i, k) + KD(j, k)), (k, 2, 2)) == piecewise_fold(
+ Piecewise((x + y, Eq(i, 2)), (0, True)) +
+ Piecewise((x + y, Eq(j, 2)), (0, True)))
+ assert ds((x + y)*(KD(i, k) + KD(j, k)), (k, 3, 3)) == piecewise_fold(
+ Piecewise((x + y, Eq(i, 3)), (0, True)) +
+ Piecewise((x + y, Eq(j, 3)), (0, True)))
+ assert ds((x + y)*(KD(i, k) + KD(j, k)), (k, 1, l)) == piecewise_fold(
+ Piecewise((x + y, And(1 <= i, i <= l)), (0, True)) +
+ Piecewise((x + y, And(1 <= j, j <= l)), (0, True)))
+ assert ds((x + y)*(KD(i, k) + KD(j, k)), (k, l, 3)) == piecewise_fold(
+ Piecewise((x + y, And(l <= i, i <= 3)), (0, True)) +
+ Piecewise((x + y, And(l <= j, j <= 3)), (0, True)))
+ assert ds((x + y)*(KD(i, k) + KD(j, k)), (k, l, m)) == piecewise_fold(
+ Piecewise((x + y, And(l <= i, i <= m)), (0, True)) +
+ Piecewise((x + y, And(l <= j, j <= m)), (0, True)))
+
+
+def test_deltasummation_add_mul_x_y_mul_x_kd():
+ assert ds(x*y + x*KD(i, j), (j, 1, 3)) == \
+ Piecewise((3*x*y + x, And(1 <= i, i <= 3)), (3*x*y, True))
+ assert ds(x*y + x*KD(i, j), (j, 1, 1)) == \
+ Piecewise((x*y + x, Eq(i, 1)), (x*y, True))
+ assert ds(x*y + x*KD(i, j), (j, 2, 2)) == \
+ Piecewise((x*y + x, Eq(i, 2)), (x*y, True))
+ assert ds(x*y + x*KD(i, j), (j, 3, 3)) == \
+ Piecewise((x*y + x, Eq(i, 3)), (x*y, True))
+ assert ds(x*y + x*KD(i, j), (j, 1, k)) == \
+ Piecewise((k*x*y + x, And(1 <= i, i <= k)), (k*x*y, True))
+ assert ds(x*y + x*KD(i, j), (j, k, 3)) == \
+ Piecewise(((4 - k)*x*y + x, And(k <= i, i <= 3)), ((4 - k)*x*y, True))
+ assert ds(x*y + x*KD(i, j), (j, k, l)) == Piecewise(
+ ((l - k + 1)*x*y + x, And(k <= i, i <= l)), ((l - k + 1)*x*y, True))
+
+
+def test_deltasummation_mul_x_add_y_kd():
+ assert ds(x*(y + KD(i, j)), (j, 1, 3)) == \
+ Piecewise((3*x*y + x, And(1 <= i, i <= 3)), (3*x*y, True))
+ assert ds(x*(y + KD(i, j)), (j, 1, 1)) == \
+ Piecewise((x*y + x, Eq(i, 1)), (x*y, True))
+ assert ds(x*(y + KD(i, j)), (j, 2, 2)) == \
+ Piecewise((x*y + x, Eq(i, 2)), (x*y, True))
+ assert ds(x*(y + KD(i, j)), (j, 3, 3)) == \
+ Piecewise((x*y + x, Eq(i, 3)), (x*y, True))
+ assert ds(x*(y + KD(i, j)), (j, 1, k)) == \
+ Piecewise((k*x*y + x, And(1 <= i, i <= k)), (k*x*y, True))
+ assert ds(x*(y + KD(i, j)), (j, k, 3)) == \
+ Piecewise(((4 - k)*x*y + x, And(k <= i, i <= 3)), ((4 - k)*x*y, True))
+ assert ds(x*(y + KD(i, j)), (j, k, l)) == Piecewise(
+ ((l - k + 1)*x*y + x, And(k <= i, i <= l)), ((l - k + 1)*x*y, True))
+
+
+def test_deltasummation_mul_x_add_y_twokd():
+ assert ds(x*(y + 2*KD(i, j)), (j, 1, 3)) == \
+ Piecewise((3*x*y + 2*x, And(1 <= i, i <= 3)), (3*x*y, True))
+ assert ds(x*(y + 2*KD(i, j)), (j, 1, 1)) == \
+ Piecewise((x*y + 2*x, Eq(i, 1)), (x*y, True))
+ assert ds(x*(y + 2*KD(i, j)), (j, 2, 2)) == \
+ Piecewise((x*y + 2*x, Eq(i, 2)), (x*y, True))
+ assert ds(x*(y + 2*KD(i, j)), (j, 3, 3)) == \
+ Piecewise((x*y + 2*x, Eq(i, 3)), (x*y, True))
+ assert ds(x*(y + 2*KD(i, j)), (j, 1, k)) == \
+ Piecewise((k*x*y + 2*x, And(1 <= i, i <= k)), (k*x*y, True))
+ assert ds(x*(y + 2*KD(i, j)), (j, k, 3)) == Piecewise(
+ ((4 - k)*x*y + 2*x, And(k <= i, i <= 3)), ((4 - k)*x*y, True))
+ assert ds(x*(y + 2*KD(i, j)), (j, k, l)) == Piecewise(
+ ((l - k + 1)*x*y + 2*x, And(k <= i, i <= l)), ((l - k + 1)*x*y, True))
+
+
+def test_deltasummation_mul_add_x_y_add_y_kd():
+ assert ds((x + y)*(y + KD(i, j)), (j, 1, 3)) == Piecewise(
+ (3*(x + y)*y + x + y, And(1 <= i, i <= 3)), (3*(x + y)*y, True))
+ assert ds((x + y)*(y + KD(i, j)), (j, 1, 1)) == \
+ Piecewise(((x + y)*y + x + y, Eq(i, 1)), ((x + y)*y, True))
+ assert ds((x + y)*(y + KD(i, j)), (j, 2, 2)) == \
+ Piecewise(((x + y)*y + x + y, Eq(i, 2)), ((x + y)*y, True))
+ assert ds((x + y)*(y + KD(i, j)), (j, 3, 3)) == \
+ Piecewise(((x + y)*y + x + y, Eq(i, 3)), ((x + y)*y, True))
+ assert ds((x + y)*(y + KD(i, j)), (j, 1, k)) == Piecewise(
+ (k*(x + y)*y + x + y, And(1 <= i, i <= k)), (k*(x + y)*y, True))
+ assert ds((x + y)*(y + KD(i, j)), (j, k, 3)) == Piecewise(
+ ((4 - k)*(x + y)*y + x + y, And(k <= i, i <= 3)),
+ ((4 - k)*(x + y)*y, True))
+ assert ds((x + y)*(y + KD(i, j)), (j, k, l)) == Piecewise(
+ ((l - k + 1)*(x + y)*y + x + y, And(k <= i, i <= l)),
+ ((l - k + 1)*(x + y)*y, True))
+
+
+def test_deltasummation_mul_add_x_kd_add_y_kd():
+ assert ds((x + KD(i, k))*(y + KD(i, j)), (j, 1, 3)) == piecewise_fold(
+ Piecewise((KD(i, k) + x, And(1 <= i, i <= 3)), (0, True)) +
+ 3*(KD(i, k) + x)*y)
+ assert ds((x + KD(i, k))*(y + KD(i, j)), (j, 1, 1)) == piecewise_fold(
+ Piecewise((KD(i, k) + x, Eq(i, 1)), (0, True)) +
+ (KD(i, k) + x)*y)
+ assert ds((x + KD(i, k))*(y + KD(i, j)), (j, 2, 2)) == piecewise_fold(
+ Piecewise((KD(i, k) + x, Eq(i, 2)), (0, True)) +
+ (KD(i, k) + x)*y)
+ assert ds((x + KD(i, k))*(y + KD(i, j)), (j, 3, 3)) == piecewise_fold(
+ Piecewise((KD(i, k) + x, Eq(i, 3)), (0, True)) +
+ (KD(i, k) + x)*y)
+ assert ds((x + KD(i, k))*(y + KD(i, j)), (j, 1, k)) == piecewise_fold(
+ Piecewise((KD(i, k) + x, And(1 <= i, i <= k)), (0, True)) +
+ k*(KD(i, k) + x)*y)
+ assert ds((x + KD(i, k))*(y + KD(i, j)), (j, k, 3)) == piecewise_fold(
+ Piecewise((KD(i, k) + x, And(k <= i, i <= 3)), (0, True)) +
+ (4 - k)*(KD(i, k) + x)*y)
+ assert ds((x + KD(i, k))*(y + KD(i, j)), (j, k, l)) == piecewise_fold(
+ Piecewise((KD(i, k) + x, And(k <= i, i <= l)), (0, True)) +
+ (l - k + 1)*(KD(i, k) + x)*y)
+
+
+def test_extract_delta():
+ raises(ValueError, lambda: _extract_delta(KD(i, j) + KD(k, l), i))
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/tests/test_gosper.py b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/tests/test_gosper.py
new file mode 100644
index 0000000000000000000000000000000000000000..77b642a9b7cd55f96840a8e20e517206b6a6f8f0
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/tests/test_gosper.py
@@ -0,0 +1,204 @@
+"""Tests for Gosper's algorithm for hypergeometric summation. """
+
+from sympy.core.numbers import (Rational, pi)
+from sympy.core.singleton import S
+from sympy.core.symbol import Symbol
+from sympy.functions.combinatorial.factorials import (binomial, factorial)
+from sympy.functions.elementary.exponential import (exp, log)
+from sympy.functions.elementary.miscellaneous import sqrt
+from sympy.functions.special.gamma_functions import gamma
+from sympy.polys.polytools import Poly
+from sympy.simplify.simplify import simplify
+from sympy.concrete.gosper import gosper_normal, gosper_sum, gosper_term
+from sympy.abc import a, b, j, k, m, n, r, x
+
+
+def test_gosper_normal():
+ eq = 4*n + 5, 2*(4*n + 1)*(2*n + 3), n
+ assert gosper_normal(*eq) == \
+ (Poly(Rational(1, 4), n), Poly(n + Rational(3, 2)), Poly(n + Rational(1, 4)))
+ assert gosper_normal(*eq, polys=False) == \
+ (Rational(1, 4), n + Rational(3, 2), n + Rational(1, 4))
+
+
+def test_gosper_term():
+ assert gosper_term((4*k + 1)*factorial(
+ k)/factorial(2*k + 1), k) == (-k - S.Half)/(k + Rational(1, 4))
+
+
+def test_gosper_sum():
+ assert gosper_sum(1, (k, 0, n)) == 1 + n
+ assert gosper_sum(k, (k, 0, n)) == n*(1 + n)/2
+ assert gosper_sum(k**2, (k, 0, n)) == n*(1 + n)*(1 + 2*n)/6
+ assert gosper_sum(k**3, (k, 0, n)) == n**2*(1 + n)**2/4
+
+ assert gosper_sum(2**k, (k, 0, n)) == 2*2**n - 1
+
+ assert gosper_sum(factorial(k), (k, 0, n)) is None
+ assert gosper_sum(binomial(n, k), (k, 0, n)) is None
+
+ assert gosper_sum(factorial(k)/k**2, (k, 0, n)) is None
+ assert gosper_sum((k - 3)*factorial(k), (k, 0, n)) is None
+
+ assert gosper_sum(k*factorial(k), k) == factorial(k)
+ assert gosper_sum(
+ k*factorial(k), (k, 0, n)) == n*factorial(n) + factorial(n) - 1
+
+ assert gosper_sum((-1)**k*binomial(n, k), (k, 0, n)) == 0
+ assert gosper_sum((
+ -1)**k*binomial(n, k), (k, 0, m)) == -(-1)**m*(m - n)*binomial(n, m)/n
+
+ assert gosper_sum((4*k + 1)*factorial(k)/factorial(2*k + 1), (k, 0, n)) == \
+ (2*factorial(2*n + 1) - factorial(n))/factorial(2*n + 1)
+
+ # issue 6033:
+ assert gosper_sum(
+ n*(n + a + b)*a**n*b**n/(factorial(n + a)*factorial(n + b)), \
+ (n, 0, m)).simplify() == -exp(m*log(a) + m*log(b))*gamma(a + 1) \
+ *gamma(b + 1)/(gamma(a)*gamma(b)*gamma(a + m + 1)*gamma(b + m + 1)) \
+ + 1/(gamma(a)*gamma(b))
+
+
+def test_gosper_sum_indefinite():
+ assert gosper_sum(k, k) == k*(k - 1)/2
+ assert gosper_sum(k**2, k) == k*(k - 1)*(2*k - 1)/6
+
+ assert gosper_sum(1/(k*(k + 1)), k) == -1/k
+ assert gosper_sum(-(27*k**4 + 158*k**3 + 430*k**2 + 678*k + 445)*gamma(2*k
+ + 4)/(3*(3*k + 7)*gamma(3*k + 6)), k) == \
+ (3*k + 5)*(k**2 + 2*k + 5)*gamma(2*k + 4)/gamma(3*k + 6)
+
+
+def test_gosper_sum_parametric():
+ assert gosper_sum(binomial(S.Half, m - j + 1)*binomial(S.Half, m + j), (j, 1, n)) == \
+ n*(1 + m - n)*(-1 + 2*m + 2*n)*binomial(S.Half, 1 + m - n)* \
+ binomial(S.Half, m + n)/(m*(1 + 2*m))
+
+
+def test_gosper_sum_algebraic():
+ assert gosper_sum(
+ n**2 + sqrt(2), (n, 0, m)) == (m + 1)*(2*m**2 + m + 6*sqrt(2))/6
+
+
+def test_gosper_sum_iterated():
+ f1 = binomial(2*k, k)/4**k
+ f2 = (1 + 2*n)*binomial(2*n, n)/4**n
+ f3 = (1 + 2*n)*(3 + 2*n)*binomial(2*n, n)/(3*4**n)
+ f4 = (1 + 2*n)*(3 + 2*n)*(5 + 2*n)*binomial(2*n, n)/(15*4**n)
+ f5 = (1 + 2*n)*(3 + 2*n)*(5 + 2*n)*(7 + 2*n)*binomial(2*n, n)/(105*4**n)
+
+ assert gosper_sum(f1, (k, 0, n)) == f2
+ assert gosper_sum(f2, (n, 0, n)) == f3
+ assert gosper_sum(f3, (n, 0, n)) == f4
+ assert gosper_sum(f4, (n, 0, n)) == f5
+
+# the AeqB tests test expressions given in
+# www.math.upenn.edu/~wilf/AeqB.pdf
+
+
+def test_gosper_sum_AeqB_part1():
+ f1a = n**4
+ f1b = n**3*2**n
+ f1c = 1/(n**2 + sqrt(5)*n - 1)
+ f1d = n**4*4**n/binomial(2*n, n)
+ f1e = factorial(3*n)/(factorial(n)*factorial(n + 1)*factorial(n + 2)*27**n)
+ f1f = binomial(2*n, n)**2/((n + 1)*4**(2*n))
+ f1g = (4*n - 1)*binomial(2*n, n)**2/((2*n - 1)**2*4**(2*n))
+ f1h = n*factorial(n - S.Half)**2/factorial(n + 1)**2
+
+ g1a = m*(m + 1)*(2*m + 1)*(3*m**2 + 3*m - 1)/30
+ g1b = 26 + 2**(m + 1)*(m**3 - 3*m**2 + 9*m - 13)
+ g1c = (m + 1)*(m*(m**2 - 7*m + 3)*sqrt(5) - (
+ 3*m**3 - 7*m**2 + 19*m - 6))/(2*m**3*sqrt(5) + m**4 + 5*m**2 - 1)/6
+ g1d = Rational(-2, 231) + 2*4**m*(m + 1)*(63*m**4 + 112*m**3 + 18*m**2 -
+ 22*m + 3)/(693*binomial(2*m, m))
+ g1e = Rational(-9, 2) + (81*m**2 + 261*m + 200)*factorial(
+ 3*m + 2)/(40*27**m*factorial(m)*factorial(m + 1)*factorial(m + 2))
+ g1f = (2*m + 1)**2*binomial(2*m, m)**2/(4**(2*m)*(m + 1))
+ g1g = -binomial(2*m, m)**2/4**(2*m)
+ g1h = 4*pi -(2*m + 1)**2*(3*m + 4)*factorial(m - S.Half)**2/factorial(m + 1)**2
+
+ g = gosper_sum(f1a, (n, 0, m))
+ assert g is not None and simplify(g - g1a) == 0
+ g = gosper_sum(f1b, (n, 0, m))
+ assert g is not None and simplify(g - g1b) == 0
+ g = gosper_sum(f1c, (n, 0, m))
+ assert g is not None and simplify(g - g1c) == 0
+ g = gosper_sum(f1d, (n, 0, m))
+ assert g is not None and simplify(g - g1d) == 0
+ g = gosper_sum(f1e, (n, 0, m))
+ assert g is not None and simplify(g - g1e) == 0
+ g = gosper_sum(f1f, (n, 0, m))
+ assert g is not None and simplify(g - g1f) == 0
+ g = gosper_sum(f1g, (n, 0, m))
+ assert g is not None and simplify(g - g1g) == 0
+ g = gosper_sum(f1h, (n, 0, m))
+ # need to call rewrite(gamma) here because we have terms involving
+ # factorial(1/2)
+ assert g is not None and simplify(g - g1h).rewrite(gamma) == 0
+
+
+def test_gosper_sum_AeqB_part2():
+ f2a = n**2*a**n
+ f2b = (n - r/2)*binomial(r, n)
+ f2c = factorial(n - 1)**2/(factorial(n - x)*factorial(n + x))
+
+ g2a = -a*(a + 1)/(a - 1)**3 + a**(
+ m + 1)*(a**2*m**2 - 2*a*m**2 + m**2 - 2*a*m + 2*m + a + 1)/(a - 1)**3
+ g2b = (m - r)*binomial(r, m)/2
+ ff = factorial(1 - x)*factorial(1 + x)
+ g2c = 1/ff*(
+ 1 - 1/x**2) + factorial(m)**2/(x**2*factorial(m - x)*factorial(m + x))
+
+ g = gosper_sum(f2a, (n, 0, m))
+ assert g is not None and simplify(g - g2a) == 0
+ g = gosper_sum(f2b, (n, 0, m))
+ assert g is not None and simplify(g - g2b) == 0
+ g = gosper_sum(f2c, (n, 1, m))
+ assert g is not None and simplify(g - g2c) == 0
+
+
+def test_gosper_nan():
+ a = Symbol('a', positive=True)
+ b = Symbol('b', positive=True)
+ n = Symbol('n', integer=True)
+ m = Symbol('m', integer=True)
+ f2d = n*(n + a + b)*a**n*b**n/(factorial(n + a)*factorial(n + b))
+ g2d = 1/(factorial(a - 1)*factorial(
+ b - 1)) - a**(m + 1)*b**(m + 1)/(factorial(a + m)*factorial(b + m))
+ g = gosper_sum(f2d, (n, 0, m))
+ assert simplify(g - g2d) == 0
+
+
+def test_gosper_sum_AeqB_part3():
+ f3a = 1/n**4
+ f3b = (6*n + 3)/(4*n**4 + 8*n**3 + 8*n**2 + 4*n + 3)
+ f3c = 2**n*(n**2 - 2*n - 1)/(n**2*(n + 1)**2)
+ f3d = n**2*4**n/((n + 1)*(n + 2))
+ f3e = 2**n/(n + 1)
+ f3f = 4*(n - 1)*(n**2 - 2*n - 1)/(n**2*(n + 1)**2*(n - 2)**2*(n - 3)**2)
+ f3g = (n**4 - 14*n**2 - 24*n - 9)*2**n/(n**2*(n + 1)**2*(n + 2)**2*
+ (n + 3)**2)
+
+ # g3a -> no closed form
+ g3b = m*(m + 2)/(2*m**2 + 4*m + 3)
+ g3c = 2**m/m**2 - 2
+ g3d = Rational(2, 3) + 4**(m + 1)*(m - 1)/(m + 2)/3
+ # g3e -> no closed form
+ g3f = -(Rational(-1, 16) + 1/((m - 2)**2*(m + 1)**2)) # the AeqB key is wrong
+ g3g = Rational(-2, 9) + 2**(m + 1)/((m + 1)**2*(m + 3)**2)
+
+ g = gosper_sum(f3a, (n, 1, m))
+ assert g is None
+ g = gosper_sum(f3b, (n, 1, m))
+ assert g is not None and simplify(g - g3b) == 0
+ g = gosper_sum(f3c, (n, 1, m - 1))
+ assert g is not None and simplify(g - g3c) == 0
+ g = gosper_sum(f3d, (n, 1, m))
+ assert g is not None and simplify(g - g3d) == 0
+ g = gosper_sum(f3e, (n, 0, m - 1))
+ assert g is None
+ g = gosper_sum(f3f, (n, 4, m))
+ assert g is not None and simplify(g - g3f) == 0
+ g = gosper_sum(f3g, (n, 1, m))
+ assert g is not None and simplify(g - g3g) == 0
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/tests/test_guess.py b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/tests/test_guess.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ac5d02b89ad62a70a29bd450b71b284b6aea76d
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/tests/test_guess.py
@@ -0,0 +1,82 @@
+from sympy.concrete.guess import (
+ find_simple_recurrence_vector,
+ find_simple_recurrence,
+ rationalize,
+ guess_generating_function_rational,
+ guess_generating_function,
+ guess
+ )
+from sympy.concrete.products import Product
+from sympy.core.function import Function
+from sympy.core.numbers import Rational
+from sympy.core.singleton import S
+from sympy.core.symbol import (Symbol, symbols)
+from sympy.core.sympify import sympify
+from sympy.functions.combinatorial.factorials import (RisingFactorial, factorial)
+from sympy.functions.combinatorial.numbers import fibonacci
+from sympy.functions.elementary.exponential import exp
+
+
+def test_find_simple_recurrence_vector():
+ assert find_simple_recurrence_vector(
+ [fibonacci(k) for k in range(12)]) == [1, -1, -1]
+
+
+def test_find_simple_recurrence():
+ a = Function('a')
+ n = Symbol('n')
+ assert find_simple_recurrence([fibonacci(k) for k in range(12)]) == (
+ -a(n) - a(n + 1) + a(n + 2))
+
+ f = Function('a')
+ i = Symbol('n')
+ a = [1, 1, 1]
+ for k in range(15): a.append(5*a[-1]-3*a[-2]+8*a[-3])
+ assert find_simple_recurrence(a, A=f, N=i) == (
+ -8*f(i) + 3*f(i + 1) - 5*f(i + 2) + f(i + 3))
+ assert find_simple_recurrence([0, 2, 15, 74, 12, 3, 0,
+ 1, 2, 85, 4, 5, 63]) == 0
+
+
+def test_rationalize():
+ from mpmath import cos, pi, mpf
+ assert rationalize(cos(pi/3)) == S.Half
+ assert rationalize(mpf("0.333333333333333")) == Rational(1, 3)
+ assert rationalize(mpf("-0.333333333333333")) == Rational(-1, 3)
+ assert rationalize(pi, maxcoeff = 250) == Rational(355, 113)
+
+
+def test_guess_generating_function_rational():
+ x = Symbol('x')
+ assert guess_generating_function_rational([fibonacci(k)
+ for k in range(5, 15)]) == ((3*x + 5)/(-x**2 - x + 1))
+
+
+def test_guess_generating_function():
+ x = Symbol('x')
+ assert guess_generating_function([fibonacci(k)
+ for k in range(5, 15)])['ogf'] == ((3*x + 5)/(-x**2 - x + 1))
+ assert guess_generating_function(
+ [1, 2, 5, 14, 41, 124, 383, 1200, 3799, 12122, 38919])['ogf'] == (
+ (1/(x**4 + 2*x**2 - 4*x + 1))**S.Half)
+ assert guess_generating_function(sympify(
+ "[3/2, 11/2, 0, -121/2, -363/2, 121, 4719/2, 11495/2, -8712, -178717/2]")
+ )['ogf'] == (x + Rational(3, 2))/(11*x**2 - 3*x + 1)
+ assert guess_generating_function([factorial(k) for k in range(12)],
+ types=['egf'])['egf'] == 1/(-x + 1)
+ assert guess_generating_function([k+1 for k in range(12)],
+ types=['egf']) == {'egf': (x + 1)*exp(x), 'lgdegf': (x + 2)/(x + 1)}
+
+
+def test_guess():
+ i0, i1 = symbols('i0 i1')
+ assert guess([1, 2, 6, 24, 120], evaluate=False) == [Product(i1 + 1, (i1, 1, i0 - 1))]
+ assert guess([1, 2, 6, 24, 120]) == [RisingFactorial(2, i0 - 1)]
+ assert guess([1, 2, 7, 42, 429, 7436, 218348, 10850216], niter=4) == [
+ 2**(i0 - 1)*(Rational(27, 16))**(i0**2/2 - 3*i0/2 +
+ 1)*Product(RisingFactorial(Rational(5, 3), i1 - 1)*RisingFactorial(Rational(7, 3), i1
+ - 1)/(RisingFactorial(Rational(3, 2), i1 - 1)*RisingFactorial(Rational(5, 2), i1 -
+ 1)), (i1, 1, i0 - 1))]
+ assert guess([1, 0, 2]) == []
+ x, y = symbols('x y')
+ assert guess([1, 2, 6, 24, 120], variables=[x, y]) == [RisingFactorial(2, x - 1)]
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/tests/test_products.py b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/tests/test_products.py
new file mode 100644
index 0000000000000000000000000000000000000000..9be053a7040014c6ed38c1279a609fcb2426258e
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/tests/test_products.py
@@ -0,0 +1,410 @@
+from sympy.concrete.products import (Product, product)
+from sympy.concrete.summations import Sum
+from sympy.core.function import (Derivative, Function, diff)
+from sympy.core.numbers import (Rational, oo, pi)
+from sympy.core.singleton import S
+from sympy.core.symbol import (Dummy, Symbol, symbols)
+from sympy.functions.combinatorial.factorials import (rf, factorial)
+from sympy.functions.elementary.exponential import (exp, log)
+from sympy.functions.elementary.miscellaneous import sqrt
+from sympy.functions.elementary.trigonometric import (cos, sin)
+from sympy.functions.special.tensor_functions import KroneckerDelta
+from sympy.simplify.combsimp import combsimp
+from sympy.simplify.simplify import simplify
+from sympy.testing.pytest import raises
+
+a, k, n, m, x = symbols('a,k,n,m,x', integer=True)
+f = Function('f')
+
+
+def test_karr_convention():
+ # Test the Karr product convention that we want to hold.
+ # See his paper "Summation in Finite Terms" for a detailed
+ # reasoning why we really want exactly this definition.
+ # The convention is described for sums on page 309 and
+ # essentially in section 1.4, definition 3. For products
+ # we can find in analogy:
+ #
+ # \prod_{m <= i < n} f(i) 'has the obvious meaning' for m < n
+ # \prod_{m <= i < n} f(i) = 0 for m = n
+ # \prod_{m <= i < n} f(i) = 1 / \prod_{n <= i < m} f(i) for m > n
+ #
+ # It is important to note that he defines all products with
+ # the upper limit being *exclusive*.
+ # In contrast, SymPy and the usual mathematical notation has:
+ #
+ # prod_{i = a}^b f(i) = f(a) * f(a+1) * ... * f(b-1) * f(b)
+ #
+ # with the upper limit *inclusive*. So translating between
+ # the two we find that:
+ #
+ # \prod_{m <= i < n} f(i) = \prod_{i = m}^{n-1} f(i)
+ #
+ # where we intentionally used two different ways to typeset the
+ # products and its limits.
+
+ i = Symbol("i", integer=True)
+ k = Symbol("k", integer=True)
+ j = Symbol("j", integer=True, positive=True)
+
+ # A simple example with a concrete factors and symbolic limits.
+
+ # The normal product: m = k and n = k + j and therefore m < n:
+ m = k
+ n = k + j
+
+ a = m
+ b = n - 1
+ S1 = Product(i**2, (i, a, b)).doit()
+
+ # The reversed product: m = k + j and n = k and therefore m > n:
+ m = k + j
+ n = k
+
+ a = m
+ b = n - 1
+ S2 = Product(i**2, (i, a, b)).doit()
+
+ assert S1 * S2 == 1
+
+ # Test the empty product: m = k and n = k and therefore m = n:
+ m = k
+ n = k
+
+ a = m
+ b = n - 1
+ Sz = Product(i**2, (i, a, b)).doit()
+
+ assert Sz == 1
+
+ # Another example this time with an unspecified factor and
+ # numeric limits. (We can not do both tests in the same example.)
+ f = Function("f")
+
+ # The normal product with m < n:
+ m = 2
+ n = 11
+
+ a = m
+ b = n - 1
+ S1 = Product(f(i), (i, a, b)).doit()
+
+ # The reversed product with m > n:
+ m = 11
+ n = 2
+
+ a = m
+ b = n - 1
+ S2 = Product(f(i), (i, a, b)).doit()
+
+ assert simplify(S1 * S2) == 1
+
+ # Test the empty product with m = n:
+ m = 5
+ n = 5
+
+ a = m
+ b = n - 1
+ Sz = Product(f(i), (i, a, b)).doit()
+
+ assert Sz == 1
+
+
+def test_karr_proposition_2a():
+ # Test Karr, page 309, proposition 2, part a
+ i, u, v = symbols('i u v', integer=True)
+
+ def test_the_product(m, n):
+ # g
+ g = i**3 + 2*i**2 - 3*i
+ # f = Delta g
+ f = simplify(g.subs(i, i+1) / g)
+ # The product
+ a = m
+ b = n - 1
+ P = Product(f, (i, a, b)).doit()
+ # Test if Product_{m <= i < n} f(i) = g(n) / g(m)
+ assert combsimp(P / (g.subs(i, n) / g.subs(i, m))) == 1
+
+ # m < n
+ test_the_product(u, u + v)
+ # m = n
+ test_the_product(u, u)
+ # m > n
+ test_the_product(u + v, u)
+
+
+def test_karr_proposition_2b():
+ # Test Karr, page 309, proposition 2, part b
+ i, u, v, w = symbols('i u v w', integer=True)
+
+ def test_the_product(l, n, m):
+ # Productmand
+ s = i**3
+ # First product
+ a = l
+ b = n - 1
+ S1 = Product(s, (i, a, b)).doit()
+ # Second product
+ a = l
+ b = m - 1
+ S2 = Product(s, (i, a, b)).doit()
+ # Third product
+ a = m
+ b = n - 1
+ S3 = Product(s, (i, a, b)).doit()
+ # Test if S1 = S2 * S3 as required
+ assert combsimp(S1 / (S2 * S3)) == 1
+
+ # l < m < n
+ test_the_product(u, u + v, u + v + w)
+ # l < m = n
+ test_the_product(u, u + v, u + v)
+ # l < m > n
+ test_the_product(u, u + v + w, v)
+ # l = m < n
+ test_the_product(u, u, u + v)
+ # l = m = n
+ test_the_product(u, u, u)
+ # l = m > n
+ test_the_product(u + v, u + v, u)
+ # l > m < n
+ test_the_product(u + v, u, u + w)
+ # l > m = n
+ test_the_product(u + v, u, u)
+ # l > m > n
+ test_the_product(u + v + w, u + v, u)
+
+
+def test_simple_products():
+ assert product(2, (k, a, n)) == 2**(n - a + 1)
+ assert product(k, (k, 1, n)) == factorial(n)
+ assert product(k**3, (k, 1, n)) == factorial(n)**3
+
+ assert product(k + 1, (k, 0, n - 1)) == factorial(n)
+ assert product(k + 1, (k, a, n - 1)) == rf(1 + a, n - a)
+
+ assert product(cos(k), (k, 0, 5)) == cos(1)*cos(2)*cos(3)*cos(4)*cos(5)
+ assert product(cos(k), (k, 3, 5)) == cos(3)*cos(4)*cos(5)
+ assert product(cos(k), (k, 1, Rational(5, 2))) != cos(1)*cos(2)
+
+ assert isinstance(product(k**k, (k, 1, n)), Product)
+
+ assert Product(x**k, (k, 1, n)).variables == [k]
+
+ raises(ValueError, lambda: Product(n))
+ raises(ValueError, lambda: Product(n, k))
+ raises(ValueError, lambda: Product(n, k, 1))
+ raises(ValueError, lambda: Product(n, k, 1, 10))
+ raises(ValueError, lambda: Product(n, (k, 1)))
+
+ assert product(1, (n, 1, oo)) == 1 # issue 8301
+ assert product(2, (n, 1, oo)) is oo
+ assert product(-1, (n, 1, oo)).func is Product
+
+
+def test_multiple_products():
+ assert product(x, (n, 1, k), (k, 1, m)) == x**(m**2/2 + m/2)
+ assert product(f(n), (
+ n, 1, m), (m, 1, k)) == Product(f(n), (n, 1, m), (m, 1, k)).doit()
+ assert Product(f(n), (m, 1, k), (n, 1, k)).doit() == \
+ Product(Product(f(n), (m, 1, k)), (n, 1, k)).doit() == \
+ product(f(n), (m, 1, k), (n, 1, k)) == \
+ product(product(f(n), (m, 1, k)), (n, 1, k)) == \
+ Product(f(n)**k, (n, 1, k))
+ assert Product(
+ x, (x, 1, k), (k, 1, n)).doit() == Product(factorial(k), (k, 1, n))
+
+ assert Product(x**k, (n, 1, k), (k, 1, m)).variables == [n, k]
+
+
+def test_rational_products():
+ assert product(1 + 1/k, (k, 1, n)) == rf(2, n)/factorial(n)
+
+
+def test_special_products():
+ # Wallis product
+ assert product((4*k)**2 / (4*k**2 - 1), (k, 1, n)) == \
+ 4**n*factorial(n)**2/rf(S.Half, n)/rf(Rational(3, 2), n)
+
+ # Euler's product formula for sin
+ assert product(1 + a/k**2, (k, 1, n)) == \
+ rf(1 - sqrt(-a), n)*rf(1 + sqrt(-a), n)/factorial(n)**2
+
+
+def test__eval_product():
+ from sympy.abc import i, n
+ # issue 4809
+ a = Function('a')
+ assert product(2*a(i), (i, 1, n)) == 2**n * Product(a(i), (i, 1, n))
+ # issue 4810
+ assert product(2**i, (i, 1, n)) == 2**(n*(n + 1)/2)
+ k, m = symbols('k m', integer=True)
+ assert product(2**i, (i, k, m)) == 2**(-k**2/2 + k/2 + m**2/2 + m/2)
+ n = Symbol('n', negative=True, integer=True)
+ p = Symbol('p', positive=True, integer=True)
+ assert product(2**i, (i, n, p)) == 2**(-n**2/2 + n/2 + p**2/2 + p/2)
+ assert product(2**i, (i, p, n)) == 2**(n**2/2 + n/2 - p**2/2 + p/2)
+
+
+def test_product_pow():
+ # issue 4817
+ assert product(2**f(k), (k, 1, n)) == 2**Sum(f(k), (k, 1, n))
+ assert product(2**(2*f(k)), (k, 1, n)) == 2**Sum(2*f(k), (k, 1, n))
+
+
+def test_infinite_product():
+ # issue 5737
+ assert isinstance(Product(2**(1/factorial(n)), (n, 0, oo)), Product)
+
+
+def test_conjugate_transpose():
+ p = Product(x**k, (k, 1, 3))
+ assert p.adjoint().doit() == p.doit().adjoint()
+ assert p.conjugate().doit() == p.doit().conjugate()
+ assert p.transpose().doit() == p.doit().transpose()
+
+ A, B = symbols("A B", commutative=False)
+ p = Product(A*B**k, (k, 1, 3))
+ assert p.adjoint().doit() == p.doit().adjoint()
+ assert p.conjugate().doit() == p.doit().conjugate()
+ assert p.transpose().doit() == p.doit().transpose()
+
+ p = Product(B**k*A, (k, 1, 3))
+ assert p.adjoint().doit() == p.doit().adjoint()
+ assert p.conjugate().doit() == p.doit().conjugate()
+ assert p.transpose().doit() == p.doit().transpose()
+
+
+def test_simplify_prod():
+ y, t, b, c, v, d = symbols('y, t, b, c, v, d', integer = True)
+
+ _simplify = lambda e: simplify(e, doit=False)
+ assert _simplify(Product(x*y, (x, n, m), (y, a, k)) * \
+ Product(y, (x, n, m), (y, a, k))) == \
+ Product(x*y**2, (x, n, m), (y, a, k))
+ assert _simplify(3 * y* Product(x, (x, n, m)) * Product(x, (x, m + 1, a))) \
+ == 3 * y * Product(x, (x, n, a))
+ assert _simplify(Product(x, (x, k + 1, a)) * Product(x, (x, n, k))) == \
+ Product(x, (x, n, a))
+ assert _simplify(Product(x, (x, k + 1, a)) * Product(x + 1, (x, n, k))) == \
+ Product(x, (x, k + 1, a)) * Product(x + 1, (x, n, k))
+ assert _simplify(Product(x, (t, a, b)) * Product(y, (t, a, b)) * \
+ Product(x, (t, b+1, c))) == Product(x*y, (t, a, b)) * \
+ Product(x, (t, b+1, c))
+ assert _simplify(Product(x, (t, a, b)) * Product(x, (t, b+1, c)) * \
+ Product(y, (t, a, b))) == Product(x*y, (t, a, b)) * \
+ Product(x, (t, b+1, c))
+ assert _simplify(Product(sin(t)**2 + cos(t)**2 + 1, (t, a, b))) == \
+ Product(2, (t, a, b))
+ assert _simplify(Product(sin(t)**2 + cos(t)**2 - 1, (t, a, b))) == \
+ Product(0, (t, a, b))
+ assert _simplify(Product(v*Product(sin(t)**2 + cos(t)**2, (t, a, b)),
+ (v, c, d))) == Product(v*Product(1, (t, a, b)), (v, c, d))
+
+
+def test_change_index():
+ b, y, c, d, z = symbols('b, y, c, d, z', integer = True)
+
+ assert Product(x, (x, a, b)).change_index(x, x + 1, y) == \
+ Product(y - 1, (y, a + 1, b + 1))
+ assert Product(x**2, (x, a, b)).change_index(x, x - 1) == \
+ Product((x + 1)**2, (x, a - 1, b - 1))
+ assert Product(x**2, (x, a, b)).change_index(x, -x, y) == \
+ Product((-y)**2, (y, -b, -a))
+ assert Product(x, (x, a, b)).change_index(x, -x - 1) == \
+ Product(-x - 1, (x, - b - 1, -a - 1))
+ assert Product(x*y, (x, a, b), (y, c, d)).change_index(x, x - 1, z) == \
+ Product((z + 1)*y, (z, a - 1, b - 1), (y, c, d))
+
+
+def test_reorder():
+ b, y, c, d, z = symbols('b, y, c, d, z', integer = True)
+
+ assert Product(x*y, (x, a, b), (y, c, d)).reorder((0, 1)) == \
+ Product(x*y, (y, c, d), (x, a, b))
+ assert Product(x, (x, a, b), (x, c, d)).reorder((0, 1)) == \
+ Product(x, (x, c, d), (x, a, b))
+ assert Product(x*y + z, (x, a, b), (z, m, n), (y, c, d)).reorder(\
+ (2, 0), (0, 1)) == Product(x*y + z, (z, m, n), (y, c, d), (x, a, b))
+ assert Product(x*y*z, (x, a, b), (y, c, d), (z, m, n)).reorder(\
+ (0, 1), (1, 2), (0, 2)) == \
+ Product(x*y*z, (x, a, b), (z, m, n), (y, c, d))
+ assert Product(x*y*z, (x, a, b), (y, c, d), (z, m, n)).reorder(\
+ (x, y), (y, z), (x, z)) == \
+ Product(x*y*z, (x, a, b), (z, m, n), (y, c, d))
+ assert Product(x*y, (x, a, b), (y, c, d)).reorder((x, 1)) == \
+ Product(x*y, (y, c, d), (x, a, b))
+ assert Product(x*y, (x, a, b), (y, c, d)).reorder((y, x)) == \
+ Product(x*y, (y, c, d), (x, a, b))
+
+
+def test_Product_is_convergent():
+ assert Product(1/n**2, (n, 1, oo)).is_convergent() is S.false
+ assert Product(exp(1/n**2), (n, 1, oo)).is_convergent() is S.true
+ assert Product(1/n, (n, 1, oo)).is_convergent() is S.false
+ assert Product(1 + 1/n, (n, 1, oo)).is_convergent() is S.false
+ assert Product(1 + 1/n**2, (n, 1, oo)).is_convergent() is S.true
+
+
+def test_reverse_order():
+ x, y, a, b, c, d= symbols('x, y, a, b, c, d', integer = True)
+
+ assert Product(x, (x, 0, 3)).reverse_order(0) == Product(1/x, (x, 4, -1))
+ assert Product(x*y, (x, 1, 5), (y, 0, 6)).reverse_order(0, 1) == \
+ Product(x*y, (x, 6, 0), (y, 7, -1))
+ assert Product(x, (x, 1, 2)).reverse_order(0) == Product(1/x, (x, 3, 0))
+ assert Product(x, (x, 1, 3)).reverse_order(0) == Product(1/x, (x, 4, 0))
+ assert Product(x, (x, 1, a)).reverse_order(0) == Product(1/x, (x, a + 1, 0))
+ assert Product(x, (x, a, 5)).reverse_order(0) == Product(1/x, (x, 6, a - 1))
+ assert Product(x, (x, a + 1, a + 5)).reverse_order(0) == \
+ Product(1/x, (x, a + 6, a))
+ assert Product(x, (x, a + 1, a + 2)).reverse_order(0) == \
+ Product(1/x, (x, a + 3, a))
+ assert Product(x, (x, a + 1, a + 1)).reverse_order(0) == \
+ Product(1/x, (x, a + 2, a))
+ assert Product(x, (x, a, b)).reverse_order(0) == Product(1/x, (x, b + 1, a - 1))
+ assert Product(x, (x, a, b)).reverse_order(x) == Product(1/x, (x, b + 1, a - 1))
+ assert Product(x*y, (x, a, b), (y, 2, 5)).reverse_order(x, 1) == \
+ Product(x*y, (x, b + 1, a - 1), (y, 6, 1))
+ assert Product(x*y, (x, a, b), (y, 2, 5)).reverse_order(y, x) == \
+ Product(x*y, (x, b + 1, a - 1), (y, 6, 1))
+
+
+def test_issue_9983():
+ n = Symbol('n', integer=True, positive=True)
+ p = Product(1 + 1/n**Rational(2, 3), (n, 1, oo))
+ assert p.is_convergent() is S.false
+ assert product(1 + 1/n**Rational(2, 3), (n, 1, oo)) == p.doit()
+
+
+def test_issue_13546():
+ n = Symbol('n')
+ k = Symbol('k')
+ p = Product(n + 1 / 2**k, (k, 0, n-1)).doit()
+ assert p.subs(n, 2).doit() == Rational(15, 2)
+
+
+def test_issue_14036():
+ a, n = symbols('a n')
+ assert product(1 - a**2 / (n*pi)**2, [n, 1, oo]) != 0
+
+
+def test_rewrite_Sum():
+ assert Product(1 - S.Half**2/k**2, (k, 1, oo)).rewrite(Sum) == \
+ exp(Sum(log(1 - 1/(4*k**2)), (k, 1, oo)))
+
+
+def test_KroneckerDelta_Product():
+ y = Symbol('y')
+ assert Product(x*KroneckerDelta(x, y), (x, 0, 1)).doit() == 0
+
+
+def test_issue_20848():
+ _i = Dummy('i')
+ t, y, z = symbols('t y z')
+ assert diff(Product(x, (y, 1, z)), x).as_dummy() == Sum(Product(x, (y, 1, _i - 1))*Product(x, (y, _i + 1, z)), (_i, 1, z)).as_dummy()
+ assert diff(Product(x, (y, 1, z)), x).doit() == x**(z - 1)*z
+ assert diff(Product(x, (y, x, z)), x) == Derivative(Product(x, (y, x, z)), x)
+ assert diff(Product(t, (x, 1, z)), x) == S(0)
+ assert Product(sin(n*x), (n, -1, 1)).diff(x).doit() == S(0)
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/tests/test_sums_products.py b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/tests/test_sums_products.py
new file mode 100644
index 0000000000000000000000000000000000000000..b190afe0bd403819d3525453879d7d5d39e20a56
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/concrete/tests/test_sums_products.py
@@ -0,0 +1,1676 @@
+from math import prod
+
+from sympy.concrete.expr_with_intlimits import ReorderError
+from sympy.concrete.products import (Product, product)
+from sympy.concrete.summations import (Sum, summation, telescopic,
+ eval_sum_residue, _dummy_with_inherited_properties_concrete)
+from sympy.core.function import (Derivative, Function)
+from sympy.core import (Catalan, EulerGamma)
+from sympy.core.facts import InconsistentAssumptions
+from sympy.core.mod import Mod
+from sympy.core.numbers import (E, I, Rational, nan, oo, pi)
+from sympy.core.relational import Eq, Ne
+from sympy.core.numbers import Float
+from sympy.core.singleton import S
+from sympy.core.symbol import (Dummy, Symbol, symbols)
+from sympy.core.sympify import sympify
+from sympy.functions.combinatorial.factorials import (rf, binomial, factorial)
+from sympy.functions.combinatorial.numbers import harmonic
+from sympy.functions.elementary.complexes import Abs, re
+from sympy.functions.elementary.exponential import (exp, log)
+from sympy.functions.elementary.hyperbolic import (sinh, tanh)
+from sympy.functions.elementary.integers import floor
+from sympy.functions.elementary.miscellaneous import sqrt
+from sympy.functions.elementary.piecewise import Piecewise
+from sympy.functions.elementary.trigonometric import (cos, sin, atan)
+from sympy.functions.special.gamma_functions import (gamma, lowergamma)
+from sympy.functions.special.tensor_functions import KroneckerDelta
+from sympy.functions.special.zeta_functions import zeta
+from sympy.integrals.integrals import Integral
+from sympy.logic.boolalg import And, Or
+from sympy.matrices.expressions.matexpr import MatrixSymbol
+from sympy.matrices.expressions.special import Identity
+from sympy.matrices import (Matrix, SparseMatrix,
+ ImmutableDenseMatrix, ImmutableSparseMatrix, diag)
+from sympy.sets.contains import Contains
+from sympy.sets.fancysets import Range
+from sympy.sets.sets import Interval
+from sympy.simplify.combsimp import combsimp
+from sympy.simplify.simplify import simplify
+from sympy.tensor.indexed import (Idx, Indexed, IndexedBase)
+from sympy.testing.pytest import XFAIL, raises, slow
+from sympy.abc import a, b, c, d, k, m, x, y, z
+
+n = Symbol('n', integer=True)
+f, g = symbols('f g', cls=Function)
+
+def test_karr_convention():
+ # Test the Karr summation convention that we want to hold.
+ # See his paper "Summation in Finite Terms" for a detailed
+ # reasoning why we really want exactly this definition.
+ # The convention is described on page 309 and essentially
+ # in section 1.4, definition 3:
+ #
+ # \sum_{m <= i < n} f(i) 'has the obvious meaning' for m < n
+ # \sum_{m <= i < n} f(i) = 0 for m = n
+ # \sum_{m <= i < n} f(i) = - \sum_{n <= i < m} f(i) for m > n
+ #
+ # It is important to note that he defines all sums with
+ # the upper limit being *exclusive*.
+ # In contrast, SymPy and the usual mathematical notation has:
+ #
+ # sum_{i = a}^b f(i) = f(a) + f(a+1) + ... + f(b-1) + f(b)
+ #
+ # with the upper limit *inclusive*. So translating between
+ # the two we find that:
+ #
+ # \sum_{m <= i < n} f(i) = \sum_{i = m}^{n-1} f(i)
+ #
+ # where we intentionally used two different ways to typeset the
+ # sum and its limits.
+
+ i = Symbol("i", integer=True)
+ k = Symbol("k", integer=True)
+ j = Symbol("j", integer=True)
+
+ # A simple example with a concrete summand and symbolic limits.
+
+ # The normal sum: m = k and n = k + j and therefore m < n:
+ m = k
+ n = k + j
+
+ a = m
+ b = n - 1
+ S1 = Sum(i**2, (i, a, b)).doit()
+
+ # The reversed sum: m = k + j and n = k and therefore m > n:
+ m = k + j
+ n = k
+
+ a = m
+ b = n - 1
+ S2 = Sum(i**2, (i, a, b)).doit()
+
+ assert simplify(S1 + S2) == 0
+
+ # Test the empty sum: m = k and n = k and therefore m = n:
+ m = k
+ n = k
+
+ a = m
+ b = n - 1
+ Sz = Sum(i**2, (i, a, b)).doit()
+
+ assert Sz == 0
+
+ # Another example this time with an unspecified summand and
+ # numeric limits. (We can not do both tests in the same example.)
+
+ # The normal sum with m < n:
+ m = 2
+ n = 11
+
+ a = m
+ b = n - 1
+ S1 = Sum(f(i), (i, a, b)).doit()
+
+ # The reversed sum with m > n:
+ m = 11
+ n = 2
+
+ a = m
+ b = n - 1
+ S2 = Sum(f(i), (i, a, b)).doit()
+
+ assert simplify(S1 + S2) == 0
+
+ # Test the empty sum with m = n:
+ m = 5
+ n = 5
+
+ a = m
+ b = n - 1
+ Sz = Sum(f(i), (i, a, b)).doit()
+
+ assert Sz == 0
+
+ e = Piecewise((exp(-i), Mod(i, 2) > 0), (0, True))
+ s = Sum(e, (i, 0, 11))
+ assert s.n(3) == s.doit().n(3)
+
+ # issue #27893
+ n = Symbol('n', integer=True)
+ assert Sum(1/(x**2 + 1), (x, oo, 0)).doit(deep=False) == Rational(-1, 2) + pi / (2 * tanh(pi))
+ assert Sum(c**x/factorial(x), (x, oo, 0)).doit(deep=False).simplify() == exp(c) - 1 # exponential series
+ assert Sum((-1)**x/x, (x, oo,0)).doit() == -log(2) # alternating harmnic series
+ assert Sum((1/2)**x,(x, oo, -1)).doit() == S(2) # geometric series
+ assert Sum(1/x, (x, oo, 0)).doit() == oo # harmonic series, divergent
+ assert Sum((-1)**x/(2*x+1), (x, oo, -1)).doit() == pi/4 # leibniz series
+ assert Sum((((-1)**x) * c**(2*x+1)) / factorial(2*x+1), (x, oo, -1)).doit() == sin(c) # sinusoidal series
+ assert Sum((((-1)**x) * c**(2*x+1)) / (2*x+1), (x, 0, oo)).doit() \
+ == Piecewise((atan(c), Ne(c**2, -1) & (Abs(c**2) <= 1)), \
+ (Sum((-1)**x*c**(2*x + 1)/(2*x + 1), (x, 0, oo)), True)) # arctangent series
+ assert Sum(binomial(n, x) * c**x, (x, 0, oo)).doit() \
+ == Piecewise(((c + 1)**n, \
+ ((n <= -1) & (Abs(c) < 1)) \
+ | ((n > 0) & (Abs(c) <= 1)) \
+ | ((n <= 0) & (n > -1) & Ne(c, -1) & (Abs(c) <= 1))), \
+ (Sum(c**x*binomial(n, x), (x, 0, oo)), True)) # binomial series
+ assert Sum(1/x**n, (x, oo, 0)).doit() \
+ == Piecewise((zeta(n), n > 1), (Sum(x**(-n), (x, oo, 0)), True)) # Euler's zeta function
+
+def test_karr_proposition_2a():
+ # Test Karr, page 309, proposition 2, part a
+ i = Symbol("i", integer=True)
+ u = Symbol("u", integer=True)
+ v = Symbol("v", integer=True)
+
+ def test_the_sum(m, n):
+ # g
+ g = i**3 + 2*i**2 - 3*i
+ # f = Delta g
+ f = simplify(g.subs(i, i+1) - g)
+ # The sum
+ a = m
+ b = n - 1
+ S = Sum(f, (i, a, b)).doit()
+ # Test if Sum_{m <= i < n} f(i) = g(n) - g(m)
+ assert simplify(S - (g.subs(i, n) - g.subs(i, m))) == 0
+
+ # m < n
+ test_the_sum(u, u+v)
+ # m = n
+ test_the_sum(u, u )
+ # m > n
+ test_the_sum(u+v, u )
+
+
+def test_karr_proposition_2b():
+ # Test Karr, page 309, proposition 2, part b
+ i = Symbol("i", integer=True)
+ u = Symbol("u", integer=True)
+ v = Symbol("v", integer=True)
+ w = Symbol("w", integer=True)
+
+ def test_the_sum(l, n, m):
+ # Summand
+ s = i**3
+ # First sum
+ a = l
+ b = n - 1
+ S1 = Sum(s, (i, a, b)).doit()
+ # Second sum
+ a = l
+ b = m - 1
+ S2 = Sum(s, (i, a, b)).doit()
+ # Third sum
+ a = m
+ b = n - 1
+ S3 = Sum(s, (i, a, b)).doit()
+ # Test if S1 = S2 + S3 as required
+ assert S1 - (S2 + S3) == 0
+
+ # l < m < n
+ test_the_sum(u, u+v, u+v+w)
+ # l < m = n
+ test_the_sum(u, u+v, u+v )
+ # l < m > n
+ test_the_sum(u, u+v+w, v )
+ # l = m < n
+ test_the_sum(u, u, u+v )
+ # l = m = n
+ test_the_sum(u, u, u )
+ # l = m > n
+ test_the_sum(u+v, u+v, u )
+ # l > m < n
+ test_the_sum(u+v, u, u+w )
+ # l > m = n
+ test_the_sum(u+v, u, u )
+ # l > m > n
+ test_the_sum(u+v+w, u+v, u )
+
+
+def test_arithmetic_sums():
+ assert summation(1, (n, a, b)) == b - a + 1
+ assert Sum(S.NaN, (n, a, b)) is S.NaN
+ assert Sum(x, (n, a, a)).doit() == x
+ assert Sum(x, (x, a, a)).doit() == a
+ assert Sum(x, (n, 1, a)).doit() == a*x
+ assert Sum(x, (x, Range(1, 11))).doit() == 55
+ assert Sum(x, (x, Range(1, 11, 2))).doit() == 25
+ assert Sum(x, (x, Range(1, 10, 2))) == Sum(x, (x, Range(9, 0, -2)))
+ lo, hi = 1, 2
+ s1 = Sum(n, (n, lo, hi))
+ s2 = Sum(n, (n, hi, lo))
+ assert s1 != s2
+ assert s1.doit() == 3 and s2.doit() == 0
+ lo, hi = x, x + 1
+ s1 = Sum(n, (n, lo, hi))
+ s2 = Sum(n, (n, hi, lo))
+ assert s1 != s2
+ assert s1.doit() == 2*x + 1 and s2.doit() == 0
+ assert Sum(Integral(x, (x, 1, y)) + x, (x, 1, 2)).doit() == \
+ y**2 + 2
+ assert summation(1, (n, 1, 10)) == 10
+ assert summation(2*n, (n, 0, 10**10)) == 100000000010000000000
+ assert summation(4*n*m, (n, a, 1), (m, 1, d)).expand() == \
+ 2*d + 2*d**2 + a*d + a*d**2 - d*a**2 - a**2*d**2
+ assert summation(cos(n), (n, -2, 1)) == cos(-2) + cos(-1) + cos(0) + cos(1)
+ assert summation(cos(n), (n, x, x + 2)) == cos(x) + cos(x + 1) + cos(x + 2)
+ assert isinstance(summation(cos(n), (n, x, x + S.Half)), Sum)
+ assert summation(k, (k, 0, oo)) is oo
+ assert summation(k, (k, Range(1, 11))) == 55
+
+
+def test_polynomial_sums():
+ assert summation(n**2, (n, 3, 8)) == 199
+ assert summation(n, (n, a, b)) == \
+ ((a + b)*(b - a + 1)/2).expand()
+ assert summation(n**2, (n, 1, b)) == \
+ ((2*b**3 + 3*b**2 + b)/6).expand()
+ assert summation(n**3, (n, 1, b)) == \
+ ((b**4 + 2*b**3 + b**2)/4).expand()
+ assert summation(n**6, (n, 1, b)) == \
+ ((6*b**7 + 21*b**6 + 21*b**5 - 7*b**3 + b)/42).expand()
+
+
+def test_geometric_sums():
+ assert summation(pi**n, (n, 0, b)) == (1 - pi**(b + 1)) / (1 - pi)
+ assert summation(2 * 3**n, (n, 0, b)) == 3**(b + 1) - 1
+ assert summation(S.Half**n, (n, 1, oo)) == 1
+ assert summation(2**n, (n, 0, b)) == 2**(b + 1) - 1
+ assert summation(2**n, (n, 1, oo)) is oo
+ assert summation(2**(-n), (n, 1, oo)) == 1
+ assert summation(3**(-n), (n, 4, oo)) == Rational(1, 54)
+ assert summation(2**(-4*n + 3), (n, 1, oo)) == Rational(8, 15)
+ assert summation(2**(n + 1), (n, 1, b)).expand() == 4*(2**b - 1)
+
+ # issue 6664:
+ assert summation(x**n, (n, 0, oo)) == \
+ Piecewise((1/(-x + 1), Abs(x) < 1), (Sum(x**n, (n, 0, oo)), True))
+
+ assert summation(-2**n, (n, 0, oo)) is -oo
+ assert summation(I**n, (n, 0, oo)) == Sum(I**n, (n, 0, oo))
+
+ # issue 6802:
+ assert summation((-1)**(2*x + 2), (x, 0, n)) == n + 1
+ assert summation((-2)**(2*x + 2), (x, 0, n)) == 4*4**(n + 1)/S(3) - Rational(4, 3)
+ assert summation((-1)**x, (x, 0, n)) == -(-1)**(n + 1)/S(2) + S.Half
+ assert summation(y**x, (x, a, b)) == \
+ Piecewise((-a + b + 1, Eq(y, 1)), ((y**a - y**(b + 1))/(-y + 1), True))
+ assert summation((-2)**(y*x + 2), (x, 0, n)) == \
+ 4*Piecewise((n + 1, Eq((-2)**y, 1)),
+ ((-(-2)**(y*(n + 1)) + 1)/(-(-2)**y + 1), True))
+
+ # issue 8251:
+ assert summation((1/(n + 1)**2)*n**2, (n, 0, oo)) is oo
+
+ #issue 9908:
+ assert Sum(1/(n**3 - 1), (n, -oo, -2)).doit() == summation(1/(n**3 - 1), (n, -oo, -2))
+
+ #issue 11642:
+ result = Sum(0.5**n, (n, 1, oo)).doit()
+ assert result == 1.0
+ assert result.is_Float
+
+ result = Sum(0.25**n, (n, 1, oo)).doit()
+ assert result == 1/3.
+ assert result.is_Float
+
+ result = Sum(0.99999**n, (n, 1, oo)).doit()
+ assert result == 99999.0
+ assert result.is_Float
+
+ result = Sum(S.Half**n, (n, 1, oo)).doit()
+ assert result == 1
+ assert not result.is_Float
+
+ result = Sum(Rational(3, 5)**n, (n, 1, oo)).doit()
+ assert result == Rational(3, 2)
+ assert not result.is_Float
+
+ assert Sum(1.0**n, (n, 1, oo)).doit() is oo
+ assert Sum(2.43**n, (n, 1, oo)).doit() is oo
+
+ # Issue 13979
+ i, k, q = symbols('i k q', integer=True)
+ result = summation(
+ exp(-2*I*pi*k*i/n) * exp(2*I*pi*q*i/n) / n, (i, 0, n - 1)
+ )
+ assert result.simplify() == Piecewise(
+ (1, Eq(exp(-2*I*pi*(k - q)/n), 1)), (0, True)
+ )
+
+ #Issue 23491
+ assert Sum(1/(n**2 + 1), (n, 1, oo)).doit() == S(-1)/2 + pi/(2*tanh(pi))
+
+def test_harmonic_sums():
+ assert summation(1/k, (k, 0, n)) == Sum(1/k, (k, 0, n))
+ assert summation(1/k, (k, 1, n)) == harmonic(n)
+ assert summation(n/k, (k, 1, n)) == n*harmonic(n)
+ assert summation(1/k, (k, 5, n)) == harmonic(n) - harmonic(4)
+
+
+def test_composite_sums():
+ f = S.Half*(7 - 6*n + Rational(1, 7)*n**3)
+ s = summation(f, (n, a, b))
+ assert not isinstance(s, Sum)
+ A = 0
+ for i in range(-3, 5):
+ A += f.subs(n, i)
+ B = s.subs(a, -3).subs(b, 4)
+ assert A == B
+
+
+def test_hypergeometric_sums():
+ assert summation(
+ binomial(2*k, k)/4**k, (k, 0, n)) == (1 + 2*n)*binomial(2*n, n)/4**n
+ assert summation(binomial(2*k, k)/5**k, (k, -oo, oo)) == sqrt(5)
+
+
+def test_other_sums():
+ f = m**2 + m*exp(m)
+ g = 3*exp(Rational(3, 2))/2 + exp(S.Half)/2 - exp(Rational(-1, 2))/2 - 3*exp(Rational(-3, 2))/2 + 5
+
+ assert summation(f, (m, Rational(-3, 2), Rational(3, 2))) == g
+ assert summation(f, (m, -1.5, 1.5)).evalf().epsilon_eq(g.evalf(), 1e-10)
+
+fac = factorial
+
+
+def NS(e, n=15, **options):
+ return str(sympify(e).evalf(n, **options))
+
+
+def test_evalf_fast_series():
+ # Euler transformed series for sqrt(1+x)
+ assert NS(Sum(
+ fac(2*n + 1)/fac(n)**2/2**(3*n + 1), (n, 0, oo)), 100) == NS(sqrt(2), 100)
+
+ # Some series for exp(1)
+ estr = NS(E, 100)
+ assert NS(Sum(1/fac(n), (n, 0, oo)), 100) == estr
+ assert NS(1/Sum((1 - 2*n)/fac(2*n), (n, 0, oo)), 100) == estr
+ assert NS(Sum((2*n + 1)/fac(2*n), (n, 0, oo)), 100) == estr
+ assert NS(Sum((4*n + 3)/2**(2*n + 1)/fac(2*n + 1), (n, 0, oo))**2, 100) == estr
+
+ pistr = NS(pi, 100)
+ # Ramanujan series for pi
+ assert NS(9801/sqrt(8)/Sum(fac(
+ 4*n)*(1103 + 26390*n)/fac(n)**4/396**(4*n), (n, 0, oo)), 100) == pistr
+ assert NS(1/Sum(
+ binomial(2*n, n)**3 * (42*n + 5)/2**(12*n + 4), (n, 0, oo)), 100) == pistr
+ # Machin's formula for pi
+ assert NS(16*Sum((-1)**n/(2*n + 1)/5**(2*n + 1), (n, 0, oo)) -
+ 4*Sum((-1)**n/(2*n + 1)/239**(2*n + 1), (n, 0, oo)), 100) == pistr
+
+ # Apery's constant
+ astr = NS(zeta(3), 100)
+ P = 126392*n**5 + 412708*n**4 + 531578*n**3 + 336367*n**2 + 104000* \
+ n + 12463
+ assert NS(Sum((-1)**n * P / 24 * (fac(2*n + 1)*fac(2*n)*fac(
+ n))**3 / fac(3*n + 2) / fac(4*n + 3)**3, (n, 0, oo)), 100) == astr
+ assert NS(Sum((-1)**n * (205*n**2 + 250*n + 77)/64 * fac(n)**10 /
+ fac(2*n + 1)**5, (n, 0, oo)), 100) == astr
+
+
+def test_evalf_fast_series_issue_4021():
+ # Catalan's constant
+ assert NS(Sum((-1)**(n - 1)*2**(8*n)*(40*n**2 - 24*n + 3)*fac(2*n)**3*
+ fac(n)**2/n**3/(2*n - 1)/fac(4*n)**2, (n, 1, oo))/64, 100) == \
+ NS(Catalan, 100)
+ astr = NS(zeta(3), 100)
+ assert NS(5*Sum(
+ (-1)**(n - 1)*fac(n)**2 / n**3 / fac(2*n), (n, 1, oo))/2, 100) == astr
+ assert NS(Sum((-1)**(n - 1)*(56*n**2 - 32*n + 5) / (2*n - 1)**2 * fac(n - 1)
+ **3 / fac(3*n), (n, 1, oo))/4, 100) == astr
+
+
+def test_evalf_slow_series():
+ assert NS(Sum((-1)**n / n, (n, 1, oo)), 15) == NS(-log(2), 15)
+ assert NS(Sum((-1)**n / n, (n, 1, oo)), 50) == NS(-log(2), 50)
+ assert NS(Sum(1/n**2, (n, 1, oo)), 15) == NS(pi**2/6, 15)
+ assert NS(Sum(1/n**2, (n, 1, oo)), 100) == NS(pi**2/6, 100)
+ assert NS(Sum(1/n**2, (n, 1, oo)), 500) == NS(pi**2/6, 500)
+ assert NS(Sum((-1)**n / (2*n + 1)**3, (n, 0, oo)), 15) == NS(pi**3/32, 15)
+ assert NS(Sum((-1)**n / (2*n + 1)**3, (n, 0, oo)), 50) == NS(pi**3/32, 50)
+
+
+def test_evalf_oo_to_oo():
+ # There used to be an error in certain cases
+ # Does not evaluate, but at least do not throw an error
+ # Evaluates symbolically to 0, which is not correct
+ assert Sum(1/(n**2+1), (n, -oo, oo)).evalf() == Sum(1/(n**2+1), (n, -oo, oo))
+ # This evaluates if from 1 to oo and symbolically
+ assert Sum(1/(factorial(abs(n))), (n, -oo, -1)).evalf() == Sum(1/(factorial(abs(n))), (n, -oo, -1))
+
+
+def test_euler_maclaurin():
+ # Exact polynomial sums with E-M
+ def check_exact(f, a, b, m, n):
+ A = Sum(f, (k, a, b))
+ s, e = A.euler_maclaurin(m, n)
+ assert (e == 0) and (s.expand() == A.doit())
+ check_exact(k**4, a, b, 0, 2)
+ check_exact(k**4 + 2*k, a, b, 1, 2)
+ check_exact(k**4 + k**2, a, b, 1, 5)
+ check_exact(k**5, 2, 6, 1, 2)
+ check_exact(k**5, 2, 6, 1, 3)
+ assert Sum(x-1, (x, 0, 2)).euler_maclaurin(m=30, n=30, eps=2**-15) == (0, 0)
+ # Not exact
+ assert Sum(k**6, (k, a, b)).euler_maclaurin(0, 2)[1] != 0
+ # Numerical test
+ for mi, ni in [(2, 4), (2, 20), (10, 20), (18, 20)]:
+ A = Sum(1/k**3, (k, 1, oo))
+ s, e = A.euler_maclaurin(mi, ni)
+ assert abs((s - zeta(3)).evalf()) < e.evalf()
+
+ raises(ValueError, lambda: Sum(1, (x, 0, 1), (k, 0, 1)).euler_maclaurin())
+
+
+@slow
+def test_evalf_euler_maclaurin():
+ assert NS(Sum(1/k**k, (k, 1, oo)), 15) == '1.29128599706266'
+ assert NS(Sum(1/k**k, (k, 1, oo)),
+ 50) == '1.2912859970626635404072825905956005414986193682745'
+ assert NS(Sum(1/k - log(1 + 1/k), (k, 1, oo)), 15) == NS(EulerGamma, 15)
+ assert NS(Sum(1/k - log(1 + 1/k), (k, 1, oo)), 50) == NS(EulerGamma, 50)
+ assert NS(Sum(log(k)/k**2, (k, 1, oo)), 15) == '0.937548254315844'
+ assert NS(Sum(log(k)/k**2, (k, 1, oo)),
+ 50) == '0.93754825431584375370257409456786497789786028861483'
+ assert NS(Sum(1/k, (k, 1000000, 2000000)), 15) == '0.693147930560008'
+ assert NS(Sum(1/k, (k, 1000000, 2000000)),
+ 50) == '0.69314793056000780941723211364567656807940638436025'
+
+
+def test_evalf_symbolic():
+ # issue 6328
+ expr = Sum(f(x), (x, 1, 3)) + Sum(g(x), (x, 1, 3))
+ assert expr.evalf() == expr
+
+
+def test_evalf_issue_3273():
+ assert Sum(0, (k, 1, oo)).evalf() == 0
+
+
+def test_simple_products():
+ assert Product(S.NaN, (x, 1, 3)) is S.NaN
+ assert product(S.NaN, (x, 1, 3)) is S.NaN
+ assert Product(x, (n, a, a)).doit() == x
+ assert Product(x, (x, a, a)).doit() == a
+ assert Product(x, (y, 1, a)).doit() == x**a
+
+ lo, hi = 1, 2
+ s1 = Product(n, (n, lo, hi))
+ s2 = Product(n, (n, hi, lo))
+ assert s1 != s2
+ # This IS correct according to Karr product convention
+ assert s1.doit() == 2
+ assert s2.doit() == 1
+
+ lo, hi = x, x + 1
+ s1 = Product(n, (n, lo, hi))
+ s2 = Product(n, (n, hi, lo))
+ s3 = 1 / Product(n, (n, hi + 1, lo - 1))
+ assert s1 != s2
+ # This IS correct according to Karr product convention
+ assert s1.doit() == x*(x + 1)
+ assert s2.doit() == 1
+ assert s3.doit() == x*(x + 1)
+
+ assert Product(Integral(2*x, (x, 1, y)) + 2*x, (x, 1, 2)).doit() == \
+ (y**2 + 1)*(y**2 + 3)
+ assert product(2, (n, a, b)) == 2**(b - a + 1)
+ assert product(n, (n, 1, b)) == factorial(b)
+ assert product(n**3, (n, 1, b)) == factorial(b)**3
+ assert product(3**(2 + n), (n, a, b)) \
+ == 3**(2*(1 - a + b) + b/2 + (b**2)/2 + a/2 - (a**2)/2)
+ assert product(cos(n), (n, 3, 5)) == cos(3)*cos(4)*cos(5)
+ assert product(cos(n), (n, x, x + 2)) == cos(x)*cos(x + 1)*cos(x + 2)
+ assert isinstance(product(cos(n), (n, x, x + S.Half)), Product)
+ # If Product managed to evaluate this one, it most likely got it wrong!
+ assert isinstance(Product(n**n, (n, 1, b)), Product)
+
+
+def test_rational_products():
+ assert combsimp(product(1 + 1/n, (n, a, b))) == (1 + b)/a
+ assert combsimp(product(n + 1, (n, a, b))) == gamma(2 + b)/gamma(1 + a)
+ assert combsimp(product((n + 1)/(n - 1), (n, a, b))) == b*(1 + b)/(a*(a - 1))
+ assert combsimp(product(n/(n + 1)/(n + 2), (n, a, b))) == \
+ a*gamma(a + 2)/(b + 1)/gamma(b + 3)
+ assert combsimp(product(n*(n + 1)/(n - 1)/(n - 2), (n, a, b))) == \
+ b**2*(b - 1)*(1 + b)/(a - 1)**2/(a*(a - 2))
+
+
+def test_wallis_product():
+ # Wallis product, given in two different forms to ensure that Product
+ # can factor simple rational expressions
+ A = Product(4*n**2 / (4*n**2 - 1), (n, 1, b))
+ B = Product((2*n)*(2*n)/(2*n - 1)/(2*n + 1), (n, 1, b))
+ R = pi*gamma(b + 1)**2/(2*gamma(b + S.Half)*gamma(b + Rational(3, 2)))
+ assert simplify(A.doit()) == R
+ assert simplify(B.doit()) == R
+ # This one should eventually also be doable (Euler's product formula for sin)
+ # assert Product(1+x/n**2, (n, 1, b)) == ...
+
+
+def test_telescopic_sums():
+ #checks also input 2 of comment 1 issue 4127
+ assert Sum(1/k - 1/(k + 1), (k, 1, n)).doit() == 1 - 1/(1 + n)
+ assert Sum(
+ f(k) - f(k + 2), (k, m, n)).doit() == -f(1 + n) - f(2 + n) + f(m) + f(1 + m)
+ assert Sum(cos(k) - cos(k + 3), (k, 1, n)).doit() == -cos(1 + n) - \
+ cos(2 + n) - cos(3 + n) + cos(1) + cos(2) + cos(3)
+
+ # dummy variable shouldn't matter
+ assert telescopic(1/m, -m/(1 + m), (m, n - 1, n)) == \
+ telescopic(1/k, -k/(1 + k), (k, n - 1, n))
+
+ assert Sum(1/x/(x - 1), (x, a, b)).doit() == 1/(a - 1) - 1/b
+ eq = 1/((5*n + 2)*(5*(n + 1) + 2))
+ assert Sum(eq, (n, 0, oo)).doit() == S(1)/10
+ nz = symbols('nz', nonzero=True)
+ v = Sum(eq.subs(5, nz), (n, 0, oo)).doit()
+ assert v.subs(nz, 5).simplify() == S(1)/10
+ # check that apart is being used in non-symbolic case
+ s = Sum(eq, (n, 0, k)).doit()
+ v = Sum(eq, (n, 0, 10**100)).doit()
+ assert v == s.subs(k, 10**100)
+
+
+def test_sum_reconstruct():
+ s = Sum(n**2, (n, -1, 1))
+ assert s == Sum(*s.args)
+ raises(ValueError, lambda: Sum(x, x))
+ raises(ValueError, lambda: Sum(x, (x, 1)))
+
+
+def test_limit_subs():
+ for F in (Sum, Product, Integral):
+ assert F(a*exp(a), (a, -2, 2)) == F(a*exp(a), (a, -b, b)).subs(b, 2)
+ assert F(a, (a, F(b, (b, 1, 2)), 4)).subs(F(b, (b, 1, 2)), c) == \
+ F(a, (a, c, 4))
+ assert F(x, (x, 1, x + y)).subs(x, 1) == F(x, (x, 1, y + 1))
+
+
+def test_function_subs():
+ S = Sum(x*f(y),(x,0,oo),(y,0,oo))
+ assert S.subs(f(y),y) == Sum(x*y,(x,0,oo),(y,0,oo))
+ assert S.subs(f(x),x) == S
+ raises(ValueError, lambda: S.subs(f(y),x+y) )
+ S = Sum(x*log(y),(x,0,oo),(y,0,oo))
+ assert S.subs(log(y),y) == S
+ S = Sum(x*f(y),(x,0,oo),(y,0,oo))
+ assert S.subs(f(y),y) == Sum(x*y,(x,0,oo),(y,0,oo))
+
+
+def test_equality():
+ # if this fails remove special handling below
+ raises(ValueError, lambda: Sum(x, x))
+ r = symbols('x', real=True)
+ for F in (Sum, Product, Integral):
+ try:
+ assert F(x, x) != F(y, y)
+ assert F(x, (x, 1, 2)) != F(x, x)
+ assert F(x, (x, x)) != F(x, x) # or else they print the same
+ assert F(1, x) != F(1, y)
+ except ValueError:
+ pass
+ assert F(a, (x, 1, 2)) != F(a, (x, 1, 3)) # diff limit
+ assert F(a, (x, 1, x)) != F(a, (y, 1, y))
+ assert F(a, (x, 1, 2)) != F(b, (x, 1, 2)) # diff expression
+ assert F(x, (x, 1, 2)) != F(r, (r, 1, 2)) # diff assumptions
+ assert F(1, (x, 1, x)) != F(1, (y, 1, x)) # only dummy is diff
+ assert F(1, (x, 1, x)).dummy_eq(F(1, (y, 1, x)))
+
+ # issue 5265
+ assert Sum(x, (x, 1, x)).subs(x, a) == Sum(x, (x, 1, a))
+
+
+def test_Sum_doit():
+ assert Sum(n*Integral(a**2), (n, 0, 2)).doit() == a**3
+ assert Sum(n*Integral(a**2), (n, 0, 2)).doit(deep=False) == \
+ 3*Integral(a**2)
+ assert summation(n*Integral(a**2), (n, 0, 2)) == 3*Integral(a**2)
+
+ # test nested sum evaluation
+ s = Sum( Sum( Sum(2,(z,1,n+1)), (y,x+1,n)), (x,1,n))
+ assert 0 == (s.doit() - n*(n+1)*(n-1)).factor()
+
+ # Integer assumes finite
+ assert Sum(KroneckerDelta(x, y), (x, -oo, oo)).doit() == Piecewise((1, And(-oo < y, y < oo)), (0, True))
+ assert Sum(KroneckerDelta(m, n), (m, -oo, oo)).doit() == 1
+ assert Sum(m*KroneckerDelta(x, y), (x, -oo, oo)).doit() == Piecewise((m, And(-oo < y, y < oo)), (0, True))
+ assert Sum(x*KroneckerDelta(m, n), (m, -oo, oo)).doit() == x
+ assert Sum(Sum(KroneckerDelta(m, n), (m, 1, 3)), (n, 1, 3)).doit() == 3
+ assert Sum(Sum(KroneckerDelta(k, m), (m, 1, 3)), (n, 1, 3)).doit() == \
+ 3 * Piecewise((1, And(1 <= k, k <= 3)), (0, True))
+ assert Sum(f(n) * Sum(KroneckerDelta(m, n), (m, 0, oo)), (n, 1, 3)).doit() == \
+ f(1) + f(2) + f(3)
+ assert Sum(f(n) * Sum(KroneckerDelta(m, n), (m, 0, oo)), (n, 1, oo)).doit() == \
+ Sum(f(n), (n, 1, oo))
+
+ # issue 2597
+ nmax = symbols('N', integer=True, positive=True)
+ pw = Piecewise((1, And(1 <= n, n <= nmax)), (0, True))
+ assert Sum(pw, (n, 1, nmax)).doit() == Sum(Piecewise((1, nmax >= n),
+ (0, True)), (n, 1, nmax))
+
+ q, s = symbols('q, s')
+ assert summation(1/n**(2*s), (n, 1, oo)) == Piecewise((zeta(2*s), 2*re(s) > 1),
+ (Sum(n**(-2*s), (n, 1, oo)), True))
+ assert summation(1/(n+1)**s, (n, 0, oo)) == Piecewise((zeta(s), re(s) > 1),
+ (Sum((n + 1)**(-s), (n, 0, oo)), True))
+ assert summation(1/(n+q)**s, (n, 0, oo)) == Piecewise(
+ (zeta(s, q), And(~Contains(-q, S.Naturals0), re(s) > 1)),
+ (Sum((n + q)**(-s), (n, 0, oo)), True))
+ assert summation(1/(n+q)**s, (n, q, oo)) == Piecewise(
+ (zeta(s, 2*q), And(~Contains(-2*q, S.Naturals0), re(s) > 1)),
+ (Sum((n + q)**(-s), (n, q, oo)), True))
+ assert summation(1/n**2, (n, 1, oo)) == zeta(2)
+ assert summation(1/n**s, (n, 0, oo)) == Sum(n**(-s), (n, 0, oo))
+ assert summation(1/(n+1)**(2+I), (n, 0, oo)) == zeta(2+I)
+ t = symbols('t', real=True, positive=True)
+ assert summation(1/(n+I)**(t+1), (n, 0, oo)) == zeta(t+1, I)
+
+
+def test_Product_doit():
+ assert Product(n*Integral(a**2), (n, 1, 3)).doit() == 2 * a**9 / 9
+ assert Product(n*Integral(a**2), (n, 1, 3)).doit(deep=False) == \
+ 6*Integral(a**2)**3
+ assert product(n*Integral(a**2), (n, 1, 3)) == 6*Integral(a**2)**3
+
+
+def test_Sum_interface():
+ assert isinstance(Sum(0, (n, 0, 2)), Sum)
+ assert Sum(nan, (n, 0, 2)) is nan
+ assert Sum(nan, (n, 0, oo)) is nan
+ assert Sum(0, (n, 0, 2)).doit() == 0
+ assert isinstance(Sum(0, (n, 0, oo)), Sum)
+ assert Sum(0, (n, 0, oo)).doit() == 0
+ raises(ValueError, lambda: Sum(1))
+ raises(ValueError, lambda: summation(1))
+
+
+def test_diff():
+ assert Sum(x, (x, 1, 2)).diff(x) == 0
+ assert Sum(x*y, (x, 1, 2)).diff(x) == 0
+ assert Sum(x*y, (y, 1, 2)).diff(x) == Sum(y, (y, 1, 2))
+ e = Sum(x*y, (x, 1, a))
+ assert e.diff(a) == Derivative(e, a)
+ assert Sum(x*y, (x, 1, 3), (a, 2, 5)).diff(y).doit() == \
+ Sum(x*y, (x, 1, 3), (a, 2, 5)).doit().diff(y) == 24
+ assert Sum(x, (x, 1, 2)).diff(y) == 0
+
+
+def test_hypersum():
+ assert simplify(summation(x**n/fac(n), (n, 1, oo))) == -1 + exp(x)
+ assert summation((-1)**n * x**(2*n) / fac(2*n), (n, 0, oo)) == cos(x)
+ assert simplify(summation((-1)**n*x**(2*n + 1) /
+ factorial(2*n + 1), (n, 3, oo))) == -x + sin(x) + x**3/6 - x**5/120
+
+ assert summation(1/(n + 2)**3, (n, 1, oo)) == Rational(-9, 8) + zeta(3)
+ assert summation(1/n**4, (n, 1, oo)) == pi**4/90
+
+ s = summation(x**n*n, (n, -oo, 0))
+ assert s.is_Piecewise
+ assert s.args[0].args[0] == -1/(x*(1 - 1/x)**2)
+ assert s.args[0].args[1] == (abs(1/x) < 1)
+
+ m = Symbol('n', integer=True, positive=True)
+ assert summation(binomial(m, k), (k, 0, m)) == 2**m
+
+
+def test_issue_4170():
+ assert summation(1/factorial(k), (k, 0, oo)) == E
+
+
+def test_is_commutative():
+ from sympy.physics.secondquant import NO, F, Fd
+ m = Symbol('m', commutative=False)
+ for f in (Sum, Product, Integral):
+ assert f(z, (z, 1, 1)).is_commutative is True
+ assert f(z*y, (z, 1, 6)).is_commutative is True
+ assert f(m*x, (x, 1, 2)).is_commutative is False
+
+ assert f(NO(Fd(x)*F(y))*z, (z, 1, 2)).is_commutative is False
+
+
+def test_is_zero():
+ for func in [Sum, Product]:
+ assert func(0, (x, 1, 1)).is_zero is True
+ assert func(x, (x, 1, 1)).is_zero is None
+
+ assert Sum(0, (x, 1, 0)).is_zero is True
+ assert Product(0, (x, 1, 0)).is_zero is False
+
+
+def test_is_number():
+ # is number should not rely on evaluation or assumptions,
+ # it should be equivalent to `not foo.free_symbols`
+ assert Sum(1, (x, 1, 1)).is_number is True
+ assert Sum(1, (x, 1, x)).is_number is False
+ assert Sum(0, (x, y, z)).is_number is False
+ assert Sum(x, (y, 1, 2)).is_number is False
+ assert Sum(x, (y, 1, 1)).is_number is False
+ assert Sum(x, (x, 1, 2)).is_number is True
+ assert Sum(x*y, (x, 1, 2), (y, 1, 3)).is_number is True
+
+ assert Product(2, (x, 1, 1)).is_number is True
+ assert Product(2, (x, 1, y)).is_number is False
+ assert Product(0, (x, y, z)).is_number is False
+ assert Product(1, (x, y, z)).is_number is False
+ assert Product(x, (y, 1, x)).is_number is False
+ assert Product(x, (y, 1, 2)).is_number is False
+ assert Product(x, (y, 1, 1)).is_number is False
+ assert Product(x, (x, 1, 2)).is_number is True
+
+
+def test_free_symbols():
+ for func in [Sum, Product]:
+ assert func(1, (x, 1, 2)).free_symbols == set()
+ assert func(0, (x, 1, y)).free_symbols == {y}
+ assert func(2, (x, 1, y)).free_symbols == {y}
+ assert func(x, (x, 1, 2)).free_symbols == set()
+ assert func(x, (x, 1, y)).free_symbols == {y}
+ assert func(x, (y, 1, y)).free_symbols == {x, y}
+ assert func(x, (y, 1, 2)).free_symbols == {x}
+ assert func(x, (y, 1, 1)).free_symbols == {x}
+ assert func(x, (y, 1, z)).free_symbols == {x, z}
+ assert func(x, (x, 1, y), (y, 1, 2)).free_symbols == set()
+ assert func(x, (x, 1, y), (y, 1, z)).free_symbols == {z}
+ assert func(x, (x, 1, y), (y, 1, y)).free_symbols == {y}
+ assert func(x, (y, 1, y), (y, 1, z)).free_symbols == {x, z}
+ assert Sum(1, (x, 1, y)).free_symbols == {y}
+ # free_symbols answers whether the object *as written* has free symbols,
+ # not whether the evaluated expression has free symbols
+ assert Product(1, (x, 1, y)).free_symbols == {y}
+ # don't count free symbols that are not independent of integration
+ # variable(s)
+ assert func(f(x), (f(x), 1, 2)).free_symbols == set()
+ assert func(f(x), (f(x), 1, x)).free_symbols == {x}
+ assert func(f(x), (f(x), 1, y)).free_symbols == {y}
+ assert func(f(x), (z, 1, y)).free_symbols == {x, y}
+
+
+def test_conjugate_transpose():
+ A, B = symbols("A B", commutative=False)
+ p = Sum(A*B**n, (n, 1, 3))
+ assert p.adjoint().doit() == p.doit().adjoint()
+ assert p.conjugate().doit() == p.doit().conjugate()
+ assert p.transpose().doit() == p.doit().transpose()
+
+ p = Sum(B**n*A, (n, 1, 3))
+ assert p.adjoint().doit() == p.doit().adjoint()
+ assert p.conjugate().doit() == p.doit().conjugate()
+ assert p.transpose().doit() == p.doit().transpose()
+
+
+def test_noncommutativity_honoured():
+ A, B = symbols("A B", commutative=False)
+ M = symbols('M', integer=True, positive=True)
+ p = Sum(A*B**n, (n, 1, M))
+ assert p.doit() == A*Piecewise((M, Eq(B, 1)),
+ ((B - B**(M + 1))*(1 - B)**(-1), True))
+
+ p = Sum(B**n*A, (n, 1, M))
+ assert p.doit() == Piecewise((M, Eq(B, 1)),
+ ((B - B**(M + 1))*(1 - B)**(-1), True))*A
+
+ p = Sum(B**n*A*B**n, (n, 1, M))
+ assert p.doit() == p
+
+
+def test_issue_4171():
+ assert summation(factorial(2*k + 1)/factorial(2*k), (k, 0, oo)) is oo
+ assert summation(2*k + 1, (k, 0, oo)) is oo
+
+
+def test_issue_6273():
+ assert Sum(x, (x, 1, n)).n(2, subs={n: 1}) == Float(1, 2)
+
+
+def test_issue_6274():
+ assert Sum(x, (x, 1, 0)).doit() == 0
+ assert NS(Sum(x, (x, 1, 0))) == '0'
+ assert Sum(n, (n, 10, 5)).doit() == -30
+ assert NS(Sum(n, (n, 10, 5))) == '-30.0000000000000'
+
+
+def test_simplify_sum():
+ y, t, v = symbols('y, t, v')
+
+ _simplify = lambda e: simplify(e, doit=False)
+ assert _simplify(Sum(x*y, (x, n, m), (y, a, k)) + \
+ Sum(y, (x, n, m), (y, a, k))) == Sum(y * (x + 1), (x, n, m), (y, a, k))
+ assert _simplify(Sum(x, (x, n, m)) + Sum(x, (x, m + 1, a))) == \
+ Sum(x, (x, n, a))
+ assert _simplify(Sum(x, (x, k + 1, a)) + Sum(x, (x, n, k))) == \
+ Sum(x, (x, n, a))
+ assert _simplify(Sum(x, (x, k + 1, a)) + Sum(x + 1, (x, n, k))) == \
+ Sum(x, (x, n, a)) + Sum(1, (x, n, k))
+ assert _simplify(Sum(x, (x, 0, 3)) * 3 + 3 * Sum(x, (x, 4, 6)) + \
+ 4 * Sum(z, (z, 0, 1))) == 4*Sum(z, (z, 0, 1)) + 3*Sum(x, (x, 0, 6))
+ assert _simplify(3*Sum(x**2, (x, a, b)) + Sum(x, (x, a, b))) == \
+ Sum(x*(3*x + 1), (x, a, b))
+ assert _simplify(Sum(x**3, (x, n, k)) * 3 + 3 * Sum(x, (x, n, k)) + \
+ 4 * y * Sum(z, (z, n, k))) + 1 == \
+ 4*y*Sum(z, (z, n, k)) + 3*Sum(x**3 + x, (x, n, k)) + 1
+ assert _simplify(Sum(x, (x, a, b)) + 1 + Sum(x, (x, b + 1, c))) == \
+ 1 + Sum(x, (x, a, c))
+ assert _simplify(Sum(x, (t, a, b)) + Sum(y, (t, a, b)) + \
+ Sum(x, (t, b+1, c))) == x * Sum(1, (t, a, c)) + y * Sum(1, (t, a, b))
+ assert _simplify(Sum(x, (t, a, b)) + Sum(x, (t, b+1, c)) + \
+ Sum(y, (t, a, b))) == x * Sum(1, (t, a, c)) + y * Sum(1, (t, a, b))
+ assert _simplify(Sum(x, (t, a, b)) + 2 * Sum(x, (t, b+1, c))) == \
+ _simplify(Sum(x, (t, a, b)) + Sum(x, (t, b+1, c)) + Sum(x, (t, b+1, c)))
+ assert _simplify(Sum(x, (x, a, b))*Sum(x**2, (x, a, b))) == \
+ Sum(x, (x, a, b)) * Sum(x**2, (x, a, b))
+ assert _simplify(Sum(x, (t, a, b)) + Sum(y, (t, a, b)) + Sum(z, (t, a, b))) \
+ == (x + y + z) * Sum(1, (t, a, b)) # issue 8596
+ assert _simplify(Sum(x, (t, a, b)) + Sum(y, (t, a, b)) + Sum(z, (t, a, b)) + \
+ Sum(v, (t, a, b))) == (x + y + z + v) * Sum(1, (t, a, b)) # issue 8596
+ assert _simplify(Sum(x * y, (x, a, b)) / (3 * y)) == \
+ (Sum(x, (x, a, b)) / 3)
+ assert _simplify(Sum(f(x) * y * z, (x, a, b)) / (y * z)) \
+ == Sum(f(x), (x, a, b))
+ assert _simplify(Sum(c * x, (x, a, b)) - c * Sum(x, (x, a, b))) == 0
+ assert _simplify(c * (Sum(x, (x, a, b)) + y)) == c * (y + Sum(x, (x, a, b)))
+ assert _simplify(c * (Sum(x, (x, a, b)) + y * Sum(x, (x, a, b)))) == \
+ c * (y + 1) * Sum(x, (x, a, b))
+ assert _simplify(Sum(Sum(c * x, (x, a, b)), (y, a, b))) == \
+ c * Sum(x, (x, a, b), (y, a, b))
+ assert _simplify(Sum((3 + y) * Sum(c * x, (x, a, b)), (y, a, b))) == \
+ c * Sum((3 + y), (y, a, b)) * Sum(x, (x, a, b))
+ assert _simplify(Sum((3 + t) * Sum(c * t, (x, a, b)), (y, a, b))) == \
+ c*t*(t + 3)*Sum(1, (x, a, b))*Sum(1, (y, a, b))
+ assert _simplify(Sum(Sum(d * t, (x, a, b - 1)) + \
+ Sum(d * t, (x, b, c)), (t, a, b))) == \
+ d * Sum(1, (x, a, c)) * Sum(t, (t, a, b))
+ assert _simplify(Sum(sin(t)**2 + cos(t)**2 + 1, (t, a, b))) == \
+ 2 * Sum(1, (t, a, b))
+
+
+def test_change_index():
+ b, v, w = symbols('b, v, w', integer = True)
+
+ assert Sum(x, (x, a, b)).change_index(x, x + 1, y) == \
+ Sum(y - 1, (y, a + 1, b + 1))
+ assert Sum(x**2, (x, a, b)).change_index( x, x - 1) == \
+ Sum((x+1)**2, (x, a - 1, b - 1))
+ assert Sum(x**2, (x, a, b)).change_index( x, -x, y) == \
+ Sum((-y)**2, (y, -b, -a))
+ assert Sum(x, (x, a, b)).change_index( x, -x - 1) == \
+ Sum(-x - 1, (x, -b - 1, -a - 1))
+ assert Sum(x*y, (x, a, b), (y, c, d)).change_index( x, x - 1, z) == \
+ Sum((z + 1)*y, (z, a - 1, b - 1), (y, c, d))
+ assert Sum(x, (x, a, b)).change_index( x, x + v) == \
+ Sum(-v + x, (x, a + v, b + v))
+ assert Sum(x, (x, a, b)).change_index( x, -x - v) == \
+ Sum(-v - x, (x, -b - v, -a - v))
+ assert Sum(x, (x, a, b)).change_index(x, w*x, v) == \
+ Sum(v/w, (v, b*w, a*w))
+ raises(ValueError, lambda: Sum(x, (x, a, b)).change_index(x, 2*x))
+
+
+def test_reorder():
+ b, y, c, d, z = symbols('b, y, c, d, z', integer = True)
+
+ assert Sum(x*y, (x, a, b), (y, c, d)).reorder((0, 1)) == \
+ Sum(x*y, (y, c, d), (x, a, b))
+ assert Sum(x, (x, a, b), (x, c, d)).reorder((0, 1)) == \
+ Sum(x, (x, c, d), (x, a, b))
+ assert Sum(x*y + z, (x, a, b), (z, m, n), (y, c, d)).reorder(\
+ (2, 0), (0, 1)) == Sum(x*y + z, (z, m, n), (y, c, d), (x, a, b))
+ assert Sum(x*y*z, (x, a, b), (y, c, d), (z, m, n)).reorder(\
+ (0, 1), (1, 2), (0, 2)) == Sum(x*y*z, (x, a, b), (z, m, n), (y, c, d))
+ assert Sum(x*y*z, (x, a, b), (y, c, d), (z, m, n)).reorder(\
+ (x, y), (y, z), (x, z)) == Sum(x*y*z, (x, a, b), (z, m, n), (y, c, d))
+ assert Sum(x*y, (x, a, b), (y, c, d)).reorder((x, 1)) == \
+ Sum(x*y, (y, c, d), (x, a, b))
+ assert Sum(x*y, (x, a, b), (y, c, d)).reorder((y, x)) == \
+ Sum(x*y, (y, c, d), (x, a, b))
+
+
+def test_reverse_order():
+ assert Sum(x, (x, 0, 3)).reverse_order(0) == Sum(-x, (x, 4, -1))
+ assert Sum(x*y, (x, 1, 5), (y, 0, 6)).reverse_order(0, 1) == \
+ Sum(x*y, (x, 6, 0), (y, 7, -1))
+ assert Sum(x, (x, 1, 2)).reverse_order(0) == Sum(-x, (x, 3, 0))
+ assert Sum(x, (x, 1, 3)).reverse_order(0) == Sum(-x, (x, 4, 0))
+ assert Sum(x, (x, 1, a)).reverse_order(0) == Sum(-x, (x, a + 1, 0))
+ assert Sum(x, (x, a, 5)).reverse_order(0) == Sum(-x, (x, 6, a - 1))
+ assert Sum(x, (x, a + 1, a + 5)).reverse_order(0) == \
+ Sum(-x, (x, a + 6, a))
+ assert Sum(x, (x, a + 1, a + 2)).reverse_order(0) == \
+ Sum(-x, (x, a + 3, a))
+ assert Sum(x, (x, a + 1, a + 1)).reverse_order(0) == \
+ Sum(-x, (x, a + 2, a))
+ assert Sum(x, (x, a, b)).reverse_order(0) == Sum(-x, (x, b + 1, a - 1))
+ assert Sum(x, (x, a, b)).reverse_order(x) == Sum(-x, (x, b + 1, a - 1))
+ assert Sum(x*y, (x, a, b), (y, 2, 5)).reverse_order(x, 1) == \
+ Sum(x*y, (x, b + 1, a - 1), (y, 6, 1))
+ assert Sum(x*y, (x, a, b), (y, 2, 5)).reverse_order(y, x) == \
+ Sum(x*y, (x, b + 1, a - 1), (y, 6, 1))
+
+
+def test_issue_7097():
+ assert sum(x**n/n for n in range(1, 401)) == summation(x**n/n, (n, 1, 400))
+
+
+def test_factor_expand_subs():
+ # test factoring
+ assert Sum(4 * x, (x, 1, y)).factor() == 4 * Sum(x, (x, 1, y))
+ assert Sum(x * a, (x, 1, y)).factor() == a * Sum(x, (x, 1, y))
+ assert Sum(4 * x * a, (x, 1, y)).factor() == 4 * a * Sum(x, (x, 1, y))
+ assert Sum(4 * x * y, (x, 1, y)).factor() == 4 * y * Sum(x, (x, 1, y))
+
+ # test expand
+ _x = Symbol('x', zero=False)
+ assert Sum(x+1,(x,1,y)).expand() == Sum(x,(x,1,y)) + Sum(1,(x,1,y))
+ assert Sum(x+a*x**2,(x,1,y)).expand() == Sum(x,(x,1,y)) + Sum(a*x**2,(x,1,y))
+ assert Sum(_x**(n + 1)*(n + 1), (n, -1, oo)).expand() \
+ == Sum(n*_x*_x**n + _x*_x**n, (n, -1, oo))
+ assert Sum(x**(n + 1)*(n + 1), (n, -1, oo)).expand(power_exp=False) \
+ == Sum(n*x**(n + 1) + x**(n + 1), (n, -1, oo))
+ assert Sum(x**(n + 1)*(n + 1), (n, -1, oo)).expand(force=True) \
+ == Sum(x*x**n, (n, -1, oo)) + Sum(n*x*x**n, (n, -1, oo))
+ assert Sum(a*n+a*n**2,(n,0,4)).expand() \
+ == Sum(a*n,(n,0,4)) + Sum(a*n**2,(n,0,4))
+ assert Sum(_x**a*_x**n,(x,0,3)) \
+ == Sum(_x**(a+n),(x,0,3)).expand(power_exp=True)
+ _a, _n = symbols('a n', positive=True)
+ assert Sum(x**(_a+_n),(x,0,3)).expand(power_exp=True) \
+ == Sum(x**_a*x**_n, (x, 0, 3))
+ assert Sum(x**(_a-_n),(x,0,3)).expand(power_exp=True) \
+ == Sum(x**(_a-_n),(x,0,3)).expand(power_exp=False)
+
+ # test subs
+ assert Sum(1/(1+a*x**2),(x,0,3)).subs([(a,3)]) == Sum(1/(1+3*x**2),(x,0,3))
+ assert Sum(x*y,(x,0,y),(y,0,x)).subs([(x,3)]) == Sum(x*y,(x,0,y),(y,0,3))
+ assert Sum(x,(x,1,10)).subs([(x,y-2)]) == Sum(x,(x,1,10))
+ assert Sum(1/x,(x,1,10)).subs([(x,(3+n)**3)]) == Sum(1/x,(x,1,10))
+ assert Sum(1/x,(x,1,10)).subs([(x,3*x-2)]) == Sum(1/x,(x,1,10))
+
+
+def test_distribution_over_equality():
+ assert Product(Eq(x*2, f(x)), (x, 1, 3)).doit() == Eq(48, f(1)*f(2)*f(3))
+ assert Sum(Eq(f(x), x**2), (x, 0, y)) == \
+ Eq(Sum(f(x), (x, 0, y)), Sum(x**2, (x, 0, y)))
+
+
+def test_issue_2787():
+ n, k = symbols('n k', positive=True, integer=True)
+ p = symbols('p', positive=True)
+ binomial_dist = binomial(n, k)*p**k*(1 - p)**(n - k)
+ s = Sum(binomial_dist*k, (k, 0, n))
+ res = s.doit().simplify()
+ ans = Piecewise(
+ (n*p, x),
+ (Sum(k*p**k*binomial(n, k)*(1 - p)**(n - k), (k, 0, n)),
+ True)).subs(x, (Eq(n, 1) | (n > 1)) & (p/Abs(p - 1) <= 1))
+ ans2 = Piecewise(
+ (n*p, x),
+ (factorial(n)*Sum(p**k*(1 - p)**(-k + n)/
+ (factorial(-k + n)*factorial(k - 1)), (k, 0, n)),
+ True)).subs(x, (Eq(n, 1) | (n > 1)) & (p/Abs(p - 1) <= 1))
+ assert res in [ans, ans2] # XXX system dependent
+ # Issue #17165: make sure that another simplify does not complicate
+ # the result by much. Why didn't first simplify replace
+ # Eq(n, 1) | (n > 1) with True?
+ assert res.simplify().count_ops() <= res.count_ops() + 2
+
+
+def test_issue_4668():
+ assert summation(1/n, (n, 2, oo)) is oo
+
+
+def test_matrix_sum():
+ A = Matrix([[0, 1], [n, 0]])
+
+ result = Sum(A, (n, 0, 3)).doit()
+ assert result == Matrix([[0, 4], [6, 0]])
+ assert result.__class__ == ImmutableDenseMatrix
+
+ A = SparseMatrix([[0, 1], [n, 0]])
+
+ result = Sum(A, (n, 0, 3)).doit()
+ assert result.__class__ == ImmutableSparseMatrix
+
+
+def test_failing_matrix_sum():
+ n = Symbol('n')
+ # TODO Implement matrix geometric series summation.
+ A = Matrix([[0, 1, 0], [-1, 0, 0], [0, 0, 0]])
+ assert Sum(A ** n, (n, 1, 4)).doit() == \
+ Matrix([[0, 0, 0], [0, 0, 0], [0, 0, 0]])
+ # issue sympy/sympy#16989
+ assert summation(A**n, (n, 1, 1)) == A
+
+
+def test_indexed_idx_sum():
+ i = symbols('i', cls=Idx)
+ r = Indexed('r', i)
+ assert Sum(r, (i, 0, 3)).doit() == sum(r.xreplace({i: j}) for j in range(4))
+ assert Product(r, (i, 0, 3)).doit() == prod([r.xreplace({i: j}) for j in range(4)])
+
+ j = symbols('j', integer=True)
+ assert Sum(r, (i, j, j+2)).doit() == sum(r.xreplace({i: j+k}) for k in range(3))
+ assert Product(r, (i, j, j+2)).doit() == prod([r.xreplace({i: j+k}) for k in range(3)])
+
+ k = Idx('k', range=(1, 3))
+ A = IndexedBase('A')
+ assert Sum(A[k], k).doit() == sum(A[Idx(j, (1, 3))] for j in range(1, 4))
+ assert Product(A[k], k).doit() == prod([A[Idx(j, (1, 3))] for j in range(1, 4)])
+
+ raises(ValueError, lambda: Sum(A[k], (k, 1, 4)))
+ raises(ValueError, lambda: Sum(A[k], (k, 0, 3)))
+ raises(ValueError, lambda: Sum(A[k], (k, 2, oo)))
+
+ raises(ValueError, lambda: Product(A[k], (k, 1, 4)))
+ raises(ValueError, lambda: Product(A[k], (k, 0, 3)))
+ raises(ValueError, lambda: Product(A[k], (k, 2, oo)))
+
+
+@slow
+def test_is_convergent():
+ # divergence tests --
+ assert Sum(n/(2*n + 1), (n, 1, oo)).is_convergent() is S.false
+ assert Sum(factorial(n)/5**n, (n, 1, oo)).is_convergent() is S.false
+ assert Sum(3**(-2*n - 1)*n**n, (n, 1, oo)).is_convergent() is S.false
+ assert Sum((-1)**n*n, (n, 3, oo)).is_convergent() is S.false
+ assert Sum((-1)**n, (n, 1, oo)).is_convergent() is S.false
+ assert Sum(log(1/n), (n, 2, oo)).is_convergent() is S.false
+ assert Sum(sin(n), (n, 1, oo)).is_convergent() is S.false
+
+ # Raabe's test --
+ assert Sum(Product((3*m),(m,1,n))/Product((3*m+4),(m,1,n)),(n,1,oo)).is_convergent() is S.true
+
+ # root test --
+ assert Sum((-12)**n/n, (n, 1, oo)).is_convergent() is S.false
+
+ # integral test --
+
+ # p-series test --
+ assert Sum(1/(n**2 + 1), (n, 1, oo)).is_convergent() is S.true
+ assert Sum(1/n**Rational(6, 5), (n, 1, oo)).is_convergent() is S.true
+ assert Sum(2/(n*sqrt(n - 1)), (n, 2, oo)).is_convergent() is S.true
+ assert Sum(1/(sqrt(n)*sqrt(n)), (n, 2, oo)).is_convergent() is S.false
+ assert Sum(factorial(n) / factorial(n+2), (n, 1, oo)).is_convergent() is S.true
+ assert Sum(rf(5,n)/rf(7,n),(n,1,oo)).is_convergent() is S.true
+ assert Sum((rf(1, n)*rf(2, n))/(rf(3, n)*factorial(n)),(n,1,oo)).is_convergent() is S.false
+
+ # comparison test --
+ assert Sum(1/(n + log(n)), (n, 1, oo)).is_convergent() is S.false
+ assert Sum(1/(n**2*log(n)), (n, 2, oo)).is_convergent() is S.true
+ assert Sum(1/(n*log(n)), (n, 2, oo)).is_convergent() is S.false
+ assert Sum(2/(n*log(n)*log(log(n))**2), (n, 5, oo)).is_convergent() is S.true
+ assert Sum(2/(n*log(n)**2), (n, 2, oo)).is_convergent() is S.true
+ assert Sum((n - 1)/(n**2*log(n)**3), (n, 2, oo)).is_convergent() is S.true
+ assert Sum(1/(n*log(n)*log(log(n))), (n, 5, oo)).is_convergent() is S.false
+ assert Sum((n - 1)/(n*log(n)**3), (n, 3, oo)).is_convergent() is S.false
+ assert Sum(2/(n**2*log(n)), (n, 2, oo)).is_convergent() is S.true
+ assert Sum(1/(n*sqrt(log(n))*log(log(n))), (n, 100, oo)).is_convergent() is S.false
+ assert Sum(log(log(n))/(n*log(n)**2), (n, 100, oo)).is_convergent() is S.true
+ assert Sum(log(n)/n**2, (n, 5, oo)).is_convergent() is S.true
+
+ # alternating series tests --
+ assert Sum((-1)**(n - 1)/(n**2 - 1), (n, 3, oo)).is_convergent() is S.true
+
+ # with -negativeInfinite Limits
+ assert Sum(1/(n**2 + 1), (n, -oo, 1)).is_convergent() is S.true
+ assert Sum(1/(n - 1), (n, -oo, -1)).is_convergent() is S.false
+ assert Sum(1/(n**2 - 1), (n, -oo, -5)).is_convergent() is S.true
+ assert Sum(1/(n**2 - 1), (n, -oo, 2)).is_convergent() is S.true
+ assert Sum(1/(n**2 - 1), (n, -oo, oo)).is_convergent() is S.true
+
+ # piecewise functions
+ f = Piecewise((n**(-2), n <= 1), (n**2, n > 1))
+ assert Sum(f, (n, 1, oo)).is_convergent() is S.false
+ assert Sum(f, (n, -oo, oo)).is_convergent() is S.false
+ assert Sum(f, (n, 1, 100)).is_convergent() is S.true
+ #assert Sum(f, (n, -oo, 1)).is_convergent() is S.true
+
+ # integral test
+
+ assert Sum(log(n)/n**3, (n, 1, oo)).is_convergent() is S.true
+ assert Sum(-log(n)/n**3, (n, 1, oo)).is_convergent() is S.true
+ # the following function has maxima located at (x, y) =
+ # (1.2, 0.43), (3.0, -0.25) and (6.8, 0.050)
+ eq = (x - 2)*(x**2 - 6*x + 4)*exp(-x)
+ assert Sum(eq, (x, 1, oo)).is_convergent() is S.true
+ assert Sum(eq, (x, 1, 2)).is_convergent() is S.true
+ assert Sum(1/(x**3), (x, 1, oo)).is_convergent() is S.true
+ assert Sum(1/(x**S.Half), (x, 1, oo)).is_convergent() is S.false
+
+ # issue 19545
+ assert Sum(1/n - 3/(3*n +2), (n, 1, oo)).is_convergent() is S.true
+
+ # issue 19836
+ assert Sum(4/(n + 2) - 5/(n + 1) + 1/n,(n, 7, oo)).is_convergent() is S.true
+
+
+def test_is_absolutely_convergent():
+ assert Sum((-1)**n, (n, 1, oo)).is_absolutely_convergent() is S.false
+ assert Sum((-1)**n/n**2, (n, 1, oo)).is_absolutely_convergent() is S.true
+
+
+@XFAIL
+def test_convergent_failing():
+ # dirichlet tests
+ assert Sum(sin(n)/n, (n, 1, oo)).is_convergent() is S.true
+ assert Sum(sin(2*n)/n, (n, 1, oo)).is_convergent() is S.true
+
+
+def test_issue_6966():
+ i, k, m = symbols('i k m', integer=True)
+ z_i, q_i = symbols('z_i q_i')
+ a_k = Sum(-q_i*z_i/k,(i,1,m))
+ b_k = a_k.diff(z_i)
+ assert isinstance(b_k, Sum)
+ assert b_k == Sum(-q_i/k,(i,1,m))
+
+
+def test_issue_10156():
+ cx = Sum(2*y**2*x, (x, 1,3))
+ e = 2*y*Sum(2*cx*x**2, (x, 1, 9))
+ assert e.factor() == \
+ 8*y**3*Sum(x, (x, 1, 3))*Sum(x**2, (x, 1, 9))
+
+
+def test_issue_10973():
+ assert Sum((-n + (n**3 + 1)**(S(1)/3))/log(n), (n, 1, oo)).is_convergent() is S.true
+
+
+def test_issue_14103():
+ assert Sum(sin(n)**2 + cos(n)**2 - 1, (n, 1, oo)).is_convergent() is S.true
+ assert Sum(sin(pi*n), (n, 1, oo)).is_convergent() is S.true
+
+
+def test_issue_14129():
+ x = Symbol('x', zero=False)
+ assert Sum( k*x**k, (k, 0, n-1)).doit() == \
+ Piecewise((n**2/2 - n/2, Eq(x, 1)), ((n*x*x**n -
+ n*x**n - x*x**n + x)/(x - 1)**2, True))
+ assert Sum( x**k, (k, 0, n-1)).doit() == \
+ Piecewise((n, Eq(x, 1)), ((-x**n + 1)/(-x + 1), True))
+ assert Sum( k*(x/y+x)**k, (k, 0, n-1)).doit() == \
+ Piecewise((n*(n - 1)/2, Eq(x, y/(y + 1))),
+ (x*(y + 1)*(n*x*y*(x + x/y)**(n - 1) +
+ n*x*(x + x/y)**(n - 1) - n*y*(x + x/y)**(n - 1) -
+ x*y*(x + x/y)**(n - 1) - x*(x + x/y)**(n - 1) + y)/
+ (x*y + x - y)**2, True))
+
+
+def test_issue_14112():
+ assert Sum((-1)**n/sqrt(n), (n, 1, oo)).is_absolutely_convergent() is S.false
+ assert Sum((-1)**(2*n)/n, (n, 1, oo)).is_convergent() is S.false
+ assert Sum((-2)**n + (-3)**n, (n, 1, oo)).is_convergent() is S.false
+
+
+def test_issue_14219():
+ A = diag(0, 2, -3)
+ res = diag(1, 15, -20)
+ assert Sum(A**n, (n, 0, 3)).doit() == res
+
+
+def test_sin_times_absolutely_convergent():
+ assert Sum(sin(n) / n**3, (n, 1, oo)).is_convergent() is S.true
+ assert Sum(sin(n) * log(n) / n**3, (n, 1, oo)).is_convergent() is S.true
+
+
+def test_issue_14111():
+ assert Sum(1/log(log(n)), (n, 22, oo)).is_convergent() is S.false
+
+
+def test_issue_14484():
+ assert Sum(sin(n)/log(log(n)), (n, 22, oo)).is_convergent() is S.false
+
+
+def test_issue_14640():
+ i, n = symbols("i n", integer=True)
+ a, b, c = symbols("a b c", zero=False)
+
+ assert Sum(a**-i/(a - b), (i, 0, n)).doit() == Sum(
+ 1/(a*a**i - a**i*b), (i, 0, n)).doit() == Piecewise(
+ (n + 1, Eq(1/a, 1)),
+ ((-a**(-n - 1) + 1)/(1 - 1/a), True))/(a - b)
+
+ assert Sum((b*a**i - c*a**i)**-2, (i, 0, n)).doit() == Piecewise(
+ (n + 1, Eq(a**(-2), 1)),
+ ((-a**(-2*n - 2) + 1)/(1 - 1/a**2), True))/(b - c)**2
+
+ s = Sum(i*(a**(n - i) - b**(n - i))/(a - b), (i, 0, n)).doit()
+ assert not s.has(Sum)
+ assert s.subs({a: 2, b: 3, n: 5}) == 122
+
+
+def test_issue_15943():
+ s = Sum(binomial(n, k)*factorial(n - k), (k, 0, n)).doit().rewrite(gamma)
+ assert s == -E*(n + 1)*gamma(n + 1)*lowergamma(n + 1, 1)/gamma(n + 2
+ ) + E*gamma(n + 1)
+ assert s.simplify() == E*(factorial(n) - lowergamma(n + 1, 1))
+
+
+def test_Sum_dummy_eq():
+ assert not Sum(x, (x, a, b)).dummy_eq(1)
+ assert not Sum(x, (x, a, b)).dummy_eq(Sum(x, (x, a, b), (a, 1, 2)))
+ assert not Sum(x, (x, a, b)).dummy_eq(Sum(x, (x, a, c)))
+ assert Sum(x, (x, a, b)).dummy_eq(Sum(x, (x, a, b)))
+ d = Dummy()
+ assert Sum(x, (x, a, d)).dummy_eq(Sum(x, (x, a, c)), c)
+ assert not Sum(x, (x, a, d)).dummy_eq(Sum(x, (x, a, c)))
+ assert Sum(x, (x, a, c)).dummy_eq(Sum(y, (y, a, c)))
+ assert Sum(x, (x, a, d)).dummy_eq(Sum(y, (y, a, c)), c)
+ assert not Sum(x, (x, a, d)).dummy_eq(Sum(y, (y, a, c)))
+
+
+def test_issue_15852():
+ assert summation(x**y*y, (y, -oo, oo)).doit() == Sum(x**y*y, (y, -oo, oo))
+
+
+def test_exceptions():
+ S = Sum(x, (x, a, b))
+ raises(ValueError, lambda: S.change_index(x, x**2, y))
+ S = Sum(x, (x, a, b), (x, 1, 4))
+ raises(ValueError, lambda: S.index(x))
+ S = Sum(x, (x, a, b), (y, 1, 4))
+ raises(ValueError, lambda: S.reorder([x]))
+ S = Sum(x, (x, y, b), (y, 1, 4))
+ raises(ReorderError, lambda: S.reorder_limit(0, 1))
+ S = Sum(x*y, (x, a, b), (y, 1, 4))
+ raises(NotImplementedError, lambda: S.is_convergent())
+
+
+def test_sumproducts_assumptions():
+ M = Symbol('M', integer=True, positive=True)
+
+ m = Symbol('m', integer=True)
+ for func in [Sum, Product]:
+ assert func(m, (m, -M, M)).is_positive is None
+ assert func(m, (m, -M, M)).is_nonpositive is None
+ assert func(m, (m, -M, M)).is_negative is None
+ assert func(m, (m, -M, M)).is_nonnegative is None
+ assert func(m, (m, -M, M)).is_finite is True
+
+ m = Symbol('m', integer=True, nonnegative=True)
+ for func in [Sum, Product]:
+ assert func(m, (m, 0, M)).is_positive is None
+ assert func(m, (m, 0, M)).is_nonpositive is None
+ assert func(m, (m, 0, M)).is_negative is False
+ assert func(m, (m, 0, M)).is_nonnegative is True
+ assert func(m, (m, 0, M)).is_finite is True
+
+ m = Symbol('m', integer=True, positive=True)
+ for func in [Sum, Product]:
+ assert func(m, (m, 1, M)).is_positive is True
+ assert func(m, (m, 1, M)).is_nonpositive is False
+ assert func(m, (m, 1, M)).is_negative is False
+ assert func(m, (m, 1, M)).is_nonnegative is True
+ assert func(m, (m, 1, M)).is_finite is True
+
+ m = Symbol('m', integer=True, negative=True)
+ assert Sum(m, (m, -M, -1)).is_positive is False
+ assert Sum(m, (m, -M, -1)).is_nonpositive is True
+ assert Sum(m, (m, -M, -1)).is_negative is True
+ assert Sum(m, (m, -M, -1)).is_nonnegative is False
+ assert Sum(m, (m, -M, -1)).is_finite is True
+ assert Product(m, (m, -M, -1)).is_positive is None
+ assert Product(m, (m, -M, -1)).is_nonpositive is None
+ assert Product(m, (m, -M, -1)).is_negative is None
+ assert Product(m, (m, -M, -1)).is_nonnegative is None
+ assert Product(m, (m, -M, -1)).is_finite is True
+
+ m = Symbol('m', integer=True, nonpositive=True)
+ assert Sum(m, (m, -M, 0)).is_positive is False
+ assert Sum(m, (m, -M, 0)).is_nonpositive is True
+ assert Sum(m, (m, -M, 0)).is_negative is None
+ assert Sum(m, (m, -M, 0)).is_nonnegative is None
+ assert Sum(m, (m, -M, 0)).is_finite is True
+ assert Product(m, (m, -M, 0)).is_positive is None
+ assert Product(m, (m, -M, 0)).is_nonpositive is None
+ assert Product(m, (m, -M, 0)).is_negative is None
+ assert Product(m, (m, -M, 0)).is_nonnegative is None
+ assert Product(m, (m, -M, 0)).is_finite is True
+
+ m = Symbol('m', integer=True)
+ assert Sum(2, (m, 0, oo)).is_positive is None
+ assert Sum(2, (m, 0, oo)).is_nonpositive is None
+ assert Sum(2, (m, 0, oo)).is_negative is None
+ assert Sum(2, (m, 0, oo)).is_nonnegative is None
+ assert Sum(2, (m, 0, oo)).is_finite is None
+
+ assert Product(2, (m, 0, oo)).is_positive is None
+ assert Product(2, (m, 0, oo)).is_nonpositive is None
+ assert Product(2, (m, 0, oo)).is_negative is False
+ assert Product(2, (m, 0, oo)).is_nonnegative is None
+ assert Product(2, (m, 0, oo)).is_finite is None
+
+ assert Product(0, (x, M, M-1)).is_positive is True
+ assert Product(0, (x, M, M-1)).is_finite is True
+
+
+def test_expand_with_assumptions():
+ M = Symbol('M', integer=True, positive=True)
+ x = Symbol('x', positive=True)
+ m = Symbol('m', nonnegative=True)
+ assert log(Product(x**m, (m, 0, M))).expand() == Sum(m*log(x), (m, 0, M))
+ assert log(Product(exp(x**m), (m, 0, M))).expand() == Sum(x**m, (m, 0, M))
+ assert log(Product(x**m, (m, 0, M))).rewrite(Sum).expand() == Sum(m*log(x), (m, 0, M))
+ assert log(Product(exp(x**m), (m, 0, M))).rewrite(Sum).expand() == Sum(x**m, (m, 0, M))
+
+ n = Symbol('n', nonnegative=True)
+ i, j = symbols('i,j', positive=True, integer=True)
+ x, y = symbols('x,y', positive=True)
+ assert log(Product(x**i*y**j, (i, 1, n), (j, 1, m))).expand() \
+ == Sum(i*log(x) + j*log(y), (i, 1, n), (j, 1, m))
+
+ m = Symbol('m', nonnegative=True, integer=True)
+ s = Sum(x**m, (m, 0, M))
+ s_as_product = s.rewrite(Product)
+ assert s_as_product.has(Product)
+ assert s_as_product == log(Product(exp(x**m), (m, 0, M)))
+ assert s_as_product.expand() == s
+ s5 = s.subs(M, 5)
+ s5_as_product = s5.rewrite(Product)
+ assert s5_as_product.has(Product)
+ assert s5_as_product.doit().expand() == s5.doit()
+
+
+def test_has_finite_limits():
+ x = Symbol('x')
+ assert Sum(1, (x, 1, 9)).has_finite_limits is True
+ assert Sum(1, (x, 1, oo)).has_finite_limits is False
+ M = Symbol('M')
+ assert Sum(1, (x, 1, M)).has_finite_limits is None
+ M = Symbol('M', positive=True)
+ assert Sum(1, (x, 1, M)).has_finite_limits is True
+ x = Symbol('x', positive=True)
+ M = Symbol('M')
+ assert Sum(1, (x, 1, M)).has_finite_limits is True
+
+ assert Sum(1, (x, 1, M), (y, -oo, oo)).has_finite_limits is False
+
+def test_has_reversed_limits():
+ assert Sum(1, (x, 1, 1)).has_reversed_limits is False
+ assert Sum(1, (x, 1, 9)).has_reversed_limits is False
+ assert Sum(1, (x, 1, -9)).has_reversed_limits is True
+ assert Sum(1, (x, 1, 0)).has_reversed_limits is True
+ assert Sum(1, (x, 1, oo)).has_reversed_limits is False
+ M = Symbol('M')
+ assert Sum(1, (x, 1, M)).has_reversed_limits is None
+ M = Symbol('M', positive=True, integer=True)
+ assert Sum(1, (x, 1, M)).has_reversed_limits is False
+ assert Sum(1, (x, 1, M), (y, -oo, oo)).has_reversed_limits is False
+ M = Symbol('M', negative=True)
+ assert Sum(1, (x, 1, M)).has_reversed_limits is True
+
+ assert Sum(1, (x, 1, M), (y, -oo, oo)).has_reversed_limits is True
+ assert Sum(1, (x, oo, oo)).has_reversed_limits is None
+
+
+def test_has_empty_sequence():
+ assert Sum(1, (x, 1, 1)).has_empty_sequence is False
+ assert Sum(1, (x, 1, 9)).has_empty_sequence is False
+ assert Sum(1, (x, 1, -9)).has_empty_sequence is False
+ assert Sum(1, (x, 1, 0)).has_empty_sequence is True
+ assert Sum(1, (x, y, y - 1)).has_empty_sequence is True
+ assert Sum(1, (x, 3, 2), (y, -oo, oo)).has_empty_sequence is True
+ assert Sum(1, (y, -oo, oo), (x, 3, 2)).has_empty_sequence is True
+ assert Sum(1, (x, oo, oo)).has_empty_sequence is False
+
+
+def test_empty_sequence():
+ assert Product(x*y, (x, -oo, oo), (y, 1, 0)).doit() == 1
+ assert Product(x*y, (y, 1, 0), (x, -oo, oo)).doit() == 1
+ assert Sum(x, (x, -oo, oo), (y, 1, 0)).doit() == 0
+ assert Sum(x, (y, 1, 0), (x, -oo, oo)).doit() == 0
+
+
+def test_issue_8016():
+ k = Symbol('k', integer=True)
+ n, m = symbols('n, m', integer=True, positive=True)
+ s = Sum(binomial(m, k)*binomial(m, n - k)*(-1)**k, (k, 0, n))
+ assert s.doit().simplify() == \
+ cos(pi*n/2)*gamma(m + 1)/gamma(n/2 + 1)/gamma(m - n/2 + 1)
+
+
+def test_issue_14313():
+ assert Sum(S.Half**floor(n/2), (n, 1, oo)).is_convergent()
+
+
+def test_issue_14563():
+ # The assertion was failing due to no assumptions methods in Sums and Product
+ assert 1 % Sum(1, (x, 0, 1)) == 1
+
+
+def test_issue_16735():
+ assert Sum(5**n/gamma(n+1), (n, 1, oo)).is_convergent() is S.true
+
+
+def test_issue_14871():
+ assert Sum((Rational(1, 10))**n*rf(0, n)/factorial(n), (n, 0, oo)).rewrite(factorial).doit() == 1
+
+
+def test_issue_17165():
+ n = symbols("n", integer=True)
+ x = symbols('x')
+ s = (x*Sum(x**n, (n, -1, oo)))
+ ssimp = s.doit().simplify()
+
+ assert ssimp == Piecewise((-1/(x - 1), (x > -1) & (x < 1)),
+ (x*Sum(x**n, (n, -1, oo)), True)), ssimp
+ assert ssimp.simplify() == ssimp
+
+
+def test_issue_19379():
+ assert Sum(factorial(n)/factorial(n + 2), (n, 1, oo)).is_convergent() is S.true
+
+
+def test_issue_20777():
+ assert Sum(exp(x*sin(n/m)), (n, 1, m)).doit() == Sum(exp(x*sin(n/m)), (n, 1, m))
+
+
+def test__dummy_with_inherited_properties_concrete():
+ x = Symbol('x')
+
+ from sympy.core.containers import Tuple
+ d = _dummy_with_inherited_properties_concrete(Tuple(x, 0, 5))
+ assert d.is_real
+ assert d.is_integer
+ assert d.is_nonnegative
+ assert d.is_extended_nonnegative
+
+ d = _dummy_with_inherited_properties_concrete(Tuple(x, 1, 9))
+ assert d.is_real
+ assert d.is_integer
+ assert d.is_positive
+ assert d.is_odd is None
+
+ d = _dummy_with_inherited_properties_concrete(Tuple(x, -5, 5))
+ assert d.is_real
+ assert d.is_integer
+ assert d.is_positive is None
+ assert d.is_extended_nonnegative is None
+ assert d.is_odd is None
+
+ d = _dummy_with_inherited_properties_concrete(Tuple(x, -1.5, 1.5))
+ assert d.is_real
+ assert d.is_integer is None
+ assert d.is_positive is None
+ assert d.is_extended_nonnegative is None
+
+ N = Symbol('N', integer=True, positive=True)
+ d = _dummy_with_inherited_properties_concrete(Tuple(x, 2, N))
+ assert d.is_real
+ assert d.is_positive
+ assert d.is_integer
+
+ # Return None if no assumptions are added
+ N = Symbol('N', integer=True, positive=True)
+ d = _dummy_with_inherited_properties_concrete(Tuple(N, 2, 4))
+ assert d is None
+
+ x = Symbol('x', negative=True)
+ raises(InconsistentAssumptions,
+ lambda: _dummy_with_inherited_properties_concrete(Tuple(x, 1, 5)))
+
+
+def test_matrixsymbol_summation_numerical_limits():
+ A = MatrixSymbol('A', 3, 3)
+ n = Symbol('n', integer=True)
+
+ assert Sum(A**n, (n, 0, 2)).doit() == Identity(3) + A + A**2
+ assert Sum(A, (n, 0, 2)).doit() == 3*A
+ assert Sum(n*A, (n, 0, 2)).doit() == 3*A
+
+ B = Matrix([[0, n, 0], [-1, 0, 0], [0, 0, 2]])
+ ans = Matrix([[0, 6, 0], [-4, 0, 0], [0, 0, 8]]) + 4*A
+ assert Sum(A+B, (n, 0, 3)).doit() == ans
+ ans = A*Matrix([[0, 6, 0], [-4, 0, 0], [0, 0, 8]])
+ assert Sum(A*B, (n, 0, 3)).doit() == ans
+
+ ans = (A**2*Matrix([[-2, 0, 0], [0,-2, 0], [0, 0, 4]]) +
+ A**3*Matrix([[0, -9, 0], [3, 0, 0], [0, 0, 8]]) +
+ A*Matrix([[0, 1, 0], [-1, 0, 0], [0, 0, 2]]))
+ assert Sum(A**n*B**n, (n, 1, 3)).doit() == ans
+
+
+def test_issue_21651():
+ i = Symbol('i')
+ a = Sum(floor(2*2**(-i)), (i, S.One, 2))
+ assert a.doit() == S.One
+
+
+@XFAIL
+def test_matrixsymbol_summation_symbolic_limits():
+ N = Symbol('N', integer=True, positive=True)
+
+ A = MatrixSymbol('A', 3, 3)
+ n = Symbol('n', integer=True)
+ assert Sum(A, (n, 0, N)).doit() == (N+1)*A
+ assert Sum(n*A, (n, 0, N)).doit() == (N**2/2+N/2)*A
+
+
+def test_summation_by_residues():
+ x = Symbol('x')
+
+ # Examples from Nakhle H. Asmar, Loukas Grafakos,
+ # Complex Analysis with Applications
+ assert eval_sum_residue(1 / (x**2 + 1), (x, -oo, oo)) == pi/tanh(pi)
+ assert eval_sum_residue(1 / x**6, (x, S(1), oo)) == pi**6/945
+ assert eval_sum_residue(1 / (x**2 + 9), (x, -oo, oo)) == pi/(3*tanh(3*pi))
+ assert eval_sum_residue(1 / (x**2 + 1)**2, (x, -oo, oo)).cancel() == \
+ (-pi**2*tanh(pi)**2 + pi*tanh(pi) + pi**2)/(2*tanh(pi)**2)
+ assert eval_sum_residue(x**2 / (x**2 + 1)**2, (x, -oo, oo)).cancel() == \
+ (-pi**2 + pi*tanh(pi) + pi**2*tanh(pi)**2)/(2*tanh(pi)**2)
+ assert eval_sum_residue(1 / (4*x**2 - 1), (x, -oo, oo)) == 0
+ assert eval_sum_residue(x**2 / (x**2 - S(1)/4)**2, (x, -oo, oo)) == pi**2/2
+ assert eval_sum_residue(1 / (4*x**2 - 1)**2, (x, -oo, oo)) == pi**2/8
+ assert eval_sum_residue(1 / ((x - S(1)/2)**2 + 1), (x, -oo, oo)) == pi*tanh(pi)
+ assert eval_sum_residue(1 / x**2, (x, S(1), oo)) == pi**2/6
+ assert eval_sum_residue(1 / x**4, (x, S(1), oo)) == pi**4/90
+ assert eval_sum_residue(1 / x**2 / (x**2 + 4), (x, S(1), oo)) == \
+ -pi*(-pi/12 - 1/(16*pi) + 1/(8*tanh(2*pi)))/2
+
+ # Some examples made from 1 / (x**2 + 1)
+ assert eval_sum_residue(1 / (x**2 + 1), (x, S(0), oo)) == \
+ S(1)/2 + pi/(2*tanh(pi))
+ assert eval_sum_residue(1 / (x**2 + 1), (x, S(1), oo)) == \
+ -S(1)/2 + pi/(2*tanh(pi))
+ assert eval_sum_residue(1 / (x**2 + 1), (x, S(-1), oo)) == \
+ 1 + pi/(2*tanh(pi))
+ assert eval_sum_residue((-1)**x / (x**2 + 1), (x, -oo, oo)) == \
+ pi/sinh(pi)
+ assert eval_sum_residue((-1)**x / (x**2 + 1), (x, S(0), oo)) == \
+ pi/(2*sinh(pi)) + S(1)/2
+ assert eval_sum_residue((-1)**x / (x**2 + 1), (x, S(1), oo)) == \
+ -S(1)/2 + pi/(2*sinh(pi))
+ assert eval_sum_residue((-1)**x / (x**2 + 1), (x, S(-1), oo)) == \
+ pi/(2*sinh(pi))
+
+ # Some examples made from shifting of 1 / (x**2 + 1)
+ assert eval_sum_residue(1 / (x**2 + 2*x + 2), (x, S(-1), oo)) == S(1)/2 + pi/(2*tanh(pi))
+ assert eval_sum_residue(1 / (x**2 + 4*x + 5), (x, S(-2), oo)) == S(1)/2 + pi/(2*tanh(pi))
+ assert eval_sum_residue(1 / (x**2 - 2*x + 2), (x, S(1), oo)) == S(1)/2 + pi/(2*tanh(pi))
+ assert eval_sum_residue(1 / (x**2 - 4*x + 5), (x, S(2), oo)) == S(1)/2 + pi/(2*tanh(pi))
+ assert eval_sum_residue((-1)**x * -1 / (x**2 + 2*x + 2), (x, S(-1), oo)) == S(1)/2 + pi/(2*sinh(pi))
+ assert eval_sum_residue((-1)**x * -1 / (x**2 -2*x + 2), (x, S(1), oo)) == S(1)/2 + pi/(2*sinh(pi))
+
+ # Some examples made from 1 / x**2
+ assert eval_sum_residue(1 / x**2, (x, S(2), oo)) == -1 + pi**2/6
+ assert eval_sum_residue(1 / x**2, (x, S(3), oo)) == -S(5)/4 + pi**2/6
+ assert eval_sum_residue((-1)**x / x**2, (x, S(1), oo)) == -pi**2/12
+ assert eval_sum_residue((-1)**x / x**2, (x, S(2), oo)) == 1 - pi**2/12
+
+
+@slow
+def test_summation_by_residues_failing():
+ x = Symbol('x')
+
+ # Failing because of the bug in residue computation
+ assert eval_sum_residue(x**2 / (x**4 + 1), (x, S(1), oo))
+ assert eval_sum_residue(1 / ((x - 1)*(x - 2) + 1), (x, -oo, oo)) != 0
+
+
+def test_process_limits():
+ from sympy.concrete.expr_with_limits import _process_limits
+
+ # these should be (x, Range(3)) not Range(3)
+ raises(ValueError, lambda: _process_limits(
+ Range(3), discrete=True))
+ raises(ValueError, lambda: _process_limits(
+ Range(3), discrete=False))
+ # these should be (x, union) not union
+ # (but then we would get a TypeError because we don't
+ # handle non-contiguous sets: see below use of `union`)
+ union = Or(x < 1, x > 3).as_set()
+ raises(ValueError, lambda: _process_limits(
+ union, discrete=True))
+ raises(ValueError, lambda: _process_limits(
+ union, discrete=False))
+
+ # error not triggered if not needed
+ assert _process_limits((x, 1, 2)) == ([(x, 1, 2)], 1)
+
+ # this equivalence is used to detect Reals in _process_limits
+ assert isinstance(S.Reals, Interval)
+
+ C = Integral # continuous limits
+ assert C(x, x >= 5) == C(x, (x, 5, oo))
+ assert C(x, x < 3) == C(x, (x, -oo, 3))
+ ans = C(x, (x, 0, 3))
+ assert C(x, And(x >= 0, x < 3)) == ans
+ assert C(x, (x, Interval.Ropen(0, 3))) == ans
+ raises(TypeError, lambda: C(x, (x, Range(3))))
+
+ # discrete limits
+ for D in (Sum, Product):
+ r, ans = Range(3, 10, 2), D(2*x + 3, (x, 0, 3))
+ assert D(x, (x, r)) == ans
+ assert D(x, (x, r.reversed)) == ans
+ r, ans = Range(3, oo, 2), D(2*x + 3, (x, 0, oo))
+ assert D(x, (x, r)) == ans
+ assert D(x, (x, r.reversed)) == ans
+ r, ans = Range(-oo, 5, 2), D(3 - 2*x, (x, 0, oo))
+ assert D(x, (x, r)) == ans
+ assert D(x, (x, r.reversed)) == ans
+ raises(TypeError, lambda: D(x, x > 0))
+ raises(ValueError, lambda: D(x, Interval(1, 3)))
+ raises(NotImplementedError, lambda: D(x, (x, union)))
+
+
+def test_pr_22677():
+ b = Symbol('b', integer=True, positive=True)
+ assert Sum(1/x**2,(x, 0, b)).doit() == Sum(x**(-2), (x, 0, b))
+ assert Sum(1/(x - b)**2,(x, 0, b-1)).doit() == Sum(
+ (-b + x)**(-2), (x, 0, b - 1))
+
+
+def test_issue_23952():
+ p, q = symbols("p q", real=True, nonnegative=True)
+ k1, k2 = symbols("k1 k2", integer=True, nonnegative=True)
+ n = Symbol("n", integer=True, positive=True)
+ expr = Sum(abs(k1 - k2)*p**k1 *(1 - q)**(n - k2),
+ (k1, 0, n), (k2, 0, n))
+ assert expr.subs(p,0).subs(q,1).subs(n, 3).doit() == 3
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0467010b7d9ec5d021a33774bd78b2a833851c2e
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/__pycache__/holonomicerrors.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/__pycache__/holonomicerrors.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..9da71fc479a69e65e3a2cb1d7f4e956c27c24665
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/__pycache__/holonomicerrors.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/__pycache__/numerical.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/__pycache__/numerical.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4279402e1a55c5a8168b913e2e62b99180c2c800
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/__pycache__/numerical.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/__pycache__/recurrence.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/__pycache__/recurrence.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d7bc8008c3b45325615d6f8d525b10111abf52ac
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/__pycache__/recurrence.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/tests/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/tests/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/tests/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/tests/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fc1c565101efcf29acb4801b64572d66aefc6b9d
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/tests/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/tests/__pycache__/test_holonomic.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/tests/__pycache__/test_holonomic.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..72757185f43e76f87021d5d7bf5e0a019e8bafec
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/tests/__pycache__/test_holonomic.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/tests/__pycache__/test_recurrence.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/tests/__pycache__/test_recurrence.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..b45b0b21317ca86657250c69b5d6acc4a2333c07
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/tests/__pycache__/test_recurrence.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/tests/test_holonomic.py b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/tests/test_holonomic.py
new file mode 100644
index 0000000000000000000000000000000000000000..49956419e917b3bc81a163d29862c539f33f6284
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/tests/test_holonomic.py
@@ -0,0 +1,851 @@
+from sympy.holonomic import (DifferentialOperator, HolonomicFunction,
+ DifferentialOperators, from_hyper,
+ from_meijerg, expr_to_holonomic)
+from sympy.holonomic.recurrence import RecurrenceOperators, HolonomicSequence
+from sympy.core import EulerGamma
+from sympy.core.numbers import (I, Rational, pi)
+from sympy.core.singleton import S
+from sympy.core.symbol import (Symbol, symbols)
+from sympy.functions.elementary.exponential import (exp, log)
+from sympy.functions.elementary.hyperbolic import (asinh, cosh)
+from sympy.functions.elementary.miscellaneous import sqrt
+from sympy.functions.elementary.trigonometric import (cos, sin)
+from sympy.functions.special.bessel import besselj
+from sympy.functions.special.beta_functions import beta
+from sympy.functions.special.error_functions import (Ci, Si, erf, erfc)
+from sympy.functions.special.gamma_functions import gamma
+from sympy.functions.special.hyper import (hyper, meijerg)
+from sympy.printing.str import sstr
+from sympy.series.order import O
+from sympy.simplify.hyperexpand import hyperexpand
+from sympy.polys.domains.integerring import ZZ
+from sympy.polys.domains.rationalfield import QQ
+from sympy.polys.domains.realfield import RR
+
+
+def test_DifferentialOperator():
+ x = symbols('x')
+ R, Dx = DifferentialOperators(QQ.old_poly_ring(x), 'Dx')
+ assert Dx == R.derivative_operator
+ assert Dx == DifferentialOperator([R.base.zero, R.base.one], R)
+ assert x * Dx + x**2 * Dx**2 == DifferentialOperator([0, x, x**2], R)
+ assert (x**2 + 1) + Dx + x * \
+ Dx**5 == DifferentialOperator([x**2 + 1, 1, 0, 0, 0, x], R)
+ assert (x * Dx + x**2 + 1 - Dx * (x**3 + x))**3 == (-48 * x**6) + \
+ (-57 * x**7) * Dx + (-15 * x**8) * Dx**2 + (-x**9) * Dx**3
+ p = (x * Dx**2 + (x**2 + 3) * Dx**5) * (Dx + x**2)
+ q = (2 * x) + (4 * x**2) * Dx + (x**3) * Dx**2 + \
+ (20 * x**2 + x + 60) * Dx**3 + (10 * x**3 + 30 * x) * Dx**4 + \
+ (x**4 + 3 * x**2) * Dx**5 + (x**2 + 3) * Dx**6
+ assert p == q
+
+
+def test_HolonomicFunction_addition():
+ x = symbols('x')
+ R, Dx = DifferentialOperators(ZZ.old_poly_ring(x), 'Dx')
+ p = HolonomicFunction(Dx**2 * x, x)
+ q = HolonomicFunction((2) * Dx + (x) * Dx**2, x)
+ assert p == q
+ p = HolonomicFunction(x * Dx + 1, x)
+ q = HolonomicFunction(Dx + 1, x)
+ r = HolonomicFunction((x - 2) + (x**2 - 2) * Dx + (x**2 - x) * Dx**2, x)
+ assert p + q == r
+ p = HolonomicFunction(x * Dx + Dx**2 * (x**2 + 2), x)
+ q = HolonomicFunction(Dx - 3, x)
+ r = HolonomicFunction((-54 * x**2 - 126 * x - 150) + (-135 * x**3 - 252 * x**2 - 270 * x + 140) * Dx +\
+ (-27 * x**4 - 24 * x**2 + 14 * x - 150) * Dx**2 + \
+ (9 * x**4 + 15 * x**3 + 38 * x**2 + 30 * x +40) * Dx**3, x)
+ assert p + q == r
+ p = HolonomicFunction(Dx**5 - 1, x)
+ q = HolonomicFunction(x**3 + Dx, x)
+ r = HolonomicFunction((-x**18 + 45*x**14 - 525*x**10 + 1575*x**6 - x**3 - 630*x**2) + \
+ (-x**15 + 30*x**11 - 195*x**7 + 210*x**3 - 1)*Dx + (x**18 - 45*x**14 + 525*x**10 - \
+ 1575*x**6 + x**3 + 630*x**2)*Dx**5 + (x**15 - 30*x**11 + 195*x**7 - 210*x**3 + \
+ 1)*Dx**6, x)
+ assert p+q == r
+
+ p = x**2 + 3*x + 8
+ q = x**3 - 7*x + 5
+ p = p*Dx - p.diff()
+ q = q*Dx - q.diff()
+ r = HolonomicFunction(p, x) + HolonomicFunction(q, x)
+ s = HolonomicFunction((6*x**2 + 18*x + 14) + (-4*x**3 - 18*x**2 - 62*x + 10)*Dx +\
+ (x**4 + 6*x**3 + 31*x**2 - 10*x - 71)*Dx**2, x)
+ assert r == s
+
+
+def test_HolonomicFunction_multiplication():
+ x = symbols('x')
+ R, Dx = DifferentialOperators(ZZ.old_poly_ring(x), 'Dx')
+ p = HolonomicFunction(Dx+x+x*Dx**2, x)
+ q = HolonomicFunction(x*Dx+Dx*x+Dx**2, x)
+ r = HolonomicFunction((8*x**6 + 4*x**4 + 6*x**2 + 3) + (24*x**5 - 4*x**3 + 24*x)*Dx + \
+ (8*x**6 + 20*x**4 + 12*x**2 + 2)*Dx**2 + (8*x**5 + 4*x**3 + 4*x)*Dx**3 + \
+ (2*x**4 + x**2)*Dx**4, x)
+ assert p*q == r
+ p = HolonomicFunction(Dx**2+1, x)
+ q = HolonomicFunction(Dx-1, x)
+ r = HolonomicFunction((2) + (-2)*Dx + (1)*Dx**2, x)
+ assert p*q == r
+ p = HolonomicFunction(Dx**2+1+x+Dx, x)
+ q = HolonomicFunction((Dx*x-1)**2, x)
+ r = HolonomicFunction((4*x**7 + 11*x**6 + 16*x**5 + 4*x**4 - 6*x**3 - 7*x**2 - 8*x - 2) + \
+ (8*x**6 + 26*x**5 + 24*x**4 - 3*x**3 - 11*x**2 - 6*x - 2)*Dx + \
+ (8*x**6 + 18*x**5 + 15*x**4 - 3*x**3 - 6*x**2 - 6*x - 2)*Dx**2 + (8*x**5 + \
+ 10*x**4 + 6*x**3 - 2*x**2 - 4*x)*Dx**3 + (4*x**5 + 3*x**4 - x**2)*Dx**4, x)
+ assert p*q == r
+ p = HolonomicFunction(x*Dx**2-1, x)
+ q = HolonomicFunction(Dx*x-x, x)
+ r = HolonomicFunction((x - 3) + (-2*x + 2)*Dx + (x)*Dx**2, x)
+ assert p*q == r
+
+
+def test_HolonomicFunction_power():
+ x = symbols('x')
+ R, Dx = DifferentialOperators(ZZ.old_poly_ring(x), 'Dx')
+ p = HolonomicFunction(Dx+x+x*Dx**2, x)
+ a = HolonomicFunction(Dx, x)
+ for n in range(10):
+ assert a == p**n
+ a *= p
+
+
+def test_addition_initial_condition():
+ x = symbols('x')
+ R, Dx = DifferentialOperators(QQ.old_poly_ring(x), 'Dx')
+ p = HolonomicFunction(Dx-1, x, 0, [3])
+ q = HolonomicFunction(Dx**2+1, x, 0, [1, 0])
+ r = HolonomicFunction(-1 + Dx - Dx**2 + Dx**3, x, 0, [4, 3, 2])
+ assert p + q == r
+ p = HolonomicFunction(Dx - x + Dx**2, x, 0, [1, 2])
+ q = HolonomicFunction(Dx**2 + x, x, 0, [1, 0])
+ r = HolonomicFunction((-x**4 - x**3/4 - x**2 + Rational(1, 4)) + (x**3 + x**2/4 + x*Rational(3, 4) + 1)*Dx + \
+ (x*Rational(-3, 2) + Rational(7, 4))*Dx**2 + (x**2 - x*Rational(7, 4) + Rational(1, 4))*Dx**3 + (x**2 + x/4 + S.Half)*Dx**4, x, 0, [2, 2, -2, 2])
+ assert p + q == r
+ p = HolonomicFunction(Dx**2 + 4*x*Dx + x**2, x, 0, [3, 4])
+ q = HolonomicFunction(Dx**2 + 1, x, 0, [1, 1])
+ r = HolonomicFunction((x**6 + 2*x**4 - 5*x**2 - 6) + (4*x**5 + 36*x**3 - 32*x)*Dx + \
+ (x**6 + 3*x**4 + 5*x**2 - 9)*Dx**2 + (4*x**5 + 36*x**3 - 32*x)*Dx**3 + (x**4 + \
+ 10*x**2 - 3)*Dx**4, x, 0, [4, 5, -1, -17])
+ assert p + q == r
+ q = HolonomicFunction(Dx**3 + x, x, 2, [3, 0, 1])
+ p = HolonomicFunction(Dx - 1, x, 2, [1])
+ r = HolonomicFunction((-x**2 - x + 1) + (x**2 + x)*Dx + (-x - 2)*Dx**3 + \
+ (x + 1)*Dx**4, x, 2, [4, 1, 2, -5 ])
+ assert p + q == r
+ p = expr_to_holonomic(sin(x))
+ q = expr_to_holonomic(1/x, x0=1)
+ r = HolonomicFunction((x**2 + 6) + (x**3 + 2*x)*Dx + (x**2 + 6)*Dx**2 + (x**3 + 2*x)*Dx**3, \
+ x, 1, [sin(1) + 1, -1 + cos(1), -sin(1) + 2])
+ assert p + q == r
+ C_1 = symbols('C_1')
+ p = expr_to_holonomic(sqrt(x))
+ q = expr_to_holonomic(sqrt(x**2-x))
+ r = (p + q).to_expr().subs(C_1, -I/2).expand()
+ assert r == I*sqrt(x)*sqrt(-x + 1) + sqrt(x)
+
+
+def test_multiplication_initial_condition():
+ x = symbols('x')
+ R, Dx = DifferentialOperators(QQ.old_poly_ring(x), 'Dx')
+ p = HolonomicFunction(Dx**2 + x*Dx - 1, x, 0, [3, 1])
+ q = HolonomicFunction(Dx**2 + 1, x, 0, [1, 1])
+ r = HolonomicFunction((x**4 + 14*x**2 + 60) + 4*x*Dx + (x**4 + 9*x**2 + 20)*Dx**2 + \
+ (2*x**3 + 18*x)*Dx**3 + (x**2 + 10)*Dx**4, x, 0, [3, 4, 2, 3])
+ assert p * q == r
+ p = HolonomicFunction(Dx**2 + x, x, 0, [1, 0])
+ q = HolonomicFunction(Dx**3 - x**2, x, 0, [3, 3, 3])
+ r = HolonomicFunction((x**8 - 37*x**7/27 - 10*x**6/27 - 164*x**5/9 - 184*x**4/9 + \
+ 160*x**3/27 + 404*x**2/9 + 8*x + Rational(40, 3)) + (6*x**7 - 128*x**6/9 - 98*x**5/9 - 28*x**4/9 + \
+ 8*x**3/9 + 28*x**2 + x*Rational(40, 9) - 40)*Dx + (3*x**6 - 82*x**5/9 + 76*x**4/9 + 4*x**3/3 + \
+ 220*x**2/9 - x*Rational(80, 3))*Dx**2 + (-2*x**6 + 128*x**5/27 - 2*x**4/3 -80*x**2/9 + Rational(200, 9))*Dx**3 + \
+ (3*x**5 - 64*x**4/9 - 28*x**3/9 + 6*x**2 - x*Rational(20, 9) - Rational(20, 3))*Dx**4 + (-4*x**3 + 64*x**2/9 + \
+ x*Rational(8, 3))*Dx**5 + (x**4 - 64*x**3/27 - 4*x**2/3 + Rational(20, 9))*Dx**6, x, 0, [3, 3, 3, -3, -12, -24])
+ assert p * q == r
+ p = HolonomicFunction(Dx - 1, x, 0, [2])
+ q = HolonomicFunction(Dx**2 + 1, x, 0, [0, 1])
+ r = HolonomicFunction(2 -2*Dx + Dx**2, x, 0, [0, 2])
+ assert p * q == r
+ q = HolonomicFunction(x*Dx**2 + 1 + 2*Dx, x, 0,[0, 1])
+ r = HolonomicFunction((x - 1) + (-2*x + 2)*Dx + x*Dx**2, x, 0, [0, 2])
+ assert p * q == r
+ p = HolonomicFunction(Dx**2 - 1, x, 0, [1, 3])
+ q = HolonomicFunction(Dx**3 + 1, x, 0, [1, 2, 1])
+ r = HolonomicFunction(6*Dx + 3*Dx**2 + 2*Dx**3 - 3*Dx**4 + Dx**6, x, 0, [1, 5, 14, 17, 17, 2])
+ assert p * q == r
+ p = expr_to_holonomic(sin(x))
+ q = expr_to_holonomic(1/x, x0=1)
+ r = HolonomicFunction(x + 2*Dx + x*Dx**2, x, 1, [sin(1), -sin(1) + cos(1)])
+ assert p * q == r
+ p = expr_to_holonomic(sqrt(x))
+ q = expr_to_holonomic(sqrt(x**2-x))
+ r = (p * q).to_expr()
+ assert r == I*x*sqrt(-x + 1)
+
+
+def test_HolonomicFunction_composition():
+ x = symbols('x')
+ R, Dx = DifferentialOperators(ZZ.old_poly_ring(x), 'Dx')
+ p = HolonomicFunction(Dx-1, x).composition(x**2+x)
+ r = HolonomicFunction((-2*x - 1) + Dx, x)
+ assert p == r
+ p = HolonomicFunction(Dx**2+1, x).composition(x**5+x**2+1)
+ r = HolonomicFunction((125*x**12 + 150*x**9 + 60*x**6 + 8*x**3) + (-20*x**3 - 2)*Dx + \
+ (5*x**4 + 2*x)*Dx**2, x)
+ assert p == r
+ p = HolonomicFunction(Dx**2*x+x, x).composition(2*x**3+x**2+1)
+ r = HolonomicFunction((216*x**9 + 324*x**8 + 180*x**7 + 152*x**6 + 112*x**5 + \
+ 36*x**4 + 4*x**3) + (24*x**4 + 16*x**3 + 3*x**2 - 6*x - 1)*Dx + (6*x**5 + 5*x**4 + \
+ x**3 + 3*x**2 + x)*Dx**2, x)
+ assert p == r
+ p = HolonomicFunction(Dx**2+1, x).composition(1-x**2)
+ r = HolonomicFunction((4*x**3) - Dx + x*Dx**2, x)
+ assert p == r
+ p = HolonomicFunction(Dx**2+1, x).composition(x - 2/(x**2 + 1))
+ r = HolonomicFunction((x**12 + 6*x**10 + 12*x**9 + 15*x**8 + 48*x**7 + 68*x**6 + \
+ 72*x**5 + 111*x**4 + 112*x**3 + 54*x**2 + 12*x + 1) + (12*x**8 + 32*x**6 + \
+ 24*x**4 - 4)*Dx + (x**12 + 6*x**10 + 4*x**9 + 15*x**8 + 16*x**7 + 20*x**6 + 24*x**5+ \
+ 15*x**4 + 16*x**3 + 6*x**2 + 4*x + 1)*Dx**2, x)
+ assert p == r
+
+
+def test_from_hyper():
+ x = symbols('x')
+ R, Dx = DifferentialOperators(QQ.old_poly_ring(x), 'Dx')
+ p = hyper([1, 1], [Rational(3, 2)], x**2/4)
+ q = HolonomicFunction((4*x) + (5*x**2 - 8)*Dx + (x**3 - 4*x)*Dx**2, x, 1, [2*sqrt(3)*pi/9, -4*sqrt(3)*pi/27 + Rational(4, 3)])
+ r = from_hyper(p)
+ assert r == q
+ p = from_hyper(hyper([1], [Rational(3, 2)], x**2/4))
+ q = HolonomicFunction(-x + (-x**2/2 + 2)*Dx + x*Dx**2, x)
+ # x0 = 1
+ y0 = '[sqrt(pi)*exp(1/4)*erf(1/2), -sqrt(pi)*exp(1/4)*erf(1/2)/2 + 1]'
+ assert sstr(p.y0) == y0
+ assert q.annihilator == p.annihilator
+
+
+def test_from_meijerg():
+ x = symbols('x')
+ R, Dx = DifferentialOperators(QQ.old_poly_ring(x), 'Dx')
+ p = from_meijerg(meijerg(([], [Rational(3, 2)]), ([S.Half], [S.Half, 1]), x))
+ q = HolonomicFunction(x/2 - Rational(1, 4) + (-x**2 + x/4)*Dx + x**2*Dx**2 + x**3*Dx**3, x, 1, \
+ [1/sqrt(pi), 1/(2*sqrt(pi)), -1/(4*sqrt(pi))])
+ assert p == q
+ p = from_meijerg(meijerg(([], []), ([0], []), x))
+ q = HolonomicFunction(1 + Dx, x, 0, [1])
+ assert p == q
+ p = from_meijerg(meijerg(([1], []), ([S.Half], [0]), x))
+ q = HolonomicFunction((x + S.Half)*Dx + x*Dx**2, x, 1, [sqrt(pi)*erf(1), exp(-1)])
+ assert p == q
+ p = from_meijerg(meijerg(([0], [1]), ([0], []), 2*x**2))
+ q = HolonomicFunction((3*x**2 - 1)*Dx + x**3*Dx**2, x, 1, [-exp(Rational(-1, 2)) + 1, -exp(Rational(-1, 2))])
+ assert p == q
+
+
+def test_to_Sequence():
+ x = symbols('x')
+ R, Dx = DifferentialOperators(ZZ.old_poly_ring(x), 'Dx')
+ n = symbols('n', integer=True)
+ _, Sn = RecurrenceOperators(ZZ.old_poly_ring(n), 'Sn')
+ p = HolonomicFunction(x**2*Dx**4 + x + Dx, x).to_sequence()
+ q = [(HolonomicSequence(1 + (n + 2)*Sn**2 + (n**4 + 6*n**3 + 11*n**2 + 6*n)*Sn**3), 0, 1)]
+ assert p == q
+ p = HolonomicFunction(x**2*Dx**4 + x**3 + Dx**2, x).to_sequence()
+ q = [(HolonomicSequence(1 + (n**4 + 14*n**3 + 72*n**2 + 163*n + 140)*Sn**5), 0, 0)]
+ assert p == q
+ p = HolonomicFunction(x**3*Dx**4 + 1 + Dx**2, x).to_sequence()
+ q = [(HolonomicSequence(1 + (n**4 - 2*n**3 - n**2 + 2*n)*Sn + (n**2 + 3*n + 2)*Sn**2), 0, 0)]
+ assert p == q
+ p = HolonomicFunction(3*x**3*Dx**4 + 2*x*Dx + x*Dx**3, x).to_sequence()
+ q = [(HolonomicSequence(2*n + (3*n**4 - 6*n**3 - 3*n**2 + 6*n)*Sn + (n**3 + 3*n**2 + 2*n)*Sn**2), 0, 1)]
+ assert p == q
+
+
+def test_to_Sequence_Initial_Coniditons():
+ x = symbols('x')
+ R, Dx = DifferentialOperators(QQ.old_poly_ring(x), 'Dx')
+ n = symbols('n', integer=True)
+ _, Sn = RecurrenceOperators(QQ.old_poly_ring(n), 'Sn')
+ p = HolonomicFunction(Dx - 1, x, 0, [1]).to_sequence()
+ q = [(HolonomicSequence(-1 + (n + 1)*Sn, 1), 0)]
+ assert p == q
+ p = HolonomicFunction(Dx**2 + 1, x, 0, [0, 1]).to_sequence()
+ q = [(HolonomicSequence(1 + (n**2 + 3*n + 2)*Sn**2, [0, 1]), 0)]
+ assert p == q
+ p = HolonomicFunction(Dx**2 + 1 + x**3*Dx, x, 0, [2, 3]).to_sequence()
+ q = [(HolonomicSequence(n + Sn**2 + (n**2 + 7*n + 12)*Sn**4, [2, 3, -1, Rational(-1, 2), Rational(1, 12)]), 1)]
+ assert p == q
+ p = HolonomicFunction(x**3*Dx**5 + 1 + Dx, x).to_sequence()
+ q = [(HolonomicSequence(1 + (n + 1)*Sn + (n**5 - 5*n**3 + 4*n)*Sn**2), 0, 3)]
+ assert p == q
+ C_0, C_1, C_2, C_3 = symbols('C_0, C_1, C_2, C_3')
+ p = expr_to_holonomic(log(1+x**2))
+ q = [(HolonomicSequence(n**2 + (n**2 + 2*n)*Sn**2, [0, 0, C_2]), 0, 1)]
+ assert p.to_sequence() == q
+ p = p.diff()
+ q = [(HolonomicSequence((n + 2) + (n + 2)*Sn**2, [C_0, 0]), 1, 0)]
+ assert p.to_sequence() == q
+ p = expr_to_holonomic(erf(x) + x).to_sequence()
+ q = [(HolonomicSequence((2*n**2 - 2*n) + (n**3 + 2*n**2 - n - 2)*Sn**2, [0, 1 + 2/sqrt(pi), 0, C_3]), 0, 2)]
+ assert p == q
+
+def test_series():
+ x = symbols('x')
+ R, Dx = DifferentialOperators(ZZ.old_poly_ring(x), 'Dx')
+ p = HolonomicFunction(Dx**2 + 2*x*Dx, x, 0, [0, 1]).series(n=10)
+ q = x - x**3/3 + x**5/10 - x**7/42 + x**9/216 + O(x**10)
+ assert p == q
+ p = HolonomicFunction(Dx - 1, x).composition(x**2, 0, [1]) # e^(x**2)
+ q = HolonomicFunction(Dx**2 + 1, x, 0, [1, 0]) # cos(x)
+ r = (p * q).series(n=10) # expansion of cos(x) * exp(x**2)
+ s = 1 + x**2/2 + x**4/24 - 31*x**6/720 - 179*x**8/8064 + O(x**10)
+ assert r == s
+ t = HolonomicFunction((1 + x)*Dx**2 + Dx, x, 0, [0, 1]) # log(1 + x)
+ r = (p * t + q).series(n=10)
+ s = 1 + x - x**2 + 4*x**3/3 - 17*x**4/24 + 31*x**5/30 - 481*x**6/720 +\
+ 71*x**7/105 - 20159*x**8/40320 + 379*x**9/840 + O(x**10)
+ assert r == s
+ p = HolonomicFunction((6+6*x-3*x**2) - (10*x-3*x**2-3*x**3)*Dx + \
+ (4-6*x**3+2*x**4)*Dx**2, x, 0, [0, 1]).series(n=7)
+ q = x + x**3/6 - 3*x**4/16 + x**5/20 - 23*x**6/960 + O(x**7)
+ assert p == q
+ p = HolonomicFunction((6+6*x-3*x**2) - (10*x-3*x**2-3*x**3)*Dx + \
+ (4-6*x**3+2*x**4)*Dx**2, x, 0, [1, 0]).series(n=7)
+ q = 1 - 3*x**2/4 - x**3/4 - 5*x**4/32 - 3*x**5/40 - 17*x**6/384 + O(x**7)
+ assert p == q
+ p = expr_to_holonomic(erf(x) + x).series(n=10)
+ C_3 = symbols('C_3')
+ q = (erf(x) + x).series(n=10)
+ assert p.subs(C_3, -2/(3*sqrt(pi))) == q
+ assert expr_to_holonomic(sqrt(x**3 + x)).series(n=10) == sqrt(x**3 + x).series(n=10)
+ assert expr_to_holonomic((2*x - 3*x**2)**Rational(1, 3)).series() == ((2*x - 3*x**2)**Rational(1, 3)).series()
+ assert expr_to_holonomic(sqrt(x**2-x)).series() == (sqrt(x**2-x)).series()
+ assert expr_to_holonomic(cos(x)**2/x**2, y0={-2: [1, 0, -1]}).series(n=10) == (cos(x)**2/x**2).series(n=10)
+ assert expr_to_holonomic(cos(x)**2/x**2, x0=1).series(n=10).together() == (cos(x)**2/x**2).series(n=10, x0=1).together()
+ assert expr_to_holonomic(cos(x-1)**2/(x-1)**2, x0=1, y0={-2: [1, 0, -1]}).series(n=10) \
+ == (cos(x-1)**2/(x-1)**2).series(x0=1, n=10)
+
+def test_evalf_euler():
+ x = symbols('x')
+ R, Dx = DifferentialOperators(QQ.old_poly_ring(x), 'Dx')
+
+ # log(1+x)
+ p = HolonomicFunction((1 + x)*Dx**2 + Dx, x, 0, [0, 1])
+
+ # path taken is a straight line from 0 to 1, on the real axis
+ r = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
+ s = '0.699525841805253' # approx. equal to log(2) i.e. 0.693147180559945
+ assert sstr(p.evalf(r, method='Euler')[-1]) == s
+
+ # path taken is a triangle 0-->1+i-->2
+ r = [0.1 + 0.1*I]
+ for i in range(9):
+ r.append(r[-1]+0.1+0.1*I)
+ for i in range(10):
+ r.append(r[-1]+0.1-0.1*I)
+
+ # close to the exact solution 1.09861228866811
+ # imaginary part also close to zero
+ s = '1.07530466271334 - 0.0251200594793912*I'
+ assert sstr(p.evalf(r, method='Euler')[-1]) == s
+
+ # sin(x)
+ p = HolonomicFunction(Dx**2 + 1, x, 0, [0, 1])
+ s = '0.905546532085401 - 6.93889390390723e-18*I'
+ assert sstr(p.evalf(r, method='Euler')[-1]) == s
+
+ # computing sin(pi/2) using this method
+ # using a linear path from 0 to pi/2
+ r = [0.1]
+ for i in range(14):
+ r.append(r[-1] + 0.1)
+ r.append(pi/2)
+ s = '1.08016557252834' # close to 1.0 (exact solution)
+ assert sstr(p.evalf(r, method='Euler')[-1]) == s
+
+ # trying different path, a rectangle (0-->i-->pi/2 + i-->pi/2)
+ # computing the same value sin(pi/2) using different path
+ r = [0.1*I]
+ for i in range(9):
+ r.append(r[-1]+0.1*I)
+ for i in range(15):
+ r.append(r[-1]+0.1)
+ r.append(pi/2+I)
+ for i in range(10):
+ r.append(r[-1]-0.1*I)
+
+ # close to 1.0
+ s = '0.976882381836257 - 1.65557671738537e-16*I'
+ assert sstr(p.evalf(r, method='Euler')[-1]) == s
+
+ # cos(x)
+ p = HolonomicFunction(Dx**2 + 1, x, 0, [1, 0])
+ # compute cos(pi) along 0-->pi
+ r = [0.05]
+ for i in range(61):
+ r.append(r[-1]+0.05)
+ r.append(pi)
+ # close to -1 (exact answer)
+ s = '-1.08140824719196'
+ assert sstr(p.evalf(r, method='Euler')[-1]) == s
+
+ # a rectangular path (0 -> i -> 2+i -> 2)
+ r = [0.1*I]
+ for i in range(9):
+ r.append(r[-1]+0.1*I)
+ for i in range(20):
+ r.append(r[-1]+0.1)
+ for i in range(10):
+ r.append(r[-1]-0.1*I)
+
+ p = HolonomicFunction(Dx**2 + 1, x, 0, [1,1]).evalf(r, method='Euler')
+ s = '0.501421652861245 - 3.88578058618805e-16*I'
+ assert sstr(p[-1]) == s
+
+def test_evalf_rk4():
+ x = symbols('x')
+ R, Dx = DifferentialOperators(QQ.old_poly_ring(x), 'Dx')
+
+ # log(1+x)
+ p = HolonomicFunction((1 + x)*Dx**2 + Dx, x, 0, [0, 1])
+
+ # path taken is a straight line from 0 to 1, on the real axis
+ r = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
+ s = '0.693146363174626' # approx. equal to log(2) i.e. 0.693147180559945
+ assert sstr(p.evalf(r)[-1]) == s
+
+ # path taken is a triangle 0-->1+i-->2
+ r = [0.1 + 0.1*I]
+ for i in range(9):
+ r.append(r[-1]+0.1+0.1*I)
+ for i in range(10):
+ r.append(r[-1]+0.1-0.1*I)
+
+ # close to the exact solution 1.09861228866811
+ # imaginary part also close to zero
+ s = '1.098616 + 1.36083e-7*I'
+ assert sstr(p.evalf(r)[-1].n(7)) == s
+
+ # sin(x)
+ p = HolonomicFunction(Dx**2 + 1, x, 0, [0, 1])
+ s = '0.90929463522785 + 1.52655665885959e-16*I'
+ assert sstr(p.evalf(r)[-1]) == s
+
+ # computing sin(pi/2) using this method
+ # using a linear path from 0 to pi/2
+ r = [0.1]
+ for i in range(14):
+ r.append(r[-1] + 0.1)
+ r.append(pi/2)
+ s = '0.999999895088917' # close to 1.0 (exact solution)
+ assert sstr(p.evalf(r)[-1]) == s
+
+ # trying different path, a rectangle (0-->i-->pi/2 + i-->pi/2)
+ # computing the same value sin(pi/2) using different path
+ r = [0.1*I]
+ for i in range(9):
+ r.append(r[-1]+0.1*I)
+ for i in range(15):
+ r.append(r[-1]+0.1)
+ r.append(pi/2+I)
+ for i in range(10):
+ r.append(r[-1]-0.1*I)
+
+ # close to 1.0
+ s = '1.00000003415141 + 6.11940487991086e-16*I'
+ assert sstr(p.evalf(r)[-1]) == s
+
+ # cos(x)
+ p = HolonomicFunction(Dx**2 + 1, x, 0, [1, 0])
+ # compute cos(pi) along 0-->pi
+ r = [0.05]
+ for i in range(61):
+ r.append(r[-1]+0.05)
+ r.append(pi)
+ # close to -1 (exact answer)
+ s = '-0.999999993238714'
+ assert sstr(p.evalf(r)[-1]) == s
+
+ # a rectangular path (0 -> i -> 2+i -> 2)
+ r = [0.1*I]
+ for i in range(9):
+ r.append(r[-1]+0.1*I)
+ for i in range(20):
+ r.append(r[-1]+0.1)
+ for i in range(10):
+ r.append(r[-1]-0.1*I)
+
+ p = HolonomicFunction(Dx**2 + 1, x, 0, [1,1]).evalf(r)
+ s = '0.493152791638442 - 1.41553435639707e-15*I'
+ assert sstr(p[-1]) == s
+
+
+def test_expr_to_holonomic():
+ x = symbols('x')
+ R, Dx = DifferentialOperators(QQ.old_poly_ring(x), 'Dx')
+ p = expr_to_holonomic((sin(x)/x)**2)
+ q = HolonomicFunction(8*x + (4*x**2 + 6)*Dx + 6*x*Dx**2 + x**2*Dx**3, x, 0, \
+ [1, 0, Rational(-2, 3)])
+ assert p == q
+ p = expr_to_holonomic(1/(1+x**2)**2)
+ q = HolonomicFunction(4*x + (x**2 + 1)*Dx, x, 0, [1])
+ assert p == q
+ p = expr_to_holonomic(exp(x)*sin(x)+x*log(1+x))
+ q = HolonomicFunction((2*x**3 + 10*x**2 + 20*x + 18) + (-2*x**4 - 10*x**3 - 20*x**2 \
+ - 18*x)*Dx + (2*x**5 + 6*x**4 + 7*x**3 + 8*x**2 + 10*x - 4)*Dx**2 + \
+ (-2*x**5 - 5*x**4 - 2*x**3 + 2*x**2 - x + 4)*Dx**3 + (x**5 + 2*x**4 - x**3 - \
+ 7*x**2/2 + x + Rational(5, 2))*Dx**4, x, 0, [0, 1, 4, -1])
+ assert p == q
+ p = expr_to_holonomic(x*exp(x)+cos(x)+1)
+ q = HolonomicFunction((-x - 3)*Dx + (x + 2)*Dx**2 + (-x - 3)*Dx**3 + (x + 2)*Dx**4, x, \
+ 0, [2, 1, 1, 3])
+ assert p == q
+ assert (x*exp(x)+cos(x)+1).series(n=10) == p.series(n=10)
+ p = expr_to_holonomic(log(1 + x)**2 + 1)
+ q = HolonomicFunction(Dx + (3*x + 3)*Dx**2 + (x**2 + 2*x + 1)*Dx**3, x, 0, [1, 0, 2])
+ assert p == q
+ p = expr_to_holonomic(erf(x)**2 + x)
+ q = HolonomicFunction((8*x**4 - 2*x**2 + 2)*Dx**2 + (6*x**3 - x/2)*Dx**3 + \
+ (x**2+ Rational(1, 4))*Dx**4, x, 0, [0, 1, 8/pi, 0])
+ assert p == q
+ p = expr_to_holonomic(cosh(x)*x)
+ q = HolonomicFunction((-x**2 + 2) -2*x*Dx + x**2*Dx**2, x, 0, [0, 1])
+ assert p == q
+ p = expr_to_holonomic(besselj(2, x))
+ q = HolonomicFunction((x**2 - 4) + x*Dx + x**2*Dx**2, x, 0, [0, 0])
+ assert p == q
+ p = expr_to_holonomic(besselj(0, x) + exp(x))
+ q = HolonomicFunction((-x**2 - x/2 + S.Half) + (x**2 - x/2 - Rational(3, 2))*Dx + (-x**2 + x/2 + 1)*Dx**2 +\
+ (x**2 + x/2)*Dx**3, x, 0, [2, 1, S.Half])
+ assert p == q
+ p = expr_to_holonomic(sin(x)**2/x)
+ q = HolonomicFunction(4 + 4*x*Dx + 3*Dx**2 + x*Dx**3, x, 0, [0, 1, 0])
+ assert p == q
+ p = expr_to_holonomic(sin(x)**2/x, x0=2)
+ q = HolonomicFunction((4) + (4*x)*Dx + (3)*Dx**2 + (x)*Dx**3, x, 2, [sin(2)**2/2,
+ sin(2)*cos(2) - sin(2)**2/4, -3*sin(2)**2/4 + cos(2)**2 - sin(2)*cos(2)])
+ assert p == q
+ p = expr_to_holonomic(log(x)/2 - Ci(2*x)/2 + Ci(2)/2)
+ q = HolonomicFunction(4*Dx + 4*x*Dx**2 + 3*Dx**3 + x*Dx**4, x, 0, \
+ [-log(2)/2 - EulerGamma/2 + Ci(2)/2, 0, 1, 0])
+ assert p == q
+ p = p.to_expr()
+ q = log(x)/2 - Ci(2*x)/2 + Ci(2)/2
+ assert p == q
+ p = expr_to_holonomic(x**S.Half, x0=1)
+ q = HolonomicFunction(x*Dx - S.Half, x, 1, [1])
+ assert p == q
+ p = expr_to_holonomic(sqrt(1 + x**2))
+ q = HolonomicFunction((-x) + (x**2 + 1)*Dx, x, 0, [1])
+ assert p == q
+ assert (expr_to_holonomic(sqrt(x) + sqrt(2*x)).to_expr()-\
+ (sqrt(x) + sqrt(2*x))).simplify() == 0
+ assert expr_to_holonomic(3*x+2*sqrt(x)).to_expr() == 3*x+2*sqrt(x)
+ p = expr_to_holonomic((x**4+x**3+5*x**2+3*x+2)/x**2, lenics=3)
+ q = HolonomicFunction((-2*x**4 - x**3 + 3*x + 4) + (x**5 + x**4 + 5*x**3 + 3*x**2 + \
+ 2*x)*Dx, x, 0, {-2: [2, 3, 5]})
+ assert p == q
+ p = expr_to_holonomic(1/(x-1)**2, lenics=3, x0=1)
+ q = HolonomicFunction((2) + (x - 1)*Dx, x, 1, {-2: [1, 0, 0]})
+ assert p == q
+ a = symbols("a")
+ p = expr_to_holonomic(sqrt(a*x), x=x)
+ assert p.to_expr() == sqrt(a)*sqrt(x)
+
+def test_to_hyper():
+ x = symbols('x')
+ R, Dx = DifferentialOperators(QQ.old_poly_ring(x), 'Dx')
+ p = HolonomicFunction(Dx - 2, x, 0, [3]).to_hyper()
+ q = 3 * hyper([], [], 2*x)
+ assert p == q
+ p = hyperexpand(HolonomicFunction((1 + x) * Dx - 3, x, 0, [2]).to_hyper()).expand()
+ q = 2*x**3 + 6*x**2 + 6*x + 2
+ assert p == q
+ p = HolonomicFunction((1 + x)*Dx**2 + Dx, x, 0, [0, 1]).to_hyper()
+ q = -x**2*hyper((2, 2, 1), (3, 2), -x)/2 + x
+ assert p == q
+ p = HolonomicFunction(2*x*Dx + Dx**2, x, 0, [0, 2/sqrt(pi)]).to_hyper()
+ q = 2*x*hyper((S.Half,), (Rational(3, 2),), -x**2)/sqrt(pi)
+ assert p == q
+ p = hyperexpand(HolonomicFunction(2*x*Dx + Dx**2, x, 0, [1, -2/sqrt(pi)]).to_hyper())
+ q = erfc(x)
+ assert p.rewrite(erfc) == q
+ p = hyperexpand(HolonomicFunction((x**2 - 1) + x*Dx + x**2*Dx**2,
+ x, 0, [0, S.Half]).to_hyper())
+ q = besselj(1, x)
+ assert p == q
+ p = hyperexpand(HolonomicFunction(x*Dx**2 + Dx + x, x, 0, [1, 0]).to_hyper())
+ q = besselj(0, x)
+ assert p == q
+
+def test_to_expr():
+ x = symbols('x')
+ R, Dx = DifferentialOperators(ZZ.old_poly_ring(x), 'Dx')
+ p = HolonomicFunction(Dx - 1, x, 0, [1]).to_expr()
+ q = exp(x)
+ assert p == q
+ p = HolonomicFunction(Dx**2 + 1, x, 0, [1, 0]).to_expr()
+ q = cos(x)
+ assert p == q
+ p = HolonomicFunction(Dx**2 - 1, x, 0, [1, 0]).to_expr()
+ q = cosh(x)
+ assert p == q
+ p = HolonomicFunction(2 + (4*x - 1)*Dx + \
+ (x**2 - x)*Dx**2, x, 0, [1, 2]).to_expr().expand()
+ q = 1/(x**2 - 2*x + 1)
+ assert p == q
+ p = expr_to_holonomic(sin(x)**2/x).integrate((x, 0, x)).to_expr()
+ q = (sin(x)**2/x).integrate((x, 0, x))
+ assert p == q
+ C_0, C_1, C_2, C_3 = symbols('C_0, C_1, C_2, C_3')
+ p = expr_to_holonomic(log(1+x**2)).to_expr()
+ q = C_2*log(x**2 + 1)
+ assert p == q
+ p = expr_to_holonomic(log(1+x**2)).diff().to_expr()
+ q = C_0*x/(x**2 + 1)
+ assert p == q
+ p = expr_to_holonomic(erf(x) + x).to_expr()
+ q = 3*C_3*x - 3*sqrt(pi)*C_3*erf(x)/2 + x + 2*x/sqrt(pi)
+ assert p == q
+ p = expr_to_holonomic(sqrt(x), x0=1).to_expr()
+ assert p == sqrt(x)
+ assert expr_to_holonomic(sqrt(x)).to_expr() == sqrt(x)
+ p = expr_to_holonomic(sqrt(1 + x**2)).to_expr()
+ assert p == sqrt(1+x**2)
+ p = expr_to_holonomic((2*x**2 + 1)**Rational(2, 3)).to_expr()
+ assert p == (2*x**2 + 1)**Rational(2, 3)
+ p = expr_to_holonomic(sqrt(-x**2+2*x)).to_expr()
+ assert p == sqrt(x)*sqrt(-x + 2)
+ p = expr_to_holonomic((-2*x**3+7*x)**Rational(2, 3)).to_expr()
+ q = x**Rational(2, 3)*(-2*x**2 + 7)**Rational(2, 3)
+ assert p == q
+ p = from_hyper(hyper((-2, -3), (S.Half, ), x))
+ s = hyperexpand(hyper((-2, -3), (S.Half, ), x))
+ D_0 = Symbol('D_0')
+ C_0 = Symbol('C_0')
+ assert (p.to_expr().subs({C_0:1, D_0:0}) - s).simplify() == 0
+ p.y0 = {0: [1], S.Half: [0]}
+ assert p.to_expr() == s
+ assert expr_to_holonomic(x**5).to_expr() == x**5
+ assert expr_to_holonomic(2*x**3-3*x**2).to_expr().expand() == \
+ 2*x**3-3*x**2
+ a = symbols("a")
+ p = (expr_to_holonomic(1.4*x)*expr_to_holonomic(a*x, x)).to_expr()
+ q = 1.4*a*x**2
+ assert p == q
+ p = (expr_to_holonomic(1.4*x)+expr_to_holonomic(a*x, x)).to_expr()
+ q = x*(a + 1.4)
+ assert p == q
+ p = (expr_to_holonomic(1.4*x)+expr_to_holonomic(x)).to_expr()
+ assert p == 2.4*x
+
+
+def test_integrate():
+ x = symbols('x')
+ R, Dx = DifferentialOperators(ZZ.old_poly_ring(x), 'Dx')
+ p = expr_to_holonomic(sin(x)**2/x, x0=1).integrate((x, 2, 3))
+ q = '0.166270406994788'
+ assert sstr(p) == q
+ p = expr_to_holonomic(sin(x)).integrate((x, 0, x)).to_expr()
+ q = 1 - cos(x)
+ assert p == q
+ p = expr_to_holonomic(sin(x)).integrate((x, 0, 3))
+ q = 1 - cos(3)
+ assert p == q
+ p = expr_to_holonomic(sin(x)/x, x0=1).integrate((x, 1, 2))
+ q = '0.659329913368450'
+ assert sstr(p) == q
+ p = expr_to_holonomic(sin(x)**2/x, x0=1).integrate((x, 1, 0))
+ q = '-0.423690480850035'
+ assert sstr(p) == q
+ p = expr_to_holonomic(sin(x)/x)
+ assert p.integrate(x).to_expr() == Si(x)
+ assert p.integrate((x, 0, 2)) == Si(2)
+ p = expr_to_holonomic(sin(x)**2/x)
+ q = p.to_expr()
+ assert p.integrate(x).to_expr() == q.integrate((x, 0, x))
+ assert p.integrate((x, 0, 1)) == q.integrate((x, 0, 1))
+ assert expr_to_holonomic(1/x, x0=1).integrate(x).to_expr() == log(x)
+ p = expr_to_holonomic((x + 1)**3*exp(-x), x0=-1).integrate(x).to_expr()
+ q = (-x**3 - 6*x**2 - 15*x + 6*exp(x + 1) - 16)*exp(-x)
+ assert p == q
+ p = expr_to_holonomic(cos(x)**2/x**2, y0={-2: [1, 0, -1]}).integrate(x).to_expr()
+ q = -Si(2*x) - cos(x)**2/x
+ assert p == q
+ p = expr_to_holonomic(sqrt(x**2+x)).integrate(x).to_expr()
+ q = (x**Rational(3, 2)*(2*x**2 + 3*x + 1) - x*sqrt(x + 1)*asinh(sqrt(x)))/(4*x*sqrt(x + 1))
+ assert p == q
+ p = expr_to_holonomic(sqrt(x**2+1)).integrate(x).to_expr()
+ q = (sqrt(x**2+1)).integrate(x)
+ assert (p-q).simplify() == 0
+ p = expr_to_holonomic(1/x**2, y0={-2:[1, 0, 0]})
+ r = expr_to_holonomic(1/x**2, lenics=3)
+ assert p == r
+ q = expr_to_holonomic(cos(x)**2)
+ assert (r*q).integrate(x).to_expr() == -Si(2*x) - cos(x)**2/x
+
+
+def test_diff():
+ x, y = symbols('x, y')
+ R, Dx = DifferentialOperators(ZZ.old_poly_ring(x), 'Dx')
+ p = HolonomicFunction(x*Dx**2 + 1, x, 0, [0, 1])
+ assert p.diff().to_expr() == p.to_expr().diff().simplify()
+ p = HolonomicFunction(Dx**2 - 1, x, 0, [1, 0])
+ assert p.diff(x, 2).to_expr() == p.to_expr()
+ p = expr_to_holonomic(Si(x))
+ assert p.diff().to_expr() == sin(x)/x
+ assert p.diff(y) == 0
+ C_0, C_1, C_2, C_3 = symbols('C_0, C_1, C_2, C_3')
+ q = Si(x)
+ assert p.diff(x).to_expr() == q.diff()
+ assert p.diff(x, 2).to_expr().subs(C_0, Rational(-1, 3)).cancel() == q.diff(x, 2).cancel()
+ assert p.diff(x, 3).series().subs({C_3: Rational(-1, 3), C_0: 0}) == q.diff(x, 3).series()
+
+
+def test_extended_domain_in_expr_to_holonomic():
+ x = symbols('x')
+ p = expr_to_holonomic(1.2*cos(3.1*x))
+ assert p.to_expr() == 1.2*cos(3.1*x)
+ assert sstr(p.integrate(x).to_expr()) == '0.387096774193548*sin(3.1*x)'
+ _, Dx = DifferentialOperators(RR.old_poly_ring(x), 'Dx')
+ p = expr_to_holonomic(1.1329138213*x)
+ q = HolonomicFunction((-1.1329138213) + (1.1329138213*x)*Dx, x, 0, {1: [1.1329138213]})
+ assert p == q
+ assert p.to_expr() == 1.1329138213*x
+ assert sstr(p.integrate((x, 1, 2))) == sstr((1.1329138213*x).integrate((x, 1, 2)))
+ y, z = symbols('y, z')
+ p = expr_to_holonomic(sin(x*y*z), x=x)
+ assert p.to_expr() == sin(x*y*z)
+ assert p.integrate(x).to_expr() == (-cos(x*y*z) + 1)/(y*z)
+ p = expr_to_holonomic(sin(x*y + z), x=x).integrate(x).to_expr()
+ q = (cos(z) - cos(x*y + z))/y
+ assert p == q
+ a = symbols('a')
+ p = expr_to_holonomic(a*x, x)
+ assert p.to_expr() == a*x
+ assert p.integrate(x).to_expr() == a*x**2/2
+ D_2, C_1 = symbols("D_2, C_1")
+ p = expr_to_holonomic(x) + expr_to_holonomic(1.2*cos(x))
+ p = p.to_expr().subs(D_2, 0)
+ assert p - x - 1.2*cos(1.0*x) == 0
+ p = expr_to_holonomic(x) * expr_to_holonomic(1.2*cos(x))
+ p = p.to_expr().subs(C_1, 0)
+ assert p - 1.2*x*cos(1.0*x) == 0
+
+
+def test_to_meijerg():
+ x = symbols('x')
+ assert hyperexpand(expr_to_holonomic(sin(x)).to_meijerg()) == sin(x)
+ assert hyperexpand(expr_to_holonomic(cos(x)).to_meijerg()) == cos(x)
+ assert hyperexpand(expr_to_holonomic(exp(x)).to_meijerg()) == exp(x)
+ assert hyperexpand(expr_to_holonomic(log(x)).to_meijerg()).simplify() == log(x)
+ assert expr_to_holonomic(4*x**2/3 + 7).to_meijerg() == 4*x**2/3 + 7
+ assert hyperexpand(expr_to_holonomic(besselj(2, x), lenics=3).to_meijerg()) == besselj(2, x)
+ p = hyper((Rational(-1, 2), -3), (), x)
+ assert from_hyper(p).to_meijerg() == hyperexpand(p)
+ p = hyper((S.One, S(3)), (S(2), ), x)
+ assert (hyperexpand(from_hyper(p).to_meijerg()) - hyperexpand(p)).expand() == 0
+ p = from_hyper(hyper((-2, -3), (S.Half, ), x))
+ s = hyperexpand(hyper((-2, -3), (S.Half, ), x))
+ C_0 = Symbol('C_0')
+ C_1 = Symbol('C_1')
+ D_0 = Symbol('D_0')
+ assert (hyperexpand(p.to_meijerg()).subs({C_0:1, D_0:0}) - s).simplify() == 0
+ p.y0 = {0: [1], S.Half: [0]}
+ assert (hyperexpand(p.to_meijerg()) - s).simplify() == 0
+ p = expr_to_holonomic(besselj(S.Half, x), initcond=False)
+ assert (p.to_expr() - (D_0*sin(x) + C_0*cos(x) + C_1*sin(x))/sqrt(x)).simplify() == 0
+ p = expr_to_holonomic(besselj(S.Half, x), y0={Rational(-1, 2): [sqrt(2)/sqrt(pi), sqrt(2)/sqrt(pi)]})
+ assert (p.to_expr() - besselj(S.Half, x) - besselj(Rational(-1, 2), x)).simplify() == 0
+
+
+def test_gaussian():
+ mu, x = symbols("mu x")
+ sd = symbols("sd", positive=True)
+ Q = QQ[mu, sd].get_field()
+ e = sqrt(2)*exp(-(-mu + x)**2/(2*sd**2))/(2*sqrt(pi)*sd)
+ h1 = expr_to_holonomic(e, x, domain=Q)
+
+ _, Dx = DifferentialOperators(Q.old_poly_ring(x), 'Dx')
+ h2 = HolonomicFunction((-mu/sd**2 + x/sd**2) + (1)*Dx, x)
+
+ assert h1 == h2
+
+
+def test_beta():
+ a, b, x = symbols("a b x", positive=True)
+ e = x**(a - 1)*(-x + 1)**(b - 1)/beta(a, b)
+ Q = QQ[a, b].get_field()
+ h1 = expr_to_holonomic(e, x, domain=Q)
+
+ _, Dx = DifferentialOperators(Q.old_poly_ring(x), 'Dx')
+ h2 = HolonomicFunction((a + x*(-a - b + 2) - 1) + (x**2 - x)*Dx, x)
+
+ assert h1 == h2
+
+
+def test_gamma():
+ a, b, x = symbols("a b x", positive=True)
+ e = b**(-a)*x**(a - 1)*exp(-x/b)/gamma(a)
+ Q = QQ[a, b].get_field()
+ h1 = expr_to_holonomic(e, x, domain=Q)
+
+ _, Dx = DifferentialOperators(Q.old_poly_ring(x), 'Dx')
+ h2 = HolonomicFunction((-a + 1 + x/b) + (x)*Dx, x)
+
+ assert h1 == h2
+
+
+def test_symbolic_power():
+ x, n = symbols("x n")
+ Q = QQ[n].get_field()
+ _, Dx = DifferentialOperators(Q.old_poly_ring(x), 'Dx')
+ h1 = HolonomicFunction((-1) + (x)*Dx, x) ** -n
+ h2 = HolonomicFunction((n) + (x)*Dx, x)
+
+ assert h1 == h2
+
+
+def test_negative_power():
+ x = symbols("x")
+ _, Dx = DifferentialOperators(QQ.old_poly_ring(x), 'Dx')
+ h1 = HolonomicFunction((-1) + (x)*Dx, x) ** -2
+ h2 = HolonomicFunction((2) + (x)*Dx, x)
+
+ assert h1 == h2
+
+
+def test_expr_in_power():
+ x, n = symbols("x n")
+ Q = QQ[n].get_field()
+ _, Dx = DifferentialOperators(Q.old_poly_ring(x), 'Dx')
+ h1 = HolonomicFunction((-1) + (x)*Dx, x) ** (n - 3)
+ h2 = HolonomicFunction((-n + 3) + (x)*Dx, x)
+
+ assert h1 == h2
+
+
+def test_DifferentialOperatorEqPoly():
+ x = symbols('x', integer=True)
+ R, Dx = DifferentialOperators(QQ.old_poly_ring(x), 'Dx')
+ do = DifferentialOperator([x**2, R.base.zero, R.base.zero], R)
+ do2 = DifferentialOperator([x**2, 1, x], R)
+ assert not do == do2
+
+ # polynomial comparison issue, see https://github.com/sympy/sympy/pull/15799
+ # should work once that is solved
+ # p = do.listofpoly[0]
+ # assert do == p
+
+ p2 = do2.listofpoly[0]
+ assert not do2 == p2
+
+
+def test_DifferentialOperatorPow():
+ x = symbols('x', integer=True)
+ R, _ = DifferentialOperators(QQ.old_poly_ring(x), 'Dx')
+ do = DifferentialOperator([x**2, R.base.zero, R.base.zero], R)
+ a = DifferentialOperator([R.base.one], R)
+ for n in range(10):
+ assert a == do**n
+ a *= do
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/tests/test_recurrence.py b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/tests/test_recurrence.py
new file mode 100644
index 0000000000000000000000000000000000000000..526595e91c5fc507877275e3e53e78c6f3716095
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/holonomic/tests/test_recurrence.py
@@ -0,0 +1,41 @@
+from sympy.holonomic.recurrence import RecurrenceOperators, RecurrenceOperator
+from sympy.core.symbol import symbols
+from sympy.polys.domains.rationalfield import QQ
+
+
+def test_RecurrenceOperator():
+ n = symbols('n', integer=True)
+ R, Sn = RecurrenceOperators(QQ.old_poly_ring(n), 'Sn')
+ assert Sn*n == (n + 1)*Sn
+ assert Sn*n**2 == (n**2+1+2*n)*Sn
+ assert Sn**2*n**2 == (n**2 + 4*n + 4)*Sn**2
+ p = (Sn**3*n**2 + Sn*n)**2
+ q = (n**2 + 3*n + 2)*Sn**2 + (2*n**3 + 19*n**2 + 57*n + 52)*Sn**4 + (n**4 + 18*n**3 + \
+ 117*n**2 + 324*n + 324)*Sn**6
+ assert p == q
+
+
+def test_RecurrenceOperatorEqPoly():
+ n = symbols('n', integer=True)
+ R, Sn = RecurrenceOperators(QQ.old_poly_ring(n), 'Sn')
+ rr = RecurrenceOperator([n**2, 0, 0], R)
+ rr2 = RecurrenceOperator([n**2, 1, n], R)
+ assert not rr == rr2
+
+ # polynomial comparison issue, see https://github.com/sympy/sympy/pull/15799
+ # should work once that is solved
+ # d = rr.listofpoly[0]
+ # assert rr == d
+
+ d2 = rr2.listofpoly[0]
+ assert not rr2 == d2
+
+
+def test_RecurrenceOperatorPow():
+ n = symbols('n', integer=True)
+ R, _ = RecurrenceOperators(QQ.old_poly_ring(n), 'Sn')
+ rr = RecurrenceOperator([n**2, 0, 0], R)
+ a = RecurrenceOperator([R.base.one], R)
+ for m in range(10):
+ assert a == rr**m
+ a *= rr
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..4aba939e255e6ccda77711bc67783587d5cc08f5
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/__pycache__/conflict.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/__pycache__/conflict.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..79fbcaf895f98994722652f6a52547744683e5be
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/__pycache__/conflict.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/__pycache__/core.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/__pycache__/core.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..da20a255ccc927a2a94979ea9db3c131ce84997f
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/__pycache__/core.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/__pycache__/dispatcher.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/__pycache__/dispatcher.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..968191e702840de9b7b51b7df764b787ab991dba
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/__pycache__/dispatcher.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/__pycache__/utils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/__pycache__/utils.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..19e8fc57bafb70ccaf7ddc1107f4f8d9dbd07bbb
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/__pycache__/utils.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/tests/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/tests/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/tests/__pycache__/__init__.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/tests/__pycache__/__init__.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8cc75955d033aa8e4fc3e2664a19e3b7a3a23fe7
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/tests/__pycache__/__init__.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/tests/__pycache__/test_conflict.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/tests/__pycache__/test_conflict.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..adba3bce217aca3a8a97dd66232277f0d6adf170
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/tests/__pycache__/test_conflict.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/tests/__pycache__/test_core.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/tests/__pycache__/test_core.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..3dfca08bd4854300a5b065650db0dbd16b3e5a05
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/tests/__pycache__/test_core.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/tests/__pycache__/test_dispatcher.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/tests/__pycache__/test_dispatcher.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0a1e2a3b8c94f156d7d0b6b4d33b8a2ffa013f3c
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/tests/__pycache__/test_dispatcher.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/tests/test_conflict.py b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/tests/test_conflict.py
new file mode 100644
index 0000000000000000000000000000000000000000..5d2292c460585ae2a65a01795b38499e67706ff0
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/tests/test_conflict.py
@@ -0,0 +1,62 @@
+from sympy.multipledispatch.conflict import (supercedes, ordering, ambiguities,
+ ambiguous, super_signature, consistent)
+
+
+class A: pass
+class B(A): pass
+class C: pass
+
+
+def test_supercedes():
+ assert supercedes([B], [A])
+ assert supercedes([B, A], [A, A])
+ assert not supercedes([B, A], [A, B])
+ assert not supercedes([A], [B])
+
+
+def test_consistent():
+ assert consistent([A], [A])
+ assert consistent([B], [B])
+ assert not consistent([A], [C])
+ assert consistent([A, B], [A, B])
+ assert consistent([B, A], [A, B])
+ assert not consistent([B, A], [B])
+ assert not consistent([B, A], [B, C])
+
+
+def test_super_signature():
+ assert super_signature([[A]]) == [A]
+ assert super_signature([[A], [B]]) == [B]
+ assert super_signature([[A, B], [B, A]]) == [B, B]
+ assert super_signature([[A, A, B], [A, B, A], [B, A, A]]) == [B, B, B]
+
+
+def test_ambiguous():
+ assert not ambiguous([A], [A])
+ assert not ambiguous([A], [B])
+ assert not ambiguous([B], [B])
+ assert not ambiguous([A, B], [B, B])
+ assert ambiguous([A, B], [B, A])
+
+
+def test_ambiguities():
+ signatures = [[A], [B], [A, B], [B, A], [A, C]]
+ expected = {((A, B), (B, A))}
+ result = ambiguities(signatures)
+ assert set(map(frozenset, expected)) == set(map(frozenset, result))
+
+ signatures = [[A], [B], [A, B], [B, A], [A, C], [B, B]]
+ expected = set()
+ result = ambiguities(signatures)
+ assert set(map(frozenset, expected)) == set(map(frozenset, result))
+
+
+def test_ordering():
+ signatures = [[A, A], [A, B], [B, A], [B, B], [A, C]]
+ ord = ordering(signatures)
+ assert ord[0] == (B, B) or ord[0] == (A, C)
+ assert ord[-1] == (A, A) or ord[-1] == (A, C)
+
+
+def test_type_mro():
+ assert super_signature([[object], [type]]) == [type]
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/tests/test_core.py b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/tests/test_core.py
new file mode 100644
index 0000000000000000000000000000000000000000..016270fecc8cda644fc71b5c310b1430b50361f6
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/tests/test_core.py
@@ -0,0 +1,213 @@
+from __future__ import annotations
+from typing import Any
+
+from sympy.multipledispatch import dispatch
+from sympy.multipledispatch.conflict import AmbiguityWarning
+from sympy.testing.pytest import raises, warns
+from functools import partial
+
+test_namespace: dict[str, Any] = {}
+
+orig_dispatch = dispatch
+dispatch = partial(dispatch, namespace=test_namespace)
+
+
+def test_singledispatch():
+ @dispatch(int)
+ def f(x): # noqa:F811
+ return x + 1
+
+ @dispatch(int)
+ def g(x): # noqa:F811
+ return x + 2
+
+ @dispatch(float) # noqa:F811
+ def f(x): # noqa:F811
+ return x - 1
+
+ assert f(1) == 2
+ assert g(1) == 3
+ assert f(1.0) == 0
+
+ assert raises(NotImplementedError, lambda: f('hello'))
+
+
+def test_multipledispatch():
+ @dispatch(int, int)
+ def f(x, y): # noqa:F811
+ return x + y
+
+ @dispatch(float, float) # noqa:F811
+ def f(x, y): # noqa:F811
+ return x - y
+
+ assert f(1, 2) == 3
+ assert f(1.0, 2.0) == -1.0
+
+
+class A: pass
+class B: pass
+class C(A): pass
+class D(C): pass
+class E(C): pass
+
+
+def test_inheritance():
+ @dispatch(A)
+ def f(x): # noqa:F811
+ return 'a'
+
+ @dispatch(B) # noqa:F811
+ def f(x): # noqa:F811
+ return 'b'
+
+ assert f(A()) == 'a'
+ assert f(B()) == 'b'
+ assert f(C()) == 'a'
+
+
+def test_inheritance_and_multiple_dispatch():
+ @dispatch(A, A)
+ def f(x, y): # noqa:F811
+ return type(x), type(y)
+
+ @dispatch(A, B) # noqa:F811
+ def f(x, y): # noqa:F811
+ return 0
+
+ assert f(A(), A()) == (A, A)
+ assert f(A(), C()) == (A, C)
+ assert f(A(), B()) == 0
+ assert f(C(), B()) == 0
+ assert raises(NotImplementedError, lambda: f(B(), B()))
+
+
+def test_competing_solutions():
+ @dispatch(A)
+ def h(x): # noqa:F811
+ return 1
+
+ @dispatch(C) # noqa:F811
+ def h(x): # noqa:F811
+ return 2
+
+ assert h(D()) == 2
+
+
+def test_competing_multiple():
+ @dispatch(A, B)
+ def h(x, y): # noqa:F811
+ return 1
+
+ @dispatch(C, B) # noqa:F811
+ def h(x, y): # noqa:F811
+ return 2
+
+ assert h(D(), B()) == 2
+
+
+def test_competing_ambiguous():
+ test_namespace = {}
+ dispatch = partial(orig_dispatch, namespace=test_namespace)
+
+ @dispatch(A, C)
+ def f(x, y): # noqa:F811
+ return 2
+
+ with warns(AmbiguityWarning, test_stacklevel=False):
+ @dispatch(C, A) # noqa:F811
+ def f(x, y): # noqa:F811
+ return 2
+
+ assert f(A(), C()) == f(C(), A()) == 2
+ # assert raises(Warning, lambda : f(C(), C()))
+
+
+def test_caching_correct_behavior():
+ @dispatch(A)
+ def f(x): # noqa:F811
+ return 1
+
+ assert f(C()) == 1
+
+ @dispatch(C)
+ def f(x): # noqa:F811
+ return 2
+
+ assert f(C()) == 2
+
+
+def test_union_types():
+ @dispatch((A, C))
+ def f(x): # noqa:F811
+ return 1
+
+ assert f(A()) == 1
+ assert f(C()) == 1
+
+
+def test_namespaces():
+ ns1 = {}
+ ns2 = {}
+
+ def foo(x):
+ return 1
+ foo1 = orig_dispatch(int, namespace=ns1)(foo)
+
+ def foo(x):
+ return 2
+ foo2 = orig_dispatch(int, namespace=ns2)(foo)
+
+ assert foo1(0) == 1
+ assert foo2(0) == 2
+
+
+"""
+Fails
+def test_dispatch_on_dispatch():
+ @dispatch(A)
+ @dispatch(C)
+ def q(x): # noqa:F811
+ return 1
+
+ assert q(A()) == 1
+ assert q(C()) == 1
+"""
+
+
+def test_methods():
+ class Foo:
+ @dispatch(float)
+ def f(self, x): # noqa:F811
+ return x - 1
+
+ @dispatch(int) # noqa:F811
+ def f(self, x): # noqa:F811
+ return x + 1
+
+ @dispatch(int)
+ def g(self, x): # noqa:F811
+ return x + 3
+
+
+ foo = Foo()
+ assert foo.f(1) == 2
+ assert foo.f(1.0) == 0.0
+ assert foo.g(1) == 4
+
+
+def test_methods_multiple_dispatch():
+ class Foo:
+ @dispatch(A, A)
+ def f(x, y): # noqa:F811
+ return 1
+
+ @dispatch(A, C) # noqa:F811
+ def f(x, y): # noqa:F811
+ return 2
+
+
+ foo = Foo()
+ assert foo.f(A(), A()) == 1
+ assert foo.f(A(), C()) == 2
+ assert foo.f(C(), C()) == 2
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/tests/test_dispatcher.py b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/tests/test_dispatcher.py
new file mode 100644
index 0000000000000000000000000000000000000000..e31ca8a5486b87eb43fc5e6f887caf50d6bfbe20
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/multipledispatch/tests/test_dispatcher.py
@@ -0,0 +1,284 @@
+from sympy.multipledispatch.dispatcher import (Dispatcher, MDNotImplementedError,
+ MethodDispatcher, halt_ordering,
+ restart_ordering,
+ ambiguity_register_error_ignore_dup)
+from sympy.testing.pytest import raises, warns
+
+
+def identity(x):
+ return x
+
+
+def inc(x):
+ return x + 1
+
+
+def dec(x):
+ return x - 1
+
+
+def test_dispatcher():
+ f = Dispatcher('f')
+ f.add((int,), inc)
+ f.add((float,), dec)
+
+ with warns(DeprecationWarning, test_stacklevel=False):
+ assert f.resolve((int,)) == inc
+ assert f.dispatch(int) is inc
+
+ assert f(1) == 2
+ assert f(1.0) == 0.0
+
+
+def test_union_types():
+ f = Dispatcher('f')
+ f.register((int, float))(inc)
+
+ assert f(1) == 2
+ assert f(1.0) == 2.0
+
+
+def test_dispatcher_as_decorator():
+ f = Dispatcher('f')
+
+ @f.register(int)
+ def inc(x): # noqa:F811
+ return x + 1
+
+ @f.register(float) # noqa:F811
+ def inc(x): # noqa:F811
+ return x - 1
+
+ assert f(1) == 2
+ assert f(1.0) == 0.0
+
+
+def test_register_instance_method():
+
+ class Test:
+ __init__ = MethodDispatcher('f')
+
+ @__init__.register(list)
+ def _init_list(self, data):
+ self.data = data
+
+ @__init__.register(object)
+ def _init_obj(self, datum):
+ self.data = [datum]
+
+ a = Test(3)
+ b = Test([3])
+ assert a.data == b.data
+
+
+def test_on_ambiguity():
+ f = Dispatcher('f')
+
+ def identity(x): return x
+
+ ambiguities = [False]
+
+ def on_ambiguity(dispatcher, amb):
+ ambiguities[0] = True
+
+ f.add((object, object), identity, on_ambiguity=on_ambiguity)
+ assert not ambiguities[0]
+ f.add((object, float), identity, on_ambiguity=on_ambiguity)
+ assert not ambiguities[0]
+ f.add((float, object), identity, on_ambiguity=on_ambiguity)
+ assert ambiguities[0]
+
+
+def test_raise_error_on_non_class():
+ f = Dispatcher('f')
+ assert raises(TypeError, lambda: f.add((1,), inc))
+
+
+def test_docstring():
+
+ def one(x, y):
+ """ Docstring number one """
+ return x + y
+
+ def two(x, y):
+ """ Docstring number two """
+ return x + y
+
+ def three(x, y):
+ return x + y
+
+ master_doc = 'Doc of the multimethod itself'
+
+ f = Dispatcher('f', doc=master_doc)
+ f.add((object, object), one)
+ f.add((int, int), two)
+ f.add((float, float), three)
+
+ assert one.__doc__.strip() in f.__doc__
+ assert two.__doc__.strip() in f.__doc__
+ assert f.__doc__.find(one.__doc__.strip()) < \
+ f.__doc__.find(two.__doc__.strip())
+ assert 'object, object' in f.__doc__
+ assert master_doc in f.__doc__
+
+
+def test_help():
+ def one(x, y):
+ """ Docstring number one """
+ return x + y
+
+ def two(x, y):
+ """ Docstring number two """
+ return x + y
+
+ def three(x, y):
+ """ Docstring number three """
+ return x + y
+
+ master_doc = 'Doc of the multimethod itself'
+
+ f = Dispatcher('f', doc=master_doc)
+ f.add((object, object), one)
+ f.add((int, int), two)
+ f.add((float, float), three)
+
+ assert f._help(1, 1) == two.__doc__
+ assert f._help(1.0, 2.0) == three.__doc__
+
+
+def test_source():
+ def one(x, y):
+ """ Docstring number one """
+ return x + y
+
+ def two(x, y):
+ """ Docstring number two """
+ return x - y
+
+ master_doc = 'Doc of the multimethod itself'
+
+ f = Dispatcher('f', doc=master_doc)
+ f.add((int, int), one)
+ f.add((float, float), two)
+
+ assert 'x + y' in f._source(1, 1)
+ assert 'x - y' in f._source(1.0, 1.0)
+
+
+def test_source_raises_on_missing_function():
+ f = Dispatcher('f')
+
+ assert raises(TypeError, lambda: f.source(1))
+
+
+def test_halt_method_resolution():
+ g = [0]
+
+ def on_ambiguity(a, b):
+ g[0] += 1
+
+ f = Dispatcher('f')
+
+ halt_ordering()
+
+ def func(*args):
+ pass
+
+ f.add((int, object), func)
+ f.add((object, int), func)
+
+ assert g == [0]
+
+ restart_ordering(on_ambiguity=on_ambiguity)
+
+ assert g == [1]
+
+ assert set(f.ordering) == {(int, object), (object, int)}
+
+
+def test_no_implementations():
+ f = Dispatcher('f')
+ assert raises(NotImplementedError, lambda: f('hello'))
+
+
+def test_register_stacking():
+ f = Dispatcher('f')
+
+ @f.register(list)
+ @f.register(tuple)
+ def rev(x):
+ return x[::-1]
+
+ assert f((1, 2, 3)) == (3, 2, 1)
+ assert f([1, 2, 3]) == [3, 2, 1]
+
+ assert raises(NotImplementedError, lambda: f('hello'))
+ assert rev('hello') == 'olleh'
+
+
+def test_dispatch_method():
+ f = Dispatcher('f')
+
+ @f.register(list)
+ def rev(x):
+ return x[::-1]
+
+ @f.register(int, int)
+ def add(x, y):
+ return x + y
+
+ class MyList(list):
+ pass
+
+ assert f.dispatch(list) is rev
+ assert f.dispatch(MyList) is rev
+ assert f.dispatch(int, int) is add
+
+
+def test_not_implemented():
+ f = Dispatcher('f')
+
+ @f.register(object)
+ def _(x):
+ return 'default'
+
+ @f.register(int)
+ def _(x):
+ if x % 2 == 0:
+ return 'even'
+ else:
+ raise MDNotImplementedError()
+
+ assert f('hello') == 'default' # default behavior
+ assert f(2) == 'even' # specialized behavior
+ assert f(3) == 'default' # fall bac to default behavior
+ assert raises(NotImplementedError, lambda: f(1, 2))
+
+
+def test_not_implemented_error():
+ f = Dispatcher('f')
+
+ @f.register(float)
+ def _(a):
+ raise MDNotImplementedError()
+
+ assert raises(NotImplementedError, lambda: f(1.0))
+
+def test_ambiguity_register_error_ignore_dup():
+ f = Dispatcher('f')
+
+ class A:
+ pass
+ class B(A):
+ pass
+ class C(A):
+ pass
+
+ # suppress warning for registering ambiguous signal
+ f.add((A, B), lambda x,y: None, ambiguity_register_error_ignore_dup)
+ f.add((B, A), lambda x,y: None, ambiguity_register_error_ignore_dup)
+ f.add((A, C), lambda x,y: None, ambiguity_register_error_ignore_dup)
+ f.add((C, A), lambda x,y: None, ambiguity_register_error_ignore_dup)
+
+ # raises error if ambiguous signal is passed
+ assert raises(NotImplementedError, lambda: f(B(), C()))
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/utilities/__pycache__/decorator.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/utilities/__pycache__/decorator.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..6ea2fc667894fe19a7086708287a5eb7e23c42ca
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/utilities/__pycache__/decorator.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/utilities/__pycache__/lambdify.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/utilities/__pycache__/lambdify.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..55470c984d05c72100e05cdb080f0815557fc7bd
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/utilities/__pycache__/lambdify.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/utilities/__pycache__/misc.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/utilities/__pycache__/misc.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..d592a660351dbcf4e6ddc9153602de59b61f7633
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/utilities/__pycache__/misc.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/utilities/__pycache__/pkgdata.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/utilities/__pycache__/pkgdata.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..7063375bb3ee9d6b8a93ddc663203f2da9e5e86f
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/utilities/__pycache__/pkgdata.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/utilities/__pycache__/timeutils.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/utilities/__pycache__/timeutils.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..83a9d9893f55336eeee62fec086b993ba99c0d4b
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/utilities/__pycache__/timeutils.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/utilities/__pycache__/tmpfiles.cpython-312.pyc b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/utilities/__pycache__/tmpfiles.cpython-312.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e26fb9bec018147d84520a39902e003b2e2522e7
Binary files /dev/null and b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/utilities/__pycache__/tmpfiles.cpython-312.pyc differ
diff --git a/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/utilities/mathml/__init__.py b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/utilities/mathml/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..eded44ee3c0f34ad1324765ba06ee9d6eb5e9899
--- /dev/null
+++ b/URSA/.venv_ursa/lib/python3.12/site-packages/sympy/utilities/mathml/__init__.py
@@ -0,0 +1,122 @@
+"""Module with some functions for MathML, like transforming MathML
+content in MathML presentation.
+
+To use this module, you will need lxml.
+"""
+
+from pathlib import Path
+
+from sympy.utilities.decorator import doctest_depends_on
+
+
+__doctest_requires__ = {('apply_xsl', 'c2p'): ['lxml']}
+
+
+def add_mathml_headers(s):
+ return """"
+
+
+def _read_binary(pkgname, filename):
+ import sys
+
+ if sys.version_info >= (3, 10):
+ # files was added in Python 3.9 but only seems to work here in 3.10+
+ from importlib.resources import files
+ return files(pkgname).joinpath(filename).read_bytes()
+ else:
+ # read_binary was deprecated in Python 3.11
+ from importlib.resources import read_binary
+ return read_binary(pkgname, filename)
+
+
+def _read_xsl(xsl):
+ # Previously these values were allowed:
+ if xsl == 'mathml/data/simple_mmlctop.xsl':
+ xsl = 'simple_mmlctop.xsl'
+ elif xsl == 'mathml/data/mmlctop.xsl':
+ xsl = 'mmlctop.xsl'
+ elif xsl == 'mathml/data/mmltex.xsl':
+ xsl = 'mmltex.xsl'
+
+ if xsl in ['simple_mmlctop.xsl', 'mmlctop.xsl', 'mmltex.xsl']:
+ xslbytes = _read_binary('sympy.utilities.mathml.data', xsl)
+ else:
+ xslbytes = Path(xsl).read_bytes()
+
+ return xslbytes
+
+
+@doctest_depends_on(modules=('lxml',))
+def apply_xsl(mml, xsl):
+ """Apply a xsl to a MathML string.
+
+ Parameters
+ ==========
+
+ mml
+ A string with MathML code.
+ xsl
+ A string giving the name of an xsl (xml stylesheet) file which can be
+ found in sympy/utilities/mathml/data. The following files are supplied
+ with SymPy:
+
+ - mmlctop.xsl
+ - mmltex.xsl
+ - simple_mmlctop.xsl
+
+ Alternatively, a full path to an xsl file can be given.
+
+ Examples
+ ========
+
+ >>> from sympy.utilities.mathml import apply_xsl
+ >>> xsl = 'simple_mmlctop.xsl'
+ >>> mml = ' a b '
+ >>> res = apply_xsl(mml,xsl)
+ >>> print(res)
+
+
+ a
+ +
+ b
+
+ """
+ from lxml import etree
+
+ parser = etree.XMLParser(resolve_entities=False)
+ ac = etree.XSLTAccessControl.DENY_ALL
+
+ s = etree.XML(_read_xsl(xsl), parser=parser)
+ transform = etree.XSLT(s, access_control=ac)
+ doc = etree.XML(mml, parser=parser)
+ result = transform(doc)
+ s = str(result)
+ return s
+
+
+@doctest_depends_on(modules=('lxml',))
+def c2p(mml, simple=False):
+ """Transforms a document in MathML content (like the one that sympy produces)
+ in one document in MathML presentation, more suitable for printing, and more
+ widely accepted
+
+ Examples
+ ========
+
+ >>> from sympy.utilities.mathml import c2p
+ >>> mml = ' 2 '
+ >>> c2p(mml,simple=True) != c2p(mml,simple=False)
+ True
+
+ """
+
+ if not mml.startswith('