diff --git a/.gitattributes b/.gitattributes
index fcfca0322cd34739ebe732a8e6c18311df99e157..072e826a297b9a31120175446b78ad19abc62b8b 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -39,3 +39,5 @@ tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/_
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Plex/Scanners.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Plex/DFA.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
 tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/FusedNode.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Code.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
+tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Parsing.cpython-311-x86_64-linux-gnu.so filter=lfs diff=lfs merge=lfs -text
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Build/Tests/__pycache__/TestCyCache.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Build/Tests/__pycache__/TestCyCache.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..c006f917f7d924e97f37cb4d6b9200bdc9995745
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Build/Tests/__pycache__/TestCyCache.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Build/Tests/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Build/Tests/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..ea4b2e9b3691bdff34529948e170e0fb9acb5824
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Build/Tests/__pycache__/__init__.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Annotate.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Annotate.py
new file mode 100644
index 0000000000000000000000000000000000000000..8e8d2c4a8d822c7b5e9fe8d4467716670d8086c2
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Annotate.py
@@ -0,0 +1,341 @@
+# Note: Work in progress
+
+from __future__ import absolute_import
+
+import os
+import os.path
+import re
+import codecs
+import textwrap
+from datetime import datetime
+from functools import partial
+from collections import defaultdict
+from xml.sax.saxutils import escape as html_escape
+try:
+    from StringIO import StringIO
+except ImportError:
+    from io import StringIO  # does not support writing 'str' in Py2
+
+from . import Version
+from .Code import CCodeWriter
+from .. import Utils
+
+
+class AnnotationCCodeWriter(CCodeWriter):
+
+    # also used as marker for detection of complete code emission in tests
+    COMPLETE_CODE_TITLE = "Complete cythonized code"
+
+    def __init__(self, create_from=None, buffer=None, copy_formatting=True, show_entire_c_code=False, source_desc=None):
+        CCodeWriter.__init__(self, create_from, buffer, copy_formatting=copy_formatting)
+        self.show_entire_c_code = show_entire_c_code
+        if create_from is None:
+            self.annotation_buffer = StringIO()
+            self.last_annotated_pos = None
+            # annotations[filename][line] -> [(column, AnnotationItem)*]
+            self.annotations = defaultdict(partial(defaultdict, list))
+            # code[filename][line] -> str
+            self.code = defaultdict(partial(defaultdict, str))
+            # scopes[filename][line] -> set(scopes)
+            self.scopes = defaultdict(partial(defaultdict, set))
+        else:
+            # When creating an insertion point, keep references to the same database
+            self.annotation_buffer = create_from.annotation_buffer
+            self.annotations = create_from.annotations
+            self.code = create_from.code
+            self.scopes = create_from.scopes
+            self.last_annotated_pos = create_from.last_annotated_pos
+
+    def create_new(self, create_from, buffer, copy_formatting):
+        return AnnotationCCodeWriter(create_from, buffer, copy_formatting)
+
+    def _write_to_buffer(self, s):
+        self.buffer.write(s)
+        self.annotation_buffer.write(s)
+
+    def mark_pos(self, pos, trace=True):
+        if pos is not None:
+            CCodeWriter.mark_pos(self, pos, trace)
+            if self.funcstate and self.funcstate.scope:
+                # lambdas and genexprs can result in multiple scopes per line => keep them in a set
+                self.scopes[pos[0].filename][pos[1]].add(self.funcstate.scope)
+        if self.last_annotated_pos:
+            source_desc, line, _ = self.last_annotated_pos
+            pos_code = self.code[source_desc.filename]
+            pos_code[line] += self.annotation_buffer.getvalue()
+        self.annotation_buffer = StringIO()
+        self.last_annotated_pos = pos
+
+    def annotate(self, pos, item):
+        self.annotations[pos[0].filename][pos[1]].append((pos[2], item))
+
+    def _css(self):
+        """css template will later allow to choose a colormap"""
+        css = [self._css_template]
+        for i in range(255):
+            color = u"FFFF%02x" % int(255.0 // (1.0 + i/10.0))
+            css.append('.cython.score-%d {background-color: #%s;}' % (i, color))
+        try:
+            from pygments.formatters import HtmlFormatter
+        except ImportError:
+            pass
+        else:
+            css.append(HtmlFormatter().get_style_defs('.cython'))
+        return '\n'.join(css)
+
+    _css_template = textwrap.dedent("""
+        body.cython { font-family: courier; font-size: 12; }
+
+        .cython.tag  {  }
+        .cython.line { color: #000000; margin: 0em }
+        .cython.code { font-size: 9; color: #444444; display: none; margin: 0px 0px 0px 8px; border-left: 8px none; }
+
+        .cython.line .run { background-color: #B0FFB0; }
+        .cython.line .mis { background-color: #FFB0B0; }
+        .cython.code.run  { border-left: 8px solid #B0FFB0; }
+        .cython.code.mis  { border-left: 8px solid #FFB0B0; }
+
+        .cython.code .py_c_api  { color: red; }
+        .cython.code .py_macro_api  { color: #FF7000; }
+        .cython.code .pyx_c_api  { color: #FF3000; }
+        .cython.code .pyx_macro_api  { color: #FF7000; }
+        .cython.code .refnanny  { color: #FFA000; }
+        .cython.code .trace  { color: #FFA000; }
+        .cython.code .error_goto  { color: #FFA000; }
+
+        .cython.code .coerce  { color: #008000; border: 1px dotted #008000 }
+        .cython.code .py_attr { color: #FF0000; font-weight: bold; }
+        .cython.code .c_attr  { color: #0000FF; }
+        .cython.code .py_call { color: #FF0000; font-weight: bold; }
+        .cython.code .c_call  { color: #0000FF; }
+    """)
+
+    # on-click toggle function to show/hide C source code
+    _onclick_attr = ' onclick="{0}"'.format((
+        "(function(s){"
+        "    s.display =  s.display === 'block' ? 'none' : 'block'"
+        "})(this.nextElementSibling.style)"
+        ).replace(' ', '')  # poor dev's JS minification
+    )
+
+    def save_annotation(self, source_filename, target_filename, coverage_xml=None):
+        with Utils.open_source_file(source_filename) as f:
+            code = f.read()
+        generated_code = self.code.get(source_filename, {})
+        c_file = Utils.decode_filename(os.path.basename(target_filename))
+        html_filename = os.path.splitext(target_filename)[0] + ".html"
+
+        with codecs.open(html_filename, "w", encoding="UTF-8") as out_buffer:
+            out_buffer.write(self._save_annotation(code, generated_code, c_file, source_filename, coverage_xml))
+
+    def _save_annotation_header(self, c_file, source_filename, coverage_timestamp=None):
+        coverage_info = ''
+        if coverage_timestamp:
+            coverage_info = u' with coverage data from {timestamp}'.format(
+                timestamp=datetime.fromtimestamp(int(coverage_timestamp) // 1000))
+
+        outlist = [
+            textwrap.dedent(u'''\
+            <!DOCTYPE html>
+            <!-- Generated by Cython {watermark} -->
+            <html>
+            <head>
+                <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
+                <title>Cython: {filename}</title>
+                <style type="text/css">
+                {css}
+                </style>
+            </head>
+            <body class="cython">
+            <p><span style="border-bottom: solid 1px grey;">Generated by Cython {watermark}</span>{more_info}</p>
+            <p>
+                <span style="background-color: #FFFF00">Yellow lines</span> hint at Python interaction.<br />
+                Click on a line that starts with a "<code>+</code>" to see the C code that Cython generated for it.
+            </p>
+            ''').format(css=self._css(), watermark=Version.watermark,
+                        filename=os.path.basename(source_filename) if source_filename else '',
+                        more_info=coverage_info)
+        ]
+        if c_file:
+            outlist.append(u'<p>Raw output: <a href="%s">%s</a></p>\n' % (c_file, c_file))
+        return outlist
+
+    def _save_annotation_footer(self):
+        return (u'</body></html>\n',)
+
+    def _save_annotation(self, code, generated_code, c_file=None, source_filename=None, coverage_xml=None):
+        """
+        lines : original cython source code split by lines
+        generated_code : generated c code keyed by line number in original file
+        target filename : name of the file in which to store the generated html
+        c_file : filename in which the c_code has been written
+        """
+        if coverage_xml is not None and source_filename:
+            coverage_timestamp = coverage_xml.get('timestamp', '').strip()
+            covered_lines = self._get_line_coverage(coverage_xml, source_filename)
+        else:
+            coverage_timestamp = covered_lines = None
+        annotation_items = dict(self.annotations[source_filename])
+        scopes = dict(self.scopes[source_filename])
+
+        outlist = []
+        outlist.extend(self._save_annotation_header(c_file, source_filename, coverage_timestamp))
+        outlist.extend(self._save_annotation_body(code, generated_code, annotation_items, scopes, covered_lines))
+        outlist.extend(self._save_annotation_footer())
+        return ''.join(outlist)
+
+    def _get_line_coverage(self, coverage_xml, source_filename):
+        coverage_data = None
+        for entry in coverage_xml.iterfind('.//class'):
+            if not entry.get('filename'):
+                continue
+            if (entry.get('filename') == source_filename or
+                    os.path.abspath(entry.get('filename')) == source_filename):
+                coverage_data = entry
+                break
+            elif source_filename.endswith(entry.get('filename')):
+                coverage_data = entry  # but we might still find a better match...
+        if coverage_data is None:
+            return None
+        return dict(
+            (int(line.get('number')), int(line.get('hits')))
+            for line in coverage_data.iterfind('lines/line')
+        )
+
+    def _htmlify_code(self, code, language):
+        try:
+            from pygments import highlight
+            from pygments.lexers import CythonLexer, CppLexer
+            from pygments.formatters import HtmlFormatter
+        except ImportError:
+            # no Pygments, just escape the code
+            return html_escape(code)
+
+        if language == "cython":
+            lexer = CythonLexer(stripnl=False, stripall=False)
+        elif language == "c/cpp":
+            lexer = CppLexer(stripnl=False, stripall=False)
+        else:
+            # unknown language, use fallback
+            return html_escape(code)
+        html_code = highlight(
+            code, lexer,
+            HtmlFormatter(nowrap=True))
+        return html_code
+
+    def _save_annotation_body(self, cython_code, generated_code, annotation_items, scopes, covered_lines=None):
+        outlist = [u'<div class="cython">']
+        pos_comment_marker = u'/* \N{HORIZONTAL ELLIPSIS} */\n'
+        new_calls_map = dict(
+            (name, 0) for name in
+            'refnanny trace py_macro_api py_c_api pyx_macro_api pyx_c_api error_goto'.split()
+        ).copy
+
+        self.mark_pos(None)
+
+        def annotate(match):
+            group_name = match.lastgroup
+            calls[group_name] += 1
+            return u"<span class='%s'>%s</span>" % (
+                group_name, match.group(group_name))
+
+        lines = self._htmlify_code(cython_code, "cython").splitlines()
+        lineno_width = len(str(len(lines)))
+        if not covered_lines:
+            covered_lines = None
+
+        for k, line in enumerate(lines, 1):
+            try:
+                c_code = generated_code[k]
+            except KeyError:
+                c_code = ''
+            else:
+                c_code = _replace_pos_comment(pos_comment_marker, c_code)
+                if c_code.startswith(pos_comment_marker):
+                    c_code = c_code[len(pos_comment_marker):]
+                c_code = html_escape(c_code)
+
+            calls = new_calls_map()
+            c_code = _parse_code(annotate, c_code)
+            score = (5 * calls['py_c_api'] + 2 * calls['pyx_c_api'] +
+                     calls['py_macro_api'] + calls['pyx_macro_api'])
+
+            if c_code:
+                onclick = self._onclick_attr
+                expandsymbol = '+'
+            else:
+                onclick = ''
+                expandsymbol = '&#xA0;'
+
+            covered = ''
+            if covered_lines is not None and k in covered_lines:
+                hits = covered_lines[k]
+                if hits is not None:
+                    covered = 'run' if hits else 'mis'
+
+            outlist.append(
+                u'<pre class="cython line score-{score}"{onclick}>'
+                # generate line number with expand symbol in front,
+                # and the right  number of digit
+                u'{expandsymbol}<span class="{covered}">{line:0{lineno_width}d}</span>: {code}</pre>\n'.format(
+                    score=score,
+                    expandsymbol=expandsymbol,
+                    covered=covered,
+                    lineno_width=lineno_width,
+                    line=k,
+                    code=line.rstrip(),
+                    onclick=onclick,
+                ))
+            if c_code:
+                outlist.append(u"<pre class='cython code score-{score} {covered}'>{code}</pre>".format(
+                    score=score, covered=covered, code=c_code))
+        outlist.append(u"</div>")
+
+        # now the whole c-code if needed:
+        if self.show_entire_c_code:
+            outlist.append(u'<p><div class="cython">')
+            onclick_title = u"<pre class='cython line'{onclick}>+ {title}</pre>\n"
+            outlist.append(onclick_title.format(
+                              onclick=self._onclick_attr,
+                              title=AnnotationCCodeWriter.COMPLETE_CODE_TITLE,
+                           ))
+            complete_code_as_html = self._htmlify_code(self.buffer.getvalue(), "c/cpp")
+            outlist.append(u"<pre class='cython code'>{code}</pre>".format(code=complete_code_as_html))
+            outlist.append(u"</div></p>")
+
+        return outlist
+
+
+_parse_code = re.compile((
+    br'(?P<refnanny>__Pyx_X?(?:GOT|GIVE)REF|__Pyx_RefNanny[A-Za-z]+)|'
+    br'(?P<trace>__Pyx_Trace[A-Za-z]+)|'
+    br'(?:'
+    br'(?P<pyx_macro_api>__Pyx_[A-Z][A-Z_]+)|'
+    br'(?P<pyx_c_api>(?:__Pyx_[A-Z][a-z_][A-Za-z_]*)|__pyx_convert_[A-Za-z_]*)|'
+    br'(?P<py_macro_api>Py[A-Z][a-z]+_[A-Z][A-Z_]+)|'
+    br'(?P<py_c_api>Py[A-Z][a-z]+_[A-Z][a-z][A-Za-z_]*)'
+    br')(?=\()|'       # look-ahead to exclude subsequent '(' from replacement
+    br'(?P<error_goto>(?:(?<=;) *if [^;]* +)?__PYX_ERR\([^)]+\))'
+).decode('ascii')).sub
+
+
+_replace_pos_comment = re.compile(
+    # this matches what Cython generates as code line marker comment
+    br'^\s*/\*(?:(?:[^*]|\*[^/])*\n)+\s*\*/\s*\n'.decode('ascii'),
+    re.M
+).sub
+
+
+class AnnotationItem(object):
+
+    def __init__(self, style, text, tag="", size=0):
+        self.style = style
+        self.text = text
+        self.tag = tag
+        self.size = size
+
+    def start(self):
+        return u"<span class='cython tag %s' title='%s'>%s" % (self.style, self.text, self.tag)
+
+    def end(self):
+        return self.size, u"</span>"
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Code.cpython-311-x86_64-linux-gnu.so b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Code.cpython-311-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..77226608999b2870f4a894b91307b3cf3f8a77cb
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Code.cpython-311-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:4d946103cb5b89bdb2437bafe9bad2325d62a443935f3674d31ca3c7f1152a39
+size 1342888
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/CythonScope.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/CythonScope.py
new file mode 100644
index 0000000000000000000000000000000000000000..f73be007086bed44b46312ad27687ceaa19c4ed8
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/CythonScope.py
@@ -0,0 +1,181 @@
+from __future__ import absolute_import
+
+from .Symtab import ModuleScope
+from .PyrexTypes import *
+from .UtilityCode import CythonUtilityCode
+from .Errors import error
+from .Scanning import StringSourceDescriptor
+from . import MemoryView
+from .StringEncoding import EncodedString
+
+
+class CythonScope(ModuleScope):
+    is_cython_builtin = 1
+    _cythonscope_initialized = False
+
+    def __init__(self, context):
+        ModuleScope.__init__(self, u'cython', None, None)
+        self.pxd_file_loaded = True
+        self.populate_cython_scope()
+        # The Main.Context object
+        self.context = context
+
+        for fused_type in (cy_integral_type, cy_floating_type, cy_numeric_type):
+            entry = self.declare_typedef(fused_type.name,
+                                         fused_type,
+                                         None,
+                                         cname='<error>')
+            entry.in_cinclude = True
+
+    def is_cpp(self):
+        # Allow C++ utility code in C++ contexts.
+        return self.context.cpp
+
+    def lookup_type(self, name):
+        # This function should go away when types are all first-level objects.
+        type = parse_basic_type(name)
+        if type:
+            return type
+
+        return super(CythonScope, self).lookup_type(name)
+
+    def lookup(self, name):
+        entry = super(CythonScope, self).lookup(name)
+
+        if entry is None and not self._cythonscope_initialized:
+            self.load_cythonscope()
+            entry = super(CythonScope, self).lookup(name)
+
+        return entry
+
+    def find_module(self, module_name, pos):
+        error("cython.%s is not available" % module_name, pos)
+
+    def find_submodule(self, module_name, as_package=False):
+        entry = self.entries.get(module_name, None)
+        if not entry:
+            self.load_cythonscope()
+            entry = self.entries.get(module_name, None)
+
+        if entry and entry.as_module:
+            return entry.as_module
+        else:
+            # TODO: fix find_submodule control flow so that we're not
+            # expected to create a submodule here (to protect CythonScope's
+            # possible immutability). Hack ourselves out of the situation
+            # for now.
+            raise error((StringSourceDescriptor(u"cython", u""), 0, 0),
+                  "cython.%s is not available" % module_name)
+
+    def lookup_qualified_name(self, qname):
+        # ExprNode.as_cython_attribute generates qnames and we untangle it here...
+        name_path = qname.split(u'.')
+        scope = self
+        while len(name_path) > 1:
+            scope = scope.lookup_here(name_path[0])
+            if scope:
+                scope = scope.as_module
+            del name_path[0]
+            if scope is None:
+                return None
+        else:
+            return scope.lookup_here(name_path[0])
+
+    def populate_cython_scope(self):
+        # These are used to optimize isinstance in FinalOptimizePhase
+        type_object = self.declare_typedef(
+            'PyTypeObject',
+            base_type = c_void_type,
+            pos = None,
+            cname = 'PyTypeObject')
+        type_object.is_void = True
+        type_object_type = type_object.type
+
+        self.declare_cfunction(
+            'PyObject_TypeCheck',
+            CFuncType(c_bint_type, [CFuncTypeArg("o", py_object_type, None),
+                                    CFuncTypeArg("t", c_ptr_type(type_object_type), None)]),
+            pos = None,
+            defining = 1,
+            cname = 'PyObject_TypeCheck')
+
+    def load_cythonscope(self):
+        """
+        Creates some entries for testing purposes and entries for
+        cython.array() and for cython.view.*.
+        """
+        if self._cythonscope_initialized:
+            return
+
+        self._cythonscope_initialized = True
+        cython_testscope_utility_code.declare_in_scope(
+                                self, cython_scope=self)
+        cython_test_extclass_utility_code.declare_in_scope(
+                                    self, cython_scope=self)
+
+        #
+        # The view sub-scope
+        #
+        self.viewscope = viewscope = ModuleScope(u'view', self, None)
+        self.declare_module('view', viewscope, None).as_module = viewscope
+        viewscope.is_cython_builtin = True
+        viewscope.pxd_file_loaded = True
+
+        cythonview_testscope_utility_code.declare_in_scope(
+                                            viewscope, cython_scope=self)
+
+        view_utility_scope = MemoryView.view_utility_code.declare_in_scope(
+                                            self.viewscope, cython_scope=self,
+                                            allowlist=MemoryView.view_utility_allowlist)
+
+        # Marks the types as being cython_builtin_type so that they can be
+        # extended from without Cython attempting to import cython.view
+        ext_types = [ entry.type
+                         for entry in view_utility_scope.entries.values()
+                         if entry.type.is_extension_type ]
+        for ext_type in ext_types:
+            ext_type.is_cython_builtin_type = 1
+
+        # self.entries["array"] = view_utility_scope.entries.pop("array")
+
+        # dataclasses scope
+        dc_str = EncodedString(u'dataclasses')
+        dataclassesscope = ModuleScope(dc_str, self, context=None)
+        self.declare_module(dc_str, dataclassesscope, pos=None).as_module = dataclassesscope
+        dataclassesscope.is_cython_builtin = True
+        dataclassesscope.pxd_file_loaded = True
+        # doesn't actually have any contents
+
+
+def create_cython_scope(context):
+    # One could in fact probably make it a singleton,
+    # but not sure yet whether any code mutates it (which would kill reusing
+    # it across different contexts)
+    return CythonScope(context)
+
+# Load test utilities for the cython scope
+
+def load_testscope_utility(cy_util_name, **kwargs):
+    return CythonUtilityCode.load(cy_util_name, "TestCythonScope.pyx", **kwargs)
+
+
+undecorated_methods_protos = UtilityCode(proto=u"""
+    /* These methods are undecorated and have therefore no prototype */
+    static PyObject *__pyx_TestClass_cdef_method(
+            struct __pyx_TestClass_obj *self, int value);
+    static PyObject *__pyx_TestClass_cpdef_method(
+            struct __pyx_TestClass_obj *self, int value, int skip_dispatch);
+    static PyObject *__pyx_TestClass_def_method(
+            PyObject *self, PyObject *value);
+""")
+
+cython_testscope_utility_code = load_testscope_utility("TestScope")
+
+test_cython_utility_dep = load_testscope_utility("TestDep")
+
+cython_test_extclass_utility_code = \
+    load_testscope_utility("TestClass", name="TestClass",
+                           requires=[undecorated_methods_protos,
+                                     test_cython_utility_dep])
+
+cythonview_testscope_utility_code = load_testscope_utility("View.TestScope")
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/ExprNodes.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/ExprNodes.py
new file mode 100644
index 0000000000000000000000000000000000000000..b4a66334a03b8dee47e16cfb6bd1104f3f376bf0
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/ExprNodes.py
@@ -0,0 +1,14758 @@
+#
+#   Parse tree nodes for expressions
+#
+
+from __future__ import absolute_import
+
+import cython
+cython.declare(error=object, warning=object, warn_once=object, InternalError=object,
+               CompileError=object, UtilityCode=object, TempitaUtilityCode=object,
+               StringEncoding=object, operator=object, local_errors=object, report_error=object,
+               Naming=object, Nodes=object, PyrexTypes=object, py_object_type=object,
+               list_type=object, tuple_type=object, set_type=object, dict_type=object,
+               unicode_type=object, str_type=object, bytes_type=object, type_type=object,
+               Builtin=object, Symtab=object, Utils=object, find_coercion_error=object,
+               debug_disposal_code=object, debug_temp_alloc=object, debug_coercion=object,
+               bytearray_type=object, slice_type=object, memoryview_type=object,
+               builtin_sequence_types=object, _py_int_types=object,
+               IS_PYTHON3=cython.bint)
+
+import re
+import sys
+import copy
+import os.path
+import operator
+
+from .Errors import (
+    error, warning, InternalError, CompileError, report_error, local_errors,
+    CannotSpecialize, performance_hint)
+from .Code import UtilityCode, TempitaUtilityCode
+from . import StringEncoding
+from . import Naming
+from . import Nodes
+from .Nodes import Node, utility_code_for_imports, SingleAssignmentNode
+from . import PyrexTypes
+from .PyrexTypes import py_object_type, typecast, error_type, \
+    unspecified_type
+from . import TypeSlots
+from .Builtin import (
+    list_type, tuple_type, set_type, dict_type, type_type,
+    unicode_type, str_type, bytes_type, bytearray_type, basestring_type,
+    slice_type, long_type, sequence_types as builtin_sequence_types, memoryview_type,
+)
+from . import Builtin
+from . import Symtab
+from .. import Utils
+from .Annotate import AnnotationItem
+from . import Future
+from ..Debugging import print_call_chain
+from .DebugFlags import debug_disposal_code, debug_coercion
+
+from .Pythran import (to_pythran, is_pythran_supported_type, is_pythran_supported_operation_type,
+     is_pythran_expr, pythran_func_type, pythran_binop_type, pythran_unaryop_type, has_np_pythran,
+     pythran_indexing_code, pythran_indexing_type, is_pythran_supported_node_or_none, pythran_type,
+     pythran_is_numpy_func_supported, pythran_get_func_include_file, pythran_functor)
+from .PyrexTypes import PythranExpr
+
+try:
+    from __builtin__ import basestring
+except ImportError:
+    # Python 3
+    basestring = str
+    any_string_type = (bytes, str)
+else:
+    # Python 2
+    any_string_type = (bytes, unicode)
+
+
+if sys.version_info[0] >= 3:
+    IS_PYTHON3 = True
+    _py_int_types = int
+else:
+    IS_PYTHON3 = False
+    _py_int_types = (int, long)
+
+
+class NotConstant(object):
+    _obj = None
+
+    def __new__(cls):
+        if NotConstant._obj is None:
+            NotConstant._obj = super(NotConstant, cls).__new__(cls)
+
+        return NotConstant._obj
+
+    def __repr__(self):
+        return "<NOT CONSTANT>"
+
+not_a_constant = NotConstant()
+constant_value_not_set = object()
+
+# error messages when coercing from key[0] to key[1]
+coercion_error_dict = {
+    # string related errors
+    (unicode_type, str_type): ("Cannot convert Unicode string to 'str' implicitly."
+                               " This is not portable and requires explicit encoding."),
+    (unicode_type, bytes_type): "Cannot convert Unicode string to 'bytes' implicitly, encoding required.",
+    (unicode_type, PyrexTypes.c_char_ptr_type): "Unicode objects only support coercion to Py_UNICODE*.",
+    (unicode_type, PyrexTypes.c_const_char_ptr_type): "Unicode objects only support coercion to Py_UNICODE*.",
+    (unicode_type, PyrexTypes.c_uchar_ptr_type): "Unicode objects only support coercion to Py_UNICODE*.",
+    (unicode_type, PyrexTypes.c_const_uchar_ptr_type): "Unicode objects only support coercion to Py_UNICODE*.",
+    (bytes_type, unicode_type): "Cannot convert 'bytes' object to unicode implicitly, decoding required",
+    (bytes_type, str_type): "Cannot convert 'bytes' object to str implicitly. This is not portable to Py3.",
+    (bytes_type, basestring_type): ("Cannot convert 'bytes' object to basestring implicitly."
+                                    " This is not portable to Py3."),
+    (bytes_type, PyrexTypes.c_py_unicode_ptr_type): "Cannot convert 'bytes' object to Py_UNICODE*, use 'unicode'.",
+    (bytes_type, PyrexTypes.c_const_py_unicode_ptr_type): (
+        "Cannot convert 'bytes' object to Py_UNICODE*, use 'unicode'."),
+    (basestring_type, bytes_type): "Cannot convert 'basestring' object to bytes implicitly. This is not portable.",
+    (str_type, unicode_type): ("str objects do not support coercion to unicode,"
+                               " use a unicode string literal instead (u'')"),
+    (str_type, bytes_type): "Cannot convert 'str' to 'bytes' implicitly. This is not portable.",
+    (str_type, PyrexTypes.c_char_ptr_type): "'str' objects do not support coercion to C types (use 'bytes'?).",
+    (str_type, PyrexTypes.c_const_char_ptr_type): "'str' objects do not support coercion to C types (use 'bytes'?).",
+    (str_type, PyrexTypes.c_uchar_ptr_type): "'str' objects do not support coercion to C types (use 'bytes'?).",
+    (str_type, PyrexTypes.c_const_uchar_ptr_type): "'str' objects do not support coercion to C types (use 'bytes'?).",
+    (str_type, PyrexTypes.c_py_unicode_ptr_type): "'str' objects do not support coercion to C types (use 'unicode'?).",
+    (str_type, PyrexTypes.c_const_py_unicode_ptr_type): (
+        "'str' objects do not support coercion to C types (use 'unicode'?)."),
+    (PyrexTypes.c_char_ptr_type, unicode_type): "Cannot convert 'char*' to unicode implicitly, decoding required",
+    (PyrexTypes.c_const_char_ptr_type, unicode_type): (
+        "Cannot convert 'char*' to unicode implicitly, decoding required"),
+    (PyrexTypes.c_uchar_ptr_type, unicode_type): "Cannot convert 'char*' to unicode implicitly, decoding required",
+    (PyrexTypes.c_const_uchar_ptr_type, unicode_type): (
+        "Cannot convert 'char*' to unicode implicitly, decoding required"),
+}
+
+def find_coercion_error(type_tuple, default, env):
+    err = coercion_error_dict.get(type_tuple)
+    if err is None:
+        return default
+    elif (env.directives['c_string_encoding'] and
+              any(t in type_tuple for t in (PyrexTypes.c_char_ptr_type, PyrexTypes.c_uchar_ptr_type,
+                                            PyrexTypes.c_const_char_ptr_type, PyrexTypes.c_const_uchar_ptr_type))):
+        if type_tuple[1].is_pyobject:
+            return default
+        elif env.directives['c_string_encoding'] in ('ascii', 'default'):
+            return default
+        else:
+            return "'%s' objects do not support coercion to C types with non-ascii or non-default c_string_encoding" % type_tuple[0].name
+    else:
+        return err
+
+
+def default_str_type(env):
+    return {
+        'bytes': bytes_type,
+        'bytearray': bytearray_type,
+        'str': str_type,
+        'unicode': unicode_type
+    }.get(env.directives['c_string_type'])
+
+
+def check_negative_indices(*nodes):
+    """
+    Raise a warning on nodes that are known to have negative numeric values.
+    Used to find (potential) bugs inside of "wraparound=False" sections.
+    """
+    for node in nodes:
+        if node is None or (
+                not isinstance(node.constant_result, _py_int_types) and
+                not isinstance(node.constant_result, float)):
+            continue
+        if node.constant_result < 0:
+            warning(node.pos,
+                    "the result of using negative indices inside of "
+                    "code sections marked as 'wraparound=False' is "
+                    "undefined", level=1)
+
+
+def infer_sequence_item_type(env, seq_node, index_node=None, seq_type=None):
+    if not seq_node.is_sequence_constructor:
+        if seq_type is None:
+            seq_type = seq_node.infer_type(env)
+        if seq_type is tuple_type:
+            # tuples are immutable => we can safely follow assignments
+            if seq_node.cf_state and len(seq_node.cf_state) == 1:
+                try:
+                    seq_node = seq_node.cf_state[0].rhs
+                except AttributeError:
+                    pass
+    if seq_node is not None and seq_node.is_sequence_constructor:
+        if index_node is not None and index_node.has_constant_result():
+            try:
+                item = seq_node.args[index_node.constant_result]
+            except (ValueError, TypeError, IndexError):
+                pass
+            else:
+                return item.infer_type(env)
+        # if we're lucky, all items have the same type
+        item_types = {item.infer_type(env) for item in seq_node.args}
+        if len(item_types) == 1:
+            return item_types.pop()
+    return None
+
+
+def make_dedup_key(outer_type, item_nodes):
+    """
+    Recursively generate a deduplication key from a sequence of values.
+    Includes Cython node types to work around the fact that (1, 2.0) == (1.0, 2), for example.
+
+    @param outer_type: The type of the outer container.
+    @param item_nodes: A sequence of constant nodes that will be traversed recursively.
+    @return: A tuple that can be used as a dict key for deduplication.
+    """
+    item_keys = [
+        (py_object_type, None, type(None)) if node is None
+        # For sequences and their "mult_factor", see TupleNode.
+        else make_dedup_key(node.type, [node.mult_factor if node.is_literal else None] + node.args) if node.is_sequence_constructor
+        else make_dedup_key(node.type, (node.start, node.stop, node.step)) if node.is_slice
+        # For constants, look at the Python value type if we don't know the concrete Cython type.
+        else (node.type, node.constant_result,
+              type(node.constant_result) if node.type is py_object_type else None) if node.has_constant_result()
+        # IdentifierStringNode doesn't usually have a "constant_result" set because:
+        #  1. it doesn't usually have unicode_value
+        #  2. it's often created later in the compilation process after ConstantFolding
+        # but should be cacheable
+        else (node.type, node.value, node.unicode_value, "IdentifierStringNode") if isinstance(node, IdentifierStringNode)
+        else None  # something we cannot handle => short-circuit below
+        for node in item_nodes
+    ]
+    if None in item_keys:
+        return None
+    return outer_type, tuple(item_keys)
+
+
+# Returns a block of code to translate the exception,
+# plus a boolean indicating whether to check for Python exceptions.
+def get_exception_handler(exception_value):
+    if exception_value is None:
+        return "__Pyx_CppExn2PyErr();", False
+    elif (exception_value.type == PyrexTypes.c_char_type
+          and exception_value.value == '*'):
+        return "__Pyx_CppExn2PyErr();", True
+    elif exception_value.type.is_pyobject:
+        return (
+            'try { throw; } catch(const std::exception& exn) {'
+            'PyErr_SetString(%s, exn.what());'
+            '} catch(...) { PyErr_SetNone(%s); }' % (
+                exception_value.entry.cname,
+                exception_value.entry.cname),
+            False)
+    else:
+        return (
+            '%s(); if (!PyErr_Occurred())'
+            'PyErr_SetString(PyExc_RuntimeError, '
+            '"Error converting c++ exception.");' % (
+                exception_value.entry.cname),
+            False)
+
+
+def maybe_check_py_error(code, check_py_exception, pos, nogil):
+    if check_py_exception:
+        if nogil:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("ErrOccurredWithGIL", "Exceptions.c"))
+            code.putln(code.error_goto_if("__Pyx_ErrOccurredWithGIL()", pos))
+        else:
+            code.putln(code.error_goto_if("PyErr_Occurred()", pos))
+
+
+def translate_cpp_exception(code, pos, inside, py_result, exception_value, nogil):
+    raise_py_exception, check_py_exception = get_exception_handler(exception_value)
+    code.putln("try {")
+    code.putln("%s" % inside)
+    if py_result:
+        code.putln(code.error_goto_if_null(py_result, pos))
+    maybe_check_py_error(code, check_py_exception, pos, nogil)
+    code.putln("} catch(...) {")
+    if nogil:
+        code.put_ensure_gil(declare_gilstate=True)
+    code.putln(raise_py_exception)
+    if nogil:
+        code.put_release_ensured_gil()
+    code.putln(code.error_goto(pos))
+    code.putln("}")
+
+def needs_cpp_exception_conversion(node):
+    assert node.exception_check == "+"
+    if node.exception_value is None:
+        return True
+    # exception_value can be a NameNode
+    # (in which case it's used as a handler function and no conversion is needed)
+    if node.exception_value.is_name:
+        return False
+    # or a CharNode with a value of "*"
+    if isinstance(node.exception_value, CharNode) and node.exception_value.value == "*":
+        return True
+    # Most other const-nodes are disallowed after "+" by the parser
+    return False
+
+
+# Used to handle the case where an lvalue expression and an overloaded assignment
+# both have an exception declaration.
+def translate_double_cpp_exception(code, pos, lhs_type, lhs_code, rhs_code, lhs_exc_val, assign_exc_val, nogil):
+    handle_lhs_exc, lhc_check_py_exc = get_exception_handler(lhs_exc_val)
+    handle_assignment_exc, assignment_check_py_exc = get_exception_handler(assign_exc_val)
+    code.putln("try {")
+    code.putln(lhs_type.declaration_code("__pyx_local_lvalue = %s;" % lhs_code))
+    maybe_check_py_error(code, lhc_check_py_exc, pos, nogil)
+    code.putln("try {")
+    code.putln("__pyx_local_lvalue = %s;" % rhs_code)
+    maybe_check_py_error(code, assignment_check_py_exc, pos, nogil)
+    # Catch any exception from the overloaded assignment.
+    code.putln("} catch(...) {")
+    if nogil:
+        code.put_ensure_gil(declare_gilstate=True)
+    code.putln(handle_assignment_exc)
+    if nogil:
+        code.put_release_ensured_gil()
+    code.putln(code.error_goto(pos))
+    code.putln("}")
+    # Catch any exception from evaluating lhs.
+    code.putln("} catch(...) {")
+    if nogil:
+        code.put_ensure_gil(declare_gilstate=True)
+    code.putln(handle_lhs_exc)
+    if nogil:
+        code.put_release_ensured_gil()
+    code.putln(code.error_goto(pos))
+    code.putln('}')
+
+
+class ExprNode(Node):
+    #  subexprs     [string]     Class var holding names of subexpr node attrs
+    #  type         PyrexType    Type of the result
+    #  result_code  string       Code fragment
+    #  result_ctype string       C type of result_code if different from type
+    #  is_temp      boolean      Result is in a temporary variable
+    #  is_sequence_constructor
+    #               boolean      Is a list or tuple constructor expression
+    #  is_starred   boolean      Is a starred expression (e.g. '*a')
+    #  use_managed_ref boolean   use ref-counted temps/assignments/etc.
+    #  result_is_used  boolean   indicates that the result will be dropped and the
+    #                            result_code/temp_result can safely be set to None
+    #  is_numpy_attribute   boolean   Is a Numpy module attribute
+    #  annotation   ExprNode or None    PEP526 annotation for names or expressions
+    #  generator_arg_tag  None or Node   A tag to mark ExprNodes that potentially need to
+    #                              be changed to a generator argument
+
+    result_ctype = None
+    type = None
+    annotation = None
+    temp_code = None
+    old_temp = None  # error checker for multiple frees etc.
+    use_managed_ref = True  # can be set by optimisation transforms
+    result_is_used = True
+    is_numpy_attribute = False
+    generator_arg_tag = None
+
+    #  The Analyse Expressions phase for expressions is split
+    #  into two sub-phases:
+    #
+    #    Analyse Types
+    #      Determines the result type of the expression based
+    #      on the types of its sub-expressions, and inserts
+    #      coercion nodes into the expression tree where needed.
+    #      Marks nodes which will need to have temporary variables
+    #      allocated.
+    #
+    #    Allocate Temps
+    #      Allocates temporary variables where needed, and fills
+    #      in the result_code field of each node.
+    #
+    #  ExprNode provides some convenience routines which
+    #  perform both of the above phases. These should only
+    #  be called from statement nodes, and only when no
+    #  coercion nodes need to be added around the expression
+    #  being analysed. In that case, the above two phases
+    #  should be invoked separately.
+    #
+    #  Framework code in ExprNode provides much of the common
+    #  processing for the various phases. It makes use of the
+    #  'subexprs' class attribute of ExprNodes, which should
+    #  contain a list of the names of attributes which can
+    #  hold sub-nodes or sequences of sub-nodes.
+    #
+    #  The framework makes use of a number of abstract methods.
+    #  Their responsibilities are as follows.
+    #
+    #    Declaration Analysis phase
+    #
+    #      analyse_target_declaration
+    #        Called during the Analyse Declarations phase to analyse
+    #        the LHS of an assignment or argument of a del statement.
+    #        Nodes which cannot be the LHS of an assignment need not
+    #        implement it.
+    #
+    #    Expression Analysis phase
+    #
+    #      analyse_types
+    #        - Call analyse_types on all sub-expressions.
+    #        - Check operand types, and wrap coercion nodes around
+    #          sub-expressions where needed.
+    #        - Set the type of this node.
+    #        - If a temporary variable will be required for the
+    #          result, set the is_temp flag of this node.
+    #
+    #      analyse_target_types
+    #        Called during the Analyse Types phase to analyse
+    #        the LHS of an assignment or argument of a del
+    #        statement. Similar responsibilities to analyse_types.
+    #
+    #      target_code
+    #        Called by the default implementation of allocate_target_temps.
+    #        Should return a C lvalue for assigning to the node. The default
+    #        implementation calls calculate_result_code.
+    #
+    #      check_const
+    #        - Check that this node and its subnodes form a
+    #          legal constant expression. If so, do nothing,
+    #          otherwise call not_const.
+    #
+    #        The default implementation of check_const
+    #        assumes that the expression is not constant.
+    #
+    #      check_const_addr
+    #        - Same as check_const, except check that the
+    #          expression is a C lvalue whose address is
+    #          constant. Otherwise, call addr_not_const.
+    #
+    #        The default implementation of calc_const_addr
+    #        assumes that the expression is not a constant
+    #        lvalue.
+    #
+    #   Code Generation phase
+    #
+    #      generate_evaluation_code
+    #        - Call generate_evaluation_code for sub-expressions.
+    #        - Perform the functions of generate_result_code
+    #          (see below).
+    #        - If result is temporary, call generate_disposal_code
+    #          on all sub-expressions.
+    #
+    #        A default implementation of generate_evaluation_code
+    #        is provided which uses the following abstract methods:
+    #
+    #          generate_result_code
+    #            - Generate any C statements necessary to calculate
+    #              the result of this node from the results of its
+    #              sub-expressions.
+    #
+    #          calculate_result_code
+    #            - Should return a C code fragment evaluating to the
+    #              result. This is only called when the result is not
+    #              a temporary.
+    #
+    #      generate_assignment_code
+    #        Called on the LHS of an assignment.
+    #        - Call generate_evaluation_code for sub-expressions.
+    #        - Generate code to perform the assignment.
+    #        - If the assignment absorbed a reference, call
+    #          generate_post_assignment_code on the RHS,
+    #          otherwise call generate_disposal_code on it.
+    #
+    #      generate_deletion_code
+    #        Called on an argument of a del statement.
+    #        - Call generate_evaluation_code for sub-expressions.
+    #        - Generate code to perform the deletion.
+    #        - Call generate_disposal_code on all sub-expressions.
+    #
+    #
+
+    is_sequence_constructor = False
+    is_dict_literal = False
+    is_set_literal = False
+    is_string_literal = False
+    is_attribute = False
+    is_subscript = False
+    is_slice = False
+
+    is_buffer_access = False
+    is_memview_index = False
+    is_memview_slice = False
+    is_memview_broadcast = False
+    is_memview_copy_assignment = False
+
+    is_temp = False
+    has_temp_moved = False  # if True then attempting to do anything but free the temp is invalid
+    is_target = False
+    is_starred = False
+
+    constant_result = constant_value_not_set
+
+    if getattr(getattr(sys, "implementation", None), "name", "cpython") == "cpython":
+        child_attrs = property(fget=operator.attrgetter('subexprs'))
+    else:
+        @property
+        def child_attrs(self):
+            return self.subexprs
+
+    def analyse_annotations(self, env):
+        pass
+
+    def not_implemented(self, method_name):
+        print_call_chain(method_name, "not implemented")
+        raise InternalError(
+            "%s.%s not implemented" % (self.__class__.__name__, method_name))
+
+    def is_lvalue(self):
+        return 0
+
+    def is_addressable(self):
+        return self.is_lvalue() and not self.type.is_memoryviewslice
+
+    def is_ephemeral(self):
+        #  An ephemeral node is one whose result is in
+        #  a Python temporary and we suspect there are no
+        #  other references to it. Certain operations are
+        #  disallowed on such values, since they are
+        #  likely to result in a dangling pointer.
+        return self.type.is_pyobject and self.is_temp
+
+    def subexpr_nodes(self):
+        #  Extract a list of subexpression nodes based
+        #  on the contents of the subexprs class attribute.
+        nodes = []
+        for name in self.subexprs:
+            item = getattr(self, name)
+            if item is not None:
+                if type(item) is list:
+                    nodes.extend(item)
+                else:
+                    nodes.append(item)
+        return nodes
+
+    def result(self):
+        if self.is_temp:
+            #if not self.temp_code:
+            #    pos = (os.path.basename(self.pos[0].get_description()),) + self.pos[1:] if self.pos else '(?)'
+            #    raise RuntimeError("temp result name not set in %s at %r" % (
+            #        self.__class__.__name__, pos))
+            return self.temp_code
+        else:
+            return self.calculate_result_code()
+
+    def _make_move_result_rhs(self, result, optional=False):
+        if optional and not (self.is_temp and self.type.is_cpp_class and not self.type.is_reference):
+            return result
+        self.has_temp_moved = True
+        return "{}({})".format("__PYX_STD_MOVE_IF_SUPPORTED" if optional else "std::move", result)
+
+    def move_result_rhs(self):
+        return self._make_move_result_rhs(self.result(), optional=True)
+
+    def move_result_rhs_as(self, type):
+        result = self.result_as(type)
+        if not (type.is_reference or type.needs_refcounting):
+            requires_move = type.is_rvalue_reference and self.is_temp
+            result = self._make_move_result_rhs(result, optional=not requires_move)
+        return result
+
+    def pythran_result(self, type_=None):
+        if is_pythran_supported_node_or_none(self):
+            return to_pythran(self)
+
+        assert type_ is not None
+        return to_pythran(self, type_)
+
+    def is_c_result_required(self):
+        """
+        Subtypes may return False here if result temp allocation can be skipped.
+        """
+        return True
+
+    def result_as(self, type = None):
+        #  Return the result code cast to the specified C type.
+        if (self.is_temp and self.type.is_pyobject and
+                type != py_object_type):
+            # Allocated temporaries are always PyObject *, which may not
+            # reflect the actual type (e.g. an extension type)
+            return typecast(type, py_object_type, self.result())
+        return typecast(type, self.ctype(), self.result())
+
+    def py_result(self):
+        #  Return the result code cast to PyObject *.
+        return self.result_as(py_object_type)
+
+    def ctype(self):
+        #  Return the native C type of the result (i.e. the
+        #  C type of the result_code expression).
+        return self.result_ctype or self.type
+
+    def get_constant_c_result_code(self):
+        # Return the constant value of this node as a result code
+        # string, or None if the node is not constant.  This method
+        # can be called when the constant result code is required
+        # before the code generation phase.
+        #
+        # The return value is a string that can represent a simple C
+        # value, a constant C name or a constant C expression.  If the
+        # node type depends on Python code, this must return None.
+        return None
+
+    def calculate_constant_result(self):
+        # Calculate the constant compile time result value of this
+        # expression and store it in ``self.constant_result``.  Does
+        # nothing by default, thus leaving ``self.constant_result``
+        # unknown.  If valid, the result can be an arbitrary Python
+        # value.
+        #
+        # This must only be called when it is assured that all
+        # sub-expressions have a valid constant_result value.  The
+        # ConstantFolding transform will do this.
+        pass
+
+    def has_constant_result(self):
+        return self.constant_result is not constant_value_not_set and \
+               self.constant_result is not not_a_constant
+
+    def compile_time_value(self, denv):
+        #  Return value of compile-time expression, or report error.
+        error(self.pos, "Invalid compile-time expression")
+
+    def compile_time_value_error(self, e):
+        error(self.pos, "Error in compile-time expression: %s: %s" % (
+            e.__class__.__name__, e))
+
+    # ------------- Declaration Analysis ----------------
+
+    def analyse_target_declaration(self, env):
+        error(self.pos, "Cannot assign to or delete this")
+
+    def analyse_assignment_expression_target_declaration(self, env):
+        error(self.pos, "Cannot use anything except a name in an assignment expression")
+
+    # ------------- Expression Analysis ----------------
+
+    def analyse_const_expression(self, env):
+        #  Called during the analyse_declarations phase of a
+        #  constant expression. Analyses the expression's type,
+        #  checks whether it is a legal const expression,
+        #  and determines its value.
+        node = self.analyse_types(env)
+        node.check_const()
+        return node
+
+    def analyse_expressions(self, env):
+        #  Convenience routine performing both the Type
+        #  Analysis and Temp Allocation phases for a whole
+        #  expression.
+        return self.analyse_types(env)
+
+    def analyse_target_expression(self, env, rhs):
+        #  Convenience routine performing both the Type
+        #  Analysis and Temp Allocation phases for the LHS of
+        #  an assignment.
+        return self.analyse_target_types(env)
+
+    def analyse_boolean_expression(self, env):
+        #  Analyse expression and coerce to a boolean.
+        node = self.analyse_types(env)
+        bool = node.coerce_to_boolean(env)
+        return bool
+
+    def analyse_temp_boolean_expression(self, env):
+        #  Analyse boolean expression and coerce result into
+        #  a temporary. This is used when a branch is to be
+        #  performed on the result and we won't have an
+        #  opportunity to ensure disposal code is executed
+        #  afterwards. By forcing the result into a temporary,
+        #  we ensure that all disposal has been done by the
+        #  time we get the result.
+        node = self.analyse_types(env)
+        return node.coerce_to_boolean(env).coerce_to_simple(env)
+
+    # --------------- Type Inference -----------------
+
+    def type_dependencies(self, env):
+        # Returns the list of entries whose types must be determined
+        # before the type of self can be inferred.
+        if getattr(self, 'type', None) is not None:
+            return ()
+        return sum([node.type_dependencies(env) for node in self.subexpr_nodes()], ())
+
+    def infer_type(self, env):
+        # Attempt to deduce the type of self.
+        # Differs from analyse_types as it avoids unnecessary
+        # analysis of subexpressions, but can assume everything
+        # in self.type_dependencies() has been resolved.
+        type = getattr(self, 'type', None)
+        if type is not None:
+            return type
+        entry = getattr(self, 'entry', None)
+        if entry is not None:
+            return entry.type
+        self.not_implemented("infer_type")
+
+    def nonlocally_immutable(self):
+        # Returns whether this variable is a safe reference, i.e.
+        # can't be modified as part of globals or closures.
+        return self.is_literal or self.is_temp or self.type.is_array or self.type.is_cfunction
+
+    def inferable_item_node(self, index=0):
+        """
+        Return a node that represents the (type) result of an indexing operation,
+        e.g. for tuple unpacking or iteration.
+        """
+        return IndexNode(self.pos, base=self, index=IntNode(
+            self.pos, value=str(index), constant_result=index, type=PyrexTypes.c_py_ssize_t_type))
+
+    # --------------- Type Analysis ------------------
+
+    def analyse_as_module(self, env):
+        # If this node can be interpreted as a reference to a
+        # cimported module, return its scope, else None.
+        return None
+
+    def analyse_as_type(self, env):
+        # If this node can be interpreted as a reference to a
+        # type, return that type, else None.
+        return None
+
+    def analyse_as_specialized_type(self, env):
+        type = self.analyse_as_type(env)
+        if type and type.is_fused and env.fused_to_specific:
+            # while it would be nice to test "if entry.type in env.fused_to_specific"
+            # rather than try/catch this doesn't work reliably (mainly for nested fused types)
+            try:
+                return type.specialize(env.fused_to_specific)
+            except KeyError:
+                pass
+        if type and type.is_fused:
+            error(self.pos, "Type is not specific")
+        return type
+
+    def analyse_as_extension_type(self, env):
+        # If this node can be interpreted as a reference to an
+        # extension type or builtin type, return its type, else None.
+        return None
+
+    def analyse_types(self, env):
+        self.not_implemented("analyse_types")
+
+    def analyse_target_types(self, env):
+        return self.analyse_types(env)
+
+    def nogil_check(self, env):
+        # By default, any expression based on Python objects is
+        # prevented in nogil environments.  Subtypes must override
+        # this if they can work without the GIL.
+        if self.type and self.type.is_pyobject:
+            self.gil_error()
+
+    def gil_assignment_check(self, env):
+        if env.nogil and self.type.is_pyobject:
+            error(self.pos, "Assignment of Python object not allowed without gil")
+
+    def check_const(self):
+        self.not_const()
+        return False
+
+    def not_const(self):
+        error(self.pos, "Not allowed in a constant expression")
+
+    def check_const_addr(self):
+        self.addr_not_const()
+        return False
+
+    def addr_not_const(self):
+        error(self.pos, "Address is not constant")
+
+    # ----------------- Result Allocation -----------------
+
+    def result_in_temp(self):
+        #  Return true if result is in a temporary owned by
+        #  this node or one of its subexpressions. Overridden
+        #  by certain nodes which can share the result of
+        #  a subnode.
+        return self.is_temp
+
+    def target_code(self):
+        #  Return code fragment for use as LHS of a C assignment.
+        return self.calculate_result_code()
+
+    def calculate_result_code(self):
+        self.not_implemented("calculate_result_code")
+
+#    def release_target_temp(self, env):
+#        #  Release temporaries used by LHS of an assignment.
+#        self.release_subexpr_temps(env)
+
+    def allocate_temp_result(self, code):
+        if self.temp_code:
+            raise RuntimeError("Temp allocated multiple times in %r: %r" % (self.__class__.__name__, self.pos))
+        type = self.type
+        if not type.is_void:
+            if type.is_pyobject:
+                type = PyrexTypes.py_object_type
+            elif not (self.result_is_used or type.is_memoryviewslice or self.is_c_result_required()):
+                self.temp_code = None
+                return
+            self.temp_code = code.funcstate.allocate_temp(
+                type, manage_ref=self.use_managed_ref)
+        else:
+            self.temp_code = None
+
+    def release_temp_result(self, code):
+        if not self.temp_code:
+            if not self.result_is_used:
+                # not used anyway, so ignore if not set up
+                return
+            pos = (os.path.basename(self.pos[0].get_description()),) + self.pos[1:] if self.pos else '(?)'
+            if self.old_temp:
+                raise RuntimeError("temp %s released multiple times in %s at %r" % (
+                    self.old_temp, self.__class__.__name__, pos))
+            else:
+                raise RuntimeError("no temp, but release requested in %s at %r" % (
+                    self.__class__.__name__, pos))
+        code.funcstate.release_temp(self.temp_code)
+        self.old_temp = self.temp_code
+        self.temp_code = None
+
+    # ---------------- Code Generation -----------------
+
+    def make_owned_reference(self, code):
+        """
+        Make sure we own a reference to result.
+        If the result is in a temp, it is already a new reference.
+        """
+        if not self.result_in_temp():
+            code.put_incref(self.result(), self.ctype())
+
+    def make_owned_memoryviewslice(self, code):
+        """
+        Make sure we own the reference to this memoryview slice.
+        """
+        # TODO ideally this would be shared with "make_owned_reference"
+        if not self.result_in_temp():
+            code.put_incref_memoryviewslice(self.result(), self.type,
+                                            have_gil=not self.in_nogil_context)
+
+    def generate_evaluation_code(self, code):
+        #  Generate code to evaluate this node and
+        #  its sub-expressions, and dispose of any
+        #  temporary results of its sub-expressions.
+        self.generate_subexpr_evaluation_code(code)
+
+        code.mark_pos(self.pos)
+        if self.is_temp:
+            self.allocate_temp_result(code)
+
+        self.generate_result_code(code)
+        if self.is_temp and not (self.type.is_string or self.type.is_pyunicode_ptr):
+            # If we are temp we do not need to wait until this node is disposed
+            # before disposing children.
+            self.generate_subexpr_disposal_code(code)
+            self.free_subexpr_temps(code)
+
+    def generate_subexpr_evaluation_code(self, code):
+        for node in self.subexpr_nodes():
+            node.generate_evaluation_code(code)
+
+    def generate_result_code(self, code):
+        self.not_implemented("generate_result_code")
+
+    def generate_disposal_code(self, code):
+        if self.has_temp_moved:
+            code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("MoveIfSupported", "CppSupport.cpp"))
+        if self.is_temp:
+            if self.type.is_string or self.type.is_pyunicode_ptr:
+                # postponed from self.generate_evaluation_code()
+                self.generate_subexpr_disposal_code(code)
+                self.free_subexpr_temps(code)
+            if self.result():
+                code.put_decref_clear(self.result(), self.ctype(),
+                                        have_gil=not self.in_nogil_context)
+        else:
+            # Already done if self.is_temp
+            self.generate_subexpr_disposal_code(code)
+
+    def generate_subexpr_disposal_code(self, code):
+        #  Generate code to dispose of temporary results
+        #  of all sub-expressions.
+        for node in self.subexpr_nodes():
+            node.generate_disposal_code(code)
+
+    def generate_post_assignment_code(self, code):
+        if self.is_temp:
+            if self.type.is_string or self.type.is_pyunicode_ptr:
+                # postponed from self.generate_evaluation_code()
+                self.generate_subexpr_disposal_code(code)
+                self.free_subexpr_temps(code)
+            elif self.type.is_pyobject:
+                code.putln("%s = 0;" % self.result())
+            elif self.type.is_memoryviewslice:
+                code.putln("%s.memview = NULL;" % self.result())
+                code.putln("%s.data = NULL;" % self.result())
+
+            if self.has_temp_moved:
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("MoveIfSupported", "CppSupport.cpp"))
+        else:
+            self.generate_subexpr_disposal_code(code)
+
+    def generate_assignment_code(self, rhs, code, overloaded_assignment=False,
+                                 exception_check=None, exception_value=None):
+        #  Stub method for nodes which are not legal as
+        #  the LHS of an assignment. An error will have
+        #  been reported earlier.
+        pass
+
+    def generate_deletion_code(self, code, ignore_nonexisting=False):
+        #  Stub method for nodes that are not legal as
+        #  the argument of a del statement. An error
+        #  will have been reported earlier.
+        pass
+
+    def free_temps(self, code):
+        if self.is_temp:
+            if not self.type.is_void:
+                self.release_temp_result(code)
+        else:
+            self.free_subexpr_temps(code)
+
+    def free_subexpr_temps(self, code):
+        for sub in self.subexpr_nodes():
+            sub.free_temps(code)
+
+    def generate_function_definitions(self, env, code):
+        pass
+
+    # ----Generation of small bits of reference counting --
+
+    def generate_decref_set(self, code, rhs):
+        code.put_decref_set(self.result(), self.ctype(), rhs)
+
+    def generate_xdecref_set(self, code, rhs):
+        code.put_xdecref_set(self.result(), self.ctype(), rhs)
+
+    def generate_gotref(self, code, handle_null=False,
+                        maybe_null_extra_check=True):
+        if not (handle_null and self.cf_is_null):
+            if (handle_null and self.cf_maybe_null
+                    and maybe_null_extra_check):
+                self.generate_xgotref(code)
+            else:
+                code.put_gotref(self.result(), self.ctype())
+
+    def generate_xgotref(self, code):
+        code.put_xgotref(self.result(), self.ctype())
+
+    def generate_giveref(self, code):
+        code.put_giveref(self.result(), self.ctype())
+
+    def generate_xgiveref(self, code):
+        code.put_xgiveref(self.result(), self.ctype())
+
+    # ---------------- Annotation ---------------------
+
+    def annotate(self, code):
+        for node in self.subexpr_nodes():
+            node.annotate(code)
+
+    # ----------------- Coercion ----------------------
+
+    def coerce_to(self, dst_type, env):
+        #   Coerce the result so that it can be assigned to
+        #   something of type dst_type. If processing is necessary,
+        #   wraps this node in a coercion node and returns that.
+        #   Otherwise, returns this node unchanged.
+        #
+        #   This method is called during the analyse_expressions
+        #   phase of the src_node's processing.
+        #
+        #   Note that subclasses that override this (especially
+        #   ConstNodes) must not (re-)set their own .type attribute
+        #   here.  Since expression nodes may turn up in different
+        #   places in the tree (e.g. inside of CloneNodes in cascaded
+        #   assignments), this method must return a new node instance
+        #   if it changes the type.
+        #
+        src = self
+        src_type = self.type
+
+        if self.check_for_coercion_error(dst_type, env):
+            return self
+
+        used_as_reference = dst_type.is_reference
+        if used_as_reference and not src_type.is_reference:
+            dst_type = dst_type.ref_base_type
+
+        if src_type.is_cv_qualified:
+            src_type = src_type.cv_base_type
+
+        if src_type.is_fused or dst_type.is_fused:
+            # See if we are coercing a fused function to a pointer to a
+            # specialized function
+            if (src_type.is_cfunction and not dst_type.is_fused and
+                    dst_type.is_ptr and dst_type.base_type.is_cfunction):
+
+                dst_type = dst_type.base_type
+
+                for signature in src_type.get_all_specialized_function_types():
+                    if signature.same_as(dst_type):
+                        src.type = signature
+                        src.entry = src.type.entry
+                        src.entry.used = True
+                        return self
+
+            if src_type.is_fused:
+                error(self.pos, "Type is not specialized")
+            elif src_type.is_null_ptr and dst_type.is_ptr:
+                # NULL can be implicitly cast to any pointer type
+                return self
+            else:
+                error(self.pos, "Cannot coerce to a type that is not specialized")
+
+            self.type = error_type
+            return self
+
+        if self.coercion_type is not None:
+            # This is purely for error checking purposes!
+            node = NameNode(self.pos, name='', type=self.coercion_type)
+            node.coerce_to(dst_type, env)
+
+        if dst_type.is_memoryviewslice:
+            from . import MemoryView
+            if not src.type.is_memoryviewslice:
+                if src.type.is_pyobject:
+                    src = CoerceToMemViewSliceNode(src, dst_type, env)
+                elif src.type.is_array:
+                    src = CythonArrayNode.from_carray(src, env).coerce_to(dst_type, env)
+                elif not src_type.is_error:
+                    error(self.pos,
+                          "Cannot convert '%s' to memoryviewslice" % (src_type,))
+            else:
+                if src.type.writable_needed:
+                    dst_type.writable_needed = True
+                if not src.type.conforms_to(dst_type, broadcast=self.is_memview_broadcast,
+                                            copying=self.is_memview_copy_assignment):
+                    if src.type.dtype.same_as(dst_type.dtype):
+                        msg = "Memoryview '%s' not conformable to memoryview '%s'."
+                        tup = src.type, dst_type
+                    else:
+                        msg = "Different base types for memoryviews (%s, %s)"
+                        tup = src.type.dtype, dst_type.dtype
+
+                    error(self.pos, msg % tup)
+
+        elif dst_type.is_pyobject:
+            # We never need a type check when assigning None to a Python object type.
+            if src.is_none:
+                pass
+            elif src.constant_result is None:
+                src = NoneNode(src.pos).coerce_to(dst_type, env)
+            else:
+                if not src.type.is_pyobject:
+                    if dst_type is bytes_type and src.type.is_int:
+                        src = CoerceIntToBytesNode(src, env)
+                    else:
+                        src = CoerceToPyTypeNode(src, env, type=dst_type)
+                # FIXME: I would expect that CoerceToPyTypeNode(type=dst_type) returns a value of type dst_type
+                #        but it doesn't for ctuples. Thus, we add a PyTypeTestNode which then triggers the
+                #        Python conversion and becomes useless. That sems backwards and inefficient.
+                #        We should not need a PyTypeTestNode after a previous conversion above.
+                if not src.type.subtype_of(dst_type):
+                    src = PyTypeTestNode(src, dst_type, env)
+        elif is_pythran_expr(dst_type) and is_pythran_supported_type(src.type):
+            # We let the compiler decide whether this is valid
+            return src
+        elif is_pythran_expr(src.type):
+            if is_pythran_supported_type(dst_type):
+                # Match the case were a pythran expr is assigned to a value, or vice versa.
+                # We let the C++ compiler decide whether this is valid or not!
+                return src
+            # Else, we need to convert the Pythran expression to a Python object
+            src = CoerceToPyTypeNode(src, env, type=dst_type)
+        elif src.type.is_pyobject:
+            if used_as_reference and dst_type.is_cpp_class:
+                warning(
+                    self.pos,
+                    "Cannot pass Python object as C++ data structure reference (%s &), will pass by copy." % dst_type)
+            src = CoerceFromPyTypeNode(dst_type, src, env)
+        elif (dst_type.is_complex
+              and src_type != dst_type
+              and dst_type.assignable_from(src_type)):
+            src = CoerceToComplexNode(src, dst_type, env)
+        elif (src_type is PyrexTypes.soft_complex_type
+              and src_type != dst_type
+              and not dst_type.assignable_from(src_type)):
+            src = coerce_from_soft_complex(src, dst_type, env)
+        else:
+            # neither src nor dst are py types
+            # Added the string comparison, since for c types that
+            # is enough, but Cython gets confused when the types are
+            # in different pxi files.
+            # TODO: Remove this hack and require shared declarations.
+            if not (src.type == dst_type or str(src.type) == str(dst_type) or dst_type.assignable_from(src_type)):
+                self.fail_assignment(dst_type)
+        return src
+
+    def fail_assignment(self, dst_type):
+        src_name = self.entry.name if hasattr(self, "entry") else None
+        src_resolved = " (alias of '{0}')".format(self.type.resolve()) if self.type.is_typedef else ""
+        dst_resolved = " (alias of '{0}')".format(dst_type.resolve()) if dst_type.is_typedef else ""
+        extra_diagnostics = dst_type.assignment_failure_extra_info(self.type, src_name)
+        if extra_diagnostics:
+            extra_diagnostics = ". " + extra_diagnostics
+        error(self.pos, "Cannot assign type '%s'%s to '%s'%s%s" % (
+                self.type, src_resolved,
+                dst_type, dst_resolved,
+                extra_diagnostics))
+
+    def check_for_coercion_error(self, dst_type, env, fail=False, default=None):
+        if fail and not default:
+            default = "Cannot assign type '%(FROM)s' to '%(TO)s'"
+        message = find_coercion_error((self.type, dst_type), default, env)
+        if message is not None:
+            error(self.pos, message % {'FROM': self.type, 'TO': dst_type})
+            return True
+        if fail:
+            self.fail_assignment(dst_type)
+            return True
+        return False
+
+    def coerce_to_pyobject(self, env):
+        return self.coerce_to(PyrexTypes.py_object_type, env)
+
+    def coerce_to_boolean(self, env):
+        #  Coerce result to something acceptable as
+        #  a boolean value.
+
+        # if it's constant, calculate the result now
+        if self.has_constant_result():
+            bool_value = bool(self.constant_result)
+            return BoolNode(self.pos, value=bool_value,
+                            constant_result=bool_value)
+
+        type = self.type
+        if type.is_enum or type.is_error:
+            return self
+        elif type is PyrexTypes.c_bint_type:
+            return self
+        elif type.is_pyobject or type.is_int or type.is_ptr or type.is_float:
+            return CoerceToBooleanNode(self, env)
+        elif type.is_cpp_class and type.scope and type.scope.lookup("operator bool"):
+            return SimpleCallNode(
+                self.pos,
+                function=AttributeNode(
+                    self.pos, obj=self, attribute=StringEncoding.EncodedString('operator bool')),
+                args=[]).analyse_types(env)
+        elif type.is_ctuple:
+            bool_value = len(type.components) == 0
+            return BoolNode(self.pos, value=bool_value,
+                            constant_result=bool_value)
+        else:
+            error(self.pos, "Type '%s' not acceptable as a boolean" % type)
+            return self
+
+    def coerce_to_integer(self, env):
+        # If not already some C integer type, coerce to longint.
+        if self.type.is_int:
+            return self
+        else:
+            return self.coerce_to(PyrexTypes.c_long_type, env)
+
+    def coerce_to_temp(self, env):
+        #  Ensure that the result is in a temporary.
+        if self.result_in_temp():
+            return self
+        else:
+            return CoerceToTempNode(self, env)
+
+    def coerce_to_simple(self, env):
+        #  Ensure that the result is simple (see is_simple).
+        if self.is_simple():
+            return self
+        else:
+            return self.coerce_to_temp(env)
+
+    def is_simple(self):
+        #  A node is simple if its result is something that can
+        #  be referred to without performing any operations, e.g.
+        #  a constant, local var, C global var, struct member
+        #  reference, or temporary.
+        return self.result_in_temp()
+
+    def may_be_none(self):
+        if self.type and not (self.type.is_pyobject or
+                              self.type.is_memoryviewslice):
+            return False
+        if self.has_constant_result():
+            return self.constant_result is not None
+        return True
+
+    def as_cython_attribute(self):
+        return None
+
+    def as_none_safe_node(self, message, error="PyExc_TypeError", format_args=()):
+        # Wraps the node in a NoneCheckNode if it is not known to be
+        # not-None (e.g. because it is a Python literal).
+        if self.may_be_none():
+            return NoneCheckNode(self, error, message, format_args)
+        else:
+            return self
+
+    @classmethod
+    def from_node(cls, node, **kwargs):
+        """Instantiate this node class from another node, properly
+        copying over all attributes that one would forget otherwise.
+        """
+        attributes = "cf_state cf_maybe_null cf_is_null constant_result".split()
+        for attr_name in attributes:
+            if attr_name in kwargs:
+                continue
+            try:
+                value = getattr(node, attr_name)
+            except AttributeError:
+                pass
+            else:
+                kwargs[attr_name] = value
+        return cls(node.pos, **kwargs)
+
+    def get_known_standard_library_import(self):
+        """
+        Gets the module.path that this node was imported from.
+
+        Many nodes do not have one, or it is ambiguous, in which case
+        this function returns a false value.
+        """
+        return None
+
+
+class AtomicExprNode(ExprNode):
+    #  Abstract base class for expression nodes which have
+    #  no sub-expressions.
+
+    subexprs = []
+
+    # Override to optimize -- we know we have no children
+    def generate_subexpr_evaluation_code(self, code):
+        pass
+    def generate_subexpr_disposal_code(self, code):
+        pass
+
+class PyConstNode(AtomicExprNode):
+    #  Abstract base class for constant Python values.
+
+    is_literal = 1
+    type = py_object_type
+    nogil_check = None
+
+    def is_simple(self):
+        return 1
+
+    def may_be_none(self):
+        return False
+
+    def analyse_types(self, env):
+        return self
+
+    def calculate_result_code(self):
+        return self.value
+
+    def generate_result_code(self, code):
+        pass
+
+
+class NoneNode(PyConstNode):
+    #  The constant value None
+
+    is_none = 1
+    value = "Py_None"
+
+    constant_result = None
+
+    def compile_time_value(self, denv):
+        return None
+
+    def may_be_none(self):
+        return True
+
+    def coerce_to(self, dst_type, env):
+        if not (dst_type.is_pyobject or dst_type.is_memoryviewslice or dst_type.is_error):
+            # Catch this error early and loudly.
+            error(self.pos, "Cannot assign None to %s" % dst_type)
+        return super(NoneNode, self).coerce_to(dst_type, env)
+
+
+class EllipsisNode(PyConstNode):
+    #  '...' in a subscript list.
+
+    value = "Py_Ellipsis"
+
+    constant_result = Ellipsis
+
+    def compile_time_value(self, denv):
+        return Ellipsis
+
+
+class ConstNode(AtomicExprNode):
+    # Abstract base type for literal constant nodes.
+    #
+    # value     string      C code fragment
+
+    is_literal = 1
+    nogil_check = None
+
+    def is_simple(self):
+        return 1
+
+    def nonlocally_immutable(self):
+        return 1
+
+    def may_be_none(self):
+        return False
+
+    def analyse_types(self, env):
+        return self  # Types are held in class variables
+
+    def check_const(self):
+        return True
+
+    def get_constant_c_result_code(self):
+        return self.calculate_result_code()
+
+    def calculate_result_code(self):
+        return str(self.value)
+
+    def generate_result_code(self, code):
+        pass
+
+
+class BoolNode(ConstNode):
+    type = PyrexTypes.c_bint_type
+    #  The constant value True or False
+
+    def calculate_constant_result(self):
+        self.constant_result = self.value
+
+    def compile_time_value(self, denv):
+        return self.value
+
+    def calculate_result_code(self):
+        if self.type.is_pyobject:
+            return 'Py_True' if self.value else 'Py_False'
+        else:
+            return str(int(self.value))
+
+    def coerce_to(self, dst_type, env):
+        if dst_type == self.type:
+            return self
+        if dst_type is py_object_type and self.type is Builtin.bool_type:
+            return self
+        if dst_type.is_pyobject and self.type.is_int:
+            return BoolNode(
+                self.pos, value=self.value,
+                constant_result=self.constant_result,
+                type=Builtin.bool_type)
+        if dst_type.is_int and self.type.is_pyobject:
+            return BoolNode(
+                self.pos, value=self.value,
+                constant_result=self.constant_result,
+                type=PyrexTypes.c_bint_type)
+        return ConstNode.coerce_to(self, dst_type, env)
+
+
+class NullNode(ConstNode):
+    type = PyrexTypes.c_null_ptr_type
+    value = "NULL"
+    constant_result = 0
+
+    def get_constant_c_result_code(self):
+        return self.value
+
+
+class CharNode(ConstNode):
+    type = PyrexTypes.c_char_type
+
+    def calculate_constant_result(self):
+        self.constant_result = ord(self.value)
+
+    def compile_time_value(self, denv):
+        return ord(self.value)
+
+    def calculate_result_code(self):
+        return "'%s'" % StringEncoding.escape_char(self.value)
+
+
+class IntNode(ConstNode):
+
+    # unsigned     "" or "U"
+    # longness     "" or "L" or "LL"
+    # is_c_literal   True/False/None   creator considers this a C integer literal
+
+    unsigned = ""
+    longness = ""
+    is_c_literal = None  # unknown
+
+    # hex_value and base_10_value are designed only to simplify
+    # writing tests to get a consistent representation of value
+    @property
+    def hex_value(self):
+        return Utils.strip_py2_long_suffix(hex(Utils.str_to_number(self.value)))
+
+    @property
+    def base_10_value(self):
+        return str(Utils.str_to_number(self.value))
+
+    def __init__(self, pos, **kwds):
+        ExprNode.__init__(self, pos, **kwds)
+        if 'type' not in kwds:
+            self.type = self.find_suitable_type_for_value()
+
+    def find_suitable_type_for_value(self):
+        if self.constant_result is constant_value_not_set:
+            try:
+                self.calculate_constant_result()
+            except ValueError:
+                pass
+        # we ignore 'is_c_literal = True' and instead map signed 32bit
+        # integers as C long values
+        if self.is_c_literal or \
+               not self.has_constant_result() or \
+               self.unsigned or self.longness == 'LL':
+            # clearly a C literal
+            rank = (self.longness == 'LL') and 2 or 1
+            suitable_type = PyrexTypes.modifiers_and_name_to_type[not self.unsigned, rank, "int"]
+            if self.type:
+                suitable_type = PyrexTypes.widest_numeric_type(suitable_type, self.type)
+        else:
+            # C literal or Python literal - split at 32bit boundary
+            if -2**31 <= self.constant_result < 2**31:
+                if self.type and self.type.is_int:
+                    suitable_type = self.type
+                else:
+                    suitable_type = PyrexTypes.c_long_type
+            else:
+                suitable_type = PyrexTypes.py_object_type
+        return suitable_type
+
+    def coerce_to(self, dst_type, env):
+        if self.type is dst_type:
+            return self
+        elif dst_type.is_float:
+            if self.has_constant_result():
+                return FloatNode(self.pos, value='%d.0' % int(self.constant_result), type=dst_type,
+                                 constant_result=float(self.constant_result))
+            else:
+                return FloatNode(self.pos, value=self.value, type=dst_type,
+                                 constant_result=not_a_constant)
+        if dst_type.is_numeric and not dst_type.is_complex:
+            node = IntNode(self.pos, value=self.value, constant_result=self.constant_result,
+                           type=dst_type, is_c_literal=True,
+                           unsigned=self.unsigned, longness=self.longness)
+            return node
+        elif dst_type.is_pyobject:
+            node = IntNode(self.pos, value=self.value, constant_result=self.constant_result,
+                           type=PyrexTypes.py_object_type, is_c_literal=False,
+                           unsigned=self.unsigned, longness=self.longness)
+        else:
+            # FIXME: not setting the type here to keep it working with
+            # complex numbers. Should they be special cased?
+            node = IntNode(self.pos, value=self.value, constant_result=self.constant_result,
+                           unsigned=self.unsigned, longness=self.longness)
+        # We still need to perform normal coerce_to processing on the
+        # result, because we might be coercing to an extension type,
+        # in which case a type test node will be needed.
+        return ConstNode.coerce_to(node, dst_type, env)
+
+    def coerce_to_boolean(self, env):
+        return IntNode(
+            self.pos, value=self.value,
+            constant_result=self.constant_result,
+            type=PyrexTypes.c_bint_type,
+            unsigned=self.unsigned, longness=self.longness)
+
+    def generate_evaluation_code(self, code):
+        if self.type.is_pyobject:
+            # pre-allocate a Python version of the number
+            # (In hex if sufficiently large to cope with Python's string-to-int limitations.
+            #  We use quite a small value of "sufficiently large" - 10**13 is picked as
+            #  the approximate point where hex strings become shorter)
+            value = Utils.str_to_number(self.value)
+            formatter = hex if value > (10**13) else str
+            plain_integer_string = formatter(value)
+            plain_integer_string = Utils.strip_py2_long_suffix(plain_integer_string)
+            self.result_code = code.get_py_int(plain_integer_string, self.longness)
+        else:
+            self.result_code = self.get_constant_c_result_code()
+
+    def get_constant_c_result_code(self):
+        unsigned, longness = self.unsigned, self.longness
+        literal = self.value_as_c_integer_string()
+        if not (unsigned or longness) and self.type.is_int and literal[0] == '-' and literal[1] != '0':
+            # negative decimal literal => guess longness from type to prevent wrap-around
+            if self.type.rank >= PyrexTypes.c_longlong_type.rank:
+                longness = 'LL'
+            elif self.type.rank >= PyrexTypes.c_long_type.rank:
+                longness = 'L'
+        return literal + unsigned + longness
+
+    def value_as_c_integer_string(self):
+        value = self.value
+        if len(value) <= 2:
+            # too short to go wrong (and simplifies code below)
+            return value
+        neg_sign = ''
+        if value[0] == '-':
+            neg_sign = '-'
+            value = value[1:]
+        if value[0] == '0':
+            literal_type = value[1]  # 0'o' - 0'b' - 0'x'
+            # 0x123 hex literals and 0123 octal literals work nicely in C
+            # but C-incompatible Py3 oct/bin notations need conversion
+            if neg_sign and literal_type in 'oOxX0123456789' and value[2:].isdigit():
+                # negative hex/octal literal => prevent C compiler from using
+                # unsigned integer types by converting to decimal (see C standard 6.4.4.1)
+                value = str(Utils.str_to_number(value))
+            elif literal_type in 'oO':
+                value = '0' + value[2:]  # '0o123' => '0123'
+            elif literal_type in 'bB':
+                value = str(int(value[2:], 2))
+        elif value.isdigit() and not self.unsigned and not self.longness:
+            if not neg_sign:
+                # C compilers do not consider unsigned types for decimal literals,
+                # but they do for hex (see C standard 6.4.4.1)
+                value = '0x%X' % int(value)
+        return neg_sign + value
+
+    def calculate_result_code(self):
+        return self.result_code
+
+    def calculate_constant_result(self):
+        self.constant_result = Utils.str_to_number(self.value)
+
+    def compile_time_value(self, denv):
+        return Utils.str_to_number(self.value)
+
+class FloatNode(ConstNode):
+    type = PyrexTypes.c_double_type
+
+    def calculate_constant_result(self):
+        self.constant_result = float(self.value)
+
+    def compile_time_value(self, denv):
+        float_value = float(self.value)
+        str_float_value = ("%.330f" % float_value).strip('0')
+        str_value = Utils.normalise_float_repr(self.value)
+        if str_value not in (str_float_value, repr(float_value).lstrip('0')):
+            warning(self.pos, "Using this floating point value with DEF may lose precision, using %r" % float_value)
+        return float_value
+
+    def coerce_to(self, dst_type, env):
+        if dst_type.is_pyobject and self.type.is_float:
+            return FloatNode(
+                self.pos, value=self.value,
+                constant_result=self.constant_result,
+                type=Builtin.float_type)
+        if dst_type.is_float and self.type.is_pyobject:
+            return FloatNode(
+                self.pos, value=self.value,
+                constant_result=self.constant_result,
+                type=dst_type)
+        return ConstNode.coerce_to(self, dst_type, env)
+
+    def calculate_result_code(self):
+        return self.result_code
+
+    def get_constant_c_result_code(self):
+        strval = self.value
+        assert isinstance(strval, basestring)
+        cmpval = repr(float(strval))
+        if cmpval == 'nan':
+            return "(Py_HUGE_VAL * 0)"
+        elif cmpval == 'inf':
+            return "Py_HUGE_VAL"
+        elif cmpval == '-inf':
+            return "(-Py_HUGE_VAL)"
+        else:
+            return strval
+
+    def generate_evaluation_code(self, code):
+        c_value = self.get_constant_c_result_code()
+        if self.type.is_pyobject:
+            self.result_code = code.get_py_float(self.value, c_value)
+        else:
+            self.result_code = c_value
+
+
+def _analyse_name_as_type(name, pos, env):
+    ctype = PyrexTypes.parse_basic_type(name)
+    if ctype is not None and env.in_c_type_context:
+        return ctype
+
+    global_scope = env.global_scope()
+    global_entry = global_scope.lookup(name)
+    if global_entry and global_entry.is_type:
+        type = global_entry.type
+        if (not env.in_c_type_context
+                and type is Builtin.int_type
+                and global_scope.context.language_level == 2):
+            # While we still support Python2 this needs to be downgraded
+            # to a generic Python object to include both int and long.
+            # With language_level > 3, we keep the type but also accept 'long' in Py2.
+            type = py_object_type
+        if type and (type.is_pyobject or env.in_c_type_context):
+            return type
+        ctype = ctype or type
+
+    # This is fairly heavy, so it's worth trying some easier things above.
+    from .TreeFragment import TreeFragment
+    with local_errors(ignore=True):
+        pos = (pos[0], pos[1], pos[2]-7)
+        try:
+            declaration = TreeFragment(u"sizeof(%s)" % name, name=pos[0].filename, initial_pos=pos)
+        except CompileError:
+            pass
+        else:
+            sizeof_node = declaration.root.stats[0].expr
+            if isinstance(sizeof_node, SizeofTypeNode):
+                sizeof_node = sizeof_node.analyse_types(env)
+                if isinstance(sizeof_node, SizeofTypeNode):
+                    type = sizeof_node.arg_type
+                    if type and (type.is_pyobject or env.in_c_type_context):
+                        return type
+                    ctype = ctype or type
+    return ctype
+
+
+class BytesNode(ConstNode):
+    # A char* or bytes literal
+    #
+    # value      BytesLiteral
+
+    is_string_literal = True
+    # start off as Python 'bytes' to support len() in O(1)
+    type = bytes_type
+
+    def calculate_constant_result(self):
+        self.constant_result = self.value
+
+    def as_sliced_node(self, start, stop, step=None):
+        value = StringEncoding.bytes_literal(self.value[start:stop:step], self.value.encoding)
+        return BytesNode(self.pos, value=value, constant_result=value)
+
+    def compile_time_value(self, denv):
+        return self.value.byteencode()
+
+    def analyse_as_type(self, env):
+        return _analyse_name_as_type(self.value.decode('ISO8859-1'), self.pos, env)
+
+    def can_coerce_to_char_literal(self):
+        return len(self.value) == 1
+
+    def coerce_to_boolean(self, env):
+        # This is special because testing a C char* for truth directly
+        # would yield the wrong result.
+        bool_value = bool(self.value)
+        return BoolNode(self.pos, value=bool_value, constant_result=bool_value)
+
+    def coerce_to(self, dst_type, env):
+        if self.type == dst_type:
+            return self
+        if dst_type.is_int:
+            if not self.can_coerce_to_char_literal():
+                error(self.pos, "Only single-character string literals can be coerced into ints.")
+                return self
+            if dst_type.is_unicode_char:
+                error(self.pos, "Bytes literals cannot coerce to Py_UNICODE/Py_UCS4, use a unicode literal instead.")
+                return self
+            return CharNode(self.pos, value=self.value,
+                            constant_result=ord(self.value))
+
+        node = BytesNode(self.pos, value=self.value, constant_result=self.constant_result)
+        if dst_type.is_pyobject:
+            if dst_type in (py_object_type, Builtin.bytes_type):
+                node.type = Builtin.bytes_type
+            else:
+                self.check_for_coercion_error(dst_type, env, fail=True)
+            return node
+        elif dst_type in (PyrexTypes.c_char_ptr_type, PyrexTypes.c_const_char_ptr_type):
+            node.type = dst_type
+            return node
+        elif dst_type in (PyrexTypes.c_uchar_ptr_type, PyrexTypes.c_const_uchar_ptr_type, PyrexTypes.c_void_ptr_type):
+            node.type = (PyrexTypes.c_const_char_ptr_type if dst_type == PyrexTypes.c_const_uchar_ptr_type
+                         else PyrexTypes.c_char_ptr_type)
+            return CastNode(node, dst_type)
+        elif dst_type.assignable_from(PyrexTypes.c_char_ptr_type):
+            # Exclude the case of passing a C string literal into a non-const C++ string.
+            if not dst_type.is_cpp_class or dst_type.is_const:
+                node.type = dst_type
+                return node
+
+        # We still need to perform normal coerce_to processing on the
+        # result, because we might be coercing to an extension type,
+        # in which case a type test node will be needed.
+        return ConstNode.coerce_to(node, dst_type, env)
+
+    def generate_evaluation_code(self, code):
+        if self.type.is_pyobject:
+            result = code.get_py_string_const(self.value)
+        elif self.type.is_const:
+            result = code.get_string_const(self.value)
+        else:
+            # not const => use plain C string literal and cast to mutable type
+            literal = self.value.as_c_string_literal()
+            # C++ may require a cast
+            result = typecast(self.type, PyrexTypes.c_void_ptr_type, literal)
+        self.result_code = result
+
+    def get_constant_c_result_code(self):
+        return None  # FIXME
+
+    def calculate_result_code(self):
+        return self.result_code
+
+
+class UnicodeNode(ConstNode):
+    # A Py_UNICODE* or unicode literal
+    #
+    # value        EncodedString
+    # bytes_value  BytesLiteral    the literal parsed as bytes string
+    #                              ('-3' unicode literals only)
+
+    is_string_literal = True
+    bytes_value = None
+    type = unicode_type
+
+    def calculate_constant_result(self):
+        self.constant_result = self.value
+
+    def analyse_as_type(self, env):
+        return _analyse_name_as_type(self.value, self.pos, env)
+
+    def as_sliced_node(self, start, stop, step=None):
+        if StringEncoding.string_contains_surrogates(self.value[:stop]):
+            # this is unsafe as it may give different results
+            # in different runtimes
+            return None
+        value = StringEncoding.EncodedString(self.value[start:stop:step])
+        value.encoding = self.value.encoding
+        if self.bytes_value is not None:
+            bytes_value = StringEncoding.bytes_literal(
+                self.bytes_value[start:stop:step], self.bytes_value.encoding)
+        else:
+            bytes_value = None
+        return UnicodeNode(
+            self.pos, value=value, bytes_value=bytes_value,
+            constant_result=value)
+
+    def coerce_to(self, dst_type, env):
+        if dst_type is self.type:
+            pass
+        elif dst_type.is_unicode_char:
+            if not self.can_coerce_to_char_literal():
+                error(self.pos,
+                      "Only single-character Unicode string literals or "
+                      "surrogate pairs can be coerced into Py_UCS4/Py_UNICODE.")
+                return self
+            int_value = ord(self.value)
+            return IntNode(self.pos, type=dst_type, value=str(int_value),
+                           constant_result=int_value)
+        elif not dst_type.is_pyobject:
+            if dst_type.is_string and self.bytes_value is not None:
+                # special case: '-3' enforced unicode literal used in a
+                # C char* context
+                return BytesNode(self.pos, value=self.bytes_value).coerce_to(dst_type, env)
+            if dst_type.is_pyunicode_ptr:
+                return UnicodeNode(self.pos, value=self.value, type=dst_type)
+            error(self.pos,
+                  "Unicode literals do not support coercion to C types other "
+                  "than Py_UNICODE/Py_UCS4 (for characters) or Py_UNICODE* "
+                  "(for strings).")
+        elif dst_type not in (py_object_type, Builtin.basestring_type):
+            self.check_for_coercion_error(dst_type, env, fail=True)
+        return self
+
+    def can_coerce_to_char_literal(self):
+        return len(self.value) == 1
+            ## or (len(self.value) == 2
+            ##     and (0xD800 <= self.value[0] <= 0xDBFF)
+            ##     and (0xDC00 <= self.value[1] <= 0xDFFF))
+
+    def coerce_to_boolean(self, env):
+        bool_value = bool(self.value)
+        return BoolNode(self.pos, value=bool_value, constant_result=bool_value)
+
+    def contains_surrogates(self):
+        return StringEncoding.string_contains_surrogates(self.value)
+
+    def generate_evaluation_code(self, code):
+        if self.type.is_pyobject:
+            # FIXME: this should go away entirely!
+            # Since string_contains_lone_surrogates() returns False for surrogate pairs in Py2/UCS2,
+            # Py2 can generate different code from Py3 here.  Let's hope we get away with claiming that
+            # the processing of surrogate pairs in code was always ambiguous and lead to different results
+            # on P16/32bit Unicode platforms.
+            if StringEncoding.string_contains_lone_surrogates(self.value):
+                # lone (unpaired) surrogates are not really portable and cannot be
+                # decoded by the UTF-8 codec in Py3.3
+                self.result_code = code.get_py_const(py_object_type, 'ustring')
+                data_cname = code.get_string_const(
+                    StringEncoding.BytesLiteral(self.value.encode('unicode_escape')))
+                const_code = code.get_cached_constants_writer(self.result_code)
+                if const_code is None:
+                    return  # already initialised
+                const_code.mark_pos(self.pos)
+                const_code.putln(
+                    "%s = PyUnicode_DecodeUnicodeEscape(%s, sizeof(%s) - 1, NULL); %s" % (
+                        self.result_code,
+                        data_cname,
+                        data_cname,
+                        const_code.error_goto_if_null(self.result_code, self.pos)))
+                const_code.put_error_if_neg(
+                    self.pos, "__Pyx_PyUnicode_READY(%s)" % self.result_code)
+            else:
+                self.result_code = code.get_py_string_const(self.value)
+        else:
+            self.result_code = code.get_pyunicode_ptr_const(self.value)
+
+    def calculate_result_code(self):
+        return self.result_code
+
+    def compile_time_value(self, env):
+        return self.value
+
+
+class StringNode(PyConstNode):
+    # A Python str object, i.e. a byte string in Python 2.x and a
+    # unicode string in Python 3.x
+    #
+    # value          BytesLiteral (or EncodedString with ASCII content)
+    # unicode_value  EncodedString or None
+    # is_identifier  boolean
+
+    type = str_type
+    is_string_literal = True
+    is_identifier = None
+    unicode_value = None
+
+    def calculate_constant_result(self):
+        if self.unicode_value is not None:
+            # only the Unicode value is portable across Py2/3
+            self.constant_result = self.unicode_value
+
+    def analyse_as_type(self, env):
+        return _analyse_name_as_type(self.unicode_value or self.value.decode('ISO8859-1'), self.pos, env)
+
+    def as_sliced_node(self, start, stop, step=None):
+        value = type(self.value)(self.value[start:stop:step])
+        value.encoding = self.value.encoding
+        if self.unicode_value is not None:
+            if StringEncoding.string_contains_surrogates(self.unicode_value[:stop]):
+                # this is unsafe as it may give different results in different runtimes
+                return None
+            unicode_value = StringEncoding.EncodedString(
+                self.unicode_value[start:stop:step])
+        else:
+            unicode_value = None
+        return StringNode(
+            self.pos, value=value, unicode_value=unicode_value,
+            constant_result=value, is_identifier=self.is_identifier)
+
+    def coerce_to(self, dst_type, env):
+        if dst_type is not py_object_type and not str_type.subtype_of(dst_type):
+#            if dst_type is Builtin.bytes_type:
+#                # special case: bytes = 'str literal'
+#                return BytesNode(self.pos, value=self.value)
+            if not dst_type.is_pyobject:
+                return BytesNode(self.pos, value=self.value).coerce_to(dst_type, env)
+            if dst_type is not Builtin.basestring_type:
+                self.check_for_coercion_error(dst_type, env, fail=True)
+        return self
+
+    def can_coerce_to_char_literal(self):
+        return not self.is_identifier and len(self.value) == 1
+
+    def generate_evaluation_code(self, code):
+        self.result_code = code.get_py_string_const(
+            self.value, identifier=self.is_identifier, is_str=True,
+            unicode_value=self.unicode_value)
+
+    def get_constant_c_result_code(self):
+        return None
+
+    def calculate_result_code(self):
+        return self.result_code
+
+    def compile_time_value(self, env):
+        if self.value.is_unicode:
+            return self.value
+        if not IS_PYTHON3:
+            # use plain str/bytes object in Py2
+            return self.value.byteencode()
+        # in Py3, always return a Unicode string
+        if self.unicode_value is not None:
+            return self.unicode_value
+        return self.value.decode('iso8859-1')
+
+
+class IdentifierStringNode(StringNode):
+    # A special str value that represents an identifier (bytes in Py2,
+    # unicode in Py3).
+    is_identifier = True
+
+
+class ImagNode(AtomicExprNode):
+    #  Imaginary number literal
+    #
+    #  value   string    imaginary part (float value)
+
+    type = PyrexTypes.c_double_complex_type
+
+    def calculate_constant_result(self):
+        self.constant_result = complex(0.0, float(self.value))
+
+    def compile_time_value(self, denv):
+        return complex(0.0, float(self.value))
+
+    def analyse_types(self, env):
+        self.type.create_declaration_utility_code(env)
+        return self
+
+    def may_be_none(self):
+        return False
+
+    def coerce_to(self, dst_type, env):
+        if self.type is dst_type:
+            return self
+        node = ImagNode(self.pos, value=self.value)
+        if dst_type.is_pyobject:
+            node.is_temp = 1
+            node.type = Builtin.complex_type
+        # We still need to perform normal coerce_to processing on the
+        # result, because we might be coercing to an extension type,
+        # in which case a type test node will be needed.
+        return AtomicExprNode.coerce_to(node, dst_type, env)
+
+    gil_message = "Constructing complex number"
+
+    def calculate_result_code(self):
+        if self.type.is_pyobject:
+            return self.result()
+        else:
+            return "%s(0, %r)" % (self.type.from_parts, float(self.value))
+
+    def generate_result_code(self, code):
+        if self.type.is_pyobject:
+            code.putln(
+                "%s = PyComplex_FromDoubles(0.0, %r); %s" % (
+                    self.result(),
+                    float(self.value),
+                    code.error_goto_if_null(self.result(), self.pos)))
+            self.generate_gotref(code)
+
+
+class NewExprNode(AtomicExprNode):
+
+    # C++ new statement
+    #
+    # cppclass              node                 c++ class to create
+
+    type = None
+
+    def infer_type(self, env):
+        type = self.cppclass.analyse_as_type(env)
+        if type is None or not type.is_cpp_class:
+            error(self.pos, "new operator can only be applied to a C++ class")
+            self.type = error_type
+            return
+        self.cpp_check(env)
+        constructor = type.get_constructor(self.pos)
+        self.class_type = type
+        self.entry = constructor
+        self.type = constructor.type
+        return self.type
+
+    def analyse_types(self, env):
+        if self.type is None:
+            self.infer_type(env)
+        return self
+
+    def may_be_none(self):
+        return False
+
+    def generate_result_code(self, code):
+        pass
+
+    def calculate_result_code(self):
+        return "new " + self.class_type.empty_declaration_code()
+
+
+class NameNode(AtomicExprNode):
+    #  Reference to a local or global variable name.
+    #
+    #  name            string    Python name of the variable
+    #  entry           Entry     Symbol table entry
+    #  type_entry      Entry     For extension type names, the original type entry
+    #  cf_is_null      boolean   Is uninitialized before this node
+    #  cf_maybe_null   boolean   Maybe uninitialized before this node
+    #  allow_null      boolean   Don't raise UnboundLocalError
+    #  nogil           boolean   Whether it is used in a nogil context
+
+    is_name = True
+    is_cython_module = False
+    cython_attribute = None
+    lhs_of_first_assignment = False  # TODO: remove me
+    is_used_as_rvalue = 0
+    entry = None
+    type_entry = None
+    cf_maybe_null = True
+    cf_is_null = False
+    allow_null = False
+    nogil = False
+    inferred_type = None
+
+    def as_cython_attribute(self):
+        return self.cython_attribute
+
+    def type_dependencies(self, env):
+        if self.entry is None:
+            self.entry = env.lookup(self.name)
+        if self.entry is not None and self.entry.type.is_unspecified:
+            return (self,)
+        else:
+            return ()
+
+    def infer_type(self, env):
+        if self.entry is None:
+            self.entry = env.lookup(self.name)
+        if self.entry is None or self.entry.type is unspecified_type:
+            if self.inferred_type is not None:
+                return self.inferred_type
+            return py_object_type
+        elif (self.entry.type.is_extension_type or self.entry.type.is_builtin_type) and \
+                self.name == self.entry.type.name:
+            # Unfortunately the type attribute of type objects
+            # is used for the pointer to the type they represent.
+            return type_type
+        elif self.entry.type.is_cfunction:
+            if self.entry.scope.is_builtin_scope:
+                # special case: optimised builtin functions must be treated as Python objects
+                return py_object_type
+            else:
+                # special case: referring to a C function must return its pointer
+                return PyrexTypes.CPtrType(self.entry.type)
+        else:
+            # If entry is inferred as pyobject it's safe to use local
+            # NameNode's inferred_type.
+            if self.entry.type.is_pyobject and self.inferred_type:
+                # Overflow may happen if integer
+                if not (self.inferred_type.is_int and self.entry.might_overflow):
+                    return self.inferred_type
+            return self.entry.type
+
+    def compile_time_value(self, denv):
+        try:
+            return denv.lookup(self.name)
+        except KeyError:
+            error(self.pos, "Compile-time name '%s' not defined" % self.name)
+
+    def get_constant_c_result_code(self):
+        if not self.entry or self.entry.type.is_pyobject:
+            return None
+        return self.entry.cname
+
+    def coerce_to(self, dst_type, env):
+        #  If coercing to a generic pyobject and this is a builtin
+        #  C function with a Python equivalent, manufacture a NameNode
+        #  referring to the Python builtin.
+        #print "NameNode.coerce_to:", self.name, dst_type ###
+        if dst_type is py_object_type:
+            entry = self.entry
+            if entry and entry.is_cfunction:
+                var_entry = entry.as_variable
+                if var_entry:
+                    if var_entry.is_builtin and var_entry.is_const:
+                        var_entry = env.declare_builtin(var_entry.name, self.pos)
+                    node = NameNode(self.pos, name = self.name)
+                    node.entry = var_entry
+                    node.analyse_rvalue_entry(env)
+                    return node
+
+        return super(NameNode, self).coerce_to(dst_type, env)
+
+    def declare_from_annotation(self, env, as_target=False):
+        """Implements PEP 526 annotation typing in a fairly relaxed way.
+
+        Annotations are ignored for global variables.
+        All other annotations are stored on the entry in the symbol table.
+        String literals are allowed and not evaluated.
+        The ambiguous Python types 'int' and 'long' are not evaluated - the 'cython.int' form must be used instead.
+        """
+        name = self.name
+        annotation = self.annotation
+        entry = self.entry or env.lookup_here(name)
+        if not entry:
+            # annotations never create global cdef names
+            if env.is_module_scope:
+                return
+
+            modifiers = ()
+            if (
+                # name: "description" => not a type, but still a declared variable or attribute
+                annotation.expr.is_string_literal
+                # don't do type analysis from annotations if not asked to, but still collect the annotation
+                or not env.directives['annotation_typing']
+            ):
+                atype = None
+            elif env.is_py_class_scope:
+                # For Python class scopes every attribute is a Python object
+                atype = py_object_type
+            else:
+                modifiers, atype = annotation.analyse_type_annotation(env)
+
+            if atype is None:
+                atype = unspecified_type if as_target and env.directives['infer_types'] != False else py_object_type
+            elif atype.is_fused and env.fused_to_specific:
+                try:
+                    atype = atype.specialize(env.fused_to_specific)
+                except CannotSpecialize:
+                    error(self.pos,
+                          "'%s' cannot be specialized since its type is not a fused argument to this function" %
+                          self.name)
+                    atype = error_type
+
+            visibility = 'private'
+            if env.is_c_dataclass_scope:
+                # handle "frozen" directive - full inspection of the dataclass directives happens
+                # in Dataclass.py
+                is_frozen = env.is_c_dataclass_scope == "frozen"
+                if atype.is_pyobject or atype.can_coerce_to_pyobject(env):
+                    visibility = 'readonly' if is_frozen else 'public'
+                    # If the object can't be coerced that's fine - we just don't create a property
+
+            if as_target and env.is_c_class_scope and not (atype.is_pyobject or atype.is_error):
+                # TODO: this will need revising slightly if annotated cdef attributes are implemented
+                atype = py_object_type
+                warning(annotation.pos, "Annotation ignored since class-level attributes must be Python objects. "
+                        "Were you trying to set up an instance attribute?", 2)
+
+            entry = self.entry = env.declare_var(
+                name, atype, self.pos, is_cdef=not as_target, visibility=visibility,
+                pytyping_modifiers=modifiers)
+
+        # Even if the entry already exists, make sure we're supplying an annotation if we can.
+        if annotation and not entry.annotation:
+            entry.annotation = annotation
+
+    def analyse_as_module(self, env):
+        # Try to interpret this as a reference to a cimported module.
+        # Returns the module scope, or None.
+        entry = self.entry
+        if not entry:
+            entry = env.lookup(self.name)
+        if entry and entry.as_module:
+            return entry.as_module
+        if entry and entry.known_standard_library_import:
+            scope = Builtin.get_known_standard_library_module_scope(entry.known_standard_library_import)
+            if scope and scope.is_module_scope:
+                return scope
+        return None
+
+    def analyse_as_type(self, env):
+        type = None
+        if self.cython_attribute:
+            type = PyrexTypes.parse_basic_type(self.cython_attribute)
+        elif env.in_c_type_context:
+            type = PyrexTypes.parse_basic_type(self.name)
+        if type:
+            return type
+
+        entry = self.entry
+        if not entry:
+            entry = env.lookup(self.name)
+        if entry and not entry.is_type and entry.known_standard_library_import:
+            entry = Builtin.get_known_standard_library_entry(entry.known_standard_library_import)
+        if entry and entry.is_type:
+            # Infer equivalent C types instead of Python types when possible.
+            type = entry.type
+            if not env.in_c_type_context and type is Builtin.long_type:
+                # Try to give a helpful warning when users write plain C type names.
+                warning(self.pos, "Found Python 2.x type 'long' in a Python annotation. Did you mean to use 'cython.long'?")
+                type = py_object_type
+            elif type.is_pyobject and type.equivalent_type:
+                type = type.equivalent_type
+            elif type is Builtin.int_type and env.global_scope().context.language_level == 2:
+                # While we still support Python 2 this must be a plain object
+                # so that it can be either int or long.  With language_level=3(str),
+                # we pick up the type but accept both int and long in Py2.
+                type = py_object_type
+            return type
+        if self.name == 'object':
+            # This is normally parsed as "simple C type", but not if we don't parse C types.
+            return py_object_type
+
+        # Try to give a helpful warning when users write plain C type names.
+        if not env.in_c_type_context and PyrexTypes.parse_basic_type(self.name):
+            warning(self.pos, "Found C type '%s' in a Python annotation. Did you mean to use 'cython.%s'?" % (self.name, self.name))
+
+        return None
+
+    def analyse_as_extension_type(self, env):
+        # Try to interpret this as a reference to an extension type.
+        # Returns the extension type, or None.
+        entry = self.entry
+        if not entry:
+            entry = env.lookup(self.name)
+        if entry and entry.is_type:
+            if entry.type.is_extension_type or entry.type.is_builtin_type:
+                return entry.type
+        return None
+
+    def analyse_target_declaration(self, env):
+        return self._analyse_target_declaration(env, is_assignment_expression=False)
+
+    def analyse_assignment_expression_target_declaration(self, env):
+        return self._analyse_target_declaration(env, is_assignment_expression=True)
+
+    def _analyse_target_declaration(self, env, is_assignment_expression):
+        self.is_target = True
+        if not self.entry:
+            if is_assignment_expression:
+                self.entry = env.lookup_assignment_expression_target(self.name)
+            else:
+                self.entry = env.lookup_here(self.name)
+        if self.entry:
+            self.entry.known_standard_library_import = ""  # already exists somewhere and so is now ambiguous
+        if not self.entry and self.annotation is not None:
+            # name : type = ...
+            is_dataclass = env.is_c_dataclass_scope
+            # In a dataclass, an assignment should not prevent a name from becoming an instance attribute.
+            # Hence, "as_target = not is_dataclass".
+            self.declare_from_annotation(env, as_target=not is_dataclass)
+        elif (self.entry and self.entry.is_inherited and
+                self.annotation and env.is_c_dataclass_scope):
+            error(self.pos, "Cannot redeclare inherited fields in Cython dataclasses")
+        if not self.entry:
+            if env.directives['warn.undeclared']:
+                warning(self.pos, "implicit declaration of '%s'" % self.name, 1)
+            if env.directives['infer_types'] != False:
+                type = unspecified_type
+            else:
+                type = py_object_type
+            if is_assignment_expression:
+                self.entry = env.declare_assignment_expression_target(self.name, type, self.pos)
+            else:
+                self.entry = env.declare_var(self.name, type, self.pos)
+        if self.entry.is_declared_generic:
+            self.result_ctype = py_object_type
+        if self.entry.as_module:
+            # cimported modules namespace can shadow actual variables
+            self.entry.is_variable = 1
+
+    def analyse_types(self, env):
+        self.initialized_check = env.directives['initializedcheck']
+        entry = self.entry
+        if entry is None:
+            entry = env.lookup(self.name)
+            if not entry:
+                entry = env.declare_builtin(self.name, self.pos)
+                if entry and entry.is_builtin and entry.is_const:
+                    self.is_literal = True
+            if not entry:
+                self.type = PyrexTypes.error_type
+                return self
+            self.entry = entry
+        entry.used = 1
+        if entry.type.is_buffer:
+            from . import Buffer
+            Buffer.used_buffer_aux_vars(entry)
+        self.analyse_rvalue_entry(env)
+        return self
+
+    def analyse_target_types(self, env):
+        self.analyse_entry(env, is_target=True)
+
+        entry = self.entry
+        if entry.is_cfunction and entry.as_variable:
+            # FIXME: unify "is_overridable" flags below
+            if (entry.is_overridable or entry.type.is_overridable) or not self.is_lvalue() and entry.fused_cfunction:
+                # We need this for assigning to cpdef names and for the fused 'def' TreeFragment
+                entry = self.entry = entry.as_variable
+                self.type = entry.type
+
+        if self.type.is_const:
+            error(self.pos, "Assignment to const '%s'" % self.name)
+        if not self.is_lvalue():
+            error(self.pos, "Assignment to non-lvalue '%s'" % self.name)
+            self.type = PyrexTypes.error_type
+        entry.used = 1
+        if entry.type.is_buffer:
+            from . import Buffer
+            Buffer.used_buffer_aux_vars(entry)
+        return self
+
+    def analyse_rvalue_entry(self, env):
+        #print "NameNode.analyse_rvalue_entry:", self.name ###
+        #print "Entry:", self.entry.__dict__ ###
+        self.analyse_entry(env)
+        entry = self.entry
+
+        if entry.is_declared_generic:
+            self.result_ctype = py_object_type
+
+        if entry.is_pyglobal or entry.is_builtin:
+            if entry.is_builtin and entry.is_const:
+                self.is_temp = 0
+            else:
+                self.is_temp = 1
+
+            self.is_used_as_rvalue = 1
+        elif entry.type.is_memoryviewslice:
+            self.is_temp = False
+            self.is_used_as_rvalue = True
+            self.use_managed_ref = True
+        return self
+
+    def nogil_check(self, env):
+        self.nogil = True
+        if self.is_used_as_rvalue:
+            entry = self.entry
+            if entry.is_builtin:
+                if not entry.is_const:  # cached builtins are ok
+                    self.gil_error()
+            elif entry.is_pyglobal:
+                self.gil_error()
+
+    gil_message = "Accessing Python global or builtin"
+
+    def analyse_entry(self, env, is_target=False):
+        #print "NameNode.analyse_entry:", self.name ###
+        self.check_identifier_kind()
+        entry = self.entry
+        type = entry.type
+        if (not is_target and type.is_pyobject and self.inferred_type and
+                self.inferred_type.is_builtin_type):
+            # assume that type inference is smarter than the static entry
+            type = self.inferred_type
+        self.type = type
+
+    def check_identifier_kind(self):
+        # Check that this is an appropriate kind of name for use in an
+        # expression.  Also finds the variable entry associated with
+        # an extension type.
+        entry = self.entry
+        if entry.is_type and entry.type.is_extension_type:
+            self.type_entry = entry
+        if entry.is_type and (entry.type.is_enum or entry.type.is_cpp_enum):
+            py_entry = Symtab.Entry(self.name, None, py_object_type)
+            py_entry.is_pyglobal = True
+            py_entry.scope = self.entry.scope
+            self.entry = py_entry
+        elif not (entry.is_const or entry.is_variable or
+                  entry.is_builtin or entry.is_cfunction or
+                  entry.is_cpp_class):
+            if self.entry.as_variable:
+                self.entry = self.entry.as_variable
+            elif not self.is_cython_module:
+                error(self.pos, "'%s' is not a constant, variable or function identifier" % self.name)
+
+    def is_cimported_module_without_shadow(self, env):
+        if self.is_cython_module or self.cython_attribute:
+            return False
+        entry = self.entry or env.lookup(self.name)
+        return entry.as_module and not entry.is_variable
+
+    def is_simple(self):
+        #  If it's not a C variable, it'll be in a temp.
+        return 1
+
+    def may_be_none(self):
+        if self.cf_state and self.type and (self.type.is_pyobject or
+                                            self.type.is_memoryviewslice):
+            # guard against infinite recursion on self-dependencies
+            if getattr(self, '_none_checking', False):
+                # self-dependency - either this node receives a None
+                # value from *another* node, or it can not reference
+                # None at this point => safe to assume "not None"
+                return False
+            self._none_checking = True
+            # evaluate control flow state to see if there were any
+            # potential None values assigned to the node so far
+            may_be_none = False
+            for assignment in self.cf_state:
+                if assignment.rhs.may_be_none():
+                    may_be_none = True
+                    break
+            del self._none_checking
+            return may_be_none
+        return super(NameNode, self).may_be_none()
+
+    def nonlocally_immutable(self):
+        if ExprNode.nonlocally_immutable(self):
+            return True
+        entry = self.entry
+        if not entry or entry.in_closure:
+            return False
+        return entry.is_local or entry.is_arg or entry.is_builtin or entry.is_readonly
+
+    def calculate_target_results(self, env):
+        pass
+
+    def check_const(self):
+        entry = self.entry
+        if entry is not None and not (
+                entry.is_const or
+                entry.is_cfunction or
+                entry.is_builtin or
+                entry.type.is_const):
+            self.not_const()
+            return False
+        return True
+
+    def check_const_addr(self):
+        entry = self.entry
+        if not (entry.is_cglobal or entry.is_cfunction or entry.is_builtin):
+            self.addr_not_const()
+            return False
+        return True
+
+    def is_lvalue(self):
+        return (
+            self.entry.is_variable and
+            not self.entry.is_readonly
+        ) or (
+            self.entry.is_cfunction and
+            self.entry.is_overridable
+        )
+
+    def is_addressable(self):
+        return self.entry.is_variable and not self.type.is_memoryviewslice
+
+    def is_ephemeral(self):
+        #  Name nodes are never ephemeral, even if the
+        #  result is in a temporary.
+        return 0
+
+    def calculate_result_code(self):
+        entry = self.entry
+        if not entry:
+            return "<error>"  # There was an error earlier
+        if self.entry.is_cpp_optional and not self.is_target:
+            return "(*%s)" % entry.cname
+        return entry.cname
+
+    def generate_result_code(self, code):
+        entry = self.entry
+        if entry is None:
+            return  # There was an error earlier
+        if entry.utility_code:
+            code.globalstate.use_utility_code(entry.utility_code)
+        if entry.is_builtin and entry.is_const:
+            return  # Lookup already cached
+        elif entry.is_pyclass_attr:
+            assert entry.type.is_pyobject, "Python global or builtin not a Python object"
+            interned_cname = code.intern_identifier(self.entry.name)
+            if entry.is_builtin:
+                namespace = Naming.builtins_cname
+            else:  # entry.is_pyglobal
+                namespace = entry.scope.namespace_cname
+            if not self.cf_is_null:
+                code.putln(
+                    '%s = PyObject_GetItem(%s, %s);' % (
+                        self.result(),
+                        namespace,
+                        interned_cname))
+                code.putln('if (unlikely(!%s)) {' % self.result())
+                code.putln('PyErr_Clear();')
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("GetModuleGlobalName", "ObjectHandling.c"))
+            code.putln(
+                '__Pyx_GetModuleGlobalName(%s, %s);' % (
+                    self.result(),
+                    interned_cname))
+            if not self.cf_is_null:
+                code.putln("}")
+            code.putln(code.error_goto_if_null(self.result(), self.pos))
+            self.generate_gotref(code)
+
+        elif entry.is_builtin and not entry.scope.is_module_scope:
+            # known builtin
+            assert entry.type.is_pyobject, "Python global or builtin not a Python object"
+            interned_cname = code.intern_identifier(self.entry.name)
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("GetBuiltinName", "ObjectHandling.c"))
+            code.putln(
+                '%s = __Pyx_GetBuiltinName(%s); %s' % (
+                self.result(),
+                interned_cname,
+                code.error_goto_if_null(self.result(), self.pos)))
+            self.generate_gotref(code)
+
+        elif entry.is_pyglobal or (entry.is_builtin and entry.scope.is_module_scope):
+            # name in class body, global name or unknown builtin
+            assert entry.type.is_pyobject, "Python global or builtin not a Python object"
+            interned_cname = code.intern_identifier(self.entry.name)
+            if entry.scope.is_module_scope:
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("GetModuleGlobalName", "ObjectHandling.c"))
+                code.putln(
+                    '__Pyx_GetModuleGlobalName(%s, %s); %s' % (
+                        self.result(),
+                        interned_cname,
+                        code.error_goto_if_null(self.result(), self.pos)))
+            else:
+                # FIXME: is_pyglobal is also used for class namespace
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("GetNameInClass", "ObjectHandling.c"))
+                code.putln(
+                    '__Pyx_GetNameInClass(%s, %s, %s); %s' % (
+                        self.result(),
+                        entry.scope.namespace_cname,
+                        interned_cname,
+                        code.error_goto_if_null(self.result(), self.pos)))
+            self.generate_gotref(code)
+
+        elif entry.is_local or entry.in_closure or entry.from_closure or entry.type.is_memoryviewslice:
+            # Raise UnboundLocalError for objects and memoryviewslices
+            raise_unbound = (
+                (self.cf_maybe_null or self.cf_is_null) and not self.allow_null)
+
+            memslice_check = entry.type.is_memoryviewslice and self.initialized_check
+            optional_cpp_check = entry.is_cpp_optional and self.initialized_check
+
+            if optional_cpp_check:
+                unbound_check_code = entry.type.cpp_optional_check_for_null_code(entry.cname)
+            else:
+                unbound_check_code = entry.type.check_for_null_code(entry.cname)
+
+            if unbound_check_code and raise_unbound and (entry.type.is_pyobject or memslice_check or optional_cpp_check):
+                code.put_error_if_unbound(self.pos, entry, self.in_nogil_context, unbound_check_code=unbound_check_code)
+
+        elif entry.is_cglobal and entry.is_cpp_optional and self.initialized_check:
+            unbound_check_code = entry.type.cpp_optional_check_for_null_code(entry.cname)
+            code.put_error_if_unbound(self.pos, entry, unbound_check_code=unbound_check_code)
+
+    def generate_assignment_code(self, rhs, code, overloaded_assignment=False,
+                                 exception_check=None, exception_value=None):
+        #print "NameNode.generate_assignment_code:", self.name ###
+        entry = self.entry
+        if entry is None:
+            return  # There was an error earlier
+
+        if (self.entry.type.is_ptr and isinstance(rhs, ListNode)
+                and not self.lhs_of_first_assignment and not rhs.in_module_scope):
+            error(self.pos, "Literal list must be assigned to pointer at time of declaration")
+
+        # is_pyglobal seems to be True for module level-globals only.
+        # We use this to access class->tp_dict if necessary.
+        if entry.is_pyglobal:
+            assert entry.type.is_pyobject, "Python global or builtin not a Python object"
+            interned_cname = code.intern_identifier(self.entry.name)
+            namespace = self.entry.scope.namespace_cname
+            if entry.is_member:
+                # if the entry is a member we have to cheat: SetAttr does not work
+                # on types, so we create a descriptor which is then added to tp_dict.
+                setter = '__Pyx_SetItemOnTypeDict'
+            elif entry.scope.is_module_scope:
+                setter = 'PyDict_SetItem'
+                namespace = Naming.moddict_cname
+            elif entry.is_pyclass_attr:
+                # Special-case setting __new__
+                n = "SetNewInClass" if self.name == "__new__" else "SetNameInClass"
+                code.globalstate.use_utility_code(UtilityCode.load_cached(n, "ObjectHandling.c"))
+                setter = '__Pyx_' + n
+            else:
+                assert False, repr(entry)
+            code.put_error_if_neg(
+                self.pos,
+                '%s(%s, %s, %s)' % (
+                    setter,
+                    namespace,
+                    interned_cname,
+                    rhs.py_result()))
+            if debug_disposal_code:
+                print("NameNode.generate_assignment_code:")
+                print("...generating disposal code for %s" % rhs)
+            rhs.generate_disposal_code(code)
+            rhs.free_temps(code)
+            if entry.is_member:
+                # in Py2.6+, we need to invalidate the method cache
+                code.putln("PyType_Modified(%s);" %
+                           entry.scope.parent_type.typeptr_cname)
+        else:
+            if self.type.is_memoryviewslice:
+                self.generate_acquire_memoryviewslice(rhs, code)
+
+            elif self.type.is_buffer:
+                # Generate code for doing the buffer release/acquisition.
+                # This might raise an exception in which case the assignment (done
+                # below) will not happen.
+                #
+                # The reason this is not in a typetest-like node is because the
+                # variables that the acquired buffer info is stored to is allocated
+                # per entry and coupled with it.
+                self.generate_acquire_buffer(rhs, code)
+            assigned = False
+            if self.type.is_pyobject:
+                #print "NameNode.generate_assignment_code: to", self.name ###
+                #print "...from", rhs ###
+                #print "...LHS type", self.type, "ctype", self.ctype() ###
+                #print "...RHS type", rhs.type, "ctype", rhs.ctype() ###
+                if self.use_managed_ref:
+                    rhs.make_owned_reference(code)
+                    is_external_ref = entry.is_cglobal or self.entry.in_closure or self.entry.from_closure
+                    if is_external_ref:
+                        self.generate_gotref(code, handle_null=True)
+                    assigned = True
+                    if entry.is_cglobal:
+                        self.generate_decref_set(code, rhs.result_as(self.ctype()))
+                    else:
+                        if not self.cf_is_null:
+                            if self.cf_maybe_null:
+                                self.generate_xdecref_set(code, rhs.result_as(self.ctype()))
+                            else:
+                                self.generate_decref_set(code, rhs.result_as(self.ctype()))
+                        else:
+                            assigned = False
+                    if is_external_ref:
+                        rhs.generate_giveref(code)
+            if not self.type.is_memoryviewslice:
+                if not assigned:
+                    if overloaded_assignment:
+                        result = rhs.move_result_rhs()
+                        if exception_check == '+':
+                            translate_cpp_exception(
+                                code, self.pos,
+                                '%s = %s;' % (self.result(), result),
+                                self.result() if self.type.is_pyobject else None,
+                                exception_value, self.in_nogil_context)
+                        else:
+                            code.putln('%s = %s;' % (self.result(), result))
+                    else:
+                        result = rhs.move_result_rhs_as(self.ctype())
+
+                        if is_pythran_expr(self.type):
+                            code.putln('new (&%s) decltype(%s){%s};' % (self.result(), self.result(), result))
+                        elif result != self.result():
+                            code.putln('%s = %s;' % (self.result(), result))
+                if debug_disposal_code:
+                    print("NameNode.generate_assignment_code:")
+                    print("...generating post-assignment code for %s" % rhs)
+                rhs.generate_post_assignment_code(code)
+            elif rhs.result_in_temp():
+                rhs.generate_post_assignment_code(code)
+
+            rhs.free_temps(code)
+
+    def generate_acquire_memoryviewslice(self, rhs, code):
+        """
+        Slices, coercions from objects, return values etc are new references.
+        We have a borrowed reference in case of dst = src
+        """
+        from . import MemoryView
+
+        MemoryView.put_acquire_memoryviewslice(
+            lhs_cname=self.result(),
+            lhs_type=self.type,
+            lhs_pos=self.pos,
+            rhs=rhs,
+            code=code,
+            have_gil=not self.in_nogil_context,
+            first_assignment=self.cf_is_null)
+
+    def generate_acquire_buffer(self, rhs, code):
+        # rhstmp is only used in case the rhs is a complicated expression leading to
+        # the object, to avoid repeating the same C expression for every reference
+        # to the rhs. It does NOT hold a reference.
+        pretty_rhs = isinstance(rhs, NameNode) or rhs.is_temp
+        if pretty_rhs:
+            rhstmp = rhs.result_as(self.ctype())
+        else:
+            rhstmp = code.funcstate.allocate_temp(self.entry.type, manage_ref=False)
+            code.putln('%s = %s;' % (rhstmp, rhs.result_as(self.ctype())))
+
+        from . import Buffer
+        Buffer.put_assign_to_buffer(self.result(), rhstmp, self.entry,
+                                    is_initialized=not self.lhs_of_first_assignment,
+                                    pos=self.pos, code=code)
+
+        if not pretty_rhs:
+            code.putln("%s = 0;" % rhstmp)
+            code.funcstate.release_temp(rhstmp)
+
+    def generate_deletion_code(self, code, ignore_nonexisting=False):
+        if self.entry is None:
+            return  # There was an error earlier
+        elif self.entry.is_pyclass_attr:
+            namespace = self.entry.scope.namespace_cname
+            interned_cname = code.intern_identifier(self.entry.name)
+            if ignore_nonexisting:
+                key_error_code = 'PyErr_Clear(); else'
+            else:
+                # minor hack: fake a NameError on KeyError
+                key_error_code = (
+                    '{ PyErr_Clear(); PyErr_Format(PyExc_NameError, "name \'%%s\' is not defined", "%s"); }' %
+                    self.entry.name)
+            code.putln(
+                'if (unlikely(PyObject_DelItem(%s, %s) < 0)) {'
+                ' if (likely(PyErr_ExceptionMatches(PyExc_KeyError))) %s'
+                ' %s '
+                '}' % (namespace, interned_cname,
+                       key_error_code,
+                       code.error_goto(self.pos)))
+        elif self.entry.is_pyglobal:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("PyObjectSetAttrStr", "ObjectHandling.c"))
+            interned_cname = code.intern_identifier(self.entry.name)
+            del_code = '__Pyx_PyObject_DelAttrStr(%s, %s)' % (
+                Naming.module_cname, interned_cname)
+            if ignore_nonexisting:
+                code.putln(
+                    'if (unlikely(%s < 0)) {'
+                    ' if (likely(PyErr_ExceptionMatches(PyExc_AttributeError))) PyErr_Clear(); else %s '
+                    '}' % (del_code, code.error_goto(self.pos)))
+            else:
+                code.put_error_if_neg(self.pos, del_code)
+        elif self.entry.type.is_pyobject or self.entry.type.is_memoryviewslice:
+            if not self.cf_is_null:
+                if self.cf_maybe_null and not ignore_nonexisting:
+                    code.put_error_if_unbound(self.pos, self.entry)
+
+                if self.entry.in_closure:
+                    # generator
+                    self.generate_gotref(code, handle_null=True, maybe_null_extra_check=ignore_nonexisting)
+                if ignore_nonexisting and self.cf_maybe_null:
+                    code.put_xdecref_clear(self.result(), self.ctype(),
+                                        have_gil=not self.nogil)
+                else:
+                    code.put_decref_clear(self.result(), self.ctype(),
+                                          have_gil=not self.nogil)
+        else:
+            error(self.pos, "Deletion of C names not supported")
+
+    def annotate(self, code):
+        if getattr(self, 'is_called', False):
+            pos = (self.pos[0], self.pos[1], self.pos[2] - len(self.name) - 1)
+            if self.type.is_pyobject:
+                style, text = 'py_call', 'python function (%s)'
+            else:
+                style, text = 'c_call', 'c function (%s)'
+            code.annotate(pos, AnnotationItem(style, text % self.type, size=len(self.name)))
+
+    def get_known_standard_library_import(self):
+        if self.entry:
+            return self.entry.known_standard_library_import
+        return None
+
+class BackquoteNode(ExprNode):
+    #  `expr`
+    #
+    #  arg    ExprNode
+
+    type = py_object_type
+
+    subexprs = ['arg']
+
+    def analyse_types(self, env):
+        self.arg = self.arg.analyse_types(env)
+        self.arg = self.arg.coerce_to_pyobject(env)
+        self.is_temp = 1
+        return self
+
+    gil_message = "Backquote expression"
+
+    def calculate_constant_result(self):
+        self.constant_result = repr(self.arg.constant_result)
+
+    def generate_result_code(self, code):
+        code.putln(
+            "%s = PyObject_Repr(%s); %s" % (
+                self.result(),
+                self.arg.py_result(),
+                code.error_goto_if_null(self.result(), self.pos)))
+        self.generate_gotref(code)
+
+
+class ImportNode(ExprNode):
+    #  Used as part of import statement implementation.
+    #  Implements result =
+    #    __import__(module_name, globals(), None, name_list, level)
+    #
+    #  module_name   StringNode            dotted name of module. Empty module
+    #                       name means importing the parent package according
+    #                       to level
+    #  name_list     ListNode or None      list of names to be imported
+    #  level         int                   relative import level:
+    #                       -1: attempt both relative import and absolute import;
+    #                        0: absolute import;
+    #                       >0: the number of parent directories to search
+    #                           relative to the current module.
+    #                     None: decide the level according to language level and
+    #                           directives
+    #  get_top_level_module   int          true: return top-level module, false: return imported module
+    #  module_names           TupleNode    the separate names of the module and submodules, or None
+
+    type = py_object_type
+    module_names = None
+    get_top_level_module = False
+    is_temp = True
+
+    subexprs = ['module_name', 'name_list', 'module_names']
+
+    def analyse_types(self, env):
+        if self.level is None:
+            # For modules in packages, and without 'absolute_import' enabled, try relative (Py2) import first.
+            if env.global_scope().parent_module and (
+                    env.directives['py2_import'] or
+                    Future.absolute_import not in env.global_scope().context.future_directives):
+                self.level = -1
+            else:
+                self.level = 0
+        module_name = self.module_name.analyse_types(env)
+        self.module_name = module_name.coerce_to_pyobject(env)
+        assert self.module_name.is_string_literal
+        if self.name_list:
+            name_list = self.name_list.analyse_types(env)
+            self.name_list = name_list.coerce_to_pyobject(env)
+        elif '.' in self.module_name.value:
+            self.module_names = TupleNode(self.module_name.pos, args=[
+                IdentifierStringNode(self.module_name.pos, value=part, constant_result=part)
+                for part in map(StringEncoding.EncodedString, self.module_name.value.split('.'))
+            ]).analyse_types(env)
+        return self
+
+    gil_message = "Python import"
+
+    def generate_result_code(self, code):
+        assert self.module_name.is_string_literal
+        module_name = self.module_name.value
+
+        if self.level <= 0 and not self.name_list and not self.get_top_level_module:
+            if self.module_names:
+                assert self.module_names.is_literal  # make sure we create the tuple only once
+            if self.level == 0:
+                utility_code = UtilityCode.load_cached("ImportDottedModule", "ImportExport.c")
+                helper_func = "__Pyx_ImportDottedModule"
+            else:
+                utility_code = UtilityCode.load_cached("ImportDottedModuleRelFirst", "ImportExport.c")
+                helper_func = "__Pyx_ImportDottedModuleRelFirst"
+            code.globalstate.use_utility_code(utility_code)
+            import_code = "%s(%s, %s)" % (
+                helper_func,
+                self.module_name.py_result(),
+                self.module_names.py_result() if self.module_names else 'NULL',
+            )
+        else:
+            code.globalstate.use_utility_code(UtilityCode.load_cached("Import", "ImportExport.c"))
+            import_code = "__Pyx_Import(%s, %s, %d)" % (
+                self.module_name.py_result(),
+                self.name_list.py_result() if self.name_list else '0',
+                self.level)
+
+        if self.level <= 0 and module_name in utility_code_for_imports:
+            helper_func, code_name, code_file = utility_code_for_imports[module_name]
+            code.globalstate.use_utility_code(UtilityCode.load_cached(code_name, code_file))
+            import_code = '%s(%s)' % (helper_func, import_code)
+
+        code.putln("%s = %s; %s" % (
+            self.result(),
+            import_code,
+            code.error_goto_if_null(self.result(), self.pos)))
+        self.generate_gotref(code)
+
+    def get_known_standard_library_import(self):
+        return self.module_name.value
+
+
+class ScopedExprNode(ExprNode):
+    # Abstract base class for ExprNodes that have their own local
+    # scope, such as generator expressions.
+    #
+    # expr_scope    Scope  the inner scope of the expression
+
+    subexprs = []
+    expr_scope = None
+
+    # does this node really have a local scope, e.g. does it leak loop
+    # variables or not?  non-leaking Py3 behaviour is default, except
+    # for list comprehensions where the behaviour differs in Py2 and
+    # Py3 (set in Parsing.py based on parser context)
+    has_local_scope = True
+
+    def init_scope(self, outer_scope, expr_scope=None):
+        if expr_scope is not None:
+            self.expr_scope = expr_scope
+        elif self.has_local_scope:
+            self.expr_scope = Symtab.ComprehensionScope(outer_scope)
+        elif not self.expr_scope:  # don't unset if it's already been set
+            self.expr_scope = None
+
+    def analyse_declarations(self, env):
+        self.init_scope(env)
+
+    def analyse_scoped_declarations(self, env):
+        # this is called with the expr_scope as env
+        pass
+
+    def analyse_types(self, env):
+        # no recursion here, the children will be analysed separately below
+        return self
+
+    def analyse_scoped_expressions(self, env):
+        # this is called with the expr_scope as env
+        return self
+
+    def generate_evaluation_code(self, code):
+        # set up local variables and free their references on exit
+        generate_inner_evaluation_code = super(ScopedExprNode, self).generate_evaluation_code
+        if not self.has_local_scope or not self.expr_scope.var_entries:
+            # no local variables => delegate, done
+            generate_inner_evaluation_code(code)
+            return
+
+        code.putln('{ /* enter inner scope */')
+        py_entries = []
+        for _, entry in sorted(item for item in self.expr_scope.entries.items() if item[0]):
+            if not entry.in_closure:
+                if entry.type.is_pyobject and entry.used:
+                    py_entries.append(entry)
+        if not py_entries:
+            # no local Python references => no cleanup required
+            generate_inner_evaluation_code(code)
+            code.putln('} /* exit inner scope */')
+            return
+
+        # must free all local Python references at each exit point
+        old_loop_labels = code.new_loop_labels()
+        old_error_label = code.new_error_label()
+
+        generate_inner_evaluation_code(code)
+
+        # normal (non-error) exit
+        self._generate_vars_cleanup(code, py_entries)
+
+        # error/loop body exit points
+        exit_scope = code.new_label('exit_scope')
+        code.put_goto(exit_scope)
+        for label, old_label in ([(code.error_label, old_error_label)] +
+                                 list(zip(code.get_loop_labels(), old_loop_labels))):
+            if code.label_used(label):
+                code.put_label(label)
+                self._generate_vars_cleanup(code, py_entries)
+                code.put_goto(old_label)
+        code.put_label(exit_scope)
+        code.putln('} /* exit inner scope */')
+
+        code.set_loop_labels(old_loop_labels)
+        code.error_label = old_error_label
+
+    def _generate_vars_cleanup(self, code, py_entries):
+        for entry in py_entries:
+            if entry.is_cglobal:
+                code.put_var_gotref(entry)
+                code.put_var_decref_set(entry, "Py_None")
+            else:
+                code.put_var_xdecref_clear(entry)
+
+
+class IteratorNode(ScopedExprNode):
+    #  Used as part of for statement implementation.
+    #
+    #  Implements result = iter(sequence)
+    #
+    #  sequence   ExprNode
+
+    type = py_object_type
+    iter_func_ptr = None
+    counter_cname = None
+    reversed = False      # currently only used for list/tuple types (see Optimize.py)
+    is_async = False
+    has_local_scope = False
+
+    subexprs = ['sequence']
+
+    def analyse_types(self, env):
+        if self.expr_scope:
+            env = self.expr_scope  # actually evaluate sequence in this scope instead
+        self.sequence = self.sequence.analyse_types(env)
+        if (self.sequence.type.is_array or self.sequence.type.is_ptr) and \
+                not self.sequence.type.is_string:
+            # C array iteration will be transformed later on
+            self.type = self.sequence.type
+        elif self.sequence.type.is_cpp_class:
+            return CppIteratorNode(self.pos, sequence=self.sequence).analyse_types(env)
+        elif self.is_reversed_cpp_iteration():
+            sequence = self.sequence.arg_tuple.args[0].arg
+            return CppIteratorNode(self.pos, sequence=sequence, reversed=True).analyse_types(env)
+        else:
+            self.sequence = self.sequence.coerce_to_pyobject(env)
+            if self.sequence.type in (list_type, tuple_type):
+                self.sequence = self.sequence.as_none_safe_node("'NoneType' object is not iterable")
+        self.is_temp = 1
+        return self
+
+    gil_message = "Iterating over Python object"
+
+    _func_iternext_type = PyrexTypes.CPtrType(PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [
+            PyrexTypes.CFuncTypeArg("it", PyrexTypes.py_object_type, None),
+            ]))
+
+    def is_reversed_cpp_iteration(self):
+        """
+        Returns True if the 'reversed' function is applied to a C++ iterable.
+
+        This supports C++ classes with reverse_iterator implemented.
+        """
+        if not (isinstance(self.sequence, SimpleCallNode) and
+                self.sequence.arg_tuple and len(self.sequence.arg_tuple.args) == 1):
+            return False
+        func = self.sequence.function
+        if func.is_name and func.name == "reversed":
+            if not func.entry.is_builtin:
+                return False
+            arg = self.sequence.arg_tuple.args[0]
+            if isinstance(arg, CoercionNode) and arg.arg.is_name:
+                arg = arg.arg.entry
+                return arg.type.is_cpp_class
+        return False
+
+    def type_dependencies(self, env):
+        return self.sequence.type_dependencies(self.expr_scope or env)
+
+    def infer_type(self, env):
+        sequence_type = self.sequence.infer_type(env)
+        if sequence_type.is_array or sequence_type.is_ptr:
+            return sequence_type
+        elif sequence_type.is_cpp_class:
+            begin = sequence_type.scope.lookup("begin")
+            if begin is not None:
+                return begin.type.return_type
+        elif sequence_type.is_pyobject:
+            return sequence_type
+        return py_object_type
+
+    def generate_result_code(self, code):
+        sequence_type = self.sequence.type
+        if sequence_type.is_cpp_class:
+            assert False, "Should have been changed to CppIteratorNode"
+        if sequence_type.is_array or sequence_type.is_ptr:
+            raise InternalError("for in carray slice not transformed")
+
+        is_builtin_sequence = sequence_type in (list_type, tuple_type)
+        if not is_builtin_sequence:
+            # reversed() not currently optimised (see Optimize.py)
+            assert not self.reversed, "internal error: reversed() only implemented for list/tuple objects"
+        self.may_be_a_sequence = not sequence_type.is_builtin_type
+        if self.may_be_a_sequence:
+            code.putln(
+                "if (likely(PyList_CheckExact(%s)) || PyTuple_CheckExact(%s)) {" % (
+                    self.sequence.py_result(),
+                    self.sequence.py_result()))
+
+        if is_builtin_sequence or self.may_be_a_sequence:
+            code.putln("%s = %s; __Pyx_INCREF(%s);" % (
+                self.result(),
+                self.sequence.py_result(),
+                self.result(),
+            ))
+            self.counter_cname = code.funcstate.allocate_temp(
+                PyrexTypes.c_py_ssize_t_type, manage_ref=False)
+            if self.reversed:
+                if sequence_type is list_type:
+                    len_func = '__Pyx_PyList_GET_SIZE'
+                else:
+                    len_func = '__Pyx_PyTuple_GET_SIZE'
+                code.putln("%s = %s(%s);" % (self.counter_cname, len_func, self.result()))
+                code.putln("#if !CYTHON_ASSUME_SAFE_MACROS")
+                code.putln(code.error_goto_if_neg(self.counter_cname, self.pos))
+                code.putln("#endif")
+                code.putln("--%s;" % self.counter_cname)  # len -> last item
+            else:
+                code.putln("%s = 0;" % self.counter_cname)
+
+        if not is_builtin_sequence:
+            self.iter_func_ptr = code.funcstate.allocate_temp(self._func_iternext_type, manage_ref=False)
+            if self.may_be_a_sequence:
+                code.putln("%s = NULL;" % self.iter_func_ptr)
+                code.putln("} else {")
+                code.put("%s = -1; " % self.counter_cname)
+
+            code.putln("%s = PyObject_GetIter(%s); %s" % (
+                self.result(),
+                self.sequence.py_result(),
+                code.error_goto_if_null(self.result(), self.pos)))
+            self.generate_gotref(code)
+
+            # PyObject_GetIter() fails if "tp_iternext" is not set, but the check below
+            # makes it visible to the C compiler that the pointer really isn't NULL, so that
+            # it can distinguish between the special cases and the generic case
+            code.putln("%s = __Pyx_PyObject_GetIterNextFunc(%s); %s" % (
+                self.iter_func_ptr, self.py_result(),
+                code.error_goto_if_null(self.iter_func_ptr, self.pos)))
+        if self.may_be_a_sequence:
+            code.putln("}")
+
+    def generate_next_sequence_item(self, test_name, result_name, code):
+        assert self.counter_cname, "internal error: counter_cname temp not prepared"
+        assert test_name in ('List', 'Tuple')
+
+        final_size = '__Pyx_Py%s_GET_SIZE(%s)' % (test_name, self.py_result())
+        size_is_safe = False
+        if self.sequence.is_sequence_constructor:
+            item_count = len(self.sequence.args)
+            if self.sequence.mult_factor is None:
+                final_size = item_count
+                size_is_safe = True
+            elif isinstance(self.sequence.mult_factor.constant_result, _py_int_types):
+                final_size = item_count * self.sequence.mult_factor.constant_result
+                size_is_safe = True
+
+        if size_is_safe:
+            code.putln("if (%s >= %s) break;" % (self.counter_cname, final_size))
+        else:
+            code.putln("{")
+            code.putln("Py_ssize_t %s = %s;" % (Naming.quick_temp_cname, final_size))
+            code.putln("#if !CYTHON_ASSUME_SAFE_MACROS")
+            code.putln(code.error_goto_if_neg(Naming.quick_temp_cname, self.pos))
+            code.putln("#endif")
+            code.putln("if (%s >= %s) break;" % (self.counter_cname, Naming.quick_temp_cname))
+            code.putln("}")
+
+        if self.reversed:
+            inc_dec = '--'
+        else:
+            inc_dec = '++'
+        code.putln("#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS")
+        code.putln(
+            "%s = Py%s_GET_ITEM(%s, %s); __Pyx_INCREF(%s); %s%s; %s" % (
+                result_name,
+                test_name,
+                self.py_result(),
+                self.counter_cname,
+                result_name,
+                self.counter_cname,
+                inc_dec,
+                # use the error label to avoid C compiler warnings if we only use it below
+                code.error_goto_if_neg('0', self.pos)
+                ))
+        code.putln("#else")
+        code.putln(
+            "%s = __Pyx_PySequence_ITEM(%s, %s); %s%s; %s" % (
+                result_name,
+                self.py_result(),
+                self.counter_cname,
+                self.counter_cname,
+                inc_dec,
+                code.error_goto_if_null(result_name, self.pos)))
+        code.put_gotref(result_name, py_object_type)
+        code.putln("#endif")
+
+    def generate_iter_next_result_code(self, result_name, code):
+        sequence_type = self.sequence.type
+        if self.reversed:
+            code.putln("if (%s < 0) break;" % self.counter_cname)
+        if sequence_type is list_type:
+            self.generate_next_sequence_item('List', result_name, code)
+            return
+        elif sequence_type is tuple_type:
+            self.generate_next_sequence_item('Tuple', result_name, code)
+            return
+
+        if self.may_be_a_sequence:
+            code.putln("if (likely(!%s)) {" % self.iter_func_ptr)
+            code.putln("if (likely(PyList_CheckExact(%s))) {" % self.py_result())
+            self.generate_next_sequence_item('List', result_name, code)
+            code.putln("} else {")
+            self.generate_next_sequence_item('Tuple', result_name, code)
+            code.putln("}")
+            code.put("} else ")
+
+        code.putln("{")
+        code.putln(
+            "%s = %s(%s);" % (
+                result_name,
+                self.iter_func_ptr,
+                self.py_result()))
+        code.putln("if (unlikely(!%s)) {" % result_name)
+        code.putln("PyObject* exc_type = PyErr_Occurred();")
+        code.putln("if (exc_type) {")
+        code.putln("if (likely(__Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration))) PyErr_Clear();")
+        code.putln("else %s" % code.error_goto(self.pos))
+        code.putln("}")
+        code.putln("break;")
+        code.putln("}")
+        code.put_gotref(result_name, py_object_type)
+        code.putln("}")
+
+    def free_temps(self, code):
+        if self.counter_cname:
+            code.funcstate.release_temp(self.counter_cname)
+        if self.iter_func_ptr:
+            code.funcstate.release_temp(self.iter_func_ptr)
+            self.iter_func_ptr = None
+        ExprNode.free_temps(self, code)
+
+
+class CppIteratorNode(ExprNode):
+    # Iteration over a C++ container.
+    # Created at the analyse_types stage by IteratorNode
+    cpp_sequence_cname = None
+    cpp_attribute_op = "."
+    extra_dereference = ""
+    is_temp = True
+    reversed = False
+
+    subexprs = ['sequence']
+
+    def get_iterator_func_names(self):
+        return ("begin", "end") if not self.reversed else ("rbegin", "rend")
+
+    def analyse_types(self, env):
+        sequence_type = self.sequence.type
+        if sequence_type.is_ptr:
+            sequence_type = sequence_type.base_type
+        begin_name, end_name = self.get_iterator_func_names()
+        begin = sequence_type.scope.lookup(begin_name)
+        end = sequence_type.scope.lookup(end_name)
+        if (begin is None
+                or not begin.type.is_cfunction
+                or begin.type.args):
+            error(self.pos, "missing %s() on %s" % (begin_name, self.sequence.type))
+            self.type = error_type
+            return self
+        if (end is None
+                or not end.type.is_cfunction
+                or end.type.args):
+            error(self.pos, "missing %s() on %s" % (end_name, self.sequence.type))
+            self.type = error_type
+            return self
+        iter_type = begin.type.return_type
+        if iter_type.is_cpp_class:
+            if env.directives['cpp_locals']:
+                self.extra_dereference = "*"
+            if env.lookup_operator_for_types(
+                    self.pos,
+                    "!=",
+                    [iter_type, end.type.return_type]) is None:
+                error(self.pos, "missing operator!= on result of %s() on %s" % (begin_name, self.sequence.type))
+                self.type = error_type
+                return self
+            if env.lookup_operator_for_types(self.pos, '++', [iter_type]) is None:
+                error(self.pos, "missing operator++ on result of %s() on %s" % (begin_name, self.sequence.type))
+                self.type = error_type
+                return self
+            if env.lookup_operator_for_types(self.pos, '*', [iter_type]) is None:
+                error(self.pos, "missing operator* on result of %s() on %s" % (begin_name, self.sequence.type))
+                self.type = error_type
+                return self
+            self.type = iter_type
+        elif iter_type.is_ptr:
+            if not (iter_type == end.type.return_type):
+                error(self.pos, "incompatible types for %s() and %s()" % (begin_name, end_name))
+            self.type = iter_type
+        else:
+            error(self.pos, "result type of %s() on %s must be a C++ class or pointer" % (begin_name, self.sequence.type))
+            self.type = error_type
+        return self
+
+    def generate_result_code(self, code):
+        sequence_type = self.sequence.type
+        begin_name, _ = self.get_iterator_func_names()
+        # essentially 3 options:
+        if self.sequence.is_simple():
+            # 1) Sequence can be accessed directly, like a name;
+            #    assigning to it may break the container, but that's the responsibility
+            #    of the user
+            code.putln("%s = %s%s%s();" % (
+                self.result(),
+                self.sequence.result(),
+                self.cpp_attribute_op,
+                begin_name))
+        else:
+                # (while it'd be nice to limit the scope of the loop temp, it's essentially
+                # impossible to do while supporting generators)
+                temp_type = sequence_type
+                if temp_type.is_reference:
+                    # 2) Sequence is a reference (often obtained by dereferencing a pointer);
+                    #    make the temp a pointer so we are not sensitive to users reassigning
+                    #    the pointer than it came from
+                    temp_type = PyrexTypes.CPtrType(sequence_type.ref_base_type)
+                if temp_type.is_ptr or code.globalstate.directives['cpp_locals']:
+                    self.cpp_attribute_op = "->"
+                # 3) (otherwise) sequence comes from a function call or similar, so we must
+                #    create a temp to store it in
+                self.cpp_sequence_cname = code.funcstate.allocate_temp(temp_type, manage_ref=False)
+                code.putln("%s = %s%s;" % (self.cpp_sequence_cname,
+                                           "&" if temp_type.is_ptr else "",
+                                           self.sequence.move_result_rhs()))
+                code.putln("%s = %s%s%s();" % (
+                    self.result(),
+                    self.cpp_sequence_cname,
+                    self.cpp_attribute_op,
+                    begin_name))
+
+    def generate_iter_next_result_code(self, result_name, code):
+        # end call isn't cached to support containers that allow adding while iterating
+        # (much as this is usually a bad idea)
+        _, end_name = self.get_iterator_func_names()
+        code.putln("if (!(%s%s != %s%s%s())) break;" % (
+                        self.extra_dereference,
+                        self.result(),
+                        self.cpp_sequence_cname or self.sequence.result(),
+                        self.cpp_attribute_op,
+                        end_name))
+        code.putln("%s = *%s%s;" % (
+                        result_name,
+                        self.extra_dereference,
+                        self.result()))
+        code.putln("++%s%s;" % (self.extra_dereference, self.result()))
+
+    def generate_subexpr_disposal_code(self, code):
+        if not self.cpp_sequence_cname:
+            # the sequence is accessed directly so any temporary result in its
+            # subexpressions must remain available until the iterator is not needed
+            return
+        ExprNode.generate_subexpr_disposal_code(self, code)
+
+    def free_subexpr_temps(self, code):
+        if not self.cpp_sequence_cname:
+            # the sequence is accessed directly so any temporary result in its
+            # subexpressions must remain available until the iterator is not needed
+            return
+        ExprNode.free_subexpr_temps(self, code)
+
+    def generate_disposal_code(self, code):
+        if not self.cpp_sequence_cname:
+            # postponed from CppIteratorNode.generate_subexpr_disposal_code
+            # and CppIteratorNode.free_subexpr_temps
+            ExprNode.generate_subexpr_disposal_code(self, code)
+            ExprNode.free_subexpr_temps(self, code)
+        ExprNode.generate_disposal_code(self, code)
+
+    def free_temps(self, code):
+        if self.cpp_sequence_cname:
+            code.funcstate.release_temp(self.cpp_sequence_cname)
+        # skip over IteratorNode since we don't use any of the temps it does
+        ExprNode.free_temps(self, code)
+
+
+class NextNode(AtomicExprNode):
+    #  Used as part of for statement implementation.
+    #  Implements result = next(iterator)
+    #  Created during analyse_types phase.
+    #  The iterator is not owned by this node.
+    #
+    #  iterator   IteratorNode
+
+    def __init__(self, iterator):
+        AtomicExprNode.__init__(self, iterator.pos)
+        self.iterator = iterator
+
+    def nogil_check(self, env):
+        # ignore - errors (if any) are already handled by IteratorNode
+        pass
+
+    def type_dependencies(self, env):
+        return self.iterator.type_dependencies(env)
+
+    def infer_type(self, env, iterator_type=None):
+        if iterator_type is None:
+            iterator_type = self.iterator.infer_type(env)
+        if iterator_type.is_ptr or iterator_type.is_array:
+            return iterator_type.base_type
+        elif iterator_type.is_cpp_class:
+            item_type = env.lookup_operator_for_types(self.pos, "*", [iterator_type]).type.return_type
+            item_type = PyrexTypes.remove_cv_ref(item_type, remove_fakeref=True)
+            return item_type
+        else:
+            # Avoid duplication of complicated logic.
+            fake_index_node = IndexNode(
+                self.pos,
+                base=self.iterator.sequence,
+                index=IntNode(self.pos, value='PY_SSIZE_T_MAX',
+                              type=PyrexTypes.c_py_ssize_t_type))
+            return fake_index_node.infer_type(env)
+
+    def analyse_types(self, env):
+        self.type = self.infer_type(env, self.iterator.type)
+        self.is_temp = 1
+        return self
+
+    def generate_result_code(self, code):
+        self.iterator.generate_iter_next_result_code(self.result(), code)
+
+
+class AsyncIteratorNode(ScopedExprNode):
+    #  Used as part of 'async for' statement implementation.
+    #
+    #  Implements result = sequence.__aiter__()
+    #
+    #  sequence   ExprNode
+
+    subexprs = ['sequence']
+
+    is_async = True
+    type = py_object_type
+    is_temp = 1
+    has_local_scope = False
+
+    def infer_type(self, env):
+        return py_object_type
+
+    def analyse_types(self, env):
+        if self.expr_scope:
+            env = self.expr_scope
+        self.sequence = self.sequence.analyse_types(env)
+        if not self.sequence.type.is_pyobject:
+            error(self.pos, "async for loops not allowed on C/C++ types")
+            self.sequence = self.sequence.coerce_to_pyobject(env)
+        return self
+
+    def generate_result_code(self, code):
+        code.globalstate.use_utility_code(UtilityCode.load_cached("AsyncIter", "Coroutine.c"))
+        code.putln("%s = __Pyx_Coroutine_GetAsyncIter(%s); %s" % (
+            self.result(),
+            self.sequence.py_result(),
+            code.error_goto_if_null(self.result(), self.pos)))
+        self.generate_gotref(code)
+
+
+class AsyncNextNode(AtomicExprNode):
+    #  Used as part of 'async for' statement implementation.
+    #  Implements result = iterator.__anext__()
+    #  Created during analyse_types phase.
+    #  The iterator is not owned by this node.
+    #
+    #  iterator   IteratorNode
+
+    type = py_object_type
+    is_temp = 1
+
+    def __init__(self, iterator):
+        AtomicExprNode.__init__(self, iterator.pos)
+        self.iterator = iterator
+
+    def infer_type(self, env):
+        return py_object_type
+
+    def analyse_types(self, env):
+        return self
+
+    def generate_result_code(self, code):
+        code.globalstate.use_utility_code(UtilityCode.load_cached("AsyncIter", "Coroutine.c"))
+        code.putln("%s = __Pyx_Coroutine_AsyncIterNext(%s); %s" % (
+            self.result(),
+            self.iterator.py_result(),
+            code.error_goto_if_null(self.result(), self.pos)))
+        self.generate_gotref(code)
+
+
+class WithExitCallNode(ExprNode):
+    # The __exit__() call of a 'with' statement.  Used in both the
+    # except and finally clauses.
+
+    # with_stat   WithStatNode                the surrounding 'with' statement
+    # args        TupleNode or ResultStatNode the exception info tuple
+    # await_expr  AwaitExprNode               the await expression of an 'async with' statement
+
+    subexprs = ['args', 'await_expr']
+    test_if_run = True
+    await_expr = None
+
+    def analyse_types(self, env):
+        self.args = self.args.analyse_types(env)
+        if self.await_expr:
+            self.await_expr = self.await_expr.analyse_types(env)
+        self.type = PyrexTypes.c_bint_type
+        self.is_temp = True
+        return self
+
+    def generate_evaluation_code(self, code):
+        if self.test_if_run:
+            # call only if it was not already called (and decref-cleared)
+            code.putln("if (%s) {" % self.with_stat.exit_var)
+
+        self.args.generate_evaluation_code(code)
+        result_var = code.funcstate.allocate_temp(py_object_type, manage_ref=False)
+
+        code.mark_pos(self.pos)
+        code.globalstate.use_utility_code(UtilityCode.load_cached(
+            "PyObjectCall", "ObjectHandling.c"))
+        code.putln("%s = __Pyx_PyObject_Call(%s, %s, NULL);" % (
+            result_var,
+            self.with_stat.exit_var,
+            self.args.result()))
+        code.put_decref_clear(self.with_stat.exit_var, type=py_object_type)
+        self.args.generate_disposal_code(code)
+        self.args.free_temps(code)
+
+        code.putln(code.error_goto_if_null(result_var, self.pos))
+        code.put_gotref(result_var, py_object_type)
+
+        if self.await_expr:
+            # FIXME: result_var temp currently leaks into the closure
+            self.await_expr.generate_evaluation_code(code, source_cname=result_var, decref_source=True)
+            code.putln("%s = %s;" % (result_var, self.await_expr.py_result()))
+            self.await_expr.generate_post_assignment_code(code)
+            self.await_expr.free_temps(code)
+
+        if self.result_is_used:
+            self.allocate_temp_result(code)
+            code.putln("%s = __Pyx_PyObject_IsTrue(%s);" % (self.result(), result_var))
+        code.put_decref_clear(result_var, type=py_object_type)
+        if self.result_is_used:
+            code.put_error_if_neg(self.pos, self.result())
+        code.funcstate.release_temp(result_var)
+        if self.test_if_run:
+            code.putln("}")
+
+
+class ExcValueNode(AtomicExprNode):
+    #  Node created during analyse_types phase
+    #  of an ExceptClauseNode to fetch the current
+    #  exception value.
+
+    type = py_object_type
+
+    def __init__(self, pos):
+        ExprNode.__init__(self, pos)
+
+    def set_var(self, var):
+        self.var = var
+
+    def calculate_result_code(self):
+        return self.var
+
+    def generate_result_code(self, code):
+        pass
+
+    def analyse_types(self, env):
+        return self
+
+
+class TempNode(ExprNode):
+    # Node created during analyse_types phase
+    # of some nodes to hold a temporary value.
+    #
+    # Note: One must call "allocate" and "release" on
+    # the node during code generation to get/release the temp.
+    # This is because the temp result is often used outside of
+    # the regular cycle.
+
+    subexprs = []
+
+    def __init__(self, pos, type, env=None):
+        ExprNode.__init__(self, pos)
+        self.type = type
+        if type.is_pyobject:
+            self.result_ctype = py_object_type
+        self.is_temp = 1
+
+    def analyse_types(self, env):
+        return self
+
+    def analyse_target_declaration(self, env):
+        self.is_target = True
+
+    def generate_result_code(self, code):
+        pass
+
+    def allocate(self, code):
+        self.temp_cname = code.funcstate.allocate_temp(self.type, manage_ref=True)
+
+    def release(self, code):
+        code.funcstate.release_temp(self.temp_cname)
+        self.temp_cname = None
+
+    def result(self):
+        try:
+            return self.temp_cname
+        except:
+            assert False, "Remember to call allocate/release on TempNode"
+            raise
+
+    # Do not participate in normal temp alloc/dealloc:
+    def allocate_temp_result(self, code):
+        pass
+
+    def release_temp_result(self, code):
+        pass
+
+class PyTempNode(TempNode):
+    #  TempNode holding a Python value.
+
+    def __init__(self, pos, env):
+        TempNode.__init__(self, pos, PyrexTypes.py_object_type, env)
+
+class RawCNameExprNode(ExprNode):
+    subexprs = []
+
+    def __init__(self, pos, type=None, cname=None):
+        ExprNode.__init__(self, pos, type=type)
+        if cname is not None:
+            self.cname = cname
+
+    def analyse_types(self, env):
+        return self
+
+    def set_cname(self, cname):
+        self.cname = cname
+
+    def result(self):
+        return self.cname
+
+    def generate_result_code(self, code):
+        pass
+
+
+#-------------------------------------------------------------------
+#
+#  F-strings
+#
+#-------------------------------------------------------------------
+
+
+class JoinedStrNode(ExprNode):
+    # F-strings
+    #
+    # values   [UnicodeNode|FormattedValueNode]   Substrings of the f-string
+    #
+    type = unicode_type
+    is_temp = True
+    gil_message = "String concatenation"
+
+    subexprs = ['values']
+
+    def analyse_types(self, env):
+        self.values = [v.analyse_types(env).coerce_to_pyobject(env) for v in self.values]
+        return self
+
+    def may_be_none(self):
+        # PyUnicode_Join() always returns a Unicode string or raises an exception
+        return False
+
+    def generate_evaluation_code(self, code):
+        code.mark_pos(self.pos)
+        num_items = len(self.values)
+        list_var = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
+        ulength_var = code.funcstate.allocate_temp(PyrexTypes.c_py_ssize_t_type, manage_ref=False)
+        max_char_var = code.funcstate.allocate_temp(PyrexTypes.c_py_ucs4_type, manage_ref=False)
+
+        code.putln('%s = PyTuple_New(%s); %s' % (
+            list_var,
+            num_items,
+            code.error_goto_if_null(list_var, self.pos)))
+        code.put_gotref(list_var, py_object_type)
+        code.putln("%s = 0;" % ulength_var)
+        code.putln("%s = 127;" % max_char_var)  # at least ASCII character range
+
+        for i, node in enumerate(self.values):
+            node.generate_evaluation_code(code)
+            node.make_owned_reference(code)
+
+            ulength = "__Pyx_PyUnicode_GET_LENGTH(%s)" % node.py_result()
+            max_char_value = "__Pyx_PyUnicode_MAX_CHAR_VALUE(%s)" % node.py_result()
+            is_ascii = False
+            if isinstance(node, UnicodeNode):
+                try:
+                    # most strings will be ASCII or at least Latin-1
+                    node.value.encode('iso8859-1')
+                    max_char_value = '255'
+                    node.value.encode('us-ascii')
+                    is_ascii = True
+                except UnicodeEncodeError:
+                    if max_char_value != '255':
+                        # not ISO8859-1 => check BMP limit
+                        max_char = max(map(ord, node.value))
+                        if max_char < 0xD800:
+                            # BMP-only, no surrogate pairs used
+                            max_char_value = '65535'
+                            ulength = str(len(node.value))
+                        elif max_char >= 65536:
+                            # clearly outside of BMP, and not on a 16-bit Unicode system
+                            max_char_value = '1114111'
+                            ulength = str(len(node.value))
+                        else:
+                            # not really worth implementing a check for surrogate pairs here
+                            # drawback: C code can differ when generating on Py2 with 2-byte Unicode
+                            pass
+                else:
+                    ulength = str(len(node.value))
+            elif isinstance(node, FormattedValueNode) and node.value.type.is_numeric:
+                is_ascii = True  # formatted C numbers are always ASCII
+
+            if not is_ascii:
+                code.putln("%s = (%s > %s) ? %s : %s;" % (
+                    max_char_var, max_char_value, max_char_var, max_char_value, max_char_var))
+            code.putln("%s += %s;" % (ulength_var, ulength))
+
+            node.generate_giveref(code)
+            code.putln('PyTuple_SET_ITEM(%s, %s, %s);' % (list_var, i, node.py_result()))
+            node.generate_post_assignment_code(code)
+            node.free_temps(code)
+
+        code.mark_pos(self.pos)
+        self.allocate_temp_result(code)
+        code.globalstate.use_utility_code(UtilityCode.load_cached("JoinPyUnicode", "StringTools.c"))
+        code.putln('%s = __Pyx_PyUnicode_Join(%s, %d, %s, %s); %s' % (
+            self.result(),
+            list_var,
+            num_items,
+            ulength_var,
+            max_char_var,
+            code.error_goto_if_null(self.py_result(), self.pos)))
+        self.generate_gotref(code)
+
+        code.put_decref_clear(list_var, py_object_type)
+        code.funcstate.release_temp(list_var)
+        code.funcstate.release_temp(ulength_var)
+        code.funcstate.release_temp(max_char_var)
+
+
+class FormattedValueNode(ExprNode):
+    # {}-delimited portions of an f-string
+    #
+    # value           ExprNode                The expression itself
+    # conversion_char str or None             Type conversion (!s, !r, !a, none, or 'd' for integer conversion)
+    # format_spec     JoinedStrNode or None   Format string passed to __format__
+    # c_format_spec   str or None             If not None, formatting can be done at the C level
+
+    subexprs = ['value', 'format_spec']
+
+    type = unicode_type
+    is_temp = True
+    c_format_spec = None
+    gil_message = "String formatting"
+
+    find_conversion_func = {
+        's': 'PyObject_Unicode',
+        'r': 'PyObject_Repr',
+        'a': 'PyObject_ASCII',  # NOTE: mapped to PyObject_Repr() in Py2
+        'd': '__Pyx_PyNumber_IntOrLong',  # NOTE: internal mapping for '%d' formatting
+    }.get
+
+    def may_be_none(self):
+        # PyObject_Format() always returns a Unicode string or raises an exception
+        return False
+
+    def analyse_types(self, env):
+        self.value = self.value.analyse_types(env)
+        if not self.format_spec or self.format_spec.is_string_literal:
+            c_format_spec = self.format_spec.value if self.format_spec else self.value.type.default_format_spec
+            if self.value.type.can_coerce_to_pystring(env, format_spec=c_format_spec):
+                self.c_format_spec = c_format_spec
+
+        if self.format_spec:
+            self.format_spec = self.format_spec.analyse_types(env).coerce_to_pyobject(env)
+        if self.c_format_spec is None:
+            self.value = self.value.coerce_to_pyobject(env)
+            if not self.format_spec and (not self.conversion_char or self.conversion_char == 's'):
+                if self.value.type is unicode_type and not self.value.may_be_none():
+                    # value is definitely a unicode string and we don't format it any special
+                    return self.value
+        return self
+
+    def generate_result_code(self, code):
+        if self.c_format_spec is not None and not self.value.type.is_pyobject:
+            convert_func_call = self.value.type.convert_to_pystring(
+                self.value.result(), code, self.c_format_spec)
+            code.putln("%s = %s; %s" % (
+                self.result(),
+                convert_func_call,
+                code.error_goto_if_null(self.result(), self.pos)))
+            self.generate_gotref(code)
+            return
+
+        value_result = self.value.py_result()
+        value_is_unicode = self.value.type is unicode_type and not self.value.may_be_none()
+        if self.format_spec:
+            format_func = '__Pyx_PyObject_Format'
+            format_spec = self.format_spec.py_result()
+        else:
+            # common case: expect simple Unicode pass-through if no format spec
+            format_func = '__Pyx_PyObject_FormatSimple'
+            # passing a Unicode format string in Py2 forces PyObject_Format() to also return a Unicode string
+            format_spec = Naming.empty_unicode
+
+        conversion_char = self.conversion_char
+        if conversion_char == 's' and value_is_unicode:
+            # no need to pipe unicode strings through str()
+            conversion_char = None
+
+        if conversion_char:
+            fn = self.find_conversion_func(conversion_char)
+            assert fn is not None, "invalid conversion character found: '%s'" % conversion_char
+            value_result = '%s(%s)' % (fn, value_result)
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("PyObjectFormatAndDecref", "StringTools.c"))
+            format_func += 'AndDecref'
+        elif self.format_spec:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("PyObjectFormat", "StringTools.c"))
+        else:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("PyObjectFormatSimple", "StringTools.c"))
+
+        code.putln("%s = %s(%s, %s); %s" % (
+            self.result(),
+            format_func,
+            value_result,
+            format_spec,
+            code.error_goto_if_null(self.result(), self.pos)))
+        self.generate_gotref(code)
+
+
+#-------------------------------------------------------------------
+#
+#  Parallel nodes (cython.parallel.thread(savailable|id))
+#
+#-------------------------------------------------------------------
+
+class ParallelThreadsAvailableNode(AtomicExprNode):
+    """
+    Note: this is disabled and not a valid directive at this moment
+
+    Implements cython.parallel.threadsavailable(). If we are called from the
+    sequential part of the application, we need to call omp_get_max_threads(),
+    and in the parallel part we can just call omp_get_num_threads()
+    """
+
+    type = PyrexTypes.c_int_type
+
+    def analyse_types(self, env):
+        self.is_temp = True
+        # env.add_include_file("omp.h")
+        return self
+
+    def generate_result_code(self, code):
+        code.putln("#ifdef _OPENMP")
+        code.putln("if (omp_in_parallel()) %s = omp_get_max_threads();" %
+                                                            self.temp_code)
+        code.putln("else %s = omp_get_num_threads();" % self.temp_code)
+        code.putln("#else")
+        code.putln("%s = 1;" % self.temp_code)
+        code.putln("#endif")
+
+    def result(self):
+        return self.temp_code
+
+
+class ParallelThreadIdNode(AtomicExprNode):  #, Nodes.ParallelNode):
+    """
+    Implements cython.parallel.threadid()
+    """
+
+    type = PyrexTypes.c_int_type
+
+    def analyse_types(self, env):
+        self.is_temp = True
+        # env.add_include_file("omp.h")
+        return self
+
+    def generate_result_code(self, code):
+        code.putln("#ifdef _OPENMP")
+        code.putln("%s = omp_get_thread_num();" % self.temp_code)
+        code.putln("#else")
+        code.putln("%s = 0;" % self.temp_code)
+        code.putln("#endif")
+
+    def result(self):
+        return self.temp_code
+
+
+#-------------------------------------------------------------------
+#
+#  Trailer nodes
+#
+#-------------------------------------------------------------------
+
+
+class _IndexingBaseNode(ExprNode):
+    # Base class for indexing nodes.
+    #
+    # base   ExprNode   the value being indexed
+
+    def is_ephemeral(self):
+        # in most cases, indexing will return a safe reference to an object in a container,
+        # so we consider the result safe if the base object is
+        return self.base.is_ephemeral() or self.base.type in (
+            basestring_type, str_type, bytes_type, bytearray_type, unicode_type)
+
+    def check_const_addr(self):
+        return self.base.check_const_addr() and self.index.check_const()
+
+    def is_lvalue(self):
+        # NOTE: references currently have both is_reference and is_ptr
+        # set.  Since pointers and references have different lvalue
+        # rules, we must be careful to separate the two.
+        if self.type.is_reference:
+            if self.type.ref_base_type.is_array:
+                # fixed-sized arrays aren't l-values
+                return False
+        elif self.type.is_ptr:
+            # non-const pointers can always be reassigned
+            return True
+        # Just about everything else returned by the index operator
+        # can be an lvalue.
+        return True
+
+
+class IndexNode(_IndexingBaseNode):
+    #  Sequence indexing.
+    #
+    #  base     ExprNode
+    #  index    ExprNode
+    #  type_indices  [PyrexType]
+    #
+    #  is_fused_index boolean   Whether the index is used to specialize a
+    #                           c(p)def function
+
+    subexprs = ['base', 'index']
+    type_indices = None
+
+    is_subscript = True
+    is_fused_index = False
+
+    def calculate_constant_result(self):
+        self.constant_result = self.base.constant_result[self.index.constant_result]
+
+    def compile_time_value(self, denv):
+        base = self.base.compile_time_value(denv)
+        index = self.index.compile_time_value(denv)
+        try:
+            return base[index]
+        except Exception as e:
+            self.compile_time_value_error(e)
+
+    def is_simple(self):
+        base = self.base
+        return (base.is_simple() and self.index.is_simple()
+                and base.type and (base.type.is_ptr or base.type.is_array))
+
+    def may_be_none(self):
+        base_type = self.base.type
+        if base_type:
+            if base_type.is_string:
+                return False
+            if isinstance(self.index, SliceNode):
+                # slicing!
+                if base_type in (bytes_type, bytearray_type, str_type, unicode_type,
+                                 basestring_type, list_type, tuple_type):
+                    return False
+        return ExprNode.may_be_none(self)
+
+    def analyse_target_declaration(self, env):
+        pass
+
+    def analyse_as_type(self, env):
+        base_type = self.base.analyse_as_type(env)
+        if base_type:
+            if base_type.is_cpp_class or base_type.python_type_constructor_name:
+                if self.index.is_sequence_constructor:
+                    template_values = self.index.args
+                else:
+                    template_values = [self.index]
+                type_node = Nodes.TemplatedTypeNode(
+                    pos=self.pos,
+                    positional_args=template_values,
+                    keyword_args=None)
+                return type_node.analyse(env, base_type=base_type)
+            elif self.index.is_slice or self.index.is_sequence_constructor:
+                # memory view
+                from . import MemoryView
+                env.use_utility_code(MemoryView.view_utility_code)
+                axes = [self.index] if self.index.is_slice else list(self.index.args)
+                return PyrexTypes.MemoryViewSliceType(base_type, MemoryView.get_axes_specs(env, axes))
+            elif not base_type.is_pyobject:
+                # C array
+                index = self.index.compile_time_value(env)
+                if index is not None:
+                    try:
+                        index = int(index)
+                    except (ValueError, TypeError):
+                        pass
+                    else:
+                        return PyrexTypes.CArrayType(base_type, index)
+                error(self.pos, "Array size must be a compile time constant")
+        return None
+
+    def analyse_pytyping_modifiers(self, env):
+        # Check for declaration modifiers, e.g. "typing.Optional[...]" or "dataclasses.InitVar[...]"
+        # TODO: somehow bring this together with TemplatedTypeNode.analyse_pytyping_modifiers()
+        modifiers = []
+        modifier_node = self
+        while modifier_node.is_subscript:
+            modifier_type = modifier_node.base.analyse_as_type(env)
+            if (modifier_type and modifier_type.python_type_constructor_name
+                    and modifier_type.modifier_name):
+                modifiers.append(modifier_type.modifier_name)
+            modifier_node = modifier_node.index
+        return modifiers
+
+    def type_dependencies(self, env):
+        return self.base.type_dependencies(env) + self.index.type_dependencies(env)
+
+    def infer_type(self, env):
+        base_type = self.base.infer_type(env)
+        if self.index.is_slice:
+            # slicing!
+            if base_type.is_string:
+                # sliced C strings must coerce to Python
+                return bytes_type
+            elif base_type.is_pyunicode_ptr:
+                # sliced Py_UNICODE* strings must coerce to Python
+                return unicode_type
+            elif base_type in (unicode_type, bytes_type, str_type,
+                               bytearray_type, list_type, tuple_type):
+                # slicing these returns the same type
+                return base_type
+            elif base_type.is_memoryviewslice:
+                return base_type
+            else:
+                # TODO: Handle buffers (hopefully without too much redundancy).
+                return py_object_type
+
+        index_type = self.index.infer_type(env)
+        if index_type and index_type.is_int or isinstance(self.index, IntNode):
+            # indexing!
+            if base_type is unicode_type:
+                # Py_UCS4 will automatically coerce to a unicode string
+                # if required, so this is safe.  We only infer Py_UCS4
+                # when the index is a C integer type.  Otherwise, we may
+                # need to use normal Python item access, in which case
+                # it's faster to return the one-char unicode string than
+                # to receive it, throw it away, and potentially rebuild it
+                # on a subsequent PyObject coercion.
+                return PyrexTypes.c_py_ucs4_type
+            elif base_type is str_type:
+                # always returns str - Py2: bytes, Py3: unicode
+                return base_type
+            elif base_type is bytearray_type:
+                return PyrexTypes.c_uchar_type
+            elif isinstance(self.base, BytesNode):
+                #if env.global_scope().context.language_level >= 3:
+                #    # inferring 'char' can be made to work in Python 3 mode
+                #    return PyrexTypes.c_char_type
+                # Py2/3 return different types on indexing bytes objects
+                return py_object_type
+            elif base_type in (tuple_type, list_type):
+                # if base is a literal, take a look at its values
+                item_type = infer_sequence_item_type(
+                    env, self.base, self.index, seq_type=base_type)
+                if item_type is not None:
+                    return item_type
+            elif base_type.is_ptr or base_type.is_array:
+                return base_type.base_type
+            elif base_type.is_ctuple and isinstance(self.index, IntNode):
+                if self.index.has_constant_result():
+                    index = self.index.constant_result
+                    if index < 0:
+                        index += base_type.size
+                    if 0 <= index < base_type.size:
+                        return base_type.components[index]
+            elif base_type.is_memoryviewslice:
+                if base_type.ndim == 0:
+                    pass  # probably an error, but definitely don't know what to do - return pyobject for now
+                if base_type.ndim == 1:
+                    return base_type.dtype
+                else:
+                    return PyrexTypes.MemoryViewSliceType(base_type.dtype, base_type.axes[1:])
+
+        if self.index.is_sequence_constructor and base_type.is_memoryviewslice:
+            inferred_type = base_type
+            for a in self.index.args:
+                if not inferred_type.is_memoryviewslice:
+                    break  # something's gone wrong
+                inferred_type = IndexNode(self.pos, base=ExprNode(self.base.pos, type=inferred_type),
+                                          index=a).infer_type(env)
+            else:
+                return inferred_type
+
+        if base_type.is_cpp_class:
+            class FakeOperand:
+                def __init__(self, **kwds):
+                    self.__dict__.update(kwds)
+            operands = [
+                FakeOperand(pos=self.pos, type=base_type),
+                FakeOperand(pos=self.pos, type=index_type),
+            ]
+            index_func = env.lookup_operator('[]', operands)
+            if index_func is not None:
+                return index_func.type.return_type
+
+        if is_pythran_expr(base_type) and is_pythran_expr(index_type):
+            index_with_type = (self.index, index_type)
+            return PythranExpr(pythran_indexing_type(base_type, [index_with_type]))
+
+        # may be slicing or indexing, we don't know
+        if base_type in (unicode_type, str_type):
+            # these types always returns their own type on Python indexing/slicing
+            return base_type
+        else:
+            # TODO: Handle buffers (hopefully without too much redundancy).
+            return py_object_type
+
+    def analyse_types(self, env):
+        return self.analyse_base_and_index_types(env, getting=True)
+
+    def analyse_target_types(self, env):
+        node = self.analyse_base_and_index_types(env, setting=True)
+        if node.type.is_const:
+            error(self.pos, "Assignment to const dereference")
+        if node is self and not node.is_lvalue():
+            error(self.pos, "Assignment to non-lvalue of type '%s'" % node.type)
+        return node
+
+    def analyse_base_and_index_types(self, env, getting=False, setting=False,
+                                     analyse_base=True):
+        # Note: This might be cleaned up by having IndexNode
+        # parsed in a saner way and only construct the tuple if
+        # needed.
+        if analyse_base:
+            self.base = self.base.analyse_types(env)
+
+        if self.base.type.is_error:
+            # Do not visit child tree if base is undeclared to avoid confusing
+            # error messages
+            self.type = PyrexTypes.error_type
+            return self
+
+        is_slice = self.index.is_slice
+        if not env.directives['wraparound']:
+            if is_slice:
+                check_negative_indices(self.index.start, self.index.stop)
+            else:
+                check_negative_indices(self.index)
+
+        # Potentially overflowing index value.
+        if not is_slice and isinstance(self.index, IntNode) and Utils.long_literal(self.index.value):
+            self.index = self.index.coerce_to_pyobject(env)
+
+        is_memslice = self.base.type.is_memoryviewslice
+        # Handle the case where base is a literal char* (and we expect a string, not an int)
+        if not is_memslice and (isinstance(self.base, BytesNode) or is_slice):
+            if self.base.type.is_string or not (self.base.type.is_ptr or self.base.type.is_array):
+                self.base = self.base.coerce_to_pyobject(env)
+
+        replacement_node = self.analyse_as_buffer_operation(env, getting)
+        if replacement_node is not None:
+            return replacement_node
+
+        self.nogil = env.nogil
+        base_type = self.base.type
+
+        if not base_type.is_cfunction:
+            self.index = self.index.analyse_types(env)
+            self.original_index_type = self.index.type
+            if self.original_index_type.is_reference:
+                self.original_index_type = self.original_index_type.ref_base_type
+
+            if base_type.is_unicode_char:
+                # we infer Py_UNICODE/Py_UCS4 for unicode strings in some
+                # cases, but indexing must still work for them
+                if setting:
+                    warning(self.pos, "cannot assign to Unicode string index", level=1)
+                elif self.index.constant_result in (0, -1):
+                    # uchar[0] => uchar
+                    return self.base
+                self.base = self.base.coerce_to_pyobject(env)
+                base_type = self.base.type
+
+        if base_type.is_pyobject:
+            return self.analyse_as_pyobject(env, is_slice, getting, setting)
+        elif base_type.is_ptr or base_type.is_array:
+            return self.analyse_as_c_array(env, is_slice)
+        elif base_type.is_cpp_class:
+            return self.analyse_as_cpp(env, setting)
+        elif base_type.is_cfunction:
+            return self.analyse_as_c_function(env)
+        elif base_type.is_ctuple:
+            return self.analyse_as_c_tuple(env, getting, setting)
+        else:
+            error(self.pos,
+                  "Attempting to index non-array type '%s'" %
+                  base_type)
+            self.type = PyrexTypes.error_type
+            return self
+
+    def analyse_as_pyobject(self, env, is_slice, getting, setting):
+        base_type = self.base.type
+        if self.index.type.is_unicode_char and base_type is not dict_type:
+            # TODO: eventually fold into case below and remove warning, once people have adapted their code
+            warning(self.pos,
+                    "Item lookup of unicode character codes now always converts to a Unicode string. "
+                    "Use an explicit C integer cast to get back the previous integer lookup behaviour.", level=1)
+            self.index = self.index.coerce_to_pyobject(env)
+            self.is_temp = 1
+        elif self.index.type.is_int and base_type is not dict_type:
+            if (getting
+                    and not env.directives['boundscheck']
+                    and (base_type in (list_type, tuple_type, bytearray_type))
+                    and (not self.index.type.signed
+                         or not env.directives['wraparound']
+                         or (isinstance(self.index, IntNode) and
+                             self.index.has_constant_result() and self.index.constant_result >= 0))
+                    ):
+                self.is_temp = 0
+            else:
+                self.is_temp = 1
+            self.index = self.index.coerce_to(PyrexTypes.c_py_ssize_t_type, env).coerce_to_simple(env)
+            self.original_index_type.create_to_py_utility_code(env)
+        else:
+            self.index = self.index.coerce_to_pyobject(env)
+            self.is_temp = 1
+
+        if self.index.type.is_int and base_type is unicode_type:
+            # Py_UNICODE/Py_UCS4 will automatically coerce to a unicode string
+            # if required, so this is fast and safe
+            self.type = PyrexTypes.c_py_ucs4_type
+        elif self.index.type.is_int and base_type is bytearray_type:
+            if setting:
+                self.type = PyrexTypes.c_uchar_type
+            else:
+                # not using 'uchar' to enable fast and safe error reporting as '-1'
+                self.type = PyrexTypes.c_int_type
+        elif is_slice and base_type in (bytes_type, bytearray_type, str_type, unicode_type, list_type, tuple_type):
+            self.type = base_type
+        else:
+            item_type = None
+            if base_type in (list_type, tuple_type) and self.index.type.is_int:
+                item_type = infer_sequence_item_type(
+                    env, self.base, self.index, seq_type=base_type)
+            if base_type in (list_type, tuple_type, dict_type):
+                # do the None check explicitly (not in a helper) to allow optimising it away
+                self.base = self.base.as_none_safe_node("'NoneType' object is not subscriptable")
+            if item_type is None or not item_type.is_pyobject:
+                # Even if we inferred a C type as result, we will read a Python object, so trigger coercion if needed.
+                # We could potentially use "item_type.equivalent_type" here, but that may trigger assumptions
+                # about the actual runtime item types, rather than just their ability to coerce to the C "item_type".
+                self.type = py_object_type
+            else:
+                self.type = item_type
+
+        self.wrap_in_nonecheck_node(env, getting)
+        return self
+
+    def analyse_as_c_array(self, env, is_slice):
+        base_type = self.base.type
+        self.type = base_type.base_type
+        if self.type.is_cpp_class:
+            self.type = PyrexTypes.CReferenceType(self.type)
+        if is_slice:
+            self.type = base_type
+        elif self.index.type.is_pyobject:
+            self.index = self.index.coerce_to(PyrexTypes.c_py_ssize_t_type, env)
+        elif not self.index.type.is_int:
+            error(self.pos, "Invalid index type '%s'" % self.index.type)
+        return self
+
+    def analyse_as_cpp(self, env, setting):
+        base_type = self.base.type
+        function = env.lookup_operator("[]", [self.base, self.index])
+        if function is None:
+            error(self.pos, "Indexing '%s' not supported for index type '%s'" % (base_type, self.index.type))
+            self.type = PyrexTypes.error_type
+            self.result_code = "<error>"
+            return self
+        func_type = function.type
+        if func_type.is_ptr:
+            func_type = func_type.base_type
+        self.exception_check = func_type.exception_check
+        self.exception_value = func_type.exception_value
+        if self.exception_check:
+            if not setting:
+                self.is_temp = True
+            if needs_cpp_exception_conversion(self):
+                env.use_utility_code(UtilityCode.load_cached("CppExceptionConversion", "CppSupport.cpp"))
+        self.index = self.index.coerce_to(func_type.args[0].type, env)
+        self.type = func_type.return_type
+        if setting and not func_type.return_type.is_reference:
+            error(self.pos, "Can't set non-reference result '%s'" % self.type)
+        return self
+
+    def analyse_as_c_function(self, env):
+        base_type = self.base.type
+        if base_type.is_fused:
+            self.parse_indexed_fused_cdef(env)
+        else:
+            self.type_indices = self.parse_index_as_types(env)
+            self.index = None  # FIXME: use a dedicated Node class instead of generic IndexNode
+            if base_type.templates is None:
+                error(self.pos, "Can only parameterize template functions.")
+                self.type = error_type
+            elif self.type_indices is None:
+                # Error recorded earlier.
+                self.type = error_type
+            elif len(base_type.templates) != len(self.type_indices):
+                error(self.pos, "Wrong number of template arguments: expected %s, got %s" % (
+                        (len(base_type.templates), len(self.type_indices))))
+                self.type = error_type
+            else:
+                self.type = base_type.specialize(dict(zip(base_type.templates, self.type_indices)))
+        # FIXME: use a dedicated Node class instead of generic IndexNode
+        return self
+
+    def analyse_as_c_tuple(self, env, getting, setting):
+        base_type = self.base.type
+        if isinstance(self.index, IntNode) and self.index.has_constant_result():
+            index = self.index.constant_result
+            if -base_type.size <= index < base_type.size:
+                if index < 0:
+                    index += base_type.size
+                self.type = base_type.components[index]
+            else:
+                error(self.pos,
+                      "Index %s out of bounds for '%s'" %
+                      (index, base_type))
+                self.type = PyrexTypes.error_type
+            return self
+        else:
+            self.base = self.base.coerce_to_pyobject(env)
+            return self.analyse_base_and_index_types(env, getting=getting, setting=setting, analyse_base=False)
+
+    def analyse_as_buffer_operation(self, env, getting):
+        """
+        Analyse buffer indexing and memoryview indexing/slicing
+        """
+        if isinstance(self.index, TupleNode):
+            indices = self.index.args
+        else:
+            indices = [self.index]
+
+        base = self.base
+        base_type = base.type
+        replacement_node = None
+        if base_type.is_memoryviewslice:
+            # memoryviewslice indexing or slicing
+            from . import MemoryView
+            if base.is_memview_slice:
+                # For memory views, "view[i][j]" is the same as "view[i, j]" => use the latter for speed.
+                merged_indices = base.merged_indices(indices)
+                if merged_indices is not None:
+                    base = base.base
+                    base_type = base.type
+                    indices = merged_indices
+            have_slices, indices, newaxes = MemoryView.unellipsify(indices, base_type.ndim)
+            if have_slices:
+                replacement_node = MemoryViewSliceNode(self.pos, indices=indices, base=base)
+            else:
+                replacement_node = MemoryViewIndexNode(self.pos, indices=indices, base=base)
+        elif base_type.is_buffer or base_type.is_pythran_expr:
+            if base_type.is_pythran_expr or len(indices) == base_type.ndim:
+                # Buffer indexing
+                is_buffer_access = True
+                indices = [index.analyse_types(env) for index in indices]
+                if base_type.is_pythran_expr:
+                    do_replacement = all(
+                        index.type.is_int or index.is_slice or index.type.is_pythran_expr
+                        for index in indices)
+                    if do_replacement:
+                        for i,index in enumerate(indices):
+                            if index.is_slice:
+                                index = SliceIntNode(index.pos, start=index.start, stop=index.stop, step=index.step)
+                                index = index.analyse_types(env)
+                                indices[i] = index
+                else:
+                    do_replacement = all(index.type.is_int for index in indices)
+                if do_replacement:
+                    replacement_node = BufferIndexNode(self.pos, indices=indices, base=base)
+                    # On cloning, indices is cloned. Otherwise, unpack index into indices.
+                    assert not isinstance(self.index, CloneNode)
+
+        if replacement_node is not None:
+            replacement_node = replacement_node.analyse_types(env, getting)
+        return replacement_node
+
+    def wrap_in_nonecheck_node(self, env, getting):
+        if not env.directives['nonecheck'] or not self.base.may_be_none():
+            return
+        self.base = self.base.as_none_safe_node("'NoneType' object is not subscriptable")
+
+    def parse_index_as_types(self, env, required=True):
+        if isinstance(self.index, TupleNode):
+            indices = self.index.args
+        else:
+            indices = [self.index]
+        type_indices = []
+        for index in indices:
+            type_indices.append(index.analyse_as_type(env))
+            if type_indices[-1] is None:
+                if required:
+                    error(index.pos, "not parsable as a type")
+                return None
+        return type_indices
+
+    def parse_indexed_fused_cdef(self, env):
+        """
+        Interpret fused_cdef_func[specific_type1, ...]
+
+        Note that if this method is called, we are an indexed cdef function
+        with fused argument types, and this IndexNode will be replaced by the
+        NameNode with specific entry just after analysis of expressions by
+        AnalyseExpressionsTransform.
+        """
+        self.type = PyrexTypes.error_type
+
+        self.is_fused_index = True
+
+        base_type = self.base.type
+        positions = []
+
+        if self.index.is_name or self.index.is_attribute:
+            positions.append(self.index.pos)
+        elif isinstance(self.index, TupleNode):
+            for arg in self.index.args:
+                positions.append(arg.pos)
+        specific_types = self.parse_index_as_types(env, required=False)
+
+        if specific_types is None:
+            self.index = self.index.analyse_types(env)
+
+            if not self.base.entry.as_variable:
+                error(self.pos, "Can only index fused functions with types")
+            else:
+                # A cpdef function indexed with Python objects
+                self.base.entry = self.entry = self.base.entry.as_variable
+                self.base.type = self.type = self.entry.type
+
+                self.base.is_temp = True
+                self.is_temp = True
+
+                self.entry.used = True
+
+            self.is_fused_index = False
+            return
+
+        for i, type in enumerate(specific_types):
+            specific_types[i] = type.specialize_fused(env)
+
+        fused_types = base_type.get_fused_types()
+        if len(specific_types) > len(fused_types):
+            return error(self.pos, "Too many types specified")
+        elif len(specific_types) < len(fused_types):
+            t = fused_types[len(specific_types)]
+            return error(self.pos, "Not enough types specified to specialize "
+                                   "the function, %s is still fused" % t)
+
+        # See if our index types form valid specializations
+        for pos, specific_type, fused_type in zip(positions,
+                                                  specific_types,
+                                                  fused_types):
+            if not any([specific_type.same_as(t) for t in fused_type.types]):
+                return error(pos, "Type not in fused type")
+
+            if specific_type is None or specific_type.is_error:
+                return
+
+        fused_to_specific = dict(zip(fused_types, specific_types))
+        type = base_type.specialize(fused_to_specific)
+
+        if type.is_fused:
+            # Only partially specific, this is invalid
+            error(self.pos,
+                  "Index operation makes function only partially specific")
+        else:
+            # Fully specific, find the signature with the specialized entry
+            for signature in self.base.type.get_all_specialized_function_types():
+                if type.same_as(signature):
+                    self.type = signature
+
+                    if self.base.is_attribute:
+                        # Pretend to be a normal attribute, for cdef extension
+                        # methods
+                        self.entry = signature.entry
+                        self.is_attribute = True
+                        self.obj = self.base.obj
+
+                    self.type.entry.used = True
+                    self.base.type = signature
+                    self.base.entry = signature.entry
+
+                    break
+            else:
+                # This is a bug
+                raise InternalError("Couldn't find the right signature")
+
+    gil_message = "Indexing Python object"
+
+    def calculate_result_code(self):
+        if self.base.type in (list_type, tuple_type, bytearray_type):
+            if self.base.type is list_type:
+                index_code = "PyList_GET_ITEM(%s, %s)"
+            elif self.base.type is tuple_type:
+                index_code = "PyTuple_GET_ITEM(%s, %s)"
+            elif self.base.type is bytearray_type:
+                index_code = "((unsigned char)(PyByteArray_AS_STRING(%s)[%s]))"
+            else:
+                assert False, "unexpected base type in indexing: %s" % self.base.type
+        elif self.base.type.is_cfunction:
+            return "%s<%s>" % (
+                self.base.result(),
+                ",".join([param.empty_declaration_code() for param in self.type_indices]))
+        elif self.base.type.is_ctuple:
+            index = self.index.constant_result
+            if index < 0:
+                index += self.base.type.size
+            return "%s.f%s" % (self.base.result(), index)
+        else:
+            if (self.type.is_ptr or self.type.is_array) and self.type == self.base.type:
+                error(self.pos, "Invalid use of pointer slice")
+                return
+            index_code = "(%s[%s])"
+        return index_code % (self.base.result(), self.index.result())
+
+    def extra_index_params(self, code):
+        if self.index.type.is_int:
+            is_list = self.base.type is list_type
+            wraparound = (
+                bool(code.globalstate.directives['wraparound']) and
+                self.original_index_type.signed and
+                not (isinstance(self.index.constant_result, _py_int_types)
+                     and self.index.constant_result >= 0))
+            boundscheck = bool(code.globalstate.directives['boundscheck'])
+            return ", %s, %d, %s, %d, %d, %d" % (
+                self.original_index_type.empty_declaration_code(),
+                self.original_index_type.signed and 1 or 0,
+                self.original_index_type.to_py_function,
+                is_list, wraparound, boundscheck)
+        else:
+            return ""
+
+    def generate_result_code(self, code):
+        if not self.is_temp:
+            # all handled in self.calculate_result_code()
+            return
+
+        utility_code = None
+        error_value = None
+        if self.type.is_pyobject:
+            error_value = 'NULL'
+            if self.index.type.is_int:
+                if self.base.type is list_type:
+                    function = "__Pyx_GetItemInt_List"
+                elif self.base.type is tuple_type:
+                    function = "__Pyx_GetItemInt_Tuple"
+                else:
+                    function = "__Pyx_GetItemInt"
+                utility_code = TempitaUtilityCode.load_cached("GetItemInt", "ObjectHandling.c")
+            else:
+                if self.base.type is dict_type:
+                    function = "__Pyx_PyDict_GetItem"
+                    utility_code = UtilityCode.load_cached("DictGetItem", "ObjectHandling.c")
+                elif self.base.type is py_object_type and self.index.type in (str_type, unicode_type):
+                    # obj[str] is probably doing a dict lookup
+                    function = "__Pyx_PyObject_Dict_GetItem"
+                    utility_code = UtilityCode.load_cached("DictGetItem", "ObjectHandling.c")
+                else:
+                    function = "__Pyx_PyObject_GetItem"
+                    code.globalstate.use_utility_code(
+                        TempitaUtilityCode.load_cached("GetItemInt", "ObjectHandling.c"))
+                    utility_code = UtilityCode.load_cached("ObjectGetItem", "ObjectHandling.c")
+        elif self.type.is_unicode_char and self.base.type is unicode_type:
+            assert self.index.type.is_int
+            function = "__Pyx_GetItemInt_Unicode"
+            error_value = '(Py_UCS4)-1'
+            utility_code = UtilityCode.load_cached("GetItemIntUnicode", "StringTools.c")
+        elif self.base.type is bytearray_type:
+            assert self.index.type.is_int
+            assert self.type.is_int
+            function = "__Pyx_GetItemInt_ByteArray"
+            error_value = '-1'
+            utility_code = UtilityCode.load_cached("GetItemIntByteArray", "StringTools.c")
+        elif not (self.base.type.is_cpp_class and self.exception_check):
+            assert False, "unexpected type %s and base type %s for indexing (%s)" % (
+                self.type, self.base.type, self.pos)
+
+        if utility_code is not None:
+            code.globalstate.use_utility_code(utility_code)
+
+        if self.index.type.is_int:
+            index_code = self.index.result()
+        else:
+            index_code = self.index.py_result()
+
+        if self.base.type.is_cpp_class and self.exception_check:
+            translate_cpp_exception(code, self.pos,
+                "%s = %s[%s];" % (self.result(), self.base.result(),
+                                  self.index.result()),
+                self.result() if self.type.is_pyobject else None,
+                self.exception_value, self.in_nogil_context)
+        else:
+            error_check = '!%s' if error_value == 'NULL' else '%%s == %s' % error_value
+            code.putln(
+                "%s = %s(%s, %s%s); %s" % (
+                    self.result(),
+                    function,
+                    self.base.py_result(),
+                    index_code,
+                    self.extra_index_params(code),
+                    code.error_goto_if(error_check % self.result(), self.pos)))
+        if self.type.is_pyobject:
+            self.generate_gotref(code)
+
+    def generate_setitem_code(self, value_code, code):
+        if self.index.type.is_int:
+            if self.base.type is bytearray_type:
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("SetItemIntByteArray", "StringTools.c"))
+                function = "__Pyx_SetItemInt_ByteArray"
+            else:
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("SetItemInt", "ObjectHandling.c"))
+                function = "__Pyx_SetItemInt"
+            index_code = self.index.result()
+        else:
+            index_code = self.index.py_result()
+            if self.base.type is dict_type:
+                function = "PyDict_SetItem"
+            # It would seem that we could specialized lists/tuples, but that
+            # shouldn't happen here.
+            # Both PyList_SetItem() and PyTuple_SetItem() take a Py_ssize_t as
+            # index instead of an object, and bad conversion here would give
+            # the wrong exception. Also, tuples are supposed to be immutable,
+            # and raise a TypeError when trying to set their entries
+            # (PyTuple_SetItem() is for creating new tuples from scratch).
+            else:
+                function = "PyObject_SetItem"
+        code.putln(code.error_goto_if_neg(
+            "%s(%s, %s, %s%s)" % (
+                function,
+                self.base.py_result(),
+                index_code,
+                value_code,
+                self.extra_index_params(code)),
+            self.pos))
+
+    def generate_assignment_code(self, rhs, code, overloaded_assignment=False,
+                                 exception_check=None, exception_value=None):
+        self.generate_subexpr_evaluation_code(code)
+
+        if self.type.is_pyobject:
+            self.generate_setitem_code(rhs.py_result(), code)
+        elif self.base.type is bytearray_type:
+            value_code = self._check_byte_value(code, rhs)
+            self.generate_setitem_code(value_code, code)
+        elif self.base.type.is_cpp_class and self.exception_check and self.exception_check == '+':
+            if overloaded_assignment and exception_check and self.exception_value != exception_value:
+                # Handle the case that both the index operator and the assignment
+                # operator have a c++ exception handler and they are not the same.
+                translate_double_cpp_exception(code, self.pos, self.type,
+                    self.result(), rhs.result(), self.exception_value,
+                    exception_value, self.in_nogil_context)
+            else:
+                # Handle the case that only the index operator has a
+                # c++ exception handler, or that
+                # both exception handlers are the same.
+                translate_cpp_exception(code, self.pos,
+                    "%s = %s;" % (self.result(), rhs.result()),
+                    self.result() if self.type.is_pyobject else None,
+                    self.exception_value, self.in_nogil_context)
+        else:
+            code.putln(
+                "%s = %s;" % (self.result(), rhs.result()))
+
+        self.generate_subexpr_disposal_code(code)
+        self.free_subexpr_temps(code)
+        rhs.generate_disposal_code(code)
+        rhs.free_temps(code)
+
+    def _check_byte_value(self, code, rhs):
+        # TODO: should we do this generally on downcasts, or just here?
+        assert rhs.type.is_int, repr(rhs.type)
+        value_code = rhs.result()
+        if rhs.has_constant_result():
+            if 0 <= rhs.constant_result < 256:
+                return value_code
+            needs_cast = True  # make at least the C compiler happy
+            warning(rhs.pos,
+                    "value outside of range(0, 256)"
+                    " when assigning to byte: %s" % rhs.constant_result,
+                    level=1)
+        else:
+            needs_cast = rhs.type != PyrexTypes.c_uchar_type
+
+        if not self.nogil:
+            conditions = []
+            if rhs.is_literal or rhs.type.signed:
+                conditions.append('%s < 0' % value_code)
+            if (rhs.is_literal or not
+                    (rhs.is_temp and rhs.type in (
+                        PyrexTypes.c_uchar_type, PyrexTypes.c_char_type,
+                        PyrexTypes.c_schar_type))):
+                conditions.append('%s > 255' % value_code)
+            if conditions:
+                code.putln("if (unlikely(%s)) {" % ' || '.join(conditions))
+                code.putln(
+                    'PyErr_SetString(PyExc_ValueError,'
+                    ' "byte must be in range(0, 256)"); %s' %
+                    code.error_goto(self.pos))
+                code.putln("}")
+
+        if needs_cast:
+            value_code = '((unsigned char)%s)' % value_code
+        return value_code
+
+    def generate_deletion_code(self, code, ignore_nonexisting=False):
+        self.generate_subexpr_evaluation_code(code)
+        #if self.type.is_pyobject:
+        if self.index.type.is_int:
+            function = "__Pyx_DelItemInt"
+            index_code = self.index.result()
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("DelItemInt", "ObjectHandling.c"))
+        else:
+            index_code = self.index.py_result()
+            if self.base.type is dict_type:
+                function = "PyDict_DelItem"
+            else:
+                function = "PyObject_DelItem"
+        code.putln(code.error_goto_if_neg(
+            "%s(%s, %s%s)" % (
+                function,
+                self.base.py_result(),
+                index_code,
+                self.extra_index_params(code)),
+            self.pos))
+        self.generate_subexpr_disposal_code(code)
+        self.free_subexpr_temps(code)
+
+
+class BufferIndexNode(_IndexingBaseNode):
+    """
+    Indexing of buffers and memoryviews. This node is created during type
+    analysis from IndexNode and replaces it.
+
+    Attributes:
+        base - base node being indexed
+        indices - list of indexing expressions
+    """
+
+    subexprs = ['base', 'indices']
+
+    is_buffer_access = True
+
+    # Whether we're assigning to a buffer (in that case it needs to be writable)
+    writable_needed = False
+
+    # Any indexing temp variables that we need to clean up.
+    index_temps = ()
+
+    def analyse_target_types(self, env):
+        self.analyse_types(env, getting=False)
+
+    def analyse_types(self, env, getting=True):
+        """
+        Analyse types for buffer indexing only. Overridden by memoryview
+        indexing and slicing subclasses
+        """
+        # self.indices are already analyzed
+        if not self.base.is_name and not is_pythran_expr(self.base.type):
+            error(self.pos, "Can only index buffer variables")
+            self.type = error_type
+            return self
+
+        if not getting:
+            if not self.base.entry.type.writable:
+                error(self.pos, "Writing to readonly buffer")
+            else:
+                self.writable_needed = True
+                if self.base.type.is_buffer:
+                    self.base.entry.buffer_aux.writable_needed = True
+
+        self.none_error_message = "'NoneType' object is not subscriptable"
+        self.analyse_buffer_index(env, getting)
+        self.wrap_in_nonecheck_node(env)
+        return self
+
+    def analyse_buffer_index(self, env, getting):
+        if is_pythran_expr(self.base.type):
+            index_with_type_list = [(idx, idx.type) for idx in self.indices]
+            self.type = PythranExpr(pythran_indexing_type(self.base.type, index_with_type_list))
+        else:
+            self.base = self.base.coerce_to_simple(env)
+            self.type = self.base.type.dtype
+        self.buffer_type = self.base.type
+
+        if getting and (self.type.is_pyobject or self.type.is_pythran_expr):
+            self.is_temp = True
+
+    def analyse_assignment(self, rhs):
+        """
+        Called by IndexNode when this node is assigned to,
+        with the rhs of the assignment
+        """
+
+    def wrap_in_nonecheck_node(self, env):
+        if not env.directives['nonecheck'] or not self.base.may_be_none():
+            return
+        self.base = self.base.as_none_safe_node(self.none_error_message)
+
+    def nogil_check(self, env):
+        if self.is_buffer_access or self.is_memview_index:
+            if self.type.is_pyobject:
+                error(self.pos, "Cannot access buffer with object dtype without gil")
+                self.type = error_type
+
+    def calculate_result_code(self):
+        return "(*%s)" % self.buffer_ptr_code
+
+    def buffer_entry(self):
+        base = self.base
+        if self.base.is_nonecheck:
+            base = base.arg
+        return base.type.get_entry(base)
+
+    def get_index_in_temp(self, code, ivar):
+        ret = code.funcstate.allocate_temp(
+            PyrexTypes.widest_numeric_type(
+                ivar.type,
+                PyrexTypes.c_ssize_t_type if ivar.type.signed else PyrexTypes.c_size_t_type),
+            manage_ref=False)
+        code.putln("%s = %s;" % (ret, ivar.result()))
+        return ret
+
+    def buffer_lookup_code(self, code):
+        """
+        ndarray[1, 2, 3] and memslice[1, 2, 3]
+        """
+        if self.in_nogil_context:
+            if self.is_buffer_access or self.is_memview_index:
+                if code.globalstate.directives['boundscheck']:
+                    warning(self.pos, "Use boundscheck(False) for faster access", level=1)
+
+        # Assign indices to temps of at least (s)size_t to allow further index calculations.
+        self.index_temps = index_temps = [self.get_index_in_temp(code,ivar) for ivar in self.indices]
+
+        # Generate buffer access code using these temps
+        from . import Buffer
+        buffer_entry = self.buffer_entry()
+        if buffer_entry.type.is_buffer:
+            negative_indices = buffer_entry.type.negative_indices
+        else:
+            negative_indices = Buffer.buffer_defaults['negative_indices']
+
+        return buffer_entry, Buffer.put_buffer_lookup_code(
+            entry=buffer_entry,
+            index_signeds=[ivar.type.signed for ivar in self.indices],
+            index_cnames=index_temps,
+            directives=code.globalstate.directives,
+            pos=self.pos, code=code,
+            negative_indices=negative_indices,
+            in_nogil_context=self.in_nogil_context)
+
+    def generate_assignment_code(self, rhs, code, overloaded_assignment=False):
+        self.generate_subexpr_evaluation_code(code)
+        self.generate_buffer_setitem_code(rhs, code)
+        self.generate_subexpr_disposal_code(code)
+        self.free_subexpr_temps(code)
+        rhs.generate_disposal_code(code)
+        rhs.free_temps(code)
+
+    def generate_buffer_setitem_code(self, rhs, code, op=""):
+        base_type = self.base.type
+        if is_pythran_expr(base_type) and is_pythran_supported_type(rhs.type):
+            obj = code.funcstate.allocate_temp(PythranExpr(pythran_type(self.base.type)), manage_ref=False)
+            # We have got to do this because we have to declare pythran objects
+            # at the beginning of the functions.
+            # Indeed, Cython uses "goto" statement for error management, and
+            # RAII doesn't work with that kind of construction.
+            # Moreover, the way Pythran expressions are made is that they don't
+            # support move-assignation easily.
+            # This, we explicitly destroy then in-place new objects in this
+            # case.
+            code.putln("__Pyx_call_destructor(%s);" % obj)
+            code.putln("new (&%s) decltype(%s){%s};" % (obj, obj, self.base.pythran_result()))
+            code.putln("%s%s %s= %s;" % (
+                obj,
+                pythran_indexing_code(self.indices),
+                op,
+                rhs.pythran_result()))
+            code.funcstate.release_temp(obj)
+            return
+
+        # Used from generate_assignment_code and InPlaceAssignmentNode
+        buffer_entry, ptrexpr = self.buffer_lookup_code(code)
+
+        if self.buffer_type.dtype.is_pyobject:
+            # Must manage refcounts. XDecref what is already there
+            # and incref what we put in (NumPy allows there to be NULL)
+            ptr = code.funcstate.allocate_temp(buffer_entry.buf_ptr_type,
+                                               manage_ref=False)
+            rhs_code = rhs.result()
+            code.putln("%s = %s;" % (ptr, ptrexpr))
+            code.put_xgotref("*%s" % ptr, self.buffer_type.dtype)
+            code.putln("__Pyx_INCREF(%s); __Pyx_XDECREF(*%s);" % (
+                rhs_code, ptr))
+            code.putln("*%s %s= %s;" % (ptr, op, rhs_code))
+            code.put_xgiveref("*%s" % ptr, self.buffer_type.dtype)
+            code.funcstate.release_temp(ptr)
+        else:
+            # Simple case
+            code.putln("*%s %s= %s;" % (ptrexpr, op, rhs.result()))
+
+    def generate_result_code(self, code):
+        if is_pythran_expr(self.base.type):
+            res = self.result()
+            code.putln("__Pyx_call_destructor(%s);" % res)
+            code.putln("new (&%s) decltype(%s){%s%s};" % (
+                res,
+                res,
+                self.base.pythran_result(),
+                pythran_indexing_code(self.indices)))
+            return
+        buffer_entry, self.buffer_ptr_code = self.buffer_lookup_code(code)
+        if self.type.is_pyobject:
+            # is_temp is True, so must pull out value and incref it.
+            # NOTE: object temporary results for nodes are declared
+            #       as PyObject *, so we need a cast
+            res = self.result()
+            code.putln("%s = (PyObject *) *%s;" % (res, self.buffer_ptr_code))
+            # NumPy does (occasionally) allow NULL to denote None.
+            code.putln("if (unlikely(%s == NULL)) %s = Py_None;" % (res, res))
+            code.putln("__Pyx_INCREF((PyObject*)%s);" % res)
+
+    def free_subexpr_temps(self, code):
+        for temp in self.index_temps:
+            code.funcstate.release_temp(temp)
+        self.index_temps = ()
+        super(BufferIndexNode, self).free_subexpr_temps(code)
+
+
+class MemoryViewIndexNode(BufferIndexNode):
+
+    is_memview_index = True
+    is_buffer_access = False
+
+    def analyse_types(self, env, getting=True):
+        # memoryviewslice indexing or slicing
+        from . import MemoryView
+
+        self.is_pythran_mode = has_np_pythran(env)
+        indices = self.indices
+        have_slices, indices, newaxes = MemoryView.unellipsify(indices, self.base.type.ndim)
+
+        if not getting:
+            self.writable_needed = True
+            if self.base.is_name or self.base.is_attribute:
+                self.base.entry.type.writable_needed = True
+
+        self.memslice_index = (not newaxes and len(indices) == self.base.type.ndim)
+        axes = []
+
+        index_type = PyrexTypes.c_py_ssize_t_type
+        new_indices = []
+
+        if len(indices) - len(newaxes) > self.base.type.ndim:
+            self.type = error_type
+            error(indices[self.base.type.ndim].pos,
+                  "Too many indices specified for type %s" % self.base.type)
+            return self
+
+        axis_idx = 0
+        for i, index in enumerate(indices[:]):
+            index = index.analyse_types(env)
+            if index.is_none:
+                self.is_memview_slice = True
+                new_indices.append(index)
+                axes.append(('direct', 'strided'))
+                continue
+
+            access, packing = self.base.type.axes[axis_idx]
+            axis_idx += 1
+
+            if index.is_slice:
+                self.is_memview_slice = True
+                if index.step.is_none:
+                    axes.append((access, packing))
+                else:
+                    axes.append((access, 'strided'))
+
+                # Coerce start, stop and step to temps of the right type
+                for attr in ('start', 'stop', 'step'):
+                    value = getattr(index, attr)
+                    if not value.is_none:
+                        value = value.coerce_to(index_type, env)
+                        #value = value.coerce_to_temp(env)
+                        setattr(index, attr, value)
+                        new_indices.append(value)
+
+            elif index.type.is_int or index.type.is_pyobject:
+                if index.type.is_pyobject:
+                    performance_hint(index.pos, "Index should be typed for more efficient access", env)
+
+                self.is_memview_index = True
+                index = index.coerce_to(index_type, env)
+                indices[i] = index
+                new_indices.append(index)
+
+            else:
+                self.type = error_type
+                error(index.pos, "Invalid index for memoryview specified, type %s" % index.type)
+                return self
+
+        ### FIXME: replace by MemoryViewSliceNode if is_memview_slice ?
+        self.is_memview_index = self.is_memview_index and not self.is_memview_slice
+        self.indices = new_indices
+        # All indices with all start/stop/step for slices.
+        # We need to keep this around.
+        self.original_indices = indices
+        self.nogil = env.nogil
+
+        self.analyse_operation(env, getting, axes)
+        self.wrap_in_nonecheck_node(env)
+        return self
+
+    def analyse_operation(self, env, getting, axes):
+        self.none_error_message = "Cannot index None memoryview slice"
+        self.analyse_buffer_index(env, getting)
+
+    def analyse_broadcast_operation(self, rhs):
+        """
+        Support broadcasting for slice assignment.
+        E.g.
+            m_2d[...] = m_1d  # or,
+            m_1d[...] = m_2d  # if the leading dimension has extent 1
+        """
+        if self.type.is_memoryviewslice:
+            lhs = self
+            if lhs.is_memview_broadcast or rhs.is_memview_broadcast:
+                lhs.is_memview_broadcast = True
+                rhs.is_memview_broadcast = True
+
+    def analyse_as_memview_scalar_assignment(self, rhs):
+        lhs = self.analyse_assignment(rhs)
+        if lhs:
+            rhs.is_memview_copy_assignment = lhs.is_memview_copy_assignment
+            return lhs
+        return self
+
+
+class MemoryViewSliceNode(MemoryViewIndexNode):
+
+    is_memview_slice = True
+
+    # No-op slicing operation, this node will be replaced
+    is_ellipsis_noop = False
+    is_memview_scalar_assignment = False
+    is_memview_index = False
+    is_memview_broadcast = False
+
+    def analyse_ellipsis_noop(self, env, getting):
+        """Slicing operations needing no evaluation, i.e. m[...] or m[:, :]"""
+        ### FIXME: replace directly
+        self.is_ellipsis_noop = all(
+            index.is_slice and index.start.is_none and index.stop.is_none and index.step.is_none
+            for index in self.indices)
+
+        if self.is_ellipsis_noop:
+            self.type = self.base.type
+
+    def analyse_operation(self, env, getting, axes):
+        from . import MemoryView
+
+        if not getting:
+            self.is_memview_broadcast = True
+            self.none_error_message = "Cannot assign to None memoryview slice"
+        else:
+            self.none_error_message = "Cannot slice None memoryview slice"
+
+        self.analyse_ellipsis_noop(env, getting)
+        if self.is_ellipsis_noop:
+            return
+
+        self.index = None
+        self.is_temp = True
+        self.use_managed_ref = True
+
+        if not MemoryView.validate_axes(self.pos, axes):
+            self.type = error_type
+            return
+
+        self.type = PyrexTypes.MemoryViewSliceType(self.base.type.dtype, axes)
+
+        if not (self.base.is_simple() or self.base.result_in_temp()):
+            self.base = self.base.coerce_to_temp(env)
+
+    def analyse_assignment(self, rhs):
+        if not rhs.type.is_memoryviewslice and (
+                self.type.dtype.assignable_from(rhs.type) or
+                rhs.type.is_pyobject):
+            # scalar assignment
+            return MemoryCopyScalar(self.pos, self)
+        else:
+            return MemoryCopySlice(self.pos, self)
+
+    def merged_indices(self, indices):
+        """Return a new list of indices/slices with 'indices' merged into the current ones
+        according to slicing rules.
+        Is used to implement "view[i][j]" => "view[i, j]".
+        Return None if the indices cannot (easily) be merged at compile time.
+        """
+        if not indices:
+            return None
+        # NOTE: Need to evaluate "self.original_indices" here as they might differ from "self.indices".
+        new_indices = self.original_indices[:]
+        indices = indices[:]
+        for i, s in enumerate(self.original_indices):
+            if s.is_slice:
+                if s.start.is_none and s.stop.is_none and s.step.is_none:
+                    # Full slice found, replace by index.
+                    new_indices[i] = indices[0]
+                    indices.pop(0)
+                    if not indices:
+                        return new_indices
+                else:
+                    # Found something non-trivial, e.g. a partial slice.
+                    return None
+            elif not s.type.is_int:
+                # Not a slice, not an integer index => could be anything...
+                return None
+        if indices:
+            if len(new_indices) + len(indices) > self.base.type.ndim:
+                return None
+            new_indices += indices
+        return new_indices
+
+    def is_simple(self):
+        if self.is_ellipsis_noop:
+            # TODO: fix SimpleCallNode.is_simple()
+            return self.base.is_simple() or self.base.result_in_temp()
+
+        return self.result_in_temp()
+
+    def calculate_result_code(self):
+        """This is called in case this is a no-op slicing node"""
+        return self.base.result()
+
+    def generate_result_code(self, code):
+        if self.is_ellipsis_noop:
+            return  ### FIXME: remove
+        buffer_entry = self.buffer_entry()
+        have_gil = not self.in_nogil_context
+
+        # TODO Mark: this is insane, do it better
+        have_slices = False
+        it = iter(self.indices)
+        for index in self.original_indices:
+            if index.is_slice:
+                have_slices = True
+                if not index.start.is_none:
+                    index.start = next(it)
+                if not index.stop.is_none:
+                    index.stop = next(it)
+                if not index.step.is_none:
+                    index.step = next(it)
+            else:
+                next(it)
+
+        assert not list(it)
+
+        buffer_entry.generate_buffer_slice_code(
+            code, self.original_indices, self.result(), self.type,
+            have_gil=have_gil, have_slices=have_slices,
+            directives=code.globalstate.directives)
+
+    def generate_assignment_code(self, rhs, code, overloaded_assignment=False):
+        if self.is_ellipsis_noop:
+            self.generate_subexpr_evaluation_code(code)
+        else:
+            self.generate_evaluation_code(code)
+
+        if self.is_memview_scalar_assignment:
+            self.generate_memoryviewslice_assign_scalar_code(rhs, code)
+        else:
+            self.generate_memoryviewslice_setslice_code(rhs, code)
+
+        if self.is_ellipsis_noop:
+            self.generate_subexpr_disposal_code(code)
+        else:
+            self.generate_disposal_code(code)
+
+        rhs.generate_disposal_code(code)
+        rhs.free_temps(code)
+
+
+class MemoryCopyNode(ExprNode):
+    """
+    Wraps a memoryview slice for slice assignment.
+
+        dst: destination mememoryview slice
+    """
+
+    subexprs = ['dst']
+
+    def __init__(self, pos, dst):
+        super(MemoryCopyNode, self).__init__(pos)
+        self.dst = dst
+        self.type = dst.type
+
+    def generate_assignment_code(self, rhs, code, overloaded_assignment=False):
+        self.dst.generate_evaluation_code(code)
+        self._generate_assignment_code(rhs, code)
+        self.dst.generate_disposal_code(code)
+        self.dst.free_temps(code)
+        rhs.generate_disposal_code(code)
+        rhs.free_temps(code)
+
+
+class MemoryCopySlice(MemoryCopyNode):
+    """
+    Copy the contents of slice src to slice dst. Does not support indirect
+    slices.
+
+        memslice1[...] = memslice2
+        memslice1[:] = memslice2
+    """
+
+    is_memview_copy_assignment = True
+    copy_slice_cname = "__pyx_memoryview_copy_contents"
+
+    def _generate_assignment_code(self, src, code):
+        dst = self.dst
+
+        src.type.assert_direct_dims(src.pos)
+        dst.type.assert_direct_dims(dst.pos)
+
+        code.putln(code.error_goto_if_neg(
+            "%s(%s, %s, %d, %d, %d)" % (self.copy_slice_cname,
+                                        src.result(), dst.result(),
+                                        src.type.ndim, dst.type.ndim,
+                                        dst.type.dtype.is_pyobject),
+            dst.pos))
+
+
+class MemoryCopyScalar(MemoryCopyNode):
+    """
+    Assign a scalar to a slice. dst must be simple, scalar will be assigned
+    to a correct type and not just something assignable.
+
+        memslice1[...] = 0.0
+        memslice1[:] = 0.0
+    """
+
+    def __init__(self, pos, dst):
+        super(MemoryCopyScalar, self).__init__(pos, dst)
+        self.type = dst.type.dtype
+
+    def _generate_assignment_code(self, scalar, code):
+        from . import MemoryView
+
+        self.dst.type.assert_direct_dims(self.dst.pos)
+
+        dtype = self.dst.type.dtype
+        type_decl = dtype.declaration_code("")
+        slice_decl = self.dst.type.declaration_code("")
+
+        code.begin_block()
+        code.putln("%s __pyx_temp_scalar = %s;" % (type_decl, scalar.result()))
+        if self.dst.result_in_temp() or self.dst.is_simple():
+            dst_temp = self.dst.result()
+        else:
+            code.putln("%s __pyx_temp_slice = %s;" % (slice_decl, self.dst.result()))
+            dst_temp = "__pyx_temp_slice"
+
+        force_strided = False
+        indices = self.dst.original_indices
+        for idx in indices:
+            if isinstance(idx, SliceNode) and not (idx.start.is_none and
+                                                   idx.stop.is_none and
+                                                   idx.step.is_none):
+                force_strided = True
+
+        slice_iter_obj = MemoryView.slice_iter(self.dst.type, dst_temp,
+                                               self.dst.type.ndim, code,
+                                               force_strided=force_strided)
+        p = slice_iter_obj.start_loops()
+
+        if dtype.is_pyobject:
+            code.putln("Py_DECREF(*(PyObject **) %s);" % p)
+
+        code.putln("*((%s *) %s) = __pyx_temp_scalar;" % (type_decl, p))
+
+        if dtype.is_pyobject:
+            code.putln("Py_INCREF(__pyx_temp_scalar);")
+
+        slice_iter_obj.end_loops()
+        code.end_block()
+
+
+class SliceIndexNode(ExprNode):
+    #  2-element slice indexing
+    #
+    #  base      ExprNode
+    #  start     ExprNode or None
+    #  stop      ExprNode or None
+    #  slice     ExprNode or None   constant slice object
+    #  nogil     bool               used internally
+
+    subexprs = ['base', 'start', 'stop', 'slice']
+    nogil = False
+
+    slice = None
+
+    def infer_type(self, env):
+        base_type = self.base.infer_type(env)
+        if base_type.is_string or base_type.is_cpp_class:
+            return bytes_type
+        elif base_type.is_pyunicode_ptr:
+            return unicode_type
+        elif base_type in (bytes_type, bytearray_type, str_type, unicode_type,
+                           basestring_type, list_type, tuple_type):
+            return base_type
+        elif base_type.is_ptr or base_type.is_array:
+            return PyrexTypes.c_array_type(base_type.base_type, None)
+        return py_object_type
+
+    def inferable_item_node(self, index=0):
+        # slicing shouldn't change the result type of the base, but the index might
+        if index is not not_a_constant and self.start:
+            if self.start.has_constant_result():
+                index += self.start.constant_result
+            else:
+                index = not_a_constant
+        return self.base.inferable_item_node(index)
+
+    def may_be_none(self):
+        base_type = self.base.type
+        if base_type:
+            if base_type.is_string:
+                return False
+            if base_type in (bytes_type, str_type, unicode_type,
+                             basestring_type, list_type, tuple_type):
+                return False
+        return ExprNode.may_be_none(self)
+
+    def calculate_constant_result(self):
+        if self.start is None:
+            start = None
+        else:
+            start = self.start.constant_result
+        if self.stop is None:
+            stop = None
+        else:
+            stop = self.stop.constant_result
+        self.constant_result = self.base.constant_result[start:stop]
+
+    def compile_time_value(self, denv):
+        base = self.base.compile_time_value(denv)
+        if self.start is None:
+            start = 0
+        else:
+            start = self.start.compile_time_value(denv)
+        if self.stop is None:
+            stop = None
+        else:
+            stop = self.stop.compile_time_value(denv)
+        try:
+            return base[start:stop]
+        except Exception as e:
+            self.compile_time_value_error(e)
+
+    def analyse_target_declaration(self, env):
+        pass
+
+    def analyse_target_types(self, env):
+        node = self.analyse_types(env, getting=False)
+        # when assigning, we must accept any Python type
+        if node.type.is_pyobject:
+            node.type = py_object_type
+        return node
+
+    def analyse_types(self, env, getting=True):
+        self.base = self.base.analyse_types(env)
+
+        if self.base.type.is_buffer or self.base.type.is_pythran_expr or self.base.type.is_memoryviewslice:
+            none_node = NoneNode(self.pos)
+            index = SliceNode(self.pos,
+                              start=self.start or none_node,
+                              stop=self.stop or none_node,
+                              step=none_node)
+            index_node = IndexNode(self.pos, index=index, base=self.base)
+            return index_node.analyse_base_and_index_types(
+                env, getting=getting, setting=not getting,
+                analyse_base=False)
+
+        if self.start:
+            self.start = self.start.analyse_types(env)
+        if self.stop:
+            self.stop = self.stop.analyse_types(env)
+
+        if not env.directives['wraparound']:
+            check_negative_indices(self.start, self.stop)
+
+        base_type = self.base.type
+        if base_type.is_array and not getting:
+            # cannot assign directly to C array => try to assign by making a copy
+            if not self.start and not self.stop:
+                self.type = base_type
+            else:
+                self.type = PyrexTypes.CPtrType(base_type.base_type)
+        elif base_type.is_string or base_type.is_cpp_string:
+            self.type = default_str_type(env)
+        elif base_type.is_pyunicode_ptr:
+            self.type = unicode_type
+        elif base_type.is_ptr:
+            self.type = base_type
+        elif base_type.is_array:
+            # we need a ptr type here instead of an array type, as
+            # array types can result in invalid type casts in the C
+            # code
+            self.type = PyrexTypes.CPtrType(base_type.base_type)
+        else:
+            self.base = self.base.coerce_to_pyobject(env)
+            self.type = py_object_type
+        if base_type.is_builtin_type:
+            # slicing builtin types returns something of the same type
+            self.type = base_type
+            self.base = self.base.as_none_safe_node("'NoneType' object is not subscriptable")
+
+        if self.type is py_object_type:
+            if (not self.start or self.start.is_literal) and \
+                    (not self.stop or self.stop.is_literal):
+                # cache the constant slice object, in case we need it
+                none_node = NoneNode(self.pos)
+                self.slice = SliceNode(
+                    self.pos,
+                    start=copy.deepcopy(self.start or none_node),
+                    stop=copy.deepcopy(self.stop or none_node),
+                    step=none_node
+                ).analyse_types(env)
+        else:
+            c_int = PyrexTypes.c_py_ssize_t_type
+
+            def allow_none(node, default_value, env):
+                # Coerce to Py_ssize_t, but allow None as meaning the default slice bound.
+                from .UtilNodes import EvalWithTempExprNode, ResultRefNode
+
+                node_ref = ResultRefNode(node)
+                new_expr = CondExprNode(
+                    node.pos,
+                    true_val=IntNode(
+                        node.pos,
+                        type=c_int,
+                        value=default_value,
+                        constant_result=int(default_value) if default_value.isdigit() else not_a_constant,
+                    ),
+                    false_val=node_ref.coerce_to(c_int, env),
+                    test=PrimaryCmpNode(
+                        node.pos,
+                        operand1=node_ref,
+                        operator='is',
+                        operand2=NoneNode(node.pos),
+                    ).analyse_types(env)
+                ).analyse_result_type(env)
+                return EvalWithTempExprNode(node_ref, new_expr)
+
+            if self.start:
+                if self.start.type.is_pyobject:
+                    self.start = allow_none(self.start, '0', env)
+                self.start = self.start.coerce_to(c_int, env)
+            if self.stop:
+                if self.stop.type.is_pyobject:
+                    self.stop = allow_none(self.stop, 'PY_SSIZE_T_MAX', env)
+                self.stop = self.stop.coerce_to(c_int, env)
+        self.is_temp = 1
+        return self
+
+    def analyse_as_type(self, env):
+        base_type = self.base.analyse_as_type(env)
+        if base_type:
+            if not self.start and not self.stop:
+                # memory view
+                from . import MemoryView
+                env.use_utility_code(MemoryView.view_utility_code)
+                none_node = NoneNode(self.pos)
+                slice_node = SliceNode(
+                    self.pos,
+                    start=none_node,
+                    stop=none_node,
+                    step=none_node,
+                )
+                return PyrexTypes.MemoryViewSliceType(
+                    base_type, MemoryView.get_axes_specs(env, [slice_node]))
+        return None
+
+    def nogil_check(self, env):
+        self.nogil = env.nogil
+        return super(SliceIndexNode, self).nogil_check(env)
+
+    gil_message = "Slicing Python object"
+
+    get_slice_utility_code = TempitaUtilityCode.load(
+        "SliceObject", "ObjectHandling.c", context={'access': 'Get'})
+
+    set_slice_utility_code = TempitaUtilityCode.load(
+        "SliceObject", "ObjectHandling.c", context={'access': 'Set'})
+
+    def coerce_to(self, dst_type, env):
+        if ((self.base.type.is_string or self.base.type.is_cpp_string)
+                and dst_type in (bytes_type, bytearray_type, str_type, unicode_type)):
+            if (dst_type not in (bytes_type, bytearray_type)
+                    and not env.directives['c_string_encoding']):
+                error(self.pos,
+                    "default encoding required for conversion from '%s' to '%s'" %
+                    (self.base.type, dst_type))
+            self.type = dst_type
+        if dst_type.is_array and self.base.type.is_array:
+            if not self.start and not self.stop:
+                # redundant slice building, copy C arrays directly
+                return self.base.coerce_to(dst_type, env)
+            # else: check array size if possible
+        return super(SliceIndexNode, self).coerce_to(dst_type, env)
+
+    def generate_result_code(self, code):
+        if not self.type.is_pyobject:
+            error(self.pos,
+                  "Slicing is not currently supported for '%s'." % self.type)
+            return
+
+        base_result = self.base.result()
+        result = self.result()
+        start_code = self.start_code()
+        stop_code = self.stop_code()
+        if self.base.type.is_string:
+            base_result = self.base.result()
+            if self.base.type not in (PyrexTypes.c_char_ptr_type, PyrexTypes.c_const_char_ptr_type):
+                base_result = '((const char*)%s)' % base_result
+            if self.type is bytearray_type:
+                type_name = 'ByteArray'
+            else:
+                type_name = self.type.name.title()
+            if self.stop is None:
+                code.putln(
+                    "%s = __Pyx_Py%s_FromString(%s + %s); %s" % (
+                        result,
+                        type_name,
+                        base_result,
+                        start_code,
+                        code.error_goto_if_null(result, self.pos)))
+            else:
+                code.putln(
+                    "%s = __Pyx_Py%s_FromStringAndSize(%s + %s, %s - %s); %s" % (
+                        result,
+                        type_name,
+                        base_result,
+                        start_code,
+                        stop_code,
+                        start_code,
+                        code.error_goto_if_null(result, self.pos)))
+        elif self.base.type.is_pyunicode_ptr:
+            base_result = self.base.result()
+            if self.base.type != PyrexTypes.c_py_unicode_ptr_type:
+                base_result = '((const Py_UNICODE*)%s)' % base_result
+            if self.stop is None:
+                code.putln(
+                    "%s = __Pyx_PyUnicode_FromUnicode(%s + %s); %s" % (
+                        result,
+                        base_result,
+                        start_code,
+                        code.error_goto_if_null(result, self.pos)))
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("pyunicode_from_unicode", "StringTools.c"))
+            else:
+                code.putln(
+                    "%s = __Pyx_PyUnicode_FromUnicodeAndLength(%s + %s, %s - %s); %s" % (
+                        result,
+                        base_result,
+                        start_code,
+                        stop_code,
+                        start_code,
+                        code.error_goto_if_null(result, self.pos)))
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("pyunicode_from_unicode", "StringTools.c"))
+
+        elif self.base.type is unicode_type:
+            code.globalstate.use_utility_code(
+                          UtilityCode.load_cached("PyUnicode_Substring", "StringTools.c"))
+            code.putln(
+                "%s = __Pyx_PyUnicode_Substring(%s, %s, %s); %s" % (
+                    result,
+                    base_result,
+                    start_code,
+                    stop_code,
+                    code.error_goto_if_null(result, self.pos)))
+        elif self.type is py_object_type:
+            code.globalstate.use_utility_code(self.get_slice_utility_code)
+            (has_c_start, has_c_stop, c_start, c_stop,
+             py_start, py_stop, py_slice) = self.get_slice_config()
+            code.putln(
+                "%s = __Pyx_PyObject_GetSlice(%s, %s, %s, %s, %s, %s, %d, %d, %d); %s" % (
+                    result,
+                    self.base.py_result(),
+                    c_start, c_stop,
+                    py_start, py_stop, py_slice,
+                    has_c_start, has_c_stop,
+                    bool(code.globalstate.directives['wraparound']),
+                    code.error_goto_if_null(result, self.pos)))
+        else:
+            if self.base.type is list_type:
+                code.globalstate.use_utility_code(
+                    TempitaUtilityCode.load_cached("SliceTupleAndList", "ObjectHandling.c"))
+                cfunc = '__Pyx_PyList_GetSlice'
+            elif self.base.type is tuple_type:
+                code.globalstate.use_utility_code(
+                    TempitaUtilityCode.load_cached("SliceTupleAndList", "ObjectHandling.c"))
+                cfunc = '__Pyx_PyTuple_GetSlice'
+            else:
+                cfunc = 'PySequence_GetSlice'
+            code.putln(
+                "%s = %s(%s, %s, %s); %s" % (
+                    result,
+                    cfunc,
+                    self.base.py_result(),
+                    start_code,
+                    stop_code,
+                    code.error_goto_if_null(result, self.pos)))
+        self.generate_gotref(code)
+
+    def generate_assignment_code(self, rhs, code, overloaded_assignment=False,
+                                 exception_check=None, exception_value=None):
+        self.generate_subexpr_evaluation_code(code)
+        if self.type.is_pyobject:
+            code.globalstate.use_utility_code(self.set_slice_utility_code)
+            has_c_start, has_c_stop, c_start, c_stop, py_start, py_stop, py_slice = self.get_slice_config()
+            code.put_error_if_neg(self.pos,
+                "__Pyx_PyObject_SetSlice(%s, %s, %s, %s, %s, %s, %s, %d, %d, %d)" % (
+                    self.base.py_result(),
+                    rhs.py_result(),
+                    c_start, c_stop,
+                    py_start, py_stop, py_slice,
+                    has_c_start, has_c_stop,
+                    bool(code.globalstate.directives['wraparound'])))
+        else:
+            start_offset = self.start_code() if self.start else '0'
+            if rhs.type.is_array:
+                array_length = rhs.type.size
+                self.generate_slice_guard_code(code, array_length)
+            else:
+                array_length = '%s - %s' % (self.stop_code(), start_offset)
+
+            code.globalstate.use_utility_code(UtilityCode.load_cached("IncludeStringH", "StringTools.c"))
+            code.putln("memcpy(&(%s[%s]), %s, sizeof(%s[0]) * (%s));" % (
+                self.base.result(), start_offset,
+                rhs.result(),
+                self.base.result(), array_length
+            ))
+
+        self.generate_subexpr_disposal_code(code)
+        self.free_subexpr_temps(code)
+        rhs.generate_disposal_code(code)
+        rhs.free_temps(code)
+
+    def generate_deletion_code(self, code, ignore_nonexisting=False):
+        if not self.base.type.is_pyobject:
+            error(self.pos,
+                  "Deleting slices is only supported for Python types, not '%s'." % self.type)
+            return
+        self.generate_subexpr_evaluation_code(code)
+        code.globalstate.use_utility_code(self.set_slice_utility_code)
+        (has_c_start, has_c_stop, c_start, c_stop,
+         py_start, py_stop, py_slice) = self.get_slice_config()
+        code.put_error_if_neg(self.pos,
+            "__Pyx_PyObject_DelSlice(%s, %s, %s, %s, %s, %s, %d, %d, %d)" % (
+                self.base.py_result(),
+                c_start, c_stop,
+                py_start, py_stop, py_slice,
+                has_c_start, has_c_stop,
+                bool(code.globalstate.directives['wraparound'])))
+        self.generate_subexpr_disposal_code(code)
+        self.free_subexpr_temps(code)
+
+    def get_slice_config(self):
+        has_c_start, c_start, py_start = False, '0', 'NULL'
+        if self.start:
+            has_c_start = not self.start.type.is_pyobject
+            if has_c_start:
+                c_start = self.start.result()
+            else:
+                py_start = '&%s' % self.start.py_result()
+        has_c_stop, c_stop, py_stop = False, '0', 'NULL'
+        if self.stop:
+            has_c_stop = not self.stop.type.is_pyobject
+            if has_c_stop:
+                c_stop = self.stop.result()
+            else:
+                py_stop = '&%s' % self.stop.py_result()
+        py_slice = self.slice and '&%s' % self.slice.py_result() or 'NULL'
+        return (has_c_start, has_c_stop, c_start, c_stop,
+                py_start, py_stop, py_slice)
+
+    def generate_slice_guard_code(self, code, target_size):
+        if not self.base.type.is_array:
+            return
+        slice_size = self.base.type.size
+        try:
+            total_length = slice_size = int(slice_size)
+        except ValueError:
+            total_length = None
+
+        start = stop = None
+        if self.stop:
+            stop = self.stop.result()
+            try:
+                stop = int(stop)
+                if stop < 0:
+                    if total_length is None:
+                        slice_size = '%s + %d' % (slice_size, stop)
+                    else:
+                        slice_size += stop
+                else:
+                    slice_size = stop
+                stop = None
+            except ValueError:
+                pass
+
+        if self.start:
+            start = self.start.result()
+            try:
+                start = int(start)
+                if start < 0:
+                    if total_length is None:
+                        start = '%s + %d' % (self.base.type.size, start)
+                    else:
+                        start += total_length
+                if isinstance(slice_size, _py_int_types):
+                    slice_size -= start
+                else:
+                    slice_size = '%s - (%s)' % (slice_size, start)
+                start = None
+            except ValueError:
+                pass
+
+        runtime_check = None
+        compile_time_check = False
+        try:
+            int_target_size = int(target_size)
+        except ValueError:
+            int_target_size = None
+        else:
+            compile_time_check = isinstance(slice_size, _py_int_types)
+
+        if compile_time_check and slice_size < 0:
+            if int_target_size > 0:
+                error(self.pos, "Assignment to empty slice.")
+        elif compile_time_check and start is None and stop is None:
+            # we know the exact slice length
+            if int_target_size != slice_size:
+                error(self.pos, "Assignment to slice of wrong length, expected %s, got %s" % (
+                      slice_size, target_size))
+        elif start is not None:
+            if stop is None:
+                stop = slice_size
+            runtime_check = "(%s)-(%s)" % (stop, start)
+        elif stop is not None:
+            runtime_check = stop
+        else:
+            runtime_check = slice_size
+
+        if runtime_check:
+            code.putln("if (unlikely((%s) != (%s))) {" % (runtime_check, target_size))
+            if self.nogil:
+                code.put_ensure_gil()
+            code.putln(
+                'PyErr_Format(PyExc_ValueError, "Assignment to slice of wrong length,'
+                ' expected %%" CYTHON_FORMAT_SSIZE_T "d, got %%" CYTHON_FORMAT_SSIZE_T "d",'
+                ' (Py_ssize_t)(%s), (Py_ssize_t)(%s));' % (
+                    target_size, runtime_check))
+            if self.nogil:
+                code.put_release_ensured_gil()
+            code.putln(code.error_goto(self.pos))
+            code.putln("}")
+
+    def start_code(self):
+        if self.start:
+            return self.start.result()
+        else:
+            return "0"
+
+    def stop_code(self):
+        if self.stop:
+            return self.stop.result()
+        elif self.base.type.is_array:
+            return self.base.type.size
+        else:
+            return "PY_SSIZE_T_MAX"
+
+    def calculate_result_code(self):
+        # self.result() is not used, but this method must exist
+        return "<unused>"
+
+
+class SliceNode(ExprNode):
+    #  start:stop:step in subscript list
+    #
+    #  start     ExprNode
+    #  stop      ExprNode
+    #  step      ExprNode
+
+    subexprs = ['start', 'stop', 'step']
+    is_slice = True
+    type = slice_type
+    is_temp = 1
+
+    def calculate_constant_result(self):
+        self.constant_result = slice(
+            self.start.constant_result,
+            self.stop.constant_result,
+            self.step.constant_result)
+
+    def compile_time_value(self, denv):
+        start = self.start.compile_time_value(denv)
+        stop = self.stop.compile_time_value(denv)
+        step = self.step.compile_time_value(denv)
+        try:
+            return slice(start, stop, step)
+        except Exception as e:
+            self.compile_time_value_error(e)
+
+    def may_be_none(self):
+        return False
+
+    def analyse_types(self, env):
+        start = self.start.analyse_types(env)
+        stop = self.stop.analyse_types(env)
+        step = self.step.analyse_types(env)
+        self.start = start.coerce_to_pyobject(env)
+        self.stop = stop.coerce_to_pyobject(env)
+        self.step = step.coerce_to_pyobject(env)
+        if self.start.is_literal and self.stop.is_literal and self.step.is_literal:
+            self.is_literal = True
+            self.is_temp = False
+        return self
+
+    gil_message = "Constructing Python slice object"
+
+    def calculate_result_code(self):
+        return self.result_code
+
+    def generate_result_code(self, code):
+        if self.is_literal:
+            dedup_key = make_dedup_key(self.type, (self,))
+            self.result_code = code.get_py_const(py_object_type, 'slice', cleanup_level=2, dedup_key=dedup_key)
+            code = code.get_cached_constants_writer(self.result_code)
+            if code is None:
+                return  # already initialised
+            code.mark_pos(self.pos)
+
+        code.putln(
+            "%s = PySlice_New(%s, %s, %s); %s" % (
+                self.result(),
+                self.start.py_result(),
+                self.stop.py_result(),
+                self.step.py_result(),
+                code.error_goto_if_null(self.result(), self.pos)))
+        self.generate_gotref(code)
+        if self.is_literal:
+            self.generate_giveref(code)
+
+class SliceIntNode(SliceNode):
+    #  start:stop:step in subscript list
+    # This is just a node to hold start,stop and step nodes that can be
+    # converted to integers. This does not generate a slice python object.
+    #
+    #  start     ExprNode
+    #  stop      ExprNode
+    #  step      ExprNode
+
+    is_temp = 0
+
+    def calculate_constant_result(self):
+        self.constant_result = slice(
+            self.start.constant_result,
+            self.stop.constant_result,
+            self.step.constant_result)
+
+    def compile_time_value(self, denv):
+        start = self.start.compile_time_value(denv)
+        stop = self.stop.compile_time_value(denv)
+        step = self.step.compile_time_value(denv)
+        try:
+            return slice(start, stop, step)
+        except Exception as e:
+            self.compile_time_value_error(e)
+
+    def may_be_none(self):
+        return False
+
+    def analyse_types(self, env):
+        self.start = self.start.analyse_types(env)
+        self.stop = self.stop.analyse_types(env)
+        self.step = self.step.analyse_types(env)
+
+        if not self.start.is_none:
+            self.start = self.start.coerce_to_integer(env)
+        if not self.stop.is_none:
+            self.stop = self.stop.coerce_to_integer(env)
+        if not self.step.is_none:
+            self.step = self.step.coerce_to_integer(env)
+
+        if self.start.is_literal and self.stop.is_literal and self.step.is_literal:
+            self.is_literal = True
+            self.is_temp = False
+        return self
+
+    def calculate_result_code(self):
+        pass
+
+    def generate_result_code(self, code):
+        for a in self.start,self.stop,self.step:
+            if isinstance(a, CloneNode):
+                a.arg.result()
+
+
+class CallNode(ExprNode):
+
+    # allow overriding the default 'may_be_none' behaviour
+    may_return_none = None
+
+    def infer_type(self, env):
+        # TODO(robertwb): Reduce redundancy with analyse_types.
+        function = self.function
+        func_type = function.infer_type(env)
+        if isinstance(function, NewExprNode):
+            # note: needs call to infer_type() above
+            return PyrexTypes.CPtrType(function.class_type)
+        if func_type is py_object_type:
+            # function might have lied for safety => try to find better type
+            entry = getattr(function, 'entry', None)
+            if entry is not None:
+                func_type = entry.type or func_type
+        if func_type.is_ptr:
+            func_type = func_type.base_type
+        if func_type.is_cfunction:
+            if getattr(self.function, 'entry', None) and hasattr(self, 'args'):
+                alternatives = self.function.entry.all_alternatives()
+                arg_types = [arg.infer_type(env) for arg in self.args]
+                func_entry = PyrexTypes.best_match(arg_types, alternatives)
+                if func_entry:
+                    func_type = func_entry.type
+                    if func_type.is_ptr:
+                        func_type = func_type.base_type
+                    return func_type.return_type
+            return func_type.return_type
+        elif func_type is type_type:
+            if function.is_name and function.entry and function.entry.type:
+                result_type = function.entry.type
+                if result_type.is_extension_type:
+                    return result_type
+                elif result_type.is_builtin_type:
+                    if function.entry.name == 'float':
+                        return PyrexTypes.c_double_type
+                    elif function.entry.name in Builtin.types_that_construct_their_instance:
+                        return result_type
+        func_type = self.function.analyse_as_type(env)
+        if func_type and (func_type.is_struct_or_union or func_type.is_cpp_class):
+            return func_type
+        return py_object_type
+
+    def type_dependencies(self, env):
+        # TODO: Update when Danilo's C++ code merged in to handle the
+        # the case of function overloading.
+        return self.function.type_dependencies(env)
+
+    def is_simple(self):
+        # C function calls could be considered simple, but they may
+        # have side-effects that may hit when multiple operations must
+        # be effected in order, e.g. when constructing the argument
+        # sequence for a function call or comparing values.
+        return False
+
+    def may_be_none(self):
+        if self.may_return_none is not None:
+            return self.may_return_none
+        func_type = self.function.type
+        if func_type is type_type and self.function.is_name:
+            entry = self.function.entry
+            if entry.type.is_extension_type:
+                return False
+            if (entry.type.is_builtin_type and
+                    entry.name in Builtin.types_that_construct_their_instance):
+                return False
+        return ExprNode.may_be_none(self)
+
+    def set_py_result_type(self, function, func_type=None):
+        if func_type is None:
+            func_type = function.type
+        if func_type is Builtin.type_type and (
+                function.is_name and
+                function.entry and
+                function.entry.is_builtin and
+                function.entry.name in Builtin.types_that_construct_their_instance):
+            # calling a builtin type that returns a specific object type
+            if function.entry.name == 'float':
+                # the following will come true later on in a transform
+                self.type = PyrexTypes.c_double_type
+                self.result_ctype = PyrexTypes.c_double_type
+            else:
+                self.type = Builtin.builtin_types[function.entry.name]
+                self.result_ctype = py_object_type
+            self.may_return_none = False
+        elif function.is_name and function.type_entry:
+            # We are calling an extension type constructor.  As long as we do not
+            # support __new__(), the result type is clear
+            self.type = function.type_entry.type
+            self.result_ctype = py_object_type
+            self.may_return_none = False
+        else:
+            self.type = py_object_type
+
+    def analyse_as_type_constructor(self, env):
+        type = self.function.analyse_as_type(env)
+        if type and type.is_struct_or_union:
+            args, kwds = self.explicit_args_kwds()
+            items = []
+            for arg, member in zip(args, type.scope.var_entries):
+                items.append(DictItemNode(pos=arg.pos, key=StringNode(pos=arg.pos, value=member.name), value=arg))
+            if kwds:
+                items += kwds.key_value_pairs
+            self.key_value_pairs = items
+            self.__class__ = DictNode
+            self.analyse_types(env)    # FIXME
+            self.coerce_to(type, env)
+            return True
+        elif type and type.is_cpp_class:
+            self.args = [ arg.analyse_types(env) for arg in self.args ]
+            constructor = type.scope.lookup("<init>")
+            if not constructor:
+                error(self.function.pos, "no constructor found for C++  type '%s'" % self.function.name)
+                self.type = error_type
+                return self
+            self.function = RawCNameExprNode(self.function.pos, constructor.type)
+            self.function.entry = constructor
+            self.function.set_cname(type.empty_declaration_code())
+            self.analyse_c_function_call(env)
+            self.type = type
+            return True
+
+    def is_lvalue(self):
+        return self.type.is_reference
+
+    def nogil_check(self, env):
+        func_type = self.function_type()
+        if func_type.is_pyobject:
+            self.gil_error()
+        elif not func_type.is_error and not getattr(func_type, 'nogil', False):
+            self.gil_error()
+
+    gil_message = "Calling gil-requiring function"
+
+
+class SimpleCallNode(CallNode):
+    #  Function call without keyword, * or ** args.
+    #
+    #  function       ExprNode
+    #  args           [ExprNode]
+    #  arg_tuple      ExprNode or None     used internally
+    #  self           ExprNode or None     used internally
+    #  coerced_self   ExprNode or None     used internally
+    #  wrapper_call   bool                 used internally
+    #  has_optional_args   bool            used internally
+    #  nogil          bool                 used internally
+
+    subexprs = ['self', 'coerced_self', 'function', 'args', 'arg_tuple']
+
+    self = None
+    coerced_self = None
+    arg_tuple = None
+    wrapper_call = False
+    has_optional_args = False
+    nogil = False
+    analysed = False
+    overflowcheck = False
+
+    def compile_time_value(self, denv):
+        function = self.function.compile_time_value(denv)
+        args = [arg.compile_time_value(denv) for arg in self.args]
+        try:
+            return function(*args)
+        except Exception as e:
+            self.compile_time_value_error(e)
+
+    @classmethod
+    def for_cproperty(cls, pos, obj, entry):
+        # Create a call node for C property access.
+        property_scope = entry.scope
+        getter_entry = property_scope.lookup_here(entry.name)
+        assert getter_entry, "Getter not found in scope %s: %s" % (property_scope, property_scope.entries)
+        function = NameNode(pos, name=entry.name, entry=getter_entry, type=getter_entry.type)
+        node = cls(pos, function=function, args=[obj])
+        return node
+
+    def analyse_as_type(self, env):
+        attr = self.function.as_cython_attribute()
+        if attr == 'pointer':
+            if len(self.args) != 1:
+                error(self.args.pos, "only one type allowed.")
+            else:
+                type = self.args[0].analyse_as_type(env)
+                if not type:
+                    error(self.args[0].pos, "Unknown type")
+                else:
+                    return PyrexTypes.CPtrType(type)
+        elif attr == 'typeof':
+            if len(self.args) != 1:
+                error(self.args.pos, "only one type allowed.")
+            operand = self.args[0].analyse_types(env)
+            return operand.type
+
+    def explicit_args_kwds(self):
+        return self.args, None
+
+    def analyse_types(self, env):
+        if self.analysed:
+            return self
+        self.analysed = True
+        if self.analyse_as_type_constructor(env):
+            return self
+        self.function.is_called = 1
+        self.function = self.function.analyse_types(env)
+        function = self.function
+
+        if function.is_attribute and function.entry and function.entry.is_cmethod:
+            # Take ownership of the object from which the attribute
+            # was obtained, because we need to pass it as 'self'.
+            self.self = function.obj
+            function.obj = CloneNode(self.self)
+
+        func_type = self.function_type()
+        self.is_numpy_call_with_exprs = False
+        if (has_np_pythran(env) and function.is_numpy_attribute and
+                pythran_is_numpy_func_supported(function)):
+            has_pythran_args = True
+            self.arg_tuple = TupleNode(self.pos, args = self.args)
+            self.arg_tuple = self.arg_tuple.analyse_types(env)
+            for arg in self.arg_tuple.args:
+                has_pythran_args &= is_pythran_supported_node_or_none(arg)
+            self.is_numpy_call_with_exprs = bool(has_pythran_args)
+        if self.is_numpy_call_with_exprs:
+            env.add_include_file(pythran_get_func_include_file(function))
+            return NumPyMethodCallNode.from_node(
+                self,
+                function_cname=pythran_functor(function),
+                arg_tuple=self.arg_tuple,
+                type=PythranExpr(pythran_func_type(function, self.arg_tuple.args)),
+            )
+        elif func_type.is_pyobject:
+            self.arg_tuple = TupleNode(self.pos, args = self.args)
+            self.arg_tuple = self.arg_tuple.analyse_types(env).coerce_to_pyobject(env)
+            self.args = None
+            self.set_py_result_type(function, func_type)
+            self.is_temp = 1
+        else:
+            self.args = [ arg.analyse_types(env) for arg in self.args ]
+            self.analyse_c_function_call(env)
+            if func_type.exception_check == '+':
+                self.is_temp = True
+
+        return self
+
+    def function_type(self):
+        # Return the type of the function being called, coercing a function
+        # pointer to a function if necessary. If the function has fused
+        # arguments, return the specific type.
+        func_type = self.function.type
+
+        if func_type.is_ptr:
+            func_type = func_type.base_type
+
+        return func_type
+
+    def analyse_c_function_call(self, env):
+        func_type = self.function.type
+        if func_type is error_type:
+            self.type = error_type
+            return
+
+        if func_type.is_cfunction and func_type.is_static_method:
+            if self.self and self.self.type.is_extension_type:
+                # To support this we'd need to pass self to determine whether
+                # it was overloaded in Python space (possibly via a Cython
+                # superclass turning a cdef method into a cpdef one).
+                error(self.pos, "Cannot call a static method on an instance variable.")
+            args = self.args
+        elif self.self:
+            args = [self.self] + self.args
+        else:
+            args = self.args
+
+        if func_type.is_cpp_class:
+            overloaded_entry = self.function.type.scope.lookup("operator()")
+            if overloaded_entry is None:
+                self.type = PyrexTypes.error_type
+                self.result_code = "<error>"
+                return
+        elif hasattr(self.function, 'entry'):
+            overloaded_entry = self.function.entry
+        elif self.function.is_subscript and self.function.is_fused_index:
+            overloaded_entry = self.function.type.entry
+        else:
+            overloaded_entry = None
+
+        if overloaded_entry:
+            if self.function.type.is_fused:
+                functypes = self.function.type.get_all_specialized_function_types()
+                alternatives = [f.entry for f in functypes]
+            else:
+                alternatives = overloaded_entry.all_alternatives()
+
+            entry = PyrexTypes.best_match([arg.type for arg in args],
+                                          alternatives, self.pos, env, args)
+
+            if not entry:
+                self.type = PyrexTypes.error_type
+                self.result_code = "<error>"
+                return
+
+            entry.used = True
+            if not func_type.is_cpp_class:
+                self.function.entry = entry
+            self.function.type = entry.type
+            func_type = self.function_type()
+        else:
+            entry = None
+            func_type = self.function_type()
+            if not func_type.is_cfunction:
+                error(self.pos, "Calling non-function type '%s'" % func_type)
+                self.type = PyrexTypes.error_type
+                self.result_code = "<error>"
+                return
+
+        # Check no. of args
+        max_nargs = len(func_type.args)
+        expected_nargs = max_nargs - func_type.optional_arg_count
+        actual_nargs = len(args)
+        if func_type.optional_arg_count and expected_nargs != actual_nargs:
+            self.has_optional_args = 1
+            self.is_temp = 1
+
+        # check 'self' argument
+        if entry and entry.is_cmethod and func_type.args and not func_type.is_static_method:
+            formal_arg = func_type.args[0]
+            arg = args[0]
+            if formal_arg.not_none:
+                if self.self:
+                    self.self = self.self.as_none_safe_node(
+                        "'NoneType' object has no attribute '%{0}s'".format('.30' if len(entry.name) <= 30 else ''),
+                        error='PyExc_AttributeError',
+                        format_args=[entry.name])
+                else:
+                    # unbound method
+                    arg = arg.as_none_safe_node(
+                        "descriptor '%s' requires a '%s' object but received a 'NoneType'",
+                        format_args=[entry.name, formal_arg.type.name])
+            if self.self:
+                if formal_arg.accept_builtin_subtypes:
+                    arg = CMethodSelfCloneNode(self.self)
+                else:
+                    arg = CloneNode(self.self)
+                arg = self.coerced_self = arg.coerce_to(formal_arg.type, env)
+            elif formal_arg.type.is_builtin_type:
+                # special case: unbound methods of builtins accept subtypes
+                arg = arg.coerce_to(formal_arg.type, env)
+                if arg.type.is_builtin_type and isinstance(arg, PyTypeTestNode):
+                    arg.exact_builtin_type = False
+            args[0] = arg
+
+        # Coerce arguments
+        some_args_in_temps = False
+        for i in range(min(max_nargs, actual_nargs)):
+            formal_arg = func_type.args[i]
+            formal_type = formal_arg.type
+            arg = args[i].coerce_to(formal_type, env)
+            if formal_arg.not_none:
+                # C methods must do the None checks at *call* time
+                arg = arg.as_none_safe_node(
+                    "cannot pass None into a C function argument that is declared 'not None'")
+            if arg.is_temp:
+                if i > 0:
+                    # first argument in temp doesn't impact subsequent arguments
+                    some_args_in_temps = True
+            elif arg.type.is_pyobject and not env.nogil:
+                if i == 0 and self.self is not None:
+                    # a method's cloned "self" argument is ok
+                    pass
+                elif arg.nonlocally_immutable():
+                    # plain local variables are ok
+                    pass
+                else:
+                    # we do not safely own the argument's reference,
+                    # but we must make sure it cannot be collected
+                    # before we return from the function, so we create
+                    # an owned temp reference to it
+                    if i > 0:  # first argument doesn't matter
+                        some_args_in_temps = True
+                    arg = arg.coerce_to_temp(env)
+            args[i] = arg
+
+        # handle additional varargs parameters
+        for i in range(max_nargs, actual_nargs):
+            arg = args[i]
+            if arg.type.is_pyobject:
+                if arg.type is str_type:
+                    arg_ctype = PyrexTypes.c_char_ptr_type
+                else:
+                    arg_ctype = arg.type.default_coerced_ctype()
+                if arg_ctype is None:
+                    error(self.args[i-1].pos,
+                          "Python object cannot be passed as a varargs parameter")
+                else:
+                    args[i] = arg = arg.coerce_to(arg_ctype, env)
+            if arg.is_temp and i > 0:
+                some_args_in_temps = True
+
+        if some_args_in_temps:
+            # if some args are temps and others are not, they may get
+            # constructed in the wrong order (temps first) => make
+            # sure they are either all temps or all not temps (except
+            # for the last argument, which is evaluated last in any
+            # case)
+            for i in range(actual_nargs-1):
+                if i == 0 and self.self is not None:
+                    continue  # self is ok
+                arg = args[i]
+                if arg.nonlocally_immutable():
+                    # locals, C functions, unassignable types are safe.
+                    pass
+                elif arg.type.is_cpp_class:
+                    # Assignment has side effects, avoid.
+                    pass
+                elif env.nogil and arg.type.is_pyobject:
+                    # can't copy a Python reference into a temp in nogil
+                    # env (this is safe: a construction would fail in
+                    # nogil anyway)
+                    pass
+                else:
+                    #self.args[i] = arg.coerce_to_temp(env)
+                    # instead: issue a warning
+                    if i > 0 or i == 1 and self.self is not None:  # skip first arg
+                        warning(arg.pos, "Argument evaluation order in C function call is undefined and may not be as expected", 0)
+                        break
+
+        self.args[:] = args
+
+        # Calc result type and code fragment
+        if isinstance(self.function, NewExprNode):
+            self.type = PyrexTypes.CPtrType(self.function.class_type)
+        else:
+            self.type = func_type.return_type
+
+        if self.function.is_name or self.function.is_attribute:
+            func_entry = self.function.entry
+            if func_entry and (func_entry.utility_code or func_entry.utility_code_definition):
+                self.is_temp = 1  # currently doesn't work for self.calculate_result_code()
+
+        if self.type.is_pyobject:
+            self.result_ctype = py_object_type
+            self.is_temp = 1
+        elif func_type.exception_value is not None or func_type.exception_check:
+            self.is_temp = 1
+        elif self.type.is_memoryviewslice:
+            self.is_temp = 1
+            # func_type.exception_check = True
+
+        if self.is_temp and self.type.is_reference:
+            self.type = PyrexTypes.CFakeReferenceType(self.type.ref_base_type)
+
+        # C++ exception handler
+        if func_type.exception_check == '+':
+            if needs_cpp_exception_conversion(func_type):
+                env.use_utility_code(UtilityCode.load_cached("CppExceptionConversion", "CppSupport.cpp"))
+
+        self.overflowcheck = env.directives['overflowcheck']
+
+    def calculate_result_code(self):
+        return self.c_call_code()
+
+    def c_call_code(self):
+        func_type = self.function_type()
+        if self.type is PyrexTypes.error_type or not func_type.is_cfunction:
+            return "<error>"
+        formal_args = func_type.args
+        arg_list_code = []
+        args = list(zip(formal_args, self.args))
+        max_nargs = len(func_type.args)
+        expected_nargs = max_nargs - func_type.optional_arg_count
+        actual_nargs = len(self.args)
+        for formal_arg, actual_arg in args[:expected_nargs]:
+            arg_code = actual_arg.move_result_rhs_as(formal_arg.type)
+            arg_list_code.append(arg_code)
+
+        if func_type.is_overridable:
+            arg_list_code.append(str(int(self.wrapper_call or self.function.entry.is_unbound_cmethod)))
+
+        if func_type.optional_arg_count:
+            if expected_nargs == actual_nargs:
+                optional_args = 'NULL'
+            else:
+                optional_args = "&%s" % self.opt_arg_struct
+            arg_list_code.append(optional_args)
+
+        for actual_arg in self.args[len(formal_args):]:
+            arg_list_code.append(actual_arg.move_result_rhs())
+
+        result = "%s(%s)" % (self.function.result(), ', '.join(arg_list_code))
+        return result
+
+    def is_c_result_required(self):
+        func_type = self.function_type()
+        if not func_type.exception_value or func_type.exception_check == '+':
+            return False  # skip allocation of unused result temp
+        return True
+
+    def generate_evaluation_code(self, code):
+        function = self.function
+        if function.is_name or function.is_attribute:
+            code.globalstate.use_entry_utility_code(function.entry)
+
+        abs_function_cnames = ('abs', 'labs', '__Pyx_abs_longlong')
+        is_signed_int = self.type.is_int and self.type.signed
+        if self.overflowcheck and is_signed_int and function.result() in abs_function_cnames:
+            code.globalstate.use_utility_code(UtilityCode.load_cached("Common", "Overflow.c"))
+            code.putln('if (unlikely(%s == __PYX_MIN(%s))) {\
+                PyErr_SetString(PyExc_OverflowError,\
+                                "Trying to take the absolute value of the most negative integer is not defined."); %s; }' % (
+                            self.args[0].result(),
+                            self.args[0].type.empty_declaration_code(),
+                            code.error_goto(self.pos)))
+
+        if not function.type.is_pyobject or len(self.arg_tuple.args) > 1 or (
+                self.arg_tuple.args and self.arg_tuple.is_literal):
+            super(SimpleCallNode, self).generate_evaluation_code(code)
+            return
+
+        # Special case 0-args and try to avoid explicit tuple creation for Python calls with 1 arg.
+        arg = self.arg_tuple.args[0] if self.arg_tuple.args else None
+        subexprs = (self.self, self.coerced_self, function, arg)
+        for subexpr in subexprs:
+            if subexpr is not None:
+                subexpr.generate_evaluation_code(code)
+
+        code.mark_pos(self.pos)
+        assert self.is_temp
+        self.allocate_temp_result(code)
+
+        if arg is None:
+            code.globalstate.use_utility_code(UtilityCode.load_cached(
+                "PyObjectCallNoArg", "ObjectHandling.c"))
+            code.putln(
+                "%s = __Pyx_PyObject_CallNoArg(%s); %s" % (
+                    self.result(),
+                    function.py_result(),
+                    code.error_goto_if_null(self.result(), self.pos)))
+        else:
+            code.globalstate.use_utility_code(UtilityCode.load_cached(
+                "PyObjectCallOneArg", "ObjectHandling.c"))
+            code.putln(
+                "%s = __Pyx_PyObject_CallOneArg(%s, %s); %s" % (
+                    self.result(),
+                    function.py_result(),
+                    arg.py_result(),
+                    code.error_goto_if_null(self.result(), self.pos)))
+
+        self.generate_gotref(code)
+
+        for subexpr in subexprs:
+            if subexpr is not None:
+                subexpr.generate_disposal_code(code)
+                subexpr.free_temps(code)
+
+    def generate_result_code(self, code):
+        func_type = self.function_type()
+        if func_type.is_pyobject:
+            arg_code = self.arg_tuple.py_result()
+            code.globalstate.use_utility_code(UtilityCode.load_cached(
+                "PyObjectCall", "ObjectHandling.c"))
+            code.putln(
+                "%s = __Pyx_PyObject_Call(%s, %s, NULL); %s" % (
+                    self.result(),
+                    self.function.py_result(),
+                    arg_code,
+                    code.error_goto_if_null(self.result(), self.pos)))
+            self.generate_gotref(code)
+        elif func_type.is_cfunction:
+            nogil = not code.funcstate.gil_owned
+            if self.has_optional_args:
+                actual_nargs = len(self.args)
+                expected_nargs = len(func_type.args) - func_type.optional_arg_count
+                self.opt_arg_struct = code.funcstate.allocate_temp(
+                    func_type.op_arg_struct.base_type, manage_ref=True)
+                code.putln("%s.%s = %s;" % (
+                        self.opt_arg_struct,
+                        Naming.pyrex_prefix + "n",
+                        len(self.args) - expected_nargs))
+                args = list(zip(func_type.args, self.args))
+                for formal_arg, actual_arg in args[expected_nargs:actual_nargs]:
+                    code.putln("%s.%s = %s;" % (
+                            self.opt_arg_struct,
+                            func_type.opt_arg_cname(formal_arg.name),
+                            actual_arg.result_as(formal_arg.type)))
+            exc_checks = []
+            if self.type.is_pyobject and self.is_temp:
+                exc_checks.append("!%s" % self.result())
+            elif self.type.is_memoryviewslice:
+                assert self.is_temp
+                exc_checks.append(self.type.error_condition(self.result()))
+            elif func_type.exception_check != '+':
+                exc_val = func_type.exception_value
+                exc_check = func_type.exception_check
+                if exc_val is not None:
+                    exc_checks.append("%s == %s" % (self.result(), func_type.return_type.cast_code(exc_val)))
+                if exc_check:
+                    if nogil:
+                        if not exc_checks:
+                            perf_hint_entry = getattr(self.function, "entry", None)
+                            PyrexTypes.write_noexcept_performance_hint(
+                                self.pos, code.funcstate.scope,
+                                function_name=perf_hint_entry.name if perf_hint_entry else None,
+                                void_return=self.type.is_void, is_call=True,
+                                is_from_pxd=(perf_hint_entry and perf_hint_entry.defined_in_pxd))
+                        code.globalstate.use_utility_code(
+                            UtilityCode.load_cached("ErrOccurredWithGIL", "Exceptions.c"))
+                        exc_checks.append("__Pyx_ErrOccurredWithGIL()")
+                    else:
+                        exc_checks.append("PyErr_Occurred()")
+            if self.is_temp or exc_checks:
+                rhs = self.c_call_code()
+                if self.result():
+                    lhs = "%s = " % self.result()
+                    if self.is_temp and self.type.is_pyobject:
+                        #return_type = self.type # func_type.return_type
+                        #print "SimpleCallNode.generate_result_code: casting", rhs, \
+                        #    "from", return_type, "to pyobject" ###
+                        rhs = typecast(py_object_type, self.type, rhs)
+                else:
+                    lhs = ""
+                if func_type.exception_check == '+':
+                    translate_cpp_exception(code, self.pos, '%s%s;' % (lhs, rhs),
+                                            self.result() if self.type.is_pyobject else None,
+                                            func_type.exception_value, nogil)
+                else:
+                    if exc_checks:
+                        goto_error = code.error_goto_if(" && ".join(exc_checks), self.pos)
+                    else:
+                        goto_error = ""
+                    code.putln("%s%s; %s" % (lhs, rhs, goto_error))
+                if self.type.is_pyobject and self.result():
+                    self.generate_gotref(code)
+            if self.has_optional_args:
+                code.funcstate.release_temp(self.opt_arg_struct)
+
+
+class NumPyMethodCallNode(ExprNode):
+    # Pythran call to a NumPy function or method.
+    #
+    # function_cname  string      the function/method to call
+    # arg_tuple       TupleNode   the arguments as an args tuple
+
+    subexprs = ['arg_tuple']
+    is_temp = True
+    may_return_none = True
+
+    def generate_evaluation_code(self, code):
+        code.mark_pos(self.pos)
+        self.allocate_temp_result(code)
+
+        assert self.arg_tuple.mult_factor is None
+        args = self.arg_tuple.args
+        for arg in args:
+            arg.generate_evaluation_code(code)
+
+        code.putln("// function evaluation code for numpy function")
+        code.putln("__Pyx_call_destructor(%s);" % self.result())
+        code.putln("new (&%s) decltype(%s){%s{}(%s)};" % (
+            self.result(),
+            self.result(),
+            self.function_cname,
+            ", ".join(a.pythran_result() for a in args)))
+
+
+class PyMethodCallNode(SimpleCallNode):
+    # Specialised call to a (potential) PyMethodObject with non-constant argument tuple.
+    # Allows the self argument to be injected directly instead of repacking a tuple for it.
+    #
+    # function    ExprNode      the function/method object to call
+    # arg_tuple   TupleNode     the arguments for the args tuple
+
+    subexprs = ['function', 'arg_tuple']
+    is_temp = True
+
+    def generate_evaluation_code(self, code):
+        code.mark_pos(self.pos)
+        self.allocate_temp_result(code)
+
+        self.function.generate_evaluation_code(code)
+        assert self.arg_tuple.mult_factor is None
+        args = self.arg_tuple.args
+        for arg in args:
+            arg.generate_evaluation_code(code)
+
+        # make sure function is in temp so that we can replace the reference below if it's a method
+        reuse_function_temp = self.function.is_temp
+        if reuse_function_temp:
+            function = self.function.result()
+        else:
+            function = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
+            self.function.make_owned_reference(code)
+            code.put("%s = %s; " % (function, self.function.py_result()))
+            self.function.generate_disposal_code(code)
+            self.function.free_temps(code)
+
+        self_arg = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
+        code.putln("%s = NULL;" % self_arg)
+        arg_offset_cname = code.funcstate.allocate_temp(PyrexTypes.c_uint_type, manage_ref=False)
+        code.putln("%s = 0;" % arg_offset_cname)
+
+        def attribute_is_likely_method(attr):
+            obj = attr.obj
+            if obj.is_name and obj.entry.is_pyglobal:
+                return False  # more likely to be a function
+            return True
+
+        if self.function.is_attribute:
+            likely_method = 'likely' if attribute_is_likely_method(self.function) else 'unlikely'
+        elif self.function.is_name and self.function.cf_state:
+            # not an attribute itself, but might have been assigned from one (e.g. bound method)
+            for assignment in self.function.cf_state:
+                value = assignment.rhs
+                if value and value.is_attribute and value.obj.type and value.obj.type.is_pyobject:
+                    if attribute_is_likely_method(value):
+                        likely_method = 'likely'
+                        break
+            else:
+                likely_method = 'unlikely'
+        else:
+            likely_method = 'unlikely'
+
+        code.putln("#if CYTHON_UNPACK_METHODS")
+        code.putln("if (%s(PyMethod_Check(%s))) {" % (likely_method, function))
+        code.putln("%s = PyMethod_GET_SELF(%s);" % (self_arg, function))
+        # the following is always true in Py3 (kept only for safety),
+        # but is false for unbound methods in Py2
+        code.putln("if (likely(%s)) {" % self_arg)
+        code.putln("PyObject* function = PyMethod_GET_FUNCTION(%s);" % function)
+        code.put_incref(self_arg, py_object_type)
+        code.put_incref("function", py_object_type)
+        # free method object as early to possible to enable reuse from CPython's freelist
+        code.put_decref_set(function, py_object_type, "function")
+        code.putln("%s = 1;" % arg_offset_cname)
+        code.putln("}")
+        code.putln("}")
+        code.putln("#endif")  # CYTHON_UNPACK_METHODS
+        # TODO may need to deal with unused variables in the #else case
+
+        # actually call the function
+        code.globalstate.use_utility_code(
+            UtilityCode.load_cached("PyObjectFastCall", "ObjectHandling.c"))
+
+        code.putln("{")
+        # To avoid passing an out-of-bounds argument pointer in the no-args case,
+        # we need at least two entries, so we pad with NULL and point to that.
+        # See https://github.com/cython/cython/issues/5668
+        code.putln("PyObject *__pyx_callargs[%d] = {%s, %s};" % (
+            (len(args) + 1) if args else 2,
+            self_arg,
+            ', '.join(arg.py_result() for arg in args) if args else "NULL",
+        ))
+        code.putln("%s = __Pyx_PyObject_FastCall(%s, __pyx_callargs+1-%s, %d+%s);" % (
+            self.result(),
+            function,
+            arg_offset_cname,
+            len(args),
+            arg_offset_cname))
+
+        code.put_xdecref_clear(self_arg, py_object_type)
+        code.funcstate.release_temp(self_arg)
+        code.funcstate.release_temp(arg_offset_cname)
+        for arg in args:
+            arg.generate_disposal_code(code)
+            arg.free_temps(code)
+        code.putln(code.error_goto_if_null(self.result(), self.pos))
+        self.generate_gotref(code)
+
+        if reuse_function_temp:
+            self.function.generate_disposal_code(code)
+            self.function.free_temps(code)
+        else:
+            code.put_decref_clear(function, py_object_type)
+            code.funcstate.release_temp(function)
+        code.putln("}")
+
+
+class InlinedDefNodeCallNode(CallNode):
+    #  Inline call to defnode
+    #
+    #  function       PyCFunctionNode
+    #  function_name  NameNode
+    #  args           [ExprNode]
+
+    subexprs = ['args', 'function_name']
+    is_temp = 1
+    type = py_object_type
+    function = None
+    function_name = None
+
+    def can_be_inlined(self):
+        func_type= self.function.def_node
+        if func_type.star_arg or func_type.starstar_arg:
+            return False
+        if len(func_type.args) != len(self.args):
+            return False
+        if func_type.num_kwonly_args:
+            return False  # actually wrong number of arguments
+        return True
+
+    def analyse_types(self, env):
+        self.function_name = self.function_name.analyse_types(env)
+
+        self.args = [ arg.analyse_types(env) for arg in self.args ]
+        func_type = self.function.def_node
+        actual_nargs = len(self.args)
+
+        # Coerce arguments
+        some_args_in_temps = False
+        for i in range(actual_nargs):
+            formal_type = func_type.args[i].type
+            arg = self.args[i].coerce_to(formal_type, env)
+            if arg.is_temp:
+                if i > 0:
+                    # first argument in temp doesn't impact subsequent arguments
+                    some_args_in_temps = True
+            elif arg.type.is_pyobject and not env.nogil:
+                if arg.nonlocally_immutable():
+                    # plain local variables are ok
+                    pass
+                else:
+                    # we do not safely own the argument's reference,
+                    # but we must make sure it cannot be collected
+                    # before we return from the function, so we create
+                    # an owned temp reference to it
+                    if i > 0:  # first argument doesn't matter
+                        some_args_in_temps = True
+                    arg = arg.coerce_to_temp(env)
+            self.args[i] = arg
+
+        if some_args_in_temps:
+            # if some args are temps and others are not, they may get
+            # constructed in the wrong order (temps first) => make
+            # sure they are either all temps or all not temps (except
+            # for the last argument, which is evaluated last in any
+            # case)
+            for i in range(actual_nargs-1):
+                arg = self.args[i]
+                if arg.nonlocally_immutable():
+                    # locals, C functions, unassignable types are safe.
+                    pass
+                elif arg.type.is_cpp_class:
+                    # Assignment has side effects, avoid.
+                    pass
+                elif env.nogil and arg.type.is_pyobject:
+                    # can't copy a Python reference into a temp in nogil
+                    # env (this is safe: a construction would fail in
+                    # nogil anyway)
+                    pass
+                else:
+                    #self.args[i] = arg.coerce_to_temp(env)
+                    # instead: issue a warning
+                    if i > 0:
+                        warning(arg.pos, "Argument evaluation order in C function call is undefined and may not be as expected", 0)
+                        break
+        return self
+
+    def generate_result_code(self, code):
+        arg_code = [self.function_name.py_result()]
+        func_type = self.function.def_node
+        for arg, proto_arg in zip(self.args, func_type.args):
+            if arg.type.is_pyobject:
+                arg_code.append(arg.result_as(proto_arg.type))
+            else:
+                arg_code.append(arg.result())
+        arg_code = ', '.join(arg_code)
+        code.putln(
+            "%s = %s(%s); %s" % (
+                self.result(),
+                self.function.def_node.entry.pyfunc_cname,
+                arg_code,
+                code.error_goto_if_null(self.result(), self.pos)))
+        self.generate_gotref(code)
+
+
+class PythonCapiFunctionNode(ExprNode):
+    subexprs = []
+
+    def __init__(self, pos, py_name, cname, func_type, utility_code = None):
+        ExprNode.__init__(self, pos, name=py_name, cname=cname,
+                          type=func_type, utility_code=utility_code)
+
+    def analyse_types(self, env):
+        return self
+
+    def generate_result_code(self, code):
+        if self.utility_code:
+            code.globalstate.use_utility_code(self.utility_code)
+
+    def calculate_result_code(self):
+        return self.cname
+
+
+class PythonCapiCallNode(SimpleCallNode):
+    # Python C-API Function call (only created in transforms)
+
+    # By default, we assume that the call never returns None, as this
+    # is true for most C-API functions in CPython.  If this does not
+    # apply to a call, set the following to True (or None to inherit
+    # the default behaviour).
+    may_return_none = False
+
+    def __init__(self, pos, function_name, func_type,
+                 utility_code = None, py_name=None, **kwargs):
+        self.type = func_type.return_type
+        self.result_ctype = self.type
+        self.function = PythonCapiFunctionNode(
+            pos, py_name, function_name, func_type,
+            utility_code = utility_code)
+        # call this last so that we can override the constructed
+        # attributes above with explicit keyword arguments if required
+        SimpleCallNode.__init__(self, pos, **kwargs)
+
+
+class CachedBuiltinMethodCallNode(CallNode):
+    # Python call to a method of a known Python builtin (only created in transforms)
+
+    subexprs = ['obj', 'args']
+    is_temp = True
+
+    def __init__(self, call_node, obj, method_name, args):
+        super(CachedBuiltinMethodCallNode, self).__init__(
+            call_node.pos,
+            obj=obj, method_name=method_name, args=args,
+            may_return_none=call_node.may_return_none,
+            type=call_node.type)
+
+    def may_be_none(self):
+        if self.may_return_none is not None:
+            return self.may_return_none
+        return ExprNode.may_be_none(self)
+
+    def generate_result_code(self, code):
+        type_cname = self.obj.type.cname
+        obj_cname = self.obj.py_result()
+        args = [arg.py_result() for arg in self.args]
+        call_code = code.globalstate.cached_unbound_method_call_code(
+            obj_cname, type_cname, self.method_name, args)
+        code.putln("%s = %s; %s" % (
+            self.result(), call_code,
+            code.error_goto_if_null(self.result(), self.pos)
+        ))
+        self.generate_gotref(code)
+
+
+class GeneralCallNode(CallNode):
+    #  General Python function call, including keyword,
+    #  * and ** arguments.
+    #
+    #  function         ExprNode
+    #  positional_args  ExprNode          Tuple of positional arguments
+    #  keyword_args     ExprNode or None  Dict of keyword arguments
+
+    type = py_object_type
+
+    subexprs = ['function', 'positional_args', 'keyword_args']
+
+    nogil_check = Node.gil_error
+
+    def compile_time_value(self, denv):
+        function = self.function.compile_time_value(denv)
+        positional_args = self.positional_args.compile_time_value(denv)
+        keyword_args = self.keyword_args.compile_time_value(denv)
+        try:
+            return function(*positional_args, **keyword_args)
+        except Exception as e:
+            self.compile_time_value_error(e)
+
+    def explicit_args_kwds(self):
+        if (self.keyword_args and not self.keyword_args.is_dict_literal or
+                not self.positional_args.is_sequence_constructor):
+            raise CompileError(self.pos,
+                'Compile-time keyword arguments must be explicit.')
+        return self.positional_args.args, self.keyword_args
+
+    def analyse_types(self, env):
+        if self.analyse_as_type_constructor(env):
+            return self
+        self.function = self.function.analyse_types(env)
+        if not self.function.type.is_pyobject:
+            if self.function.type.is_error:
+                self.type = error_type
+                return self
+            if hasattr(self.function, 'entry'):
+                node = self.map_to_simple_call_node()
+                if node is not None and node is not self:
+                    return node.analyse_types(env)
+                elif self.function.entry.as_variable:
+                    self.function = self.function.coerce_to_pyobject(env)
+                elif node is self:
+                    error(self.pos,
+                          "Non-trivial keyword arguments and starred "
+                          "arguments not allowed in cdef functions.")
+                else:
+                    # error was already reported
+                    pass
+            else:
+                self.function = self.function.coerce_to_pyobject(env)
+        if self.keyword_args:
+            self.keyword_args = self.keyword_args.analyse_types(env)
+        self.positional_args = self.positional_args.analyse_types(env)
+        self.positional_args = \
+            self.positional_args.coerce_to_pyobject(env)
+        self.set_py_result_type(self.function)
+        self.is_temp = 1
+        return self
+
+    def map_to_simple_call_node(self):
+        """
+        Tries to map keyword arguments to declared positional arguments.
+        Returns self to try a Python call, None to report an error
+        or a SimpleCallNode if the mapping succeeds.
+        """
+        if not isinstance(self.positional_args, TupleNode):
+            # has starred argument
+            return self
+        if not self.keyword_args.is_dict_literal:
+            # keywords come from arbitrary expression => nothing to do here
+            return self
+        function = self.function
+        entry = getattr(function, 'entry', None)
+        if not entry:
+            return self
+        function_type = entry.type
+        if function_type.is_ptr:
+            function_type = function_type.base_type
+        if not function_type.is_cfunction:
+            return self
+
+        pos_args = self.positional_args.args
+        kwargs = self.keyword_args
+        declared_args = function_type.args
+        if entry.is_cmethod:
+            declared_args = declared_args[1:]  # skip 'self'
+
+        if len(pos_args) > len(declared_args):
+            error(self.pos, "function call got too many positional arguments, "
+                            "expected %d, got %s" % (len(declared_args),
+                                                     len(pos_args)))
+            return None
+
+        matched_args = {
+            arg.name for arg in declared_args[:len(pos_args)]
+            if arg.name
+        }
+        unmatched_args = declared_args[len(pos_args):]
+        matched_kwargs_count = 0
+        args = list(pos_args)
+
+        # check for duplicate keywords
+        seen = set(matched_args)
+        has_errors = False
+        for arg in kwargs.key_value_pairs:
+            name = arg.key.value
+            if name in seen:
+                error(arg.pos, "argument '%s' passed twice" % name)
+                has_errors = True
+                # continue to report more errors if there are any
+            seen.add(name)
+
+        # match keywords that are passed in order
+        for decl_arg, arg in zip(unmatched_args, kwargs.key_value_pairs):
+            name = arg.key.value
+            if decl_arg.name == name:
+                matched_args.add(name)
+                matched_kwargs_count += 1
+                args.append(arg.value)
+            else:
+                break
+
+        # match keyword arguments that are passed out-of-order, but keep
+        # the evaluation of non-simple arguments in order by moving them
+        # into temps
+        from .UtilNodes import EvalWithTempExprNode, LetRefNode
+        temps = []
+        if len(kwargs.key_value_pairs) > matched_kwargs_count:
+            unmatched_args = declared_args[len(args):]
+            keywords = dict([ (arg.key.value, (i+len(pos_args), arg))
+                              for i, arg in enumerate(kwargs.key_value_pairs) ])
+            first_missing_keyword = None
+            for decl_arg in unmatched_args:
+                name = decl_arg.name
+                if name not in keywords:
+                    # missing keyword argument => either done or error
+                    if not first_missing_keyword:
+                        first_missing_keyword = name
+                    continue
+                elif first_missing_keyword:
+                    if entry.as_variable:
+                        # we might be able to convert the function to a Python
+                        # object, which then allows full calling semantics
+                        # with default values in gaps - currently, we only
+                        # support optional arguments at the end
+                        return self
+                    # wasn't the last keyword => gaps are not supported
+                    error(self.pos, "C function call is missing "
+                                    "argument '%s'" % first_missing_keyword)
+                    return None
+                pos, arg = keywords[name]
+                matched_args.add(name)
+                matched_kwargs_count += 1
+                if arg.value.is_simple():
+                    args.append(arg.value)
+                else:
+                    temp = LetRefNode(arg.value)
+                    assert temp.is_simple()
+                    args.append(temp)
+                    temps.append((pos, temp))
+
+            if temps:
+                # may have to move preceding non-simple args into temps
+                final_args = []
+                new_temps = []
+                first_temp_arg = temps[0][-1]
+                for arg_value in args:
+                    if arg_value is first_temp_arg:
+                        break  # done
+                    if arg_value.is_simple():
+                        final_args.append(arg_value)
+                    else:
+                        temp = LetRefNode(arg_value)
+                        new_temps.append(temp)
+                        final_args.append(temp)
+                if new_temps:
+                    args = final_args
+                temps = new_temps + [ arg for i,arg in sorted(temps) ]
+
+        # check for unexpected keywords
+        for arg in kwargs.key_value_pairs:
+            name = arg.key.value
+            if name not in matched_args:
+                has_errors = True
+                error(arg.pos,
+                      "C function got unexpected keyword argument '%s'" %
+                      name)
+
+        if has_errors:
+            # error was reported already
+            return None
+
+        # all keywords mapped to positional arguments
+        # if we are missing arguments, SimpleCallNode will figure it out
+        node = SimpleCallNode(self.pos, function=function, args=args)
+        for temp in temps[::-1]:
+            node = EvalWithTempExprNode(temp, node)
+        return node
+
+    def generate_result_code(self, code):
+        if self.type.is_error: return
+        if self.keyword_args:
+            kwargs = self.keyword_args.py_result()
+        else:
+            kwargs = 'NULL'
+        code.globalstate.use_utility_code(UtilityCode.load_cached(
+            "PyObjectCall", "ObjectHandling.c"))
+        code.putln(
+            "%s = __Pyx_PyObject_Call(%s, %s, %s); %s" % (
+                self.result(),
+                self.function.py_result(),
+                self.positional_args.py_result(),
+                kwargs,
+                code.error_goto_if_null(self.result(), self.pos)))
+        self.generate_gotref(code)
+
+
+class AsTupleNode(ExprNode):
+    #  Convert argument to tuple. Used for normalising
+    #  the * argument of a function call.
+    #
+    #  arg    ExprNode
+
+    subexprs = ['arg']
+    is_temp = 1
+
+    def calculate_constant_result(self):
+        self.constant_result = tuple(self.arg.constant_result)
+
+    def compile_time_value(self, denv):
+        arg = self.arg.compile_time_value(denv)
+        try:
+            return tuple(arg)
+        except Exception as e:
+            self.compile_time_value_error(e)
+
+    def analyse_types(self, env):
+        self.arg = self.arg.analyse_types(env).coerce_to_pyobject(env)
+        if self.arg.type is tuple_type:
+            return self.arg.as_none_safe_node("'NoneType' object is not iterable")
+        self.type = tuple_type
+        return self
+
+    def may_be_none(self):
+        return False
+
+    nogil_check = Node.gil_error
+    gil_message = "Constructing Python tuple"
+
+    def generate_result_code(self, code):
+        cfunc = "__Pyx_PySequence_Tuple" if self.arg.type in (py_object_type, tuple_type) else "PySequence_Tuple"
+        code.putln(
+            "%s = %s(%s); %s" % (
+                self.result(),
+                cfunc, self.arg.py_result(),
+                code.error_goto_if_null(self.result(), self.pos)))
+        self.generate_gotref(code)
+
+
+class MergedDictNode(ExprNode):
+    #  Helper class for keyword arguments and other merged dicts.
+    #
+    #  keyword_args      [DictNode or other ExprNode]
+
+    subexprs = ['keyword_args']
+    is_temp = 1
+    type = dict_type
+    reject_duplicates = True
+
+    def calculate_constant_result(self):
+        result = {}
+        reject_duplicates = self.reject_duplicates
+        for item in self.keyword_args:
+            if item.is_dict_literal:
+                # process items in order
+                items = ((key.constant_result, value.constant_result)
+                         for key, value in item.key_value_pairs)
+            else:
+                items = item.constant_result.iteritems()
+
+            for key, value in items:
+                if reject_duplicates and key in result:
+                    raise ValueError("duplicate keyword argument found: %s" % key)
+                result[key] = value
+
+        self.constant_result = result
+
+    def compile_time_value(self, denv):
+        result = {}
+        reject_duplicates = self.reject_duplicates
+        for item in self.keyword_args:
+            if item.is_dict_literal:
+                # process items in order
+                items = [(key.compile_time_value(denv), value.compile_time_value(denv))
+                         for key, value in item.key_value_pairs]
+            else:
+                items = item.compile_time_value(denv).iteritems()
+
+            try:
+                for key, value in items:
+                    if reject_duplicates and key in result:
+                        raise ValueError("duplicate keyword argument found: %s" % key)
+                    result[key] = value
+            except Exception as e:
+                self.compile_time_value_error(e)
+        return result
+
+    def type_dependencies(self, env):
+        return ()
+
+    def infer_type(self, env):
+        return dict_type
+
+    def analyse_types(self, env):
+        self.keyword_args = [
+            arg.analyse_types(env).coerce_to_pyobject(env).as_none_safe_node(
+                # FIXME: CPython's error message starts with the runtime function name
+                'argument after ** must be a mapping, not NoneType')
+            for arg in self.keyword_args
+        ]
+
+        return self
+
+    def may_be_none(self):
+        return False
+
+    gil_message = "Constructing Python dict"
+
+    def generate_evaluation_code(self, code):
+        code.mark_pos(self.pos)
+        self.allocate_temp_result(code)
+
+        args = iter(self.keyword_args)
+        item = next(args)
+        item.generate_evaluation_code(code)
+        if item.type is not dict_type:
+            # CPython supports calling functions with non-dicts, so do we
+            code.putln('if (likely(PyDict_CheckExact(%s))) {' %
+                       item.py_result())
+
+        if item.is_dict_literal:
+            item.make_owned_reference(code)
+            code.putln("%s = %s;" % (self.result(), item.py_result()))
+            item.generate_post_assignment_code(code)
+        else:
+            code.putln("%s = PyDict_Copy(%s); %s" % (
+                self.result(),
+                item.py_result(),
+                code.error_goto_if_null(self.result(), item.pos)))
+            self.generate_gotref(code)
+            item.generate_disposal_code(code)
+
+        if item.type is not dict_type:
+            code.putln('} else {')
+            code.globalstate.use_utility_code(UtilityCode.load_cached(
+                "PyObjectCallOneArg", "ObjectHandling.c"))
+            code.putln("%s = __Pyx_PyObject_CallOneArg((PyObject*)&PyDict_Type, %s); %s" % (
+                self.result(),
+                item.py_result(),
+                code.error_goto_if_null(self.result(), self.pos)))
+            self.generate_gotref(code)
+            item.generate_disposal_code(code)
+            code.putln('}')
+        item.free_temps(code)
+
+        helpers = set()
+        for item in args:
+            if item.is_dict_literal:
+                # inline update instead of creating an intermediate dict
+                for arg in item.key_value_pairs:
+                    arg.generate_evaluation_code(code)
+                    if self.reject_duplicates:
+                        code.putln("if (unlikely(PyDict_Contains(%s, %s))) {" % (
+                            self.result(),
+                            arg.key.py_result()))
+                        helpers.add("RaiseDoubleKeywords")
+                        # FIXME: find out function name at runtime!
+                        code.putln('__Pyx_RaiseDoubleKeywordsError("function", %s); %s' % (
+                            arg.key.py_result(),
+                            code.error_goto(self.pos)))
+                        code.putln("}")
+                    code.put_error_if_neg(arg.key.pos, "PyDict_SetItem(%s, %s, %s)" % (
+                        self.result(),
+                        arg.key.py_result(),
+                        arg.value.py_result()))
+                    arg.generate_disposal_code(code)
+                    arg.free_temps(code)
+            else:
+                item.generate_evaluation_code(code)
+                if self.reject_duplicates:
+                    # merge mapping into kwdict one by one as we need to check for duplicates
+                    helpers.add("MergeKeywords")
+                    code.put_error_if_neg(item.pos, "__Pyx_MergeKeywords(%s, %s)" % (
+                        self.result(), item.py_result()))
+                else:
+                    # simple case, just add all entries
+                    helpers.add("RaiseMappingExpected")
+                    code.putln("if (unlikely(PyDict_Update(%s, %s) < 0)) {" % (
+                        self.result(), item.py_result()))
+                    code.putln("if (PyErr_ExceptionMatches(PyExc_AttributeError)) "
+                               "__Pyx_RaiseMappingExpectedError(%s);" % item.py_result())
+                    code.putln(code.error_goto(item.pos))
+                    code.putln("}")
+                item.generate_disposal_code(code)
+                item.free_temps(code)
+
+        for helper in sorted(helpers):
+            code.globalstate.use_utility_code(UtilityCode.load_cached(helper, "FunctionArguments.c"))
+
+    def annotate(self, code):
+        for item in self.keyword_args:
+            item.annotate(code)
+
+
+class AttributeNode(ExprNode):
+    #  obj.attribute
+    #
+    #  obj          ExprNode
+    #  attribute    string
+    #  needs_none_check boolean        Used if obj is an extension type.
+    #                                  If set to True, it is known that the type is not None.
+    #
+    #  Used internally:
+    #
+    #  is_py_attr           boolean   Is a Python getattr operation
+    #  member               string    C name of struct member
+    #  is_called            boolean   Function call is being done on result
+    #  entry                Entry     Symbol table entry of attribute
+
+    is_attribute = 1
+    subexprs = ['obj']
+
+    entry = None
+    is_called = 0
+    needs_none_check = True
+    is_memslice_transpose = False
+    is_special_lookup = False
+    is_py_attr = 0
+
+    def as_cython_attribute(self):
+        if (isinstance(self.obj, NameNode) and
+                self.obj.is_cython_module and not
+                self.attribute == u"parallel"):
+            return self.attribute
+
+        cy = self.obj.as_cython_attribute()
+        if cy:
+            return "%s.%s" % (cy, self.attribute)
+        return None
+
+    def coerce_to(self, dst_type, env):
+        #  If coercing to a generic pyobject and this is a cpdef function
+        #  we can create the corresponding attribute
+        if dst_type is py_object_type:
+            entry = self.entry
+            if entry and entry.is_cfunction and entry.as_variable:
+                # must be a cpdef function
+                self.is_temp = 1
+                self.entry = entry.as_variable
+                self.analyse_as_python_attribute(env)
+                return self
+            elif entry and entry.is_cfunction and self.obj.type is not Builtin.type_type:
+                # "bound" cdef function.
+                # This implementation is likely a little inefficient and could be improved.
+                # Essentially it does:
+                #  __import__("functools").partial(coerce_to_object(self), self.obj)
+                from .UtilNodes import EvalWithTempExprNode, ResultRefNode
+                # take self.obj out to a temp because it's used twice
+                obj_node = ResultRefNode(self.obj, type=self.obj.type)
+                obj_node.result_ctype = self.obj.result_ctype
+                self.obj = obj_node
+                unbound_node = ExprNode.coerce_to(self, dst_type, env)
+                utility_code=UtilityCode.load_cached(
+                    "PyMethodNew2Arg", "ObjectHandling.c"
+                )
+                func_type = PyrexTypes.CFuncType(
+                    PyrexTypes.py_object_type, [
+                        PyrexTypes.CFuncTypeArg("func", PyrexTypes.py_object_type, None),
+                        PyrexTypes.CFuncTypeArg("self", PyrexTypes.py_object_type, None)
+                    ],
+                )
+                binding_call = PythonCapiCallNode(
+                    self.pos,
+                    function_name="__Pyx_PyMethod_New2Arg",
+                    func_type=func_type,
+                    args=[unbound_node, obj_node],
+                    utility_code=utility_code,
+                )
+                complete_call = EvalWithTempExprNode(obj_node, binding_call)
+                return complete_call.analyse_types(env)
+        return ExprNode.coerce_to(self, dst_type, env)
+
+    def calculate_constant_result(self):
+        attr = self.attribute
+        if attr.startswith("__") and attr.endswith("__"):
+            return
+        self.constant_result = getattr(self.obj.constant_result, attr)
+
+    def compile_time_value(self, denv):
+        attr = self.attribute
+        if attr.startswith("__") and attr.endswith("__"):
+            error(self.pos,
+                  "Invalid attribute name '%s' in compile-time expression" % attr)
+            return None
+        obj = self.obj.compile_time_value(denv)
+        try:
+            return getattr(obj, attr)
+        except Exception as e:
+            self.compile_time_value_error(e)
+
+    def type_dependencies(self, env):
+        return self.obj.type_dependencies(env)
+
+    def infer_type(self, env):
+        # FIXME: this is way too redundant with analyse_types()
+        node = self.analyse_as_cimported_attribute_node(env, target=False)
+        if node is not None:
+            if node.entry.type and node.entry.type.is_cfunction:
+                # special-case - function converted to pointer
+                return PyrexTypes.CPtrType(node.entry.type)
+            else:
+                return node.entry.type
+        node = self.analyse_as_type_attribute(env)
+        if node is not None:
+            return node.entry.type
+        obj_type = self.obj.infer_type(env)
+        self.analyse_attribute(env, obj_type=obj_type)
+        if obj_type.is_builtin_type and self.type.is_cfunction:
+            # special case: C-API replacements for C methods of
+            # builtin types cannot be inferred as C functions as
+            # that would prevent their use as bound methods
+            return py_object_type
+        elif self.entry and self.entry.is_cmethod:
+            # special case: bound methods should not be inferred
+            # as their unbound method types
+            return py_object_type
+        return self.type
+
+    def analyse_target_declaration(self, env):
+        self.is_target = True
+
+    def analyse_target_types(self, env):
+        node = self.analyse_types(env, target = 1)
+        if node.type.is_const:
+            error(self.pos, "Assignment to const attribute '%s'" % self.attribute)
+        if not node.is_lvalue():
+            error(self.pos, "Assignment to non-lvalue of type '%s'" % self.type)
+        return node
+
+    def analyse_types(self, env, target = 0):
+        if not self.type:
+            self.type = PyrexTypes.error_type  # default value if it isn't analysed successfully
+        self.initialized_check = env.directives['initializedcheck']
+        node = self.analyse_as_cimported_attribute_node(env, target)
+        if node is None and not target:
+            node = self.analyse_as_type_attribute(env)
+        if node is None:
+            node = self.analyse_as_ordinary_attribute_node(env, target)
+            assert node is not None
+        if (node.is_attribute or node.is_name) and node.entry:
+            node.entry.used = True
+        if node.is_attribute:
+            node.wrap_obj_in_nonecheck(env)
+        return node
+
+    def analyse_as_cimported_attribute_node(self, env, target):
+        # Try to interpret this as a reference to an imported
+        # C const, type, var or function. If successful, mutates
+        # this node into a NameNode and returns 1, otherwise
+        # returns 0.
+        module_scope = self.obj.analyse_as_module(env)
+        if module_scope:
+            entry = module_scope.lookup_here(self.attribute)
+            if entry and not entry.known_standard_library_import and (
+                    entry.is_cglobal or entry.is_cfunction
+                    or entry.is_type or entry.is_const):
+                return self.as_name_node(env, entry, target)
+            if self.is_cimported_module_without_shadow(env):
+                # TODO: search for submodule
+                error(self.pos, "cimported module has no attribute '%s'" % self.attribute)
+                return self
+        return None
+
+    def analyse_as_type_attribute(self, env):
+        # Try to interpret this as a reference to an unbound
+        # C method of an extension type or builtin type.  If successful,
+        # creates a corresponding NameNode and returns it, otherwise
+        # returns None.
+        if self.obj.is_string_literal:
+            return
+        type = self.obj.analyse_as_type(env)
+        if type:
+            if type.is_extension_type or type.is_builtin_type or type.is_cpp_class:
+                entry = type.scope.lookup_here(self.attribute)
+                if entry and (entry.is_cmethod or type.is_cpp_class and entry.type.is_cfunction):
+                    if type.is_builtin_type:
+                        if not self.is_called:
+                            # must handle this as Python object
+                            return None
+                        ubcm_entry = entry
+                    else:
+                        ubcm_entry = self._create_unbound_cmethod_entry(type, entry, env)
+                        ubcm_entry.overloaded_alternatives = [
+                            self._create_unbound_cmethod_entry(type, overloaded_alternative, env)
+                            for overloaded_alternative in entry.overloaded_alternatives
+                        ]
+                    return self.as_name_node(env, ubcm_entry, target=False)
+            elif type.is_enum or type.is_cpp_enum:
+                if self.attribute in type.values:
+                    for entry in type.entry.enum_values:
+                        if entry.name == self.attribute:
+                            return self.as_name_node(env, entry, target=False)
+                    else:
+                        error(self.pos, "%s not a known value of %s" % (self.attribute, type))
+                else:
+                    error(self.pos, "%s not a known value of %s" % (self.attribute, type))
+        return None
+
+    def _create_unbound_cmethod_entry(self, type, entry, env):
+        # Create a temporary entry describing the unbound C method in `entry`
+        # as an ordinary function.
+        if entry.func_cname and entry.type.op_arg_struct is None:
+            cname = entry.func_cname
+            if entry.type.is_static_method or (
+                    env.parent_scope and env.parent_scope.is_cpp_class_scope):
+                ctype = entry.type
+            elif type.is_cpp_class:
+                error(self.pos, "%s not a static member of %s" % (entry.name, type))
+                ctype = PyrexTypes.error_type
+            else:
+                # Fix self type.
+                ctype = copy.copy(entry.type)
+                ctype.args = ctype.args[:]
+                ctype.args[0] = PyrexTypes.CFuncTypeArg('self', type, 'self', None)
+        else:
+            cname = "%s->%s" % (type.vtabptr_cname, entry.cname)
+            ctype = entry.type
+        ubcm_entry = Symtab.Entry(entry.name, cname, ctype)
+        ubcm_entry.is_cfunction = 1
+        ubcm_entry.func_cname = entry.func_cname
+        ubcm_entry.is_unbound_cmethod = 1
+        ubcm_entry.scope = entry.scope
+        return ubcm_entry
+
+    def analyse_as_type(self, env):
+        module_scope = self.obj.analyse_as_module(env)
+        if module_scope:
+            return module_scope.lookup_type(self.attribute)
+        if not self.obj.is_string_literal:
+            base_type = self.obj.analyse_as_type(env)
+            if base_type and getattr(base_type, 'scope', None) is not None:
+                return base_type.scope.lookup_type(self.attribute)
+        return None
+
+    def analyse_as_extension_type(self, env):
+        # Try to interpret this as a reference to an extension type
+        # in a cimported module. Returns the extension type, or None.
+        module_scope = self.obj.analyse_as_module(env)
+        if module_scope:
+            entry = module_scope.lookup_here(self.attribute)
+            if entry and entry.is_type:
+                if entry.type.is_extension_type or entry.type.is_builtin_type:
+                    return entry.type
+        return None
+
+    def analyse_as_module(self, env):
+        # Try to interpret this as a reference to a cimported module
+        # in another cimported module. Returns the module scope, or None.
+        module_scope = self.obj.analyse_as_module(env)
+        if module_scope:
+            entry = module_scope.lookup_here(self.attribute)
+            if entry and entry.as_module:
+                return entry.as_module
+        return None
+
+    def as_name_node(self, env, entry, target):
+        # Create a corresponding NameNode from this node and complete the
+        # analyse_types phase.
+        node = NameNode.from_node(self, name=self.attribute, entry=entry)
+        if target:
+            node = node.analyse_target_types(env)
+        else:
+            node = node.analyse_rvalue_entry(env)
+        node.entry.used = 1
+        return node
+
+    def analyse_as_ordinary_attribute_node(self, env, target):
+        self.obj = self.obj.analyse_types(env)
+        self.analyse_attribute(env)
+        if self.entry and self.entry.is_cmethod and not self.is_called:
+#            error(self.pos, "C method can only be called")
+            pass
+        ## Reference to C array turns into pointer to first element.
+        #while self.type.is_array:
+        #    self.type = self.type.element_ptr_type()
+        if self.is_py_attr:
+            if not target:
+                self.is_temp = 1
+                self.result_ctype = py_object_type
+        elif target and self.obj.type.is_builtin_type:
+            error(self.pos, "Assignment to an immutable object field")
+        elif self.entry and self.entry.is_cproperty:
+            if not target:
+                return SimpleCallNode.for_cproperty(self.pos, self.obj, self.entry).analyse_types(env)
+            # TODO: implement writable C-properties?
+            error(self.pos, "Assignment to a read-only property")
+        #elif self.type.is_memoryviewslice and not target:
+        #    self.is_temp = True
+        return self
+
+    def analyse_attribute(self, env, obj_type = None):
+        # Look up attribute and set self.type and self.member.
+        immutable_obj = obj_type is not None  # used during type inference
+        self.is_py_attr = 0
+        self.member = self.attribute
+        if obj_type is None:
+            if self.obj.type.is_string or self.obj.type.is_pyunicode_ptr:
+                self.obj = self.obj.coerce_to_pyobject(env)
+            obj_type = self.obj.type
+        else:
+            if obj_type.is_string or obj_type.is_pyunicode_ptr:
+                obj_type = py_object_type
+        if obj_type.is_ptr or obj_type.is_array:
+            obj_type = obj_type.base_type
+            self.op = "->"
+        elif obj_type.is_extension_type or obj_type.is_builtin_type:
+            self.op = "->"
+        elif obj_type.is_reference and obj_type.is_fake_reference:
+            self.op = "->"
+        else:
+            self.op = "."
+        if obj_type.has_attributes:
+            if obj_type.attributes_known():
+                entry = obj_type.scope.lookup_here(self.attribute)
+                if obj_type.is_memoryviewslice and not entry:
+                    if self.attribute == 'T':
+                        self.is_memslice_transpose = True
+                        self.is_temp = True
+                        self.use_managed_ref = True
+                        self.type = self.obj.type.transpose(self.pos)
+                        return
+                    else:
+                        obj_type.declare_attribute(self.attribute, env, self.pos)
+                        entry = obj_type.scope.lookup_here(self.attribute)
+                if entry and entry.is_member:
+                    entry = None
+            else:
+                error(self.pos,
+                    "Cannot select attribute of incomplete type '%s'"
+                    % obj_type)
+                self.type = PyrexTypes.error_type
+                return
+            self.entry = entry
+            if entry:
+                if obj_type.is_extension_type and entry.name == "__weakref__":
+                    error(self.pos, "Illegal use of special attribute __weakref__")
+
+                # def methods need the normal attribute lookup
+                # because they do not have struct entries
+                # fused function go through assignment synthesis
+                # (foo = pycfunction(foo_func_obj)) and need to go through
+                # regular Python lookup as well
+                if entry.is_cproperty:
+                    self.type = entry.type
+                    return
+                elif (entry.is_variable and not entry.fused_cfunction) or entry.is_cmethod:
+                    self.type = entry.type
+                    self.member = entry.cname
+                    return
+                else:
+                    # If it's not a variable or C method, it must be a Python
+                    # method of an extension type, so we treat it like a Python
+                    # attribute.
+                    pass
+        # If we get here, the base object is not a struct/union/extension
+        # type, or it is an extension type and the attribute is either not
+        # declared or is declared as a Python method. Treat it as a Python
+        # attribute reference.
+        self.analyse_as_python_attribute(env, obj_type, immutable_obj)
+
+    def analyse_as_python_attribute(self, env, obj_type=None, immutable_obj=False):
+        if obj_type is None:
+            obj_type = self.obj.type
+        # mangle private '__*' Python attributes used inside of a class
+        self.attribute = env.mangle_class_private_name(self.attribute)
+        self.member = self.attribute
+        self.type = py_object_type
+        self.is_py_attr = 1
+
+        if not obj_type.is_pyobject and not obj_type.is_error:
+            # Expose python methods for immutable objects.
+            if (obj_type.is_string or obj_type.is_cpp_string
+                    or obj_type.is_buffer or obj_type.is_memoryviewslice
+                    or obj_type.is_numeric
+                    or (obj_type.is_ctuple and obj_type.can_coerce_to_pyobject(env))
+                    or (obj_type.is_struct and obj_type.can_coerce_to_pyobject(env))):
+                if not immutable_obj:
+                    self.obj = self.obj.coerce_to_pyobject(env)
+            elif (obj_type.is_cfunction and (self.obj.is_name or self.obj.is_attribute)
+                    and self.obj.entry.as_variable
+                    and self.obj.entry.as_variable.type.is_pyobject):
+                # might be an optimised builtin function => unpack it
+                if not immutable_obj:
+                    self.obj = self.obj.coerce_to_pyobject(env)
+            else:
+                error(self.pos,
+                      "Object of type '%s' has no attribute '%s'" %
+                      (obj_type, self.attribute))
+
+    def wrap_obj_in_nonecheck(self, env):
+        if not env.directives['nonecheck']:
+            return
+
+        msg = None
+        format_args = ()
+        if (self.obj.type.is_extension_type and self.needs_none_check and not
+                self.is_py_attr):
+            msg = "'NoneType' object has no attribute '%{0}s'".format('.30' if len(self.attribute) <= 30 else '')
+            format_args = (self.attribute,)
+        elif self.obj.type.is_memoryviewslice:
+            if self.is_memslice_transpose:
+                msg = "Cannot transpose None memoryview slice"
+            else:
+                entry = self.obj.type.scope.lookup_here(self.attribute)
+                if entry:
+                    # copy/is_c_contig/shape/strides etc
+                    msg = "Cannot access '%s' attribute of None memoryview slice"
+                    format_args = (entry.name,)
+
+        if msg:
+            self.obj = self.obj.as_none_safe_node(msg, 'PyExc_AttributeError',
+                                                  format_args=format_args)
+
+    def nogil_check(self, env):
+        if self.is_py_attr:
+            self.gil_error()
+
+    gil_message = "Accessing Python attribute"
+
+    def is_cimported_module_without_shadow(self, env):
+        return self.obj.is_cimported_module_without_shadow(env)
+
+    def is_simple(self):
+        if self.obj:
+            return self.result_in_temp() or self.obj.is_simple()
+        else:
+            return NameNode.is_simple(self)
+
+    def is_lvalue(self):
+        if self.obj:
+            return True
+        else:
+            return NameNode.is_lvalue(self)
+
+    def is_ephemeral(self):
+        if self.obj:
+            return self.obj.is_ephemeral()
+        else:
+            return NameNode.is_ephemeral(self)
+
+    def calculate_result_code(self):
+        result = self.calculate_access_code()
+        if self.entry and self.entry.is_cpp_optional and not self.is_target:
+            result = "(*%s)" % result
+        return result
+
+    def calculate_access_code(self):
+        # Does the job of calculate_result_code but doesn't dereference cpp_optionals
+        # Therefore allowing access to the holder variable
+        obj = self.obj
+        obj_code = obj.result_as(obj.type)
+        #print "...obj_code =", obj_code ###
+        if self.entry and self.entry.is_cmethod:
+            if obj.type.is_extension_type and not self.entry.is_builtin_cmethod:
+                if self.entry.final_func_cname:
+                    return self.entry.final_func_cname
+
+                if self.type.from_fused:
+                    # If the attribute was specialized through indexing, make
+                    # sure to get the right fused name, as our entry was
+                    # replaced by our parent index node
+                    # (AnalyseExpressionsTransform)
+                    self.member = self.entry.cname
+
+                return "((struct %s *)%s%s%s)->%s" % (
+                    obj.type.vtabstruct_cname, obj_code, self.op,
+                    obj.type.vtabslot_cname, self.member)
+            elif self.result_is_used:
+                return self.member
+            # Generating no code at all for unused access to optimised builtin
+            # methods fixes the problem that some optimisations only exist as
+            # macros, i.e. there is no function pointer to them, so we would
+            # generate invalid C code here.
+            return
+        elif obj.type.is_complex:
+            return "__Pyx_C%s(%s)" % (self.member.upper(), obj_code)
+        else:
+            if obj.type.is_builtin_type and self.entry and self.entry.is_variable:
+                # accessing a field of a builtin type, need to cast better than result_as() does
+                obj_code = obj.type.cast_code(obj.result(), to_object_struct = True)
+            return "%s%s%s" % (obj_code, self.op, self.member)
+
+    def generate_result_code(self, code):
+        if self.is_py_attr:
+            if self.is_special_lookup:
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("PyObjectLookupSpecial", "ObjectHandling.c"))
+                lookup_func_name = '__Pyx_PyObject_LookupSpecial'
+            else:
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("PyObjectGetAttrStr", "ObjectHandling.c"))
+                lookup_func_name = '__Pyx_PyObject_GetAttrStr'
+            code.putln(
+                '%s = %s(%s, %s); %s' % (
+                    self.result(),
+                    lookup_func_name,
+                    self.obj.py_result(),
+                    code.intern_identifier(self.attribute),
+                    code.error_goto_if_null(self.result(), self.pos)))
+            self.generate_gotref(code)
+        elif self.type.is_memoryviewslice:
+            if self.is_memslice_transpose:
+                # transpose the slice
+                for access, packing in self.type.axes:
+                    if access == 'ptr':
+                        error(self.pos, "Transposing not supported for slices "
+                                        "with indirect dimensions")
+                        return
+
+                code.putln("%s = %s;" % (self.result(), self.obj.result()))
+                code.put_incref_memoryviewslice(self.result(), self.type,
+                                have_gil=True)
+
+                T = "__pyx_memslice_transpose(&%s)" % self.result()
+                code.putln(code.error_goto_if_neg(T, self.pos))
+            elif self.initialized_check:
+                code.putln(
+                    'if (unlikely(!%s.memview)) {'
+                        'PyErr_SetString(PyExc_AttributeError,'
+                                        '"Memoryview is not initialized");'
+                        '%s'
+                    '}' % (self.result(), code.error_goto(self.pos)))
+        elif self.entry.is_cpp_optional and self.initialized_check:
+            if self.is_target:
+                undereferenced_result = self.result()
+            else:
+                assert not self.is_temp  # calculate_access_code() only makes sense for non-temps
+                undereferenced_result = self.calculate_access_code()
+            unbound_check_code = self.type.cpp_optional_check_for_null_code(undereferenced_result)
+            code.put_error_if_unbound(self.pos, self.entry, unbound_check_code=unbound_check_code)
+        else:
+            # result_code contains what is needed, but we may need to insert
+            # a check and raise an exception
+            if self.obj.type and self.obj.type.is_extension_type:
+                pass
+            elif self.entry and self.entry.is_cmethod:
+                # C method implemented as function call with utility code
+                code.globalstate.use_entry_utility_code(self.entry)
+
+    def generate_disposal_code(self, code):
+        if self.is_temp and self.type.is_memoryviewslice and self.is_memslice_transpose:
+            # mirror condition for putting the memview incref here:
+            code.put_xdecref_clear(self.result(), self.type, have_gil=True)
+        else:
+            ExprNode.generate_disposal_code(self, code)
+
+    def generate_assignment_code(self, rhs, code, overloaded_assignment=False,
+                                 exception_check=None, exception_value=None):
+        self.obj.generate_evaluation_code(code)
+        if self.is_py_attr:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("PyObjectSetAttrStr", "ObjectHandling.c"))
+            code.put_error_if_neg(self.pos,
+                '__Pyx_PyObject_SetAttrStr(%s, %s, %s)' % (
+                    self.obj.py_result(),
+                    code.intern_identifier(self.attribute),
+                    rhs.py_result()))
+            rhs.generate_disposal_code(code)
+            rhs.free_temps(code)
+        elif self.obj.type.is_complex:
+            code.putln("__Pyx_SET_C%s%s(%s, %s);" % (
+                self.member.upper(),
+                self.obj.type.implementation_suffix,
+                self.obj.result_as(self.obj.type),
+                rhs.result_as(self.ctype())))
+            rhs.generate_disposal_code(code)
+            rhs.free_temps(code)
+        else:
+            select_code = self.result()
+            if self.type.is_pyobject and self.use_managed_ref:
+                rhs.make_owned_reference(code)
+                rhs.generate_giveref(code)
+                code.put_gotref(select_code, self.type)
+                code.put_decref(select_code, self.ctype())
+            elif self.type.is_memoryviewslice:
+                from . import MemoryView
+                MemoryView.put_assign_to_memviewslice(
+                        select_code, rhs, rhs.result(), self.type, code)
+
+            if not self.type.is_memoryviewslice:
+                code.putln(
+                    "%s = %s;" % (
+                        select_code,
+                        rhs.move_result_rhs_as(self.ctype())))
+                        #rhs.result()))
+            rhs.generate_post_assignment_code(code)
+            rhs.free_temps(code)
+        self.obj.generate_disposal_code(code)
+        self.obj.free_temps(code)
+
+    def generate_deletion_code(self, code, ignore_nonexisting=False):
+        self.obj.generate_evaluation_code(code)
+        if self.is_py_attr or (self.entry.scope.is_property_scope
+                               and u'__del__' in self.entry.scope.entries):
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("PyObjectSetAttrStr", "ObjectHandling.c"))
+            code.put_error_if_neg(self.pos,
+                '__Pyx_PyObject_DelAttrStr(%s, %s)' % (
+                    self.obj.py_result(),
+                    code.intern_identifier(self.attribute)))
+        else:
+            error(self.pos, "Cannot delete C attribute of extension type")
+        self.obj.generate_disposal_code(code)
+        self.obj.free_temps(code)
+
+    def annotate(self, code):
+        if self.is_py_attr:
+            style, text = 'py_attr', 'python attribute (%s)'
+        else:
+            style, text = 'c_attr', 'c attribute (%s)'
+        code.annotate(self.pos, AnnotationItem(style, text % self.type, size=len(self.attribute)))
+
+    def get_known_standard_library_import(self):
+        module_name = self.obj.get_known_standard_library_import()
+        if module_name:
+            return StringEncoding.EncodedString("%s.%s" % (module_name, self.attribute))
+        return None
+
+
+#-------------------------------------------------------------------
+#
+#  Constructor nodes
+#
+#-------------------------------------------------------------------
+
+class StarredUnpackingNode(ExprNode):
+    #  A starred expression like "*a"
+    #
+    #  This is only allowed in sequence assignment or construction such as
+    #
+    #      a, *b = (1,2,3,4)    =>     a = 1 ; b = [2,3,4]
+    #
+    #  and will be special cased during type analysis (or generate an error
+    #  if it's found at unexpected places).
+    #
+    #  target          ExprNode
+
+    subexprs = ['target']
+    is_starred = 1
+    type = py_object_type
+    is_temp = 1
+    starred_expr_allowed_here = False
+
+    def __init__(self, pos, target):
+        ExprNode.__init__(self, pos, target=target)
+
+    def analyse_declarations(self, env):
+        if not self.starred_expr_allowed_here:
+            error(self.pos, "starred expression is not allowed here")
+        self.target.analyse_declarations(env)
+
+    def infer_type(self, env):
+        return self.target.infer_type(env)
+
+    def analyse_types(self, env):
+        if not self.starred_expr_allowed_here:
+            error(self.pos, "starred expression is not allowed here")
+        self.target = self.target.analyse_types(env)
+        self.type = self.target.type
+        return self
+
+    def analyse_target_declaration(self, env):
+        self.target.analyse_target_declaration(env)
+
+    def analyse_target_types(self, env):
+        self.target = self.target.analyse_target_types(env)
+        self.type = self.target.type
+        return self
+
+    def calculate_result_code(self):
+        return ""
+
+    def generate_result_code(self, code):
+        pass
+
+
+class SequenceNode(ExprNode):
+    #  Base class for list and tuple constructor nodes.
+    #  Contains common code for performing sequence unpacking.
+    #
+    #  args                    [ExprNode]
+    #  unpacked_items          [ExprNode] or None
+    #  coerced_unpacked_items  [ExprNode] or None
+    # mult_factor              ExprNode     the integer number of content repetitions ([1,2]*3)
+
+    subexprs = ['args', 'mult_factor']
+
+    is_sequence_constructor = 1
+    unpacked_items = None
+    mult_factor = None
+    slow = False  # trade speed for code size (e.g. use PyTuple_Pack())
+
+    def compile_time_value_list(self, denv):
+        return [arg.compile_time_value(denv) for arg in self.args]
+
+    def replace_starred_target_node(self):
+        # replace a starred node in the targets by the contained expression
+        self.starred_assignment = False
+        args = []
+        for arg in self.args:
+            if arg.is_starred:
+                if self.starred_assignment:
+                    error(arg.pos, "more than 1 starred expression in assignment")
+                self.starred_assignment = True
+                arg = arg.target
+                arg.is_starred = True
+            args.append(arg)
+        self.args = args
+
+    def analyse_target_declaration(self, env):
+        self.replace_starred_target_node()
+        for arg in self.args:
+            arg.analyse_target_declaration(env)
+
+    def analyse_types(self, env, skip_children=False):
+        for i, arg in enumerate(self.args):
+            if not skip_children:
+                arg = arg.analyse_types(env)
+            self.args[i] = arg.coerce_to_pyobject(env)
+        if self.mult_factor:
+            mult_factor = self.mult_factor.analyse_types(env)
+            if not mult_factor.type.is_int:
+                mult_factor = mult_factor.coerce_to_pyobject(env)
+            self.mult_factor = mult_factor.coerce_to_simple(env)
+        self.is_temp = 1
+        # not setting self.type here, subtypes do this
+        return self
+
+    def coerce_to_ctuple(self, dst_type, env):
+        if self.type == dst_type:
+            return self
+        assert not self.mult_factor
+        if len(self.args) != dst_type.size:
+            error(self.pos, "trying to coerce sequence to ctuple of wrong length, expected %d, got %d" % (
+                dst_type.size, len(self.args)))
+        coerced_args = [arg.coerce_to(type, env) for arg, type in zip(self.args, dst_type.components)]
+        return TupleNode(self.pos, args=coerced_args, type=dst_type, is_temp=True)
+
+    def _create_merge_node_if_necessary(self, env):
+        self._flatten_starred_args()
+        if not any(arg.is_starred for arg in self.args):
+            return self
+        # convert into MergedSequenceNode by building partial sequences
+        args = []
+        values = []
+        for arg in self.args:
+            if arg.is_starred:
+                if values:
+                    args.append(TupleNode(values[0].pos, args=values).analyse_types(env, skip_children=True))
+                    values = []
+                args.append(arg.target)
+            else:
+                values.append(arg)
+        if values:
+            args.append(TupleNode(values[0].pos, args=values).analyse_types(env, skip_children=True))
+        node = MergedSequenceNode(self.pos, args, self.type)
+        if self.mult_factor:
+            node = binop_node(
+                self.pos, '*', node, self.mult_factor.coerce_to_pyobject(env),
+                inplace=True, type=self.type, is_temp=True)
+        return node
+
+    def _flatten_starred_args(self):
+        args = []
+        for arg in self.args:
+            if arg.is_starred and arg.target.is_sequence_constructor and not arg.target.mult_factor:
+                args.extend(arg.target.args)
+            else:
+                args.append(arg)
+        self.args[:] = args
+
+    def may_be_none(self):
+        return False
+
+    def analyse_target_types(self, env):
+        if self.mult_factor:
+            error(self.pos, "can't assign to multiplied sequence")
+        self.unpacked_items = []
+        self.coerced_unpacked_items = []
+        self.any_coerced_items = False
+        for i, arg in enumerate(self.args):
+            arg = self.args[i] = arg.analyse_target_types(env)
+            if arg.is_starred:
+                if not arg.type.assignable_from(list_type):
+                    error(arg.pos,
+                          "starred target must have Python object (list) type")
+                if arg.type is py_object_type:
+                    arg.type = list_type
+            unpacked_item = PyTempNode(self.pos, env)
+            coerced_unpacked_item = unpacked_item.coerce_to(arg.type, env)
+            if unpacked_item is not coerced_unpacked_item:
+                self.any_coerced_items = True
+            self.unpacked_items.append(unpacked_item)
+            self.coerced_unpacked_items.append(coerced_unpacked_item)
+        self.type = py_object_type
+        return self
+
+    def generate_result_code(self, code):
+        self.generate_operation_code(code)
+
+    def generate_sequence_packing_code(self, code, target=None, plain=False):
+        if target is None:
+            target = self.result()
+        size_factor = c_mult = ''
+        mult_factor = None
+
+        if self.mult_factor and not plain:
+            mult_factor = self.mult_factor
+            if mult_factor.type.is_int:
+                c_mult = mult_factor.result()
+                if (isinstance(mult_factor.constant_result, _py_int_types) and
+                        mult_factor.constant_result > 0):
+                    size_factor = ' * %s' % mult_factor.constant_result
+                elif mult_factor.type.signed:
+                    size_factor = ' * ((%s<0) ? 0:%s)' % (c_mult, c_mult)
+                else:
+                    size_factor = ' * (%s)' % (c_mult,)
+
+        if self.type is tuple_type and (self.is_literal or self.slow) and not c_mult:
+            # use PyTuple_Pack() to avoid generating huge amounts of one-time code
+            code.putln('%s = PyTuple_Pack(%d, %s); %s' % (
+                target,
+                len(self.args),
+                ', '.join(arg.py_result() for arg in self.args),
+                code.error_goto_if_null(target, self.pos)))
+            code.put_gotref(target, py_object_type)
+        elif self.type.is_ctuple:
+            for i, arg in enumerate(self.args):
+                code.putln("%s.f%s = %s;" % (
+                    target, i, arg.result()))
+        else:
+            # build the tuple/list step by step, potentially multiplying it as we go
+            if self.type is list_type:
+                create_func, set_item_func = 'PyList_New', '__Pyx_PyList_SET_ITEM'
+            elif self.type is tuple_type:
+                create_func, set_item_func = 'PyTuple_New', '__Pyx_PyTuple_SET_ITEM'
+            else:
+                raise InternalError("sequence packing for unexpected type %s" % self.type)
+            arg_count = len(self.args)
+            code.putln("%s = %s(%s%s); %s" % (
+                target, create_func, arg_count, size_factor,
+                code.error_goto_if_null(target, self.pos)))
+            code.put_gotref(target, py_object_type)
+
+            if c_mult:
+                # FIXME: can't use a temp variable here as the code may
+                # end up in the constant building function.  Temps
+                # currently don't work there.
+
+                #counter = code.funcstate.allocate_temp(mult_factor.type, manage_ref=False)
+                counter = Naming.quick_temp_cname
+                code.putln('{ Py_ssize_t %s;' % counter)
+                if arg_count == 1:
+                    offset = counter
+                else:
+                    offset = '%s * %s' % (counter, arg_count)
+                code.putln('for (%s=0; %s < %s; %s++) {' % (
+                    counter, counter, c_mult, counter
+                    ))
+            else:
+                offset = ''
+
+            for i in range(arg_count):
+                arg = self.args[i]
+                if c_mult or not arg.result_in_temp():
+                    code.put_incref(arg.result(), arg.ctype())
+                arg.generate_giveref(code)
+                code.putln("if (%s(%s, %s, %s)) %s;" % (
+                    set_item_func,
+                    target,
+                    (offset and i) and ('%s + %s' % (offset, i)) or (offset or i),
+                    arg.py_result(),
+                    code.error_goto(self.pos)))
+
+            if c_mult:
+                code.putln('}')
+                #code.funcstate.release_temp(counter)
+                code.putln('}')
+
+        if mult_factor is not None and mult_factor.type.is_pyobject:
+            code.putln('{ PyObject* %s = PyNumber_InPlaceMultiply(%s, %s); %s' % (
+                Naming.quick_temp_cname, target, mult_factor.py_result(),
+                code.error_goto_if_null(Naming.quick_temp_cname, self.pos)
+                ))
+            code.put_gotref(Naming.quick_temp_cname, py_object_type)
+            code.put_decref(target, py_object_type)
+            code.putln('%s = %s;' % (target, Naming.quick_temp_cname))
+            code.putln('}')
+
+    def generate_subexpr_disposal_code(self, code):
+        if self.mult_factor and self.mult_factor.type.is_int:
+            super(SequenceNode, self).generate_subexpr_disposal_code(code)
+        elif self.type is tuple_type and (self.is_literal or self.slow):
+            super(SequenceNode, self).generate_subexpr_disposal_code(code)
+        else:
+            # We call generate_post_assignment_code here instead
+            # of generate_disposal_code, because values were stored
+            # in the tuple using a reference-stealing operation.
+            for arg in self.args:
+                arg.generate_post_assignment_code(code)
+                # Should NOT call free_temps -- this is invoked by the default
+                # generate_evaluation_code which will do that.
+            if self.mult_factor:
+                self.mult_factor.generate_disposal_code(code)
+
+    def generate_assignment_code(self, rhs, code, overloaded_assignment=False,
+                                 exception_check=None, exception_value=None):
+        if self.starred_assignment:
+            self.generate_starred_assignment_code(rhs, code)
+        else:
+            self.generate_parallel_assignment_code(rhs, code)
+
+        for item in self.unpacked_items:
+            item.release(code)
+        rhs.free_temps(code)
+
+    _func_iternext_type = PyrexTypes.CPtrType(PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [
+            PyrexTypes.CFuncTypeArg("it", PyrexTypes.py_object_type, None),
+            ]))
+
+    def generate_parallel_assignment_code(self, rhs, code):
+        # Need to work around the fact that generate_evaluation_code
+        # allocates the temps in a rather hacky way -- the assignment
+        # is evaluated twice, within each if-block.
+        for item in self.unpacked_items:
+            item.allocate(code)
+        special_unpack = (rhs.type is py_object_type
+                          or rhs.type in (tuple_type, list_type)
+                          or not rhs.type.is_builtin_type)
+        long_enough_for_a_loop = len(self.unpacked_items) > 3
+
+        if special_unpack:
+            self.generate_special_parallel_unpacking_code(
+                code, rhs, use_loop=long_enough_for_a_loop)
+        else:
+            code.putln("{")
+            self.generate_generic_parallel_unpacking_code(
+                code, rhs, self.unpacked_items, use_loop=long_enough_for_a_loop)
+            code.putln("}")
+
+        for value_node in self.coerced_unpacked_items:
+            value_node.generate_evaluation_code(code)
+        for i in range(len(self.args)):
+            self.args[i].generate_assignment_code(
+                self.coerced_unpacked_items[i], code)
+
+    def generate_special_parallel_unpacking_code(self, code, rhs, use_loop):
+        sequence_type_test = '1'
+        none_check = "likely(%s != Py_None)" % rhs.py_result()
+        if rhs.type is list_type:
+            sequence_types = ['List']
+            if rhs.may_be_none():
+                sequence_type_test = none_check
+        elif rhs.type is tuple_type:
+            sequence_types = ['Tuple']
+            if rhs.may_be_none():
+                sequence_type_test = none_check
+        else:
+            sequence_types = ['Tuple', 'List']
+            tuple_check = 'likely(PyTuple_CheckExact(%s))' % rhs.py_result()
+            list_check  = 'PyList_CheckExact(%s)' % rhs.py_result()
+            sequence_type_test = "(%s) || (%s)" % (tuple_check, list_check)
+
+        code.putln("if (%s) {" % sequence_type_test)
+        code.putln("PyObject* sequence = %s;" % rhs.py_result())
+
+        # list/tuple => check size
+        code.putln("Py_ssize_t size = __Pyx_PySequence_SIZE(sequence);")
+        code.putln("if (unlikely(size != %d)) {" % len(self.args))
+        code.globalstate.use_utility_code(
+            UtilityCode.load_cached("RaiseTooManyValuesToUnpack", "ObjectHandling.c"))
+        code.putln("if (size > %d) __Pyx_RaiseTooManyValuesError(%d);" % (
+            len(self.args), len(self.args)))
+        code.globalstate.use_utility_code(
+            UtilityCode.load_cached("RaiseNeedMoreValuesToUnpack", "ObjectHandling.c"))
+        code.putln("else if (size >= 0) __Pyx_RaiseNeedMoreValuesError(size);")
+        # < 0 => exception
+        code.putln(code.error_goto(self.pos))
+        code.putln("}")
+
+        code.putln("#if CYTHON_ASSUME_SAFE_MACROS && !CYTHON_AVOID_BORROWED_REFS")
+        # unpack items from list/tuple in unrolled loop (can't fail)
+        if len(sequence_types) == 2:
+            code.putln("if (likely(Py%s_CheckExact(sequence))) {" % sequence_types[0])
+        for i, item in enumerate(self.unpacked_items):
+            code.putln("%s = Py%s_GET_ITEM(sequence, %d); " % (
+                item.result(), sequence_types[0], i))
+        if len(sequence_types) == 2:
+            code.putln("} else {")
+            for i, item in enumerate(self.unpacked_items):
+                code.putln("%s = Py%s_GET_ITEM(sequence, %d); " % (
+                    item.result(), sequence_types[1], i))
+            code.putln("}")
+        for item in self.unpacked_items:
+            code.put_incref(item.result(), item.ctype())
+
+        code.putln("#else")
+        # in non-CPython, use the PySequence protocol (which can fail)
+        if not use_loop:
+            for i, item in enumerate(self.unpacked_items):
+                code.putln("%s = PySequence_ITEM(sequence, %d); %s" % (
+                    item.result(), i,
+                    code.error_goto_if_null(item.result(), self.pos)))
+                code.put_gotref(item.result(), item.type)
+        else:
+            code.putln("{")
+            code.putln("Py_ssize_t i;")
+            code.putln("PyObject** temps[%s] = {%s};" % (
+                len(self.unpacked_items),
+                ','.join(['&%s' % item.result() for item in self.unpacked_items])))
+            code.putln("for (i=0; i < %s; i++) {" % len(self.unpacked_items))
+            code.putln("PyObject* item = PySequence_ITEM(sequence, i); %s" % (
+                code.error_goto_if_null('item', self.pos)))
+            code.put_gotref('item', py_object_type)
+            code.putln("*(temps[i]) = item;")
+            code.putln("}")
+            code.putln("}")
+
+        code.putln("#endif")
+        rhs.generate_disposal_code(code)
+
+        if sequence_type_test == '1':
+            code.putln("}")  # all done
+        elif sequence_type_test == none_check:
+            # either tuple/list or None => save some code by generating the error directly
+            code.putln("} else {")
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("RaiseNoneIterError", "ObjectHandling.c"))
+            code.putln("__Pyx_RaiseNoneNotIterableError(); %s" % code.error_goto(self.pos))
+            code.putln("}")  # all done
+        else:
+            code.putln("} else {")  # needs iteration fallback code
+            self.generate_generic_parallel_unpacking_code(
+                code, rhs, self.unpacked_items, use_loop=use_loop)
+            code.putln("}")
+
+    def generate_generic_parallel_unpacking_code(self, code, rhs, unpacked_items, use_loop, terminate=True):
+        code.globalstate.use_utility_code(
+            UtilityCode.load_cached("RaiseNeedMoreValuesToUnpack", "ObjectHandling.c"))
+        code.globalstate.use_utility_code(
+            UtilityCode.load_cached("IterFinish", "ObjectHandling.c"))
+        code.putln("Py_ssize_t index = -1;")  # must be at the start of a C block!
+
+        if use_loop:
+            code.putln("PyObject** temps[%s] = {%s};" % (
+                len(self.unpacked_items),
+                ','.join(['&%s' % item.result() for item in unpacked_items])))
+
+        iterator_temp = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
+        code.putln(
+            "%s = PyObject_GetIter(%s); %s" % (
+                iterator_temp,
+                rhs.py_result(),
+                code.error_goto_if_null(iterator_temp, self.pos)))
+        code.put_gotref(iterator_temp, py_object_type)
+        rhs.generate_disposal_code(code)
+
+        iternext_func = code.funcstate.allocate_temp(self._func_iternext_type, manage_ref=False)
+        code.putln("%s = __Pyx_PyObject_GetIterNextFunc(%s);" % (
+            iternext_func, iterator_temp))
+
+        unpacking_error_label = code.new_label('unpacking_failed')
+        unpack_code = "%s(%s)" % (iternext_func, iterator_temp)
+        if use_loop:
+            code.putln("for (index=0; index < %s; index++) {" % len(unpacked_items))
+            code.put("PyObject* item = %s; if (unlikely(!item)) " % unpack_code)
+            code.put_goto(unpacking_error_label)
+            code.put_gotref("item", py_object_type)
+            code.putln("*(temps[index]) = item;")
+            code.putln("}")
+        else:
+            for i, item in enumerate(unpacked_items):
+                code.put(
+                    "index = %d; %s = %s; if (unlikely(!%s)) " % (
+                        i,
+                        item.result(),
+                        unpack_code,
+                        item.result()))
+                code.put_goto(unpacking_error_label)
+                item.generate_gotref(code)
+
+        if terminate:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("UnpackItemEndCheck", "ObjectHandling.c"))
+            code.put_error_if_neg(self.pos, "__Pyx_IternextUnpackEndCheck(%s, %d)" % (
+                unpack_code,
+                len(unpacked_items)))
+            code.putln("%s = NULL;" % iternext_func)
+            code.put_decref_clear(iterator_temp, py_object_type)
+
+        unpacking_done_label = code.new_label('unpacking_done')
+        code.put_goto(unpacking_done_label)
+
+        code.put_label(unpacking_error_label)
+        code.put_decref_clear(iterator_temp, py_object_type)
+        code.putln("%s = NULL;" % iternext_func)
+        code.putln("if (__Pyx_IterFinish() == 0) __Pyx_RaiseNeedMoreValuesError(index);")
+        code.putln(code.error_goto(self.pos))
+        code.put_label(unpacking_done_label)
+
+        code.funcstate.release_temp(iternext_func)
+        if terminate:
+            code.funcstate.release_temp(iterator_temp)
+            iterator_temp = None
+
+        return iterator_temp
+
+    def generate_starred_assignment_code(self, rhs, code):
+        for i, arg in enumerate(self.args):
+            if arg.is_starred:
+                starred_target = self.unpacked_items[i]
+                unpacked_fixed_items_left  = self.unpacked_items[:i]
+                unpacked_fixed_items_right = self.unpacked_items[i+1:]
+                break
+        else:
+            assert False
+
+        iterator_temp = None
+        if unpacked_fixed_items_left:
+            for item in unpacked_fixed_items_left:
+                item.allocate(code)
+            code.putln('{')
+            iterator_temp = self.generate_generic_parallel_unpacking_code(
+                code, rhs, unpacked_fixed_items_left,
+                use_loop=True, terminate=False)
+            for i, item in enumerate(unpacked_fixed_items_left):
+                value_node = self.coerced_unpacked_items[i]
+                value_node.generate_evaluation_code(code)
+            code.putln('}')
+
+        starred_target.allocate(code)
+        target_list = starred_target.result()
+        code.putln("%s = %s(%s); %s" % (
+            target_list,
+            "__Pyx_PySequence_ListKeepNew" if (
+                    not iterator_temp and rhs.is_temp and rhs.type in (py_object_type, list_type))
+                else "PySequence_List",
+            iterator_temp or rhs.py_result(),
+            code.error_goto_if_null(target_list, self.pos)))
+        starred_target.generate_gotref(code)
+
+        if iterator_temp:
+            code.put_decref_clear(iterator_temp, py_object_type)
+            code.funcstate.release_temp(iterator_temp)
+        else:
+            rhs.generate_disposal_code(code)
+
+        if unpacked_fixed_items_right:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("RaiseNeedMoreValuesToUnpack", "ObjectHandling.c"))
+            length_temp = code.funcstate.allocate_temp(PyrexTypes.c_py_ssize_t_type, manage_ref=False)
+            code.putln('%s = PyList_GET_SIZE(%s);' % (length_temp, target_list))
+            code.putln("if (unlikely(%s < %d)) {" % (length_temp, len(unpacked_fixed_items_right)))
+            code.putln("__Pyx_RaiseNeedMoreValuesError(%d+%s); %s" % (
+                 len(unpacked_fixed_items_left), length_temp,
+                 code.error_goto(self.pos)))
+            code.putln('}')
+
+            for item in unpacked_fixed_items_right[::-1]:
+                item.allocate(code)
+            for i, (item, coerced_arg) in enumerate(zip(unpacked_fixed_items_right[::-1],
+                                                        self.coerced_unpacked_items[::-1])):
+                code.putln('#if CYTHON_COMPILING_IN_CPYTHON')
+                code.putln("%s = PyList_GET_ITEM(%s, %s-%d); " % (
+                    item.py_result(), target_list, length_temp, i+1))
+                # resize the list the hard way
+                code.putln("((PyVarObject*)%s)->ob_size--;" % target_list)
+                code.putln('#else')
+                code.putln("%s = PySequence_ITEM(%s, %s-%d); " % (
+                    item.py_result(), target_list, length_temp, i+1))
+                code.putln('#endif')
+                item.generate_gotref(code)
+                coerced_arg.generate_evaluation_code(code)
+
+            code.putln('#if !CYTHON_COMPILING_IN_CPYTHON')
+            sublist_temp = code.funcstate.allocate_temp(py_object_type, manage_ref=True)
+            code.putln('%s = PySequence_GetSlice(%s, 0, %s-%d); %s' % (
+                sublist_temp, target_list, length_temp, len(unpacked_fixed_items_right),
+                code.error_goto_if_null(sublist_temp, self.pos)))
+            code.put_gotref(sublist_temp, py_object_type)
+            code.funcstate.release_temp(length_temp)
+            code.put_decref(target_list, py_object_type)
+            code.putln('%s = %s; %s = NULL;' % (target_list, sublist_temp, sublist_temp))
+            code.putln('#else')
+            code.putln('CYTHON_UNUSED_VAR(%s);' % sublist_temp)
+            code.funcstate.release_temp(sublist_temp)
+            code.putln('#endif')
+
+        for i, arg in enumerate(self.args):
+            arg.generate_assignment_code(self.coerced_unpacked_items[i], code)
+
+    def annotate(self, code):
+        for arg in self.args:
+            arg.annotate(code)
+        if self.unpacked_items:
+            for arg in self.unpacked_items:
+                arg.annotate(code)
+            for arg in self.coerced_unpacked_items:
+                arg.annotate(code)
+
+
+class TupleNode(SequenceNode):
+    #  Tuple constructor.
+
+    type = tuple_type
+    is_partly_literal = False
+
+    gil_message = "Constructing Python tuple"
+
+    def infer_type(self, env):
+        if self.mult_factor or not self.args:
+            return tuple_type
+        arg_types = [arg.infer_type(env) for arg in self.args]
+        if any(type.is_pyobject or type.is_memoryviewslice or type.is_unspecified or type.is_fused
+               for type in arg_types):
+            return tuple_type
+        return env.declare_tuple_type(self.pos, arg_types).type
+
+    def analyse_types(self, env, skip_children=False):
+        # reset before re-analysing
+        if self.is_literal:
+            self.is_literal = False
+        if self.is_partly_literal:
+            self.is_partly_literal = False
+
+        if len(self.args) == 0:
+            self.is_temp = False
+            self.is_literal = True
+            return self
+
+        if not skip_children:
+            for i, arg in enumerate(self.args):
+                if arg.is_starred:
+                    arg.starred_expr_allowed_here = True
+                self.args[i] = arg.analyse_types(env)
+        if (not self.mult_factor and
+                not any((arg.is_starred or arg.type.is_pyobject or arg.type.is_memoryviewslice or arg.type.is_fused)
+                        for arg in self.args)):
+            self.type = env.declare_tuple_type(self.pos, (arg.type for arg in self.args)).type
+            self.is_temp = 1
+            return self
+
+        node = SequenceNode.analyse_types(self, env, skip_children=True)
+        node = node._create_merge_node_if_necessary(env)
+        if not node.is_sequence_constructor:
+            return node
+
+        if not all(child.is_literal for child in node.args):
+            return node
+        if not node.mult_factor or (
+                node.mult_factor.is_literal and
+                isinstance(node.mult_factor.constant_result, _py_int_types)):
+            node.is_temp = False
+            node.is_literal = True
+        else:
+            if not node.mult_factor.type.is_pyobject and not node.mult_factor.type.is_int:
+                node.mult_factor = node.mult_factor.coerce_to_pyobject(env)
+            node.is_temp = True
+            node.is_partly_literal = True
+        return node
+
+    def analyse_as_type(self, env):
+        # ctuple type
+        if not self.args:
+            return None
+        item_types = [arg.analyse_as_type(env) for arg in self.args]
+        if any(t is None for t in item_types):
+            return None
+        entry = env.declare_tuple_type(self.pos, item_types)
+        return entry.type
+
+    def coerce_to(self, dst_type, env):
+        if self.type.is_ctuple:
+            if dst_type.is_ctuple and self.type.size == dst_type.size:
+                return self.coerce_to_ctuple(dst_type, env)
+            elif dst_type is tuple_type or dst_type is py_object_type:
+                coerced_args = [arg.coerce_to_pyobject(env) for arg in self.args]
+                return TupleNode(
+                    self.pos,
+                    args=coerced_args,
+                    type=tuple_type,
+                    mult_factor=self.mult_factor,
+                    is_temp=1,
+                ).analyse_types(env, skip_children=True)
+            else:
+                return self.coerce_to_pyobject(env).coerce_to(dst_type, env)
+        elif dst_type.is_ctuple and not self.mult_factor:
+            return self.coerce_to_ctuple(dst_type, env)
+        else:
+            return SequenceNode.coerce_to(self, dst_type, env)
+
+    def as_list(self):
+        t = ListNode(self.pos, args=self.args, mult_factor=self.mult_factor)
+        if isinstance(self.constant_result, tuple):
+            t.constant_result = list(self.constant_result)
+        return t
+
+    def is_simple(self):
+        # either temp or constant => always simple
+        return True
+
+    def nonlocally_immutable(self):
+        # either temp or constant => always safe
+        return True
+
+    def calculate_result_code(self):
+        if len(self.args) > 0:
+            return self.result_code
+        else:
+            return Naming.empty_tuple
+
+    def calculate_constant_result(self):
+        self.constant_result = tuple([
+                arg.constant_result for arg in self.args])
+
+    def compile_time_value(self, denv):
+        values = self.compile_time_value_list(denv)
+        try:
+            return tuple(values)
+        except Exception as e:
+            self.compile_time_value_error(e)
+
+    def generate_operation_code(self, code):
+        if len(self.args) == 0:
+            # result_code is Naming.empty_tuple
+            return
+
+        if self.is_literal or self.is_partly_literal:
+            # The "mult_factor" is part of the deduplication if it is also constant, i.e. when
+            # we deduplicate the multiplied result.  Otherwise, only deduplicate the constant part.
+            dedup_key = make_dedup_key(self.type, [self.mult_factor if self.is_literal else None] + self.args)
+            tuple_target = code.get_py_const(py_object_type, 'tuple', cleanup_level=2, dedup_key=dedup_key)
+            const_code = code.get_cached_constants_writer(tuple_target)
+            if const_code is not None:
+                # constant is not yet initialised
+                const_code.mark_pos(self.pos)
+                self.generate_sequence_packing_code(const_code, tuple_target, plain=not self.is_literal)
+                const_code.put_giveref(tuple_target, py_object_type)
+            if self.is_literal:
+                self.result_code = tuple_target
+            elif self.mult_factor.type.is_int:
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("PySequenceMultiply", "ObjectHandling.c"))
+                code.putln('%s = __Pyx_PySequence_Multiply(%s, %s); %s' % (
+                    self.result(), tuple_target, self.mult_factor.result(),
+                    code.error_goto_if_null(self.result(), self.pos)
+                ))
+                self.generate_gotref(code)
+            else:
+                code.putln('%s = PyNumber_Multiply(%s, %s); %s' % (
+                    self.result(), tuple_target, self.mult_factor.py_result(),
+                    code.error_goto_if_null(self.result(), self.pos)
+                ))
+                self.generate_gotref(code)
+        else:
+            self.type.entry.used = True
+            self.generate_sequence_packing_code(code)
+
+
+class ListNode(SequenceNode):
+    #  List constructor.
+
+    # obj_conversion_errors    [PyrexError]   used internally
+    # orignial_args            [ExprNode]     used internally
+
+    obj_conversion_errors = []
+    type = list_type
+    in_module_scope = False
+
+    gil_message = "Constructing Python list"
+
+    def type_dependencies(self, env):
+        return ()
+
+    def infer_type(self, env):
+        # TODO: Infer non-object list arrays.
+        return list_type
+
+    def analyse_expressions(self, env):
+        for arg in self.args:
+            if arg.is_starred:
+                arg.starred_expr_allowed_here = True
+        node = SequenceNode.analyse_expressions(self, env)
+        return node.coerce_to_pyobject(env)
+
+    def analyse_types(self, env):
+        with local_errors(ignore=True) as errors:
+            self.original_args = list(self.args)
+            node = SequenceNode.analyse_types(self, env)
+        node.obj_conversion_errors = errors
+        if env.is_module_scope:
+            self.in_module_scope = True
+        node = node._create_merge_node_if_necessary(env)
+        return node
+
+    def coerce_to(self, dst_type, env):
+        if dst_type.is_pyobject:
+            for err in self.obj_conversion_errors:
+                report_error(err)
+            self.obj_conversion_errors = []
+            if not self.type.subtype_of(dst_type):
+                error(self.pos, "Cannot coerce list to type '%s'" % dst_type)
+        elif (dst_type.is_array or dst_type.is_ptr) and dst_type.base_type is not PyrexTypes.c_void_type:
+            array_length = len(self.args)
+            if self.mult_factor:
+                if isinstance(self.mult_factor.constant_result, _py_int_types):
+                    if self.mult_factor.constant_result <= 0:
+                        error(self.pos, "Cannot coerce non-positively multiplied list to '%s'" % dst_type)
+                    else:
+                        array_length *= self.mult_factor.constant_result
+                else:
+                    error(self.pos, "Cannot coerce dynamically multiplied list to '%s'" % dst_type)
+            base_type = dst_type.base_type
+            self.type = PyrexTypes.CArrayType(base_type, array_length)
+            for i in range(len(self.original_args)):
+                arg = self.args[i]
+                if isinstance(arg, CoerceToPyTypeNode):
+                    arg = arg.arg
+                self.args[i] = arg.coerce_to(base_type, env)
+        elif dst_type.is_cpp_class:
+            # TODO(robertwb): Avoid object conversion for vector/list/set.
+            return TypecastNode(self.pos, operand=self, type=PyrexTypes.py_object_type).coerce_to(dst_type, env)
+        elif self.mult_factor:
+            error(self.pos, "Cannot coerce multiplied list to '%s'" % dst_type)
+        elif dst_type.is_struct:
+            if len(self.args) > len(dst_type.scope.var_entries):
+                error(self.pos, "Too many members for '%s'" % dst_type)
+            else:
+                if len(self.args) < len(dst_type.scope.var_entries):
+                    warning(self.pos, "Too few members for '%s'" % dst_type, 1)
+                for i, (arg, member) in enumerate(zip(self.original_args, dst_type.scope.var_entries)):
+                    if isinstance(arg, CoerceToPyTypeNode):
+                        arg = arg.arg
+                    self.args[i] = arg.coerce_to(member.type, env)
+            self.type = dst_type
+        elif dst_type.is_ctuple:
+            return self.coerce_to_ctuple(dst_type, env)
+        else:
+            self.type = error_type
+            error(self.pos, "Cannot coerce list to type '%s'" % dst_type)
+        return self
+
+    def as_list(self):  # dummy for compatibility with TupleNode
+        return self
+
+    def as_tuple(self):
+        t = TupleNode(self.pos, args=self.args, mult_factor=self.mult_factor)
+        if isinstance(self.constant_result, list):
+            t.constant_result = tuple(self.constant_result)
+        return t
+
+    def allocate_temp_result(self, code):
+        if self.type.is_array:
+            if self.in_module_scope:
+                self.temp_code = code.funcstate.allocate_temp(
+                    self.type, manage_ref=False, static=True, reusable=False)
+            else:
+                # To be valid C++, we must allocate the memory on the stack
+                # manually and be sure not to reuse it for something else.
+                # Yes, this means that we leak a temp array variable.
+                self.temp_code = code.funcstate.allocate_temp(
+                    self.type, manage_ref=False, reusable=False)
+        else:
+            SequenceNode.allocate_temp_result(self, code)
+
+    def calculate_constant_result(self):
+        if self.mult_factor:
+            raise ValueError()  # may exceed the compile time memory
+        self.constant_result = [
+            arg.constant_result for arg in self.args]
+
+    def compile_time_value(self, denv):
+        l = self.compile_time_value_list(denv)
+        if self.mult_factor:
+            l *= self.mult_factor.compile_time_value(denv)
+        return l
+
+    def generate_operation_code(self, code):
+        if self.type.is_pyobject:
+            for err in self.obj_conversion_errors:
+                report_error(err)
+            self.generate_sequence_packing_code(code)
+        elif self.type.is_array:
+            if self.mult_factor:
+                code.putln("{")
+                code.putln("Py_ssize_t %s;" % Naming.quick_temp_cname)
+                code.putln("for ({i} = 0; {i} < {count}; {i}++) {{".format(
+                    i=Naming.quick_temp_cname, count=self.mult_factor.result()))
+                offset = '+ (%d * %s)' % (len(self.args), Naming.quick_temp_cname)
+            else:
+                offset = ''
+            for i, arg in enumerate(self.args):
+                if arg.type.is_array:
+                    code.globalstate.use_utility_code(UtilityCode.load_cached("IncludeStringH", "StringTools.c"))
+                    code.putln("memcpy(&(%s[%s%s]), %s, sizeof(%s[0]));" % (
+                        self.result(), i, offset,
+                        arg.result(), self.result()
+                    ))
+                else:
+                    code.putln("%s[%s%s] = %s;" % (
+                        self.result(),
+                        i,
+                        offset,
+                        arg.result()))
+            if self.mult_factor:
+                code.putln("}")
+                code.putln("}")
+        elif self.type.is_struct:
+            for arg, member in zip(self.args, self.type.scope.var_entries):
+                code.putln("%s.%s = %s;" % (
+                    self.result(),
+                    member.cname,
+                    arg.result()))
+        else:
+            raise InternalError("List type never specified")
+
+
+class ComprehensionNode(ScopedExprNode):
+    # A list/set/dict comprehension
+
+    child_attrs = ["loop"]
+
+    is_temp = True
+    constant_result = not_a_constant
+
+    def infer_type(self, env):
+        return self.type
+
+    def analyse_declarations(self, env):
+        self.append.target = self  # this is used in the PyList_Append of the inner loop
+        self.init_scope(env)
+        # setup loop scope
+        if isinstance(self.loop, Nodes._ForInStatNode):
+            assert isinstance(self.loop.iterator, ScopedExprNode), self.loop.iterator
+            self.loop.iterator.init_scope(None, env)
+        else:
+            assert isinstance(self.loop, Nodes.ForFromStatNode), self.loop
+
+    def analyse_scoped_declarations(self, env):
+        self.loop.analyse_declarations(env)
+
+    def analyse_types(self, env):
+        if not self.has_local_scope:
+            self.loop = self.loop.analyse_expressions(env)
+        return self
+
+    def analyse_scoped_expressions(self, env):
+        if self.has_local_scope:
+            self.loop = self.loop.analyse_expressions(env)
+        return self
+
+    def may_be_none(self):
+        return False
+
+    def generate_result_code(self, code):
+        self.generate_operation_code(code)
+
+    def generate_operation_code(self, code):
+        if self.type is Builtin.list_type:
+            create_code = 'PyList_New(0)'
+        elif self.type is Builtin.set_type:
+            create_code = 'PySet_New(NULL)'
+        elif self.type is Builtin.dict_type:
+            create_code = 'PyDict_New()'
+        else:
+            raise InternalError("illegal type for comprehension: %s" % self.type)
+        code.putln('%s = %s; %s' % (
+            self.result(), create_code,
+            code.error_goto_if_null(self.result(), self.pos)))
+
+        self.generate_gotref(code)
+        self.loop.generate_execution_code(code)
+
+    def annotate(self, code):
+        self.loop.annotate(code)
+
+
+class ComprehensionAppendNode(Node):
+    # Need to be careful to avoid infinite recursion:
+    # target must not be in child_attrs/subexprs
+
+    child_attrs = ['expr']
+    target = None
+
+    type = PyrexTypes.c_int_type
+
+    def analyse_expressions(self, env):
+        self.expr = self.expr.analyse_expressions(env)
+        if not self.expr.type.is_pyobject:
+            self.expr = self.expr.coerce_to_pyobject(env)
+        return self
+
+    def generate_execution_code(self, code):
+        if self.target.type is list_type:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("ListCompAppend", "Optimize.c"))
+            function = "__Pyx_ListComp_Append"
+        elif self.target.type is set_type:
+            function = "PySet_Add"
+        else:
+            raise InternalError(
+                "Invalid type for comprehension node: %s" % self.target.type)
+
+        self.expr.generate_evaluation_code(code)
+        code.putln(code.error_goto_if("%s(%s, (PyObject*)%s)" % (
+            function,
+            self.target.result(),
+            self.expr.result()
+            ), self.pos))
+        self.expr.generate_disposal_code(code)
+        self.expr.free_temps(code)
+
+    def generate_function_definitions(self, env, code):
+        self.expr.generate_function_definitions(env, code)
+
+    def annotate(self, code):
+        self.expr.annotate(code)
+
+class DictComprehensionAppendNode(ComprehensionAppendNode):
+    child_attrs = ['key_expr', 'value_expr']
+
+    def analyse_expressions(self, env):
+        self.key_expr = self.key_expr.analyse_expressions(env)
+        if not self.key_expr.type.is_pyobject:
+            self.key_expr = self.key_expr.coerce_to_pyobject(env)
+        self.value_expr = self.value_expr.analyse_expressions(env)
+        if not self.value_expr.type.is_pyobject:
+            self.value_expr = self.value_expr.coerce_to_pyobject(env)
+        return self
+
+    def generate_execution_code(self, code):
+        self.key_expr.generate_evaluation_code(code)
+        self.value_expr.generate_evaluation_code(code)
+        code.putln(code.error_goto_if("PyDict_SetItem(%s, (PyObject*)%s, (PyObject*)%s)" % (
+            self.target.result(),
+            self.key_expr.result(),
+            self.value_expr.result()
+            ), self.pos))
+        self.key_expr.generate_disposal_code(code)
+        self.key_expr.free_temps(code)
+        self.value_expr.generate_disposal_code(code)
+        self.value_expr.free_temps(code)
+
+    def generate_function_definitions(self, env, code):
+        self.key_expr.generate_function_definitions(env, code)
+        self.value_expr.generate_function_definitions(env, code)
+
+    def annotate(self, code):
+        self.key_expr.annotate(code)
+        self.value_expr.annotate(code)
+
+
+class InlinedGeneratorExpressionNode(ExprNode):
+    # An inlined generator expression for which the result is calculated
+    # inside of the loop and returned as a single, first and only Generator
+    # return value.
+    # This will only be created by transforms when replacing safe builtin
+    # calls on generator expressions.
+    #
+    # gen            GeneratorExpressionNode      the generator, not containing any YieldExprNodes
+    # orig_func      String                       the name of the builtin function this node replaces
+    # target         ExprNode or None             a 'target' for a ComprehensionAppend node
+
+    subexprs = ["gen"]
+    orig_func = None
+    target = None
+    is_temp = True
+    type = py_object_type
+
+    def __init__(self, pos, gen, comprehension_type=None, **kwargs):
+        gbody = gen.def_node.gbody
+        gbody.is_inlined = True
+        if comprehension_type is not None:
+            assert comprehension_type in (list_type, set_type, dict_type), comprehension_type
+            gbody.inlined_comprehension_type = comprehension_type
+            kwargs.update(
+                target=RawCNameExprNode(pos, comprehension_type, Naming.retval_cname),
+                type=comprehension_type,
+            )
+        super(InlinedGeneratorExpressionNode, self).__init__(pos, gen=gen, **kwargs)
+
+    def may_be_none(self):
+        return self.orig_func not in ('any', 'all', 'sorted')
+
+    def infer_type(self, env):
+        return self.type
+
+    def analyse_types(self, env):
+        self.gen = self.gen.analyse_expressions(env)
+        return self
+
+    def generate_result_code(self, code):
+        code.putln("%s = __Pyx_Generator_Next(%s); %s" % (
+            self.result(), self.gen.result(),
+            code.error_goto_if_null(self.result(), self.pos)))
+        self.generate_gotref(code)
+
+
+class MergedSequenceNode(ExprNode):
+    """
+    Merge a sequence of iterables into a set/list/tuple.
+
+    The target collection is determined by self.type, which must be set externally.
+
+    args    [ExprNode]
+    """
+    subexprs = ['args']
+    is_temp = True
+    gil_message = "Constructing Python collection"
+
+    def __init__(self, pos, args, type):
+        if type in (list_type, tuple_type) and args and args[0].is_sequence_constructor:
+            # construct a list directly from the first argument that we can then extend
+            if args[0].type is not list_type:
+                args[0] = ListNode(args[0].pos, args=args[0].args, is_temp=True, mult_factor=args[0].mult_factor)
+        ExprNode.__init__(self, pos, args=args, type=type)
+
+    def calculate_constant_result(self):
+        result = []
+        for item in self.args:
+            if item.is_sequence_constructor and item.mult_factor:
+                if item.mult_factor.constant_result <= 0:
+                    continue
+                # otherwise, adding each item once should be enough
+            if item.is_set_literal or item.is_sequence_constructor:
+                # process items in order
+                items = (arg.constant_result for arg in item.args)
+            else:
+                items = item.constant_result
+            result.extend(items)
+        if self.type is set_type:
+            result = set(result)
+        elif self.type is tuple_type:
+            result = tuple(result)
+        else:
+            assert self.type is list_type
+        self.constant_result = result
+
+    def compile_time_value(self, denv):
+        result = []
+        for item in self.args:
+            if item.is_sequence_constructor and item.mult_factor:
+                if item.mult_factor.compile_time_value(denv) <= 0:
+                    continue
+            if item.is_set_literal or item.is_sequence_constructor:
+                # process items in order
+                items = (arg.compile_time_value(denv) for arg in item.args)
+            else:
+                items = item.compile_time_value(denv)
+            result.extend(items)
+        if self.type is set_type:
+            try:
+                result = set(result)
+            except Exception as e:
+                self.compile_time_value_error(e)
+        elif self.type is tuple_type:
+            result = tuple(result)
+        else:
+            assert self.type is list_type
+        return result
+
+    def type_dependencies(self, env):
+        return ()
+
+    def infer_type(self, env):
+        return self.type
+
+    def analyse_types(self, env):
+        args = [
+            arg.analyse_types(env).coerce_to_pyobject(env).as_none_safe_node(
+                # FIXME: CPython's error message starts with the runtime function name
+                'argument after * must be an iterable, not NoneType')
+            for arg in self.args
+        ]
+
+        if len(args) == 1 and args[0].type is self.type:
+            # strip this intermediate node and use the bare collection
+            return args[0]
+
+        assert self.type in (set_type, list_type, tuple_type)
+
+        self.args = args
+        return self
+
+    def may_be_none(self):
+        return False
+
+    def generate_evaluation_code(self, code):
+        code.mark_pos(self.pos)
+        self.allocate_temp_result(code)
+
+        is_set = self.type is set_type
+
+        args = iter(self.args)
+        item = next(args)
+        item.generate_evaluation_code(code)
+        if (is_set and item.is_set_literal or
+                not is_set and item.is_sequence_constructor and item.type is list_type):
+            code.putln("%s = %s;" % (self.result(), item.py_result()))
+            item.generate_post_assignment_code(code)
+        else:
+            code.putln("%s = %s(%s); %s" % (
+                self.result(),
+                'PySet_New' if is_set
+                    else "__Pyx_PySequence_ListKeepNew" if item.is_temp and item.type in (py_object_type, list_type)
+                    else "PySequence_List",
+                item.py_result(),
+                code.error_goto_if_null(self.result(), self.pos)))
+            self.generate_gotref(code)
+            item.generate_disposal_code(code)
+        item.free_temps(code)
+
+        helpers = set()
+        if is_set:
+            add_func = "PySet_Add"
+            extend_func = "__Pyx_PySet_Update"
+        else:
+            add_func = "__Pyx_ListComp_Append"
+            extend_func = "__Pyx_PyList_Extend"
+
+        for item in args:
+            if (is_set and (item.is_set_literal or item.is_sequence_constructor) or
+                    (item.is_sequence_constructor and not item.mult_factor)):
+                if not is_set and item.args:
+                    helpers.add(("ListCompAppend", "Optimize.c"))
+                for arg in item.args:
+                    arg.generate_evaluation_code(code)
+                    code.put_error_if_neg(arg.pos, "%s(%s, %s)" % (
+                        add_func,
+                        self.result(),
+                        arg.py_result()))
+                    arg.generate_disposal_code(code)
+                    arg.free_temps(code)
+                continue
+
+            if is_set:
+                helpers.add(("PySet_Update", "Builtins.c"))
+            else:
+                helpers.add(("ListExtend", "Optimize.c"))
+
+            item.generate_evaluation_code(code)
+            code.put_error_if_neg(item.pos, "%s(%s, %s)" % (
+                extend_func,
+                self.result(),
+                item.py_result()))
+            item.generate_disposal_code(code)
+            item.free_temps(code)
+
+        if self.type is tuple_type:
+            code.putln("{")
+            code.putln("PyObject *%s = PyList_AsTuple(%s);" % (
+                Naming.quick_temp_cname,
+                self.result()))
+            code.put_decref(self.result(), py_object_type)
+            code.putln("%s = %s; %s" % (
+                self.result(),
+                Naming.quick_temp_cname,
+                code.error_goto_if_null(self.result(), self.pos)))
+            self.generate_gotref(code)
+            code.putln("}")
+
+        for helper in sorted(helpers):
+            code.globalstate.use_utility_code(UtilityCode.load_cached(*helper))
+
+    def annotate(self, code):
+        for item in self.args:
+            item.annotate(code)
+
+
+class SetNode(ExprNode):
+    """
+    Set constructor.
+    """
+    subexprs = ['args']
+    type = set_type
+    is_set_literal = True
+    gil_message = "Constructing Python set"
+
+    def analyse_types(self, env):
+        for i in range(len(self.args)):
+            arg = self.args[i]
+            arg = arg.analyse_types(env)
+            self.args[i] = arg.coerce_to_pyobject(env)
+        self.type = set_type
+        self.is_temp = 1
+        return self
+
+    def may_be_none(self):
+        return False
+
+    def calculate_constant_result(self):
+        self.constant_result = {arg.constant_result for arg in self.args}
+
+    def compile_time_value(self, denv):
+        values = [arg.compile_time_value(denv) for arg in self.args]
+        try:
+            return set(values)
+        except Exception as e:
+            self.compile_time_value_error(e)
+
+    def generate_evaluation_code(self, code):
+        for arg in self.args:
+            arg.generate_evaluation_code(code)
+        self.allocate_temp_result(code)
+        code.putln(
+            "%s = PySet_New(0); %s" % (
+                self.result(),
+                code.error_goto_if_null(self.result(), self.pos)))
+        self.generate_gotref(code)
+        for arg in self.args:
+            code.put_error_if_neg(
+                self.pos,
+                "PySet_Add(%s, %s)" % (self.result(), arg.py_result()))
+            arg.generate_disposal_code(code)
+            arg.free_temps(code)
+
+
+class DictNode(ExprNode):
+    #  Dictionary constructor.
+    #
+    #  key_value_pairs     [DictItemNode]
+    #  exclude_null_values [boolean]          Do not add NULL values to dict
+    #
+    # obj_conversion_errors    [PyrexError]   used internally
+
+    subexprs = ['key_value_pairs']
+    is_temp = 1
+    exclude_null_values = False
+    type = dict_type
+    is_dict_literal = True
+    reject_duplicates = False
+
+    obj_conversion_errors = []
+
+    @classmethod
+    def from_pairs(cls, pos, pairs):
+        return cls(pos, key_value_pairs=[
+                DictItemNode(pos, key=k, value=v) for k, v in pairs])
+
+    def calculate_constant_result(self):
+        self.constant_result = dict([
+                item.constant_result for item in self.key_value_pairs])
+
+    def compile_time_value(self, denv):
+        pairs = [(item.key.compile_time_value(denv), item.value.compile_time_value(denv))
+            for item in self.key_value_pairs]
+        try:
+            return dict(pairs)
+        except Exception as e:
+            self.compile_time_value_error(e)
+
+    def type_dependencies(self, env):
+        return ()
+
+    def infer_type(self, env):
+        # TODO: Infer struct constructors.
+        return dict_type
+
+    def analyse_types(self, env):
+        with local_errors(ignore=True) as errors:
+            self.key_value_pairs = [
+                item.analyse_types(env)
+                for item in self.key_value_pairs
+            ]
+        self.obj_conversion_errors = errors
+        return self
+
+    def may_be_none(self):
+        return False
+
+    def coerce_to(self, dst_type, env):
+        if dst_type.is_pyobject:
+            self.release_errors()
+            if self.type.is_struct_or_union:
+                if not dict_type.subtype_of(dst_type):
+                    error(self.pos, "Cannot interpret struct as non-dict type '%s'" % dst_type)
+                return DictNode(self.pos, key_value_pairs=[
+                    DictItemNode(item.pos, key=item.key.coerce_to_pyobject(env),
+                                 value=item.value.coerce_to_pyobject(env))
+                    for item in self.key_value_pairs])
+            if not self.type.subtype_of(dst_type):
+                error(self.pos, "Cannot interpret dict as type '%s'" % dst_type)
+        elif dst_type.is_struct_or_union:
+            self.type = dst_type
+            if not dst_type.is_struct and len(self.key_value_pairs) != 1:
+                error(self.pos, "Exactly one field must be specified to convert to union '%s'" % dst_type)
+            elif dst_type.is_struct and len(self.key_value_pairs) < len(dst_type.scope.var_entries):
+                warning(self.pos, "Not all members given for struct '%s'" % dst_type, 1)
+            for item in self.key_value_pairs:
+                if isinstance(item.key, CoerceToPyTypeNode):
+                    item.key = item.key.arg
+                if not item.key.is_string_literal:
+                    error(item.key.pos, "Invalid struct field identifier")
+                    item.key = StringNode(item.key.pos, value="<error>")
+                else:
+                    key = str(item.key.value)  # converts string literals to unicode in Py3
+                    member = dst_type.scope.lookup_here(key)
+                    if not member:
+                        error(item.key.pos, "struct '%s' has no field '%s'" % (dst_type, key))
+                    else:
+                        value = item.value
+                        if isinstance(value, CoerceToPyTypeNode):
+                            value = value.arg
+                        item.value = value.coerce_to(member.type, env)
+        else:
+            return super(DictNode, self).coerce_to(dst_type, env)
+        return self
+
+    def release_errors(self):
+        for err in self.obj_conversion_errors:
+            report_error(err)
+        self.obj_conversion_errors = []
+
+    gil_message = "Constructing Python dict"
+
+    def generate_evaluation_code(self, code):
+        #  Custom method used here because key-value
+        #  pairs are evaluated and used one at a time.
+        code.mark_pos(self.pos)
+        self.allocate_temp_result(code)
+
+        is_dict = self.type.is_pyobject
+        if is_dict:
+            self.release_errors()
+            code.putln(
+                "%s = __Pyx_PyDict_NewPresized(%d); %s" % (
+                    self.result(),
+                    len(self.key_value_pairs),
+                    code.error_goto_if_null(self.result(), self.pos)))
+            self.generate_gotref(code)
+
+        keys_seen = set()
+        key_type = None
+        needs_error_helper = False
+
+        for item in self.key_value_pairs:
+            item.generate_evaluation_code(code)
+            if is_dict:
+                if self.exclude_null_values:
+                    code.putln('if (%s) {' % item.value.py_result())
+                key = item.key
+                if self.reject_duplicates:
+                    if keys_seen is not None:
+                        # avoid runtime 'in' checks for literals that we can do at compile time
+                        if not key.is_string_literal:
+                            keys_seen = None
+                        elif key.value in keys_seen:
+                            # FIXME: this could be a compile time error, at least in Cython code
+                            keys_seen = None
+                        elif key_type is not type(key.value):
+                            if key_type is None:
+                                key_type = type(key.value)
+                                keys_seen.add(key.value)
+                            else:
+                                # different types => may not be able to compare at compile time
+                                keys_seen = None
+                        else:
+                            keys_seen.add(key.value)
+
+                    if keys_seen is None:
+                        code.putln('if (unlikely(PyDict_Contains(%s, %s))) {' % (
+                            self.result(), key.py_result()))
+                        # currently only used in function calls
+                        needs_error_helper = True
+                        code.putln('__Pyx_RaiseDoubleKeywordsError("function", %s); %s' % (
+                            key.py_result(),
+                            code.error_goto(item.pos)))
+                        code.putln("} else {")
+
+                code.put_error_if_neg(self.pos, "PyDict_SetItem(%s, %s, %s)" % (
+                    self.result(),
+                    item.key.py_result(),
+                    item.value.py_result()))
+                if self.reject_duplicates and keys_seen is None:
+                    code.putln('}')
+                if self.exclude_null_values:
+                    code.putln('}')
+            else:
+                if item.value.type.is_array:
+                    code.putln("memcpy(%s.%s, %s, sizeof(%s));" % (
+                            self.result(),
+                            item.key.value,
+                            item.value.result(),
+                            item.value.result()))
+                else:
+                    code.putln("%s.%s = %s;" % (
+                            self.result(),
+                            item.key.value,
+                            item.value.result()))
+            item.generate_disposal_code(code)
+            item.free_temps(code)
+
+        if needs_error_helper:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("RaiseDoubleKeywords", "FunctionArguments.c"))
+
+    def annotate(self, code):
+        for item in self.key_value_pairs:
+            item.annotate(code)
+
+    def as_python_dict(self):
+        # returns a dict with constant keys and Node values
+        # (only works on DictNodes where the keys are ConstNodes or PyConstNode)
+        return dict([(key.value, value) for key, value in self.key_value_pairs])
+
+
+class DictItemNode(ExprNode):
+    # Represents a single item in a DictNode
+    #
+    # key          ExprNode
+    # value        ExprNode
+    subexprs = ['key', 'value']
+
+    nogil_check = None  # Parent DictNode takes care of it
+
+    def calculate_constant_result(self):
+        self.constant_result = (
+            self.key.constant_result, self.value.constant_result)
+
+    def analyse_types(self, env):
+        self.key = self.key.analyse_types(env)
+        self.value = self.value.analyse_types(env)
+        self.key = self.key.coerce_to_pyobject(env)
+        self.value = self.value.coerce_to_pyobject(env)
+        return self
+
+    def generate_evaluation_code(self, code):
+        self.key.generate_evaluation_code(code)
+        self.value.generate_evaluation_code(code)
+
+    def generate_disposal_code(self, code):
+        self.key.generate_disposal_code(code)
+        self.value.generate_disposal_code(code)
+
+    def free_temps(self, code):
+        self.key.free_temps(code)
+        self.value.free_temps(code)
+
+    def __iter__(self):
+        return iter([self.key, self.value])
+
+
+class SortedDictKeysNode(ExprNode):
+    # build sorted list of dict keys, e.g. for dir()
+    subexprs = ['arg']
+
+    is_temp = True
+
+    def __init__(self, arg):
+        ExprNode.__init__(self, arg.pos, arg=arg)
+        self.type = Builtin.list_type
+
+    def analyse_types(self, env):
+        arg = self.arg.analyse_types(env)
+        if arg.type is Builtin.dict_type:
+            arg = arg.as_none_safe_node(
+                "'NoneType' object is not iterable")
+        self.arg = arg
+        return self
+
+    def may_be_none(self):
+        return False
+
+    def generate_result_code(self, code):
+        dict_result = self.arg.py_result()
+        if self.arg.type is Builtin.dict_type:
+            code.putln('%s = PyDict_Keys(%s); %s' % (
+                self.result(), dict_result,
+                code.error_goto_if_null(self.result(), self.pos)))
+            self.generate_gotref(code)
+        else:
+            # originally used PyMapping_Keys() here, but that may return a tuple
+            code.globalstate.use_utility_code(UtilityCode.load_cached(
+                'PyObjectCallMethod0', 'ObjectHandling.c'))
+            keys_cname = code.intern_identifier(StringEncoding.EncodedString("keys"))
+            code.putln('%s = __Pyx_PyObject_CallMethod0(%s, %s); %s' % (
+                self.result(), dict_result, keys_cname,
+                code.error_goto_if_null(self.result(), self.pos)))
+            self.generate_gotref(code)
+            code.putln("if (unlikely(!PyList_Check(%s))) {" % self.result())
+            self.generate_decref_set(code, "PySequence_List(%s)" % self.result())
+            code.putln(code.error_goto_if_null(self.result(), self.pos))
+            self.generate_gotref(code)
+            code.putln("}")
+        code.put_error_if_neg(
+            self.pos, 'PyList_Sort(%s)' % self.py_result())
+
+
+class ModuleNameMixin(object):
+    def get_py_mod_name(self, code):
+        return code.get_py_string_const(
+            self.module_name, identifier=True)
+
+    def get_py_qualified_name(self, code):
+        return code.get_py_string_const(
+            self.qualname, identifier=True)
+
+
+class ClassNode(ExprNode, ModuleNameMixin):
+    #  Helper class used in the implementation of Python
+    #  class definitions. Constructs a class object given
+    #  a name, tuple of bases and class dictionary.
+    #
+    #  name         EncodedString      Name of the class
+    #  class_def_node  PyClassDefNode  PyClassDefNode defining this class
+    #  doc          ExprNode or None   Doc string
+    #  module_name  EncodedString      Name of defining module
+
+    subexprs = ['doc']
+    type = py_object_type
+    is_temp = True
+
+    def analyse_annotations(self, env):
+        pass
+
+    def infer_type(self, env):
+        # TODO: could return 'type' in some cases
+        return py_object_type
+
+    def analyse_types(self, env):
+        if self.doc:
+            self.doc = self.doc.analyse_types(env)
+            self.doc = self.doc.coerce_to_pyobject(env)
+        env.use_utility_code(UtilityCode.load_cached("CreateClass", "ObjectHandling.c"))
+        return self
+
+    def may_be_none(self):
+        return True
+
+    gil_message = "Constructing Python class"
+
+    def generate_result_code(self, code):
+        class_def_node = self.class_def_node
+        cname = code.intern_identifier(self.name)
+
+        if self.doc:
+            code.put_error_if_neg(self.pos,
+                'PyDict_SetItem(%s, %s, %s)' % (
+                    class_def_node.dict.py_result(),
+                    code.intern_identifier(
+                        StringEncoding.EncodedString("__doc__")),
+                    self.doc.py_result()))
+        py_mod_name = self.get_py_mod_name(code)
+        qualname = self.get_py_qualified_name(code)
+        code.putln(
+            '%s = __Pyx_CreateClass(%s, %s, %s, %s, %s); %s' % (
+                self.result(),
+                class_def_node.bases.py_result(),
+                class_def_node.dict.py_result(),
+                cname,
+                qualname,
+                py_mod_name,
+                code.error_goto_if_null(self.result(), self.pos)))
+        self.generate_gotref(code)
+
+
+class Py3ClassNode(ExprNode):
+    #  Helper class used in the implementation of Python3+
+    #  class definitions. Constructs a class object given
+    #  a name, tuple of bases and class dictionary.
+    #
+    #  name         EncodedString      Name of the class
+    #  module_name  EncodedString      Name of defining module
+    #  class_def_node  PyClassDefNode  PyClassDefNode defining this class
+    #  calculate_metaclass  bool       should call CalculateMetaclass()
+    #  allow_py2_metaclass  bool       should look for Py2 metaclass
+    #  force_type           bool       always create a "new style" class, even with no bases
+
+    subexprs = []
+    type = py_object_type
+    force_type = False
+    is_temp = True
+
+    def infer_type(self, env):
+        # TODO: could return 'type' in some cases
+        return py_object_type
+
+    def analyse_types(self, env):
+        return self
+
+    def may_be_none(self):
+        return True
+
+    gil_message = "Constructing Python class"
+
+    def analyse_annotations(self, env):
+        from .AutoDocTransforms import AnnotationWriter
+        position = self.class_def_node.pos
+        dict_items = [
+            DictItemNode(
+                entry.pos,
+                key=IdentifierStringNode(entry.pos, value=entry.name),
+                value=entry.annotation.string
+            )
+            for entry in env.entries.values() if entry.annotation
+        ]
+        # Annotations dict shouldn't exist for classes which don't declare any.
+        if dict_items:
+            annotations_dict = DictNode(position, key_value_pairs=dict_items)
+            lhs = NameNode(position, name=StringEncoding.EncodedString(u"__annotations__"))
+            lhs.entry = env.lookup_here(lhs.name) or env.declare_var(lhs.name, dict_type, position)
+            node = SingleAssignmentNode(position, lhs=lhs, rhs=annotations_dict)
+            node.analyse_declarations(env)
+            self.class_def_node.body.stats.insert(0, node)
+
+    def generate_result_code(self, code):
+        code.globalstate.use_utility_code(UtilityCode.load_cached("Py3ClassCreate", "ObjectHandling.c"))
+        cname = code.intern_identifier(self.name)
+        class_def_node = self.class_def_node
+        mkw = class_def_node.mkw.py_result() if class_def_node.mkw else 'NULL'
+        if class_def_node.metaclass:
+            metaclass = class_def_node.metaclass.py_result()
+        elif self.force_type:
+            metaclass = "((PyObject*)&PyType_Type)"
+        else:
+            metaclass = "((PyObject*)&__Pyx_DefaultClassType)"
+        code.putln(
+            '%s = __Pyx_Py3ClassCreate(%s, %s, %s, %s, %s, %d, %d); %s' % (
+                self.result(),
+                metaclass,
+                cname,
+                class_def_node.bases.py_result(),
+                class_def_node.dict.py_result(),
+                mkw,
+                self.calculate_metaclass,
+                self.allow_py2_metaclass,
+                code.error_goto_if_null(self.result(), self.pos)))
+        self.generate_gotref(code)
+
+
+class PyClassMetaclassNode(ExprNode):
+    # Helper class holds Python3 metaclass object
+    #
+    #  class_def_node  PyClassDefNode  PyClassDefNode defining this class
+
+    subexprs = []
+
+    def analyse_types(self, env):
+        self.type = py_object_type
+        self.is_temp = True
+        return self
+
+    def may_be_none(self):
+        return True
+
+    def generate_result_code(self, code):
+        bases = self.class_def_node.bases
+        mkw = self.class_def_node.mkw
+        if mkw:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("Py3MetaclassGet", "ObjectHandling.c"))
+            call = "__Pyx_Py3MetaclassGet(%s, %s)" % (
+                bases.result(),
+                mkw.result())
+        else:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("CalculateMetaclass", "ObjectHandling.c"))
+            call = "__Pyx_CalculateMetaclass(NULL, %s)" % (
+                bases.result())
+        code.putln(
+            "%s = %s; %s" % (
+                self.result(), call,
+                code.error_goto_if_null(self.result(), self.pos)))
+        self.generate_gotref(code)
+
+
+class PyClassNamespaceNode(ExprNode, ModuleNameMixin):
+    # Helper class holds Python3 namespace object
+    #
+    # All this are not owned by this node
+    #  class_def_node  PyClassDefNode  PyClassDefNode defining this class
+    #  doc          ExprNode or None   Doc string (owned)
+
+    subexprs = ['doc']
+
+    def analyse_types(self, env):
+        if self.doc:
+            self.doc = self.doc.analyse_types(env).coerce_to_pyobject(env)
+        self.type = py_object_type
+        self.is_temp = 1
+        return self
+
+    def may_be_none(self):
+        return True
+
+    def generate_result_code(self, code):
+        cname = code.intern_identifier(self.name)
+        py_mod_name = self.get_py_mod_name(code)
+        qualname = self.get_py_qualified_name(code)
+        class_def_node = self.class_def_node
+        null = "(PyObject *) NULL"
+        doc_code = self.doc.result() if self.doc else null
+        mkw = class_def_node.mkw.py_result() if class_def_node.mkw else null
+        metaclass = class_def_node.metaclass.py_result() if class_def_node.metaclass else null
+        code.putln(
+            "%s = __Pyx_Py3MetaclassPrepare(%s, %s, %s, %s, %s, %s, %s); %s" % (
+                self.result(),
+                metaclass,
+                class_def_node.bases.result(),
+                cname,
+                qualname,
+                mkw,
+                py_mod_name,
+                doc_code,
+                code.error_goto_if_null(self.result(), self.pos)))
+        self.generate_gotref(code)
+
+
+class ClassCellInjectorNode(ExprNode):
+    # Initialize CyFunction.func_classobj
+    is_temp = True
+    type = py_object_type
+    subexprs = []
+    is_active = False
+
+    def analyse_expressions(self, env):
+        return self
+
+    def generate_result_code(self, code):
+        assert self.is_active
+        code.putln(
+            '%s = PyList_New(0); %s' % (
+                self.result(),
+                code.error_goto_if_null(self.result(), self.pos)))
+        self.generate_gotref(code)
+
+    def generate_injection_code(self, code, classobj_cname):
+        assert self.is_active
+        code.globalstate.use_utility_code(
+            UtilityCode.load_cached("CyFunctionClassCell", "CythonFunction.c"))
+        code.put_error_if_neg(self.pos, '__Pyx_CyFunction_InitClassCell(%s, %s)' % (
+            self.result(), classobj_cname))
+
+
+class ClassCellNode(ExprNode):
+    # Class Cell for noargs super()
+    subexprs = []
+    is_temp = True
+    is_generator = False
+    type = py_object_type
+
+    def analyse_types(self, env):
+        return self
+
+    def generate_result_code(self, code):
+        if not self.is_generator:
+            code.putln('%s = __Pyx_CyFunction_GetClassObj(%s);' % (
+                self.result(),
+                Naming.self_cname))
+        else:
+            code.putln('%s =  %s->classobj;' % (
+                self.result(), Naming.generator_cname))
+        code.putln(
+            'if (!%s) { PyErr_SetString(PyExc_SystemError, '
+            '"super(): empty __class__ cell"); %s }' % (
+                self.result(),
+                code.error_goto(self.pos)))
+        code.put_incref(self.result(), py_object_type)
+
+
+class PyCFunctionNode(ExprNode, ModuleNameMixin):
+    #  Helper class used in the implementation of Python
+    #  functions.  Constructs a PyCFunction object
+    #  from a PyMethodDef struct.
+    #
+    #  pymethdef_cname   string             PyMethodDef structure
+    #  binding           bool
+    #  def_node          DefNode            the Python function node
+    #  module_name       EncodedString      Name of defining module
+    #  code_object       CodeObjectNode     the PyCodeObject creator node
+
+    subexprs = ['code_object', 'defaults_tuple', 'defaults_kwdict',
+                'annotations_dict']
+
+    code_object = None
+    binding = False
+    def_node = None
+    defaults = None
+    defaults_struct = None
+    defaults_pyobjects = 0
+    defaults_tuple = None
+    defaults_kwdict = None
+    annotations_dict = None
+
+    type = py_object_type
+    is_temp = 1
+
+    specialized_cpdefs = None
+    is_specialization = False
+
+    @classmethod
+    def from_defnode(cls, node, binding):
+        return cls(node.pos,
+                   def_node=node,
+                   pymethdef_cname=node.entry.pymethdef_cname,
+                   binding=binding or node.specialized_cpdefs,
+                   specialized_cpdefs=node.specialized_cpdefs,
+                   code_object=CodeObjectNode(node))
+
+    def analyse_types(self, env):
+        if self.binding:
+            self.analyse_default_args(env)
+        return self
+
+    def analyse_default_args(self, env):
+        """
+        Handle non-literal function's default arguments.
+        """
+        nonliteral_objects = []
+        nonliteral_other = []
+        default_args = []
+        default_kwargs = []
+        annotations = []
+
+        # For global cpdef functions and def/cpdef methods in cdef classes, we must use global constants
+        # for default arguments to avoid the dependency on the CyFunction object as 'self' argument
+        # in the underlying C function.  Basically, cpdef functions/methods are static C functions,
+        # so their optional arguments must be static, too.
+        # TODO: change CyFunction implementation to pass both function object and owning object for method calls
+        must_use_constants = env.is_c_class_scope or (self.def_node.is_wrapper and env.is_module_scope)
+
+        for arg in self.def_node.args:
+            if arg.default:
+                if not must_use_constants:
+                    if arg.default.is_literal:
+                        arg.default = DefaultLiteralArgNode(arg.pos, arg.default)
+                        if arg.default.type:
+                            arg.default = arg.default.coerce_to(arg.type, env)
+                    else:
+                        arg.is_dynamic = True
+                        if arg.type.is_pyobject:
+                            nonliteral_objects.append(arg)
+                        else:
+                            nonliteral_other.append(arg)
+                if arg.default.type and arg.default.type.can_coerce_to_pyobject(env):
+                    if arg.kw_only:
+                        default_kwargs.append(arg)
+                    else:
+                        default_args.append(arg)
+            if arg.annotation:
+                arg.annotation = arg.annotation.analyse_types(env)
+                annotations.append((arg.pos, arg.name, arg.annotation.string))
+
+        for arg in (self.def_node.star_arg, self.def_node.starstar_arg):
+            if arg and arg.annotation:
+                arg.annotation = arg.annotation.analyse_types(env)
+                annotations.append((arg.pos, arg.name, arg.annotation.string))
+
+        annotation = self.def_node.return_type_annotation
+        if annotation:
+            self.def_node.return_type_annotation = annotation.analyse_types(env)
+            annotations.append((annotation.pos, StringEncoding.EncodedString("return"),
+                                annotation.string))
+
+        if nonliteral_objects or nonliteral_other:
+            module_scope = env.global_scope()
+            cname = module_scope.next_id(Naming.defaults_struct_prefix)
+            scope = Symtab.StructOrUnionScope(cname)
+            self.defaults = []
+            for arg in nonliteral_objects:
+                type_ = arg.type
+                if type_.is_buffer:
+                    type_ = type_.base
+                entry = scope.declare_var(arg.name, type_, None,
+                                          Naming.arg_prefix + arg.name,
+                                          allow_pyobject=True)
+                self.defaults.append((arg, entry))
+            for arg in nonliteral_other:
+                entry = scope.declare_var(arg.name, arg.type, None,
+                                          Naming.arg_prefix + arg.name,
+                                          allow_pyobject=False, allow_memoryview=True)
+                self.defaults.append((arg, entry))
+            entry = module_scope.declare_struct_or_union(
+                None, 'struct', scope, 1, None, cname=cname)
+            self.defaults_struct = scope
+            self.defaults_pyobjects = len(nonliteral_objects)
+            for arg, entry in self.defaults:
+                arg.default_value = '%s->%s' % (
+                    Naming.dynamic_args_cname, entry.cname)
+            self.def_node.defaults_struct = self.defaults_struct.name
+
+        if default_args or default_kwargs:
+            if self.defaults_struct is None:
+                if default_args:
+                    defaults_tuple = TupleNode(self.pos, args=[
+                        arg.default for arg in default_args])
+                    self.defaults_tuple = defaults_tuple.analyse_types(env).coerce_to_pyobject(env)
+                if default_kwargs:
+                    defaults_kwdict = DictNode(self.pos, key_value_pairs=[
+                        DictItemNode(
+                            arg.pos,
+                            key=IdentifierStringNode(arg.pos, value=arg.name),
+                            value=arg.default)
+                        for arg in default_kwargs])
+                    self.defaults_kwdict = defaults_kwdict.analyse_types(env)
+            elif not self.specialized_cpdefs:
+                # Fused dispatch functions do not support (dynamic) default arguments, only the specialisations do.
+                if default_args:
+                    defaults_tuple = DefaultsTupleNode(
+                        self.pos, default_args, self.defaults_struct)
+                else:
+                    defaults_tuple = NoneNode(self.pos)
+                if default_kwargs:
+                    defaults_kwdict = DefaultsKwDictNode(
+                        self.pos, default_kwargs, self.defaults_struct)
+                else:
+                    defaults_kwdict = NoneNode(self.pos)
+
+                defaults_getter = Nodes.DefNode(
+                    self.pos, args=[], star_arg=None, starstar_arg=None,
+                    body=Nodes.ReturnStatNode(
+                        self.pos, return_type=py_object_type,
+                        value=TupleNode(
+                            self.pos, args=[defaults_tuple, defaults_kwdict])),
+                    decorators=None,
+                    name=StringEncoding.EncodedString("__defaults__"))
+                # defaults getter must never live in class scopes, it's always a module function
+                module_scope = env.global_scope()
+                defaults_getter.analyse_declarations(module_scope)
+                defaults_getter = defaults_getter.analyse_expressions(module_scope)
+                defaults_getter.body = defaults_getter.body.analyse_expressions(
+                    defaults_getter.local_scope)
+                defaults_getter.py_wrapper_required = False
+                defaults_getter.pymethdef_required = False
+                self.def_node.defaults_getter = defaults_getter
+        if annotations:
+            annotations_dict = DictNode(self.pos, key_value_pairs=[
+                DictItemNode(
+                    pos, key=IdentifierStringNode(pos, value=name),
+                    value=value)
+                for pos, name, value in annotations])
+            self.annotations_dict = annotations_dict.analyse_types(env)
+
+    def may_be_none(self):
+        return False
+
+    gil_message = "Constructing Python function"
+
+    def closure_result_code(self):
+        return "NULL"
+
+    def generate_result_code(self, code):
+        if self.binding:
+            self.generate_cyfunction_code(code)
+        else:
+            self.generate_pycfunction_code(code)
+
+    def generate_pycfunction_code(self, code):
+        py_mod_name = self.get_py_mod_name(code)
+        code.putln(
+            '%s = PyCFunction_NewEx(&%s, %s, %s); %s' % (
+                self.result(),
+                self.pymethdef_cname,
+                self.closure_result_code(),
+                py_mod_name,
+                code.error_goto_if_null(self.result(), self.pos)))
+
+        self.generate_gotref(code)
+
+    def generate_cyfunction_code(self, code):
+        if self.specialized_cpdefs:
+            def_node = self.specialized_cpdefs[0]
+        else:
+            def_node = self.def_node
+
+        if self.specialized_cpdefs or self.is_specialization:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("FusedFunction", "CythonFunction.c"))
+            constructor = "__pyx_FusedFunction_New"
+        else:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("CythonFunction", "CythonFunction.c"))
+            constructor = "__Pyx_CyFunction_New"
+
+        if self.code_object:
+            code_object_result = self.code_object.py_result()
+        else:
+            code_object_result = 'NULL'
+
+        flags = []
+        if def_node.is_staticmethod:
+            flags.append('__Pyx_CYFUNCTION_STATICMETHOD')
+        elif def_node.is_classmethod:
+            flags.append('__Pyx_CYFUNCTION_CLASSMETHOD')
+
+        if def_node.local_scope.parent_scope.is_c_class_scope and not def_node.entry.is_anonymous:
+            flags.append('__Pyx_CYFUNCTION_CCLASS')
+
+        if def_node.is_coroutine:
+            flags.append('__Pyx_CYFUNCTION_COROUTINE')
+
+        if flags:
+            flags = ' | '.join(flags)
+        else:
+            flags = '0'
+
+        code.putln(
+            '%s = %s(&%s, %s, %s, %s, %s, %s, %s); %s' % (
+                self.result(),
+                constructor,
+                self.pymethdef_cname,
+                flags,
+                self.get_py_qualified_name(code),
+                self.closure_result_code(),
+                self.get_py_mod_name(code),
+                Naming.moddict_cname,
+                code_object_result,
+                code.error_goto_if_null(self.result(), self.pos)))
+
+        self.generate_gotref(code)
+
+        if def_node.requires_classobj:
+            assert code.pyclass_stack, "pyclass_stack is empty"
+            class_node = code.pyclass_stack[-1]
+            code.put_incref(self.py_result(), py_object_type)
+            code.putln(
+                'PyList_Append(%s, %s);' % (
+                    class_node.class_cell.result(),
+                    self.result()))
+            self.generate_giveref(code)
+
+        if self.defaults:
+            code.putln(
+                'if (!__Pyx_CyFunction_InitDefaults(%s, sizeof(%s), %d)) %s' % (
+                    self.result(), self.defaults_struct.name,
+                    self.defaults_pyobjects, code.error_goto(self.pos)))
+            defaults = '__Pyx_CyFunction_Defaults(%s, %s)' % (
+                self.defaults_struct.name, self.result())
+            for arg, entry in self.defaults:
+                arg.generate_assignment_code(code, target='%s->%s' % (
+                    defaults, entry.cname))
+
+        if self.defaults_tuple:
+            code.putln('__Pyx_CyFunction_SetDefaultsTuple(%s, %s);' % (
+                self.result(), self.defaults_tuple.py_result()))
+        if not self.specialized_cpdefs:
+            # disable introspection functions for fused dispatcher function since the user never sees it
+            # TODO: this is mostly disabled because the attributes end up pointing to ones belonging
+            #  to the specializations - ideally this would be fixed instead
+            if self.defaults_kwdict:
+                code.putln('__Pyx_CyFunction_SetDefaultsKwDict(%s, %s);' % (
+                    self.result(), self.defaults_kwdict.py_result()))
+            if def_node.defaults_getter:
+                code.putln('__Pyx_CyFunction_SetDefaultsGetter(%s, %s);' % (
+                    self.result(), def_node.defaults_getter.entry.pyfunc_cname))
+            if self.annotations_dict:
+                code.putln('__Pyx_CyFunction_SetAnnotationsDict(%s, %s);' % (
+                    self.result(), self.annotations_dict.py_result()))
+
+
+class InnerFunctionNode(PyCFunctionNode):
+    # Special PyCFunctionNode that depends on a closure class
+
+    binding = True
+    needs_closure_code = True
+
+    def closure_result_code(self):
+        if self.needs_closure_code:
+            return "((PyObject*)%s)" % Naming.cur_scope_cname
+        return "NULL"
+
+
+class CodeObjectNode(ExprNode):
+    # Create a PyCodeObject for a CyFunction instance.
+    #
+    # def_node   DefNode    the Python function node
+    # varnames   TupleNode  a tuple with all local variable names
+
+    subexprs = ['varnames']
+    is_temp = False
+    result_code = None
+
+    def __init__(self, def_node):
+        ExprNode.__init__(self, def_node.pos, def_node=def_node)
+        args = list(def_node.args)
+        # if we have args/kwargs, then the first two in var_entries are those
+        local_vars = [arg for arg in def_node.local_scope.var_entries if arg.name]
+        self.varnames = TupleNode(
+            def_node.pos,
+            args=[IdentifierStringNode(arg.pos, value=arg.name)
+                  for arg in args + local_vars],
+            is_temp=0,
+            is_literal=1)
+
+    def may_be_none(self):
+        return False
+
+    def calculate_result_code(self, code=None):
+        if self.result_code is None:
+            self.result_code = code.get_py_const(py_object_type, 'codeobj', cleanup_level=2)
+        return self.result_code
+
+    def generate_result_code(self, code):
+        if self.result_code is None:
+            self.result_code = code.get_py_const(py_object_type, 'codeobj', cleanup_level=2)
+
+        code = code.get_cached_constants_writer(self.result_code)
+        if code is None:
+            return  # already initialised
+        code.mark_pos(self.pos)
+        func = self.def_node
+        func_name = code.get_py_string_const(
+            func.name, identifier=True, is_str=False, unicode_value=func.name)
+        # FIXME: better way to get the module file path at module init time? Encoding to use?
+        file_path = StringEncoding.bytes_literal(func.pos[0].get_filenametable_entry().encode('utf8'), 'utf8')
+        file_path_const = code.get_py_string_const(file_path, identifier=False, is_str=True)
+
+        # This combination makes CPython create a new dict for "frame.f_locals" (see GH #1836).
+        flags = ['CO_OPTIMIZED', 'CO_NEWLOCALS']
+
+        if self.def_node.star_arg:
+            flags.append('CO_VARARGS')
+        if self.def_node.starstar_arg:
+            flags.append('CO_VARKEYWORDS')
+        if self.def_node.is_asyncgen:
+            flags.append('CO_ASYNC_GENERATOR')
+        elif self.def_node.is_coroutine:
+            flags.append('CO_COROUTINE')
+        elif self.def_node.is_generator:
+            flags.append('CO_GENERATOR')
+
+        code.putln("%s = (PyObject*)__Pyx_PyCode_New(%d, %d, %d, %d, 0, %s, %s, %s, %s, %s, %s, %s, %s, %s, %d, %s); %s" % (
+            self.result_code,
+            len(func.args) - func.num_kwonly_args,  # argcount
+            func.num_posonly_args,     # posonlyargcount (Py3.8+ only)
+            func.num_kwonly_args,      # kwonlyargcount (Py3 only)
+            len(self.varnames.args),   # nlocals
+            '|'.join(flags) or '0',    # flags
+            Naming.empty_bytes,        # code
+            Naming.empty_tuple,        # consts
+            Naming.empty_tuple,        # names (FIXME)
+            self.varnames.result(),    # varnames
+            Naming.empty_tuple,        # freevars (FIXME)
+            Naming.empty_tuple,        # cellvars (FIXME)
+            file_path_const,           # filename
+            func_name,                 # name
+            self.pos[1],               # firstlineno
+            Naming.empty_bytes,        # lnotab
+            code.error_goto_if_null(self.result_code, self.pos),
+            ))
+
+
+class DefaultLiteralArgNode(ExprNode):
+    # CyFunction's literal argument default value
+    #
+    # Evaluate literal only once.
+
+    subexprs = []
+    is_literal = True
+    is_temp = False
+
+    def __init__(self, pos, arg):
+        super(DefaultLiteralArgNode, self).__init__(pos)
+        self.arg = arg
+        self.constant_result = arg.constant_result
+        self.type = self.arg.type
+        self.evaluated = False
+
+    def analyse_types(self, env):
+        return self
+
+    def generate_result_code(self, code):
+        pass
+
+    def generate_evaluation_code(self, code):
+        if not self.evaluated:
+            self.arg.generate_evaluation_code(code)
+            self.evaluated = True
+
+    def result(self):
+        return self.type.cast_code(self.arg.result())
+
+
+class DefaultNonLiteralArgNode(ExprNode):
+    # CyFunction's non-literal argument default value
+
+    subexprs = []
+
+    def __init__(self, pos, arg, defaults_struct):
+        super(DefaultNonLiteralArgNode, self).__init__(pos)
+        self.arg = arg
+        self.defaults_struct = defaults_struct
+
+    def analyse_types(self, env):
+        self.type = self.arg.type
+        self.is_temp = False
+        return self
+
+    def generate_result_code(self, code):
+        pass
+
+    def result(self):
+        return '__Pyx_CyFunction_Defaults(%s, %s)->%s' % (
+            self.defaults_struct.name, Naming.self_cname,
+            self.defaults_struct.lookup(self.arg.name).cname)
+
+
+class DefaultsTupleNode(TupleNode):
+    # CyFunction's __defaults__ tuple
+
+    def __init__(self, pos, defaults, defaults_struct):
+        args = []
+        for arg in defaults:
+            if not arg.default.is_literal:
+                arg = DefaultNonLiteralArgNode(pos, arg, defaults_struct)
+            else:
+                arg = arg.default
+            args.append(arg)
+        super(DefaultsTupleNode, self).__init__(pos, args=args)
+
+    def analyse_types(self, env, skip_children=False):
+        return super(DefaultsTupleNode, self).analyse_types(env, skip_children).coerce_to_pyobject(env)
+
+
+class DefaultsKwDictNode(DictNode):
+    # CyFunction's __kwdefaults__ dict
+
+    def __init__(self, pos, defaults, defaults_struct):
+        items = []
+        for arg in defaults:
+            name = IdentifierStringNode(arg.pos, value=arg.name)
+            if not arg.default.is_literal:
+                arg = DefaultNonLiteralArgNode(pos, arg, defaults_struct)
+            else:
+                arg = arg.default
+            items.append(DictItemNode(arg.pos, key=name, value=arg))
+        super(DefaultsKwDictNode, self).__init__(pos, key_value_pairs=items)
+
+
+class LambdaNode(InnerFunctionNode):
+    # Lambda expression node (only used as a function reference)
+    #
+    # args          [CArgDeclNode]         formal arguments
+    # star_arg      PyArgDeclNode or None  * argument
+    # starstar_arg  PyArgDeclNode or None  ** argument
+    # lambda_name   string                 a module-globally unique lambda name
+    # result_expr   ExprNode
+    # def_node      DefNode                the underlying function 'def' node
+
+    child_attrs = ['def_node']
+
+    name = StringEncoding.EncodedString('<lambda>')
+
+    def analyse_declarations(self, env):
+        if hasattr(self, "lambda_name"):
+            # this if-statement makes it safe to run twice
+            return
+        self.lambda_name = self.def_node.lambda_name = env.next_id('lambda')
+        self.def_node.no_assignment_synthesis = True
+        self.def_node.pymethdef_required = True
+        self.def_node.is_cyfunction = True
+        self.def_node.analyse_declarations(env)
+        self.pymethdef_cname = self.def_node.entry.pymethdef_cname
+        env.add_lambda_def(self.def_node)
+
+    def analyse_types(self, env):
+        self.def_node = self.def_node.analyse_expressions(env)
+        return super(LambdaNode, self).analyse_types(env)
+
+    def generate_result_code(self, code):
+        self.def_node.generate_execution_code(code)
+        super(LambdaNode, self).generate_result_code(code)
+
+
+class GeneratorExpressionNode(LambdaNode):
+    # A generator expression, e.g.  (i for i in range(10))
+    #
+    # Result is a generator.
+    #
+    # loop      ForStatNode   the for-loop, containing a YieldExprNode
+    # def_node  DefNode       the underlying generator 'def' node
+    # call_parameters [ExprNode]   (Internal) parameters passed to the DefNode call
+
+    name = StringEncoding.EncodedString('genexpr')
+    binding = False
+
+    child_attrs = LambdaNode.child_attrs + ["call_parameters"]
+    subexprs = LambdaNode.subexprs + ["call_parameters"]
+
+    def __init__(self, pos, *args, **kwds):
+        super(GeneratorExpressionNode, self).__init__(pos, *args, **kwds)
+        self.call_parameters = []
+
+    def analyse_declarations(self, env):
+        if hasattr(self, "genexpr_name"):
+            # this if-statement makes it safe to run twice
+            return
+        self.genexpr_name = env.next_id('genexpr')
+        super(GeneratorExpressionNode, self).analyse_declarations(env)
+        # No pymethdef required
+        self.def_node.pymethdef_required = False
+        self.def_node.py_wrapper_required = False
+        self.def_node.is_cyfunction = False
+        # Force genexpr signature
+        self.def_node.entry.signature = TypeSlots.pyfunction_noargs
+        # setup loop scope
+        if isinstance(self.loop, Nodes._ForInStatNode):
+            assert isinstance(self.loop.iterator, ScopedExprNode)
+            self.loop.iterator.init_scope(None, env)
+        else:
+            assert isinstance(self.loop, Nodes.ForFromStatNode)
+
+    def generate_result_code(self, code):
+        args_to_call = ([self.closure_result_code()] +
+                        [ cp.result() for cp in self.call_parameters ])
+        args_to_call = ", ".join(args_to_call)
+        code.putln(
+            '%s = %s(%s); %s' % (
+                self.result(),
+                self.def_node.entry.pyfunc_cname,
+                args_to_call,
+                code.error_goto_if_null(self.result(), self.pos)))
+        self.generate_gotref(code)
+
+
+class YieldExprNode(ExprNode):
+    # Yield expression node
+    #
+    # arg         ExprNode   the value to return from the generator
+    # label_num   integer    yield label number
+    # is_yield_from  boolean is a YieldFromExprNode to delegate to another generator
+
+    subexprs = ['arg']
+    type = py_object_type
+    label_num = 0
+    is_yield_from = False
+    is_await = False
+    in_async_gen = False
+    expr_keyword = 'yield'
+
+    def analyse_types(self, env):
+        if not self.label_num or (self.is_yield_from and self.in_async_gen):
+            error(self.pos, "'%s' not supported here" % self.expr_keyword)
+        self.is_temp = 1
+        if self.arg is not None:
+            self.arg = self.arg.analyse_types(env)
+            if not self.arg.type.is_pyobject:
+                self.coerce_yield_argument(env)
+        return self
+
+    def coerce_yield_argument(self, env):
+        self.arg = self.arg.coerce_to_pyobject(env)
+
+    def generate_evaluation_code(self, code):
+        if self.arg:
+            self.arg.generate_evaluation_code(code)
+            self.arg.make_owned_reference(code)
+            code.putln(
+                "%s = %s;" % (
+                    Naming.retval_cname,
+                    self.arg.result_as(py_object_type)))
+            self.arg.generate_post_assignment_code(code)
+            self.arg.free_temps(code)
+        else:
+            code.put_init_to_py_none(Naming.retval_cname, py_object_type)
+        self.generate_yield_code(code)
+
+    def generate_yield_code(self, code):
+        """
+        Generate the code to return the argument in 'Naming.retval_cname'
+        and to continue at the yield label.
+        """
+        label_num, label_name = code.new_yield_label(
+            self.expr_keyword.replace(' ', '_'))
+        code.use_label(label_name)
+
+        saved = []
+        code.funcstate.closure_temps.reset()
+        for cname, type, manage_ref in code.funcstate.temps_in_use():
+            save_cname = code.funcstate.closure_temps.allocate_temp(type)
+            saved.append((cname, save_cname, type))
+            if type.is_cpp_class:
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("MoveIfSupported", "CppSupport.cpp"))
+                cname = "__PYX_STD_MOVE_IF_SUPPORTED(%s)" % cname
+            else:
+                code.put_xgiveref(cname, type)
+            code.putln('%s->%s = %s;' % (Naming.cur_scope_cname, save_cname, cname))
+
+        code.put_xgiveref(Naming.retval_cname, py_object_type)
+        profile = code.globalstate.directives['profile']
+        linetrace = code.globalstate.directives['linetrace']
+        if profile or linetrace:
+            code.put_trace_return(Naming.retval_cname,
+                                  nogil=not code.funcstate.gil_owned)
+        code.put_finish_refcount_context()
+
+        if code.funcstate.current_except is not None:
+            # inside of an except block => save away currently handled exception
+            code.putln("__Pyx_Coroutine_SwapException(%s);" % Naming.generator_cname)
+        else:
+            # no exceptions being handled => restore exception state of caller
+            code.putln("__Pyx_Coroutine_ResetAndClearException(%s);" % Naming.generator_cname)
+
+        code.putln("/* return from %sgenerator, %sing value */" % (
+            'async ' if self.in_async_gen else '',
+            'await' if self.is_await else 'yield'))
+        code.putln("%s->resume_label = %d;" % (
+            Naming.generator_cname, label_num))
+        if self.in_async_gen and not self.is_await:
+            # __Pyx__PyAsyncGenValueWrapperNew() steals a reference to the return value
+            code.putln("return __Pyx__PyAsyncGenValueWrapperNew(%s);" % Naming.retval_cname)
+        else:
+            code.putln("return %s;" % Naming.retval_cname)
+
+        code.put_label(label_name)
+        for cname, save_cname, type in saved:
+            save_cname = "%s->%s" % (Naming.cur_scope_cname, save_cname)
+            if type.is_cpp_class:
+                save_cname = "__PYX_STD_MOVE_IF_SUPPORTED(%s)" % save_cname
+            code.putln('%s = %s;' % (cname, save_cname))
+            if type.is_pyobject:
+                code.putln('%s = 0;' % save_cname)
+                code.put_xgotref(cname, type)
+            elif type.is_memoryviewslice:
+                code.putln('%s.memview = NULL; %s.data = NULL;' % (save_cname, save_cname))
+        self.generate_sent_value_handling_code(code, Naming.sent_value_cname)
+        if self.result_is_used:
+            self.allocate_temp_result(code)
+            code.put('%s = %s; ' % (self.result(), Naming.sent_value_cname))
+            code.put_incref(self.result(), py_object_type)
+
+    def generate_sent_value_handling_code(self, code, value_cname):
+        code.putln(code.error_goto_if_null(value_cname, self.pos))
+
+
+class _YieldDelegationExprNode(YieldExprNode):
+    def yield_from_func(self, code):
+        raise NotImplementedError()
+
+    def generate_evaluation_code(self, code, source_cname=None, decref_source=False):
+        if source_cname is None:
+            self.arg.generate_evaluation_code(code)
+        code.putln("%s = %s(%s, %s);" % (
+            Naming.retval_cname,
+            self.yield_from_func(code),
+            Naming.generator_cname,
+            self.arg.py_result() if source_cname is None else source_cname))
+        if source_cname is None:
+            self.arg.generate_disposal_code(code)
+            self.arg.free_temps(code)
+        elif decref_source:
+            code.put_decref_clear(source_cname, py_object_type)
+        code.put_xgotref(Naming.retval_cname, py_object_type)
+
+        code.putln("if (likely(%s)) {" % Naming.retval_cname)
+        self.generate_yield_code(code)
+        code.putln("} else {")
+        # either error or sub-generator has normally terminated: return value => node result
+        if self.result_is_used:
+            self.fetch_iteration_result(code)
+        else:
+            self.handle_iteration_exception(code)
+        code.putln("}")
+
+    def fetch_iteration_result(self, code):
+        # YieldExprNode has allocated the result temp for us
+        code.putln("%s = NULL;" % self.result())
+        code.put_error_if_neg(self.pos, "__Pyx_PyGen_FetchStopIterationValue(&%s)" % self.result())
+        self.generate_gotref(code)
+
+    def handle_iteration_exception(self, code):
+        code.putln("PyObject* exc_type = __Pyx_PyErr_CurrentExceptionType();")
+        code.putln("if (exc_type) {")
+        code.putln("if (likely(exc_type == PyExc_StopIteration || (exc_type != PyExc_GeneratorExit &&"
+                   " __Pyx_PyErr_GivenExceptionMatches(exc_type, PyExc_StopIteration)))) PyErr_Clear();")
+        code.putln("else %s" % code.error_goto(self.pos))
+        code.putln("}")
+
+
+class YieldFromExprNode(_YieldDelegationExprNode):
+    # "yield from GEN" expression
+    is_yield_from = True
+    expr_keyword = 'yield from'
+
+    def coerce_yield_argument(self, env):
+        if not self.arg.type.is_string:
+            # FIXME: support C arrays and C++ iterators?
+            error(self.pos, "yielding from non-Python object not supported")
+        self.arg = self.arg.coerce_to_pyobject(env)
+
+    def yield_from_func(self, code):
+        code.globalstate.use_utility_code(UtilityCode.load_cached("GeneratorYieldFrom", "Coroutine.c"))
+        return "__Pyx_Generator_Yield_From"
+
+
+class AwaitExprNode(_YieldDelegationExprNode):
+    # 'await' expression node
+    #
+    # arg         ExprNode   the Awaitable value to await
+    # label_num   integer    yield label number
+
+    is_await = True
+    expr_keyword = 'await'
+
+    def coerce_yield_argument(self, env):
+        if self.arg is not None:
+            # FIXME: use same check as in YieldFromExprNode.coerce_yield_argument() ?
+            self.arg = self.arg.coerce_to_pyobject(env)
+
+    def yield_from_func(self, code):
+        code.globalstate.use_utility_code(UtilityCode.load_cached("CoroutineYieldFrom", "Coroutine.c"))
+        return "__Pyx_Coroutine_Yield_From"
+
+
+class AwaitIterNextExprNode(AwaitExprNode):
+    # 'await' expression node as part of 'async for' iteration
+    #
+    # Breaks out of loop on StopAsyncIteration exception.
+
+    def _generate_break(self, code):
+        code.globalstate.use_utility_code(UtilityCode.load_cached("StopAsyncIteration", "Coroutine.c"))
+        code.putln("PyObject* exc_type = __Pyx_PyErr_CurrentExceptionType();")
+        code.putln("if (unlikely(exc_type && (exc_type == __Pyx_PyExc_StopAsyncIteration || ("
+                   " exc_type != PyExc_StopIteration && exc_type != PyExc_GeneratorExit &&"
+                   " __Pyx_PyErr_GivenExceptionMatches(exc_type, __Pyx_PyExc_StopAsyncIteration))))) {")
+        code.putln("PyErr_Clear();")
+        code.putln("break;")
+        code.putln("}")
+
+    def fetch_iteration_result(self, code):
+        assert code.break_label, "AwaitIterNextExprNode outside of 'async for' loop"
+        self._generate_break(code)
+        super(AwaitIterNextExprNode, self).fetch_iteration_result(code)
+
+    def generate_sent_value_handling_code(self, code, value_cname):
+        assert code.break_label, "AwaitIterNextExprNode outside of 'async for' loop"
+        code.putln("if (unlikely(!%s)) {" % value_cname)
+        self._generate_break(code)
+        # all non-break exceptions are errors, as in parent class
+        code.putln(code.error_goto(self.pos))
+        code.putln("}")
+
+
+class GlobalsExprNode(AtomicExprNode):
+    type = dict_type
+    is_temp = 1
+
+    def analyse_types(self, env):
+        env.use_utility_code(Builtin.globals_utility_code)
+        return self
+
+    gil_message = "Constructing globals dict"
+
+    def may_be_none(self):
+        return False
+
+    def generate_result_code(self, code):
+        code.putln('%s = __Pyx_Globals(); %s' % (
+            self.result(),
+            code.error_goto_if_null(self.result(), self.pos)))
+        self.generate_gotref(code)
+
+
+class LocalsDictItemNode(DictItemNode):
+    def analyse_types(self, env):
+        self.key = self.key.analyse_types(env)
+        self.value = self.value.analyse_types(env)
+        self.key = self.key.coerce_to_pyobject(env)
+        if self.value.type.can_coerce_to_pyobject(env):
+            self.value = self.value.coerce_to_pyobject(env)
+        else:
+            self.value = None
+        return self
+
+
+class FuncLocalsExprNode(DictNode):
+    def __init__(self, pos, env):
+        local_vars = sorted([
+            entry.name for entry in env.entries.values() if entry.name])
+        items = [LocalsDictItemNode(
+            pos, key=IdentifierStringNode(pos, value=var),
+            value=NameNode(pos, name=var, allow_null=True))
+                 for var in local_vars]
+        DictNode.__init__(self, pos, key_value_pairs=items,
+                          exclude_null_values=True)
+
+    def analyse_types(self, env):
+        node = super(FuncLocalsExprNode, self).analyse_types(env)
+        node.key_value_pairs = [ i for i in node.key_value_pairs
+                                 if i.value is not None ]
+        return node
+
+
+class PyClassLocalsExprNode(AtomicExprNode):
+    def __init__(self, pos, pyclass_dict):
+        AtomicExprNode.__init__(self, pos)
+        self.pyclass_dict = pyclass_dict
+
+    def analyse_types(self, env):
+        self.type = self.pyclass_dict.type
+        self.is_temp = False
+        return self
+
+    def may_be_none(self):
+        return False
+
+    def result(self):
+        return self.pyclass_dict.result()
+
+    def generate_result_code(self, code):
+        pass
+
+
+def LocalsExprNode(pos, scope_node, env):
+    if env.is_module_scope:
+        return GlobalsExprNode(pos)
+    if env.is_py_class_scope:
+        return PyClassLocalsExprNode(pos, scope_node.dict)
+    return FuncLocalsExprNode(pos, env)
+
+
+#-------------------------------------------------------------------
+#
+#  Unary operator nodes
+#
+#-------------------------------------------------------------------
+
+compile_time_unary_operators = {
+    'not': operator.not_,
+    '~': operator.inv,
+    '-': operator.neg,
+    '+': operator.pos,
+}
+
+class UnopNode(ExprNode):
+    #  operator     string
+    #  operand      ExprNode
+    #
+    #  Processing during analyse_expressions phase:
+    #
+    #    analyse_c_operation
+    #      Called when the operand is not a pyobject.
+    #      - Check operand type and coerce if needed.
+    #      - Determine result type and result code fragment.
+    #      - Allocate temporary for result if needed.
+
+    subexprs = ['operand']
+    infix = True
+    is_inc_dec_op = False
+
+    def calculate_constant_result(self):
+        func = compile_time_unary_operators[self.operator]
+        self.constant_result = func(self.operand.constant_result)
+
+    def compile_time_value(self, denv):
+        func = compile_time_unary_operators.get(self.operator)
+        if not func:
+            error(self.pos,
+                "Unary '%s' not supported in compile-time expression"
+                    % self.operator)
+        operand = self.operand.compile_time_value(denv)
+        try:
+            return func(operand)
+        except Exception as e:
+            self.compile_time_value_error(e)
+
+    def infer_type(self, env):
+        operand_type = self.operand.infer_type(env)
+        if operand_type.is_cpp_class or operand_type.is_ptr:
+            cpp_type = operand_type.find_cpp_operation_type(self.operator)
+            if cpp_type is not None:
+                return cpp_type
+        return self.infer_unop_type(env, operand_type)
+
+    def infer_unop_type(self, env, operand_type):
+        if operand_type.is_pyobject:
+            return py_object_type
+        else:
+            return operand_type
+
+    def may_be_none(self):
+        if self.operand.type and self.operand.type.is_builtin_type:
+            if self.operand.type is not type_type:
+                return False
+        return ExprNode.may_be_none(self)
+
+    def analyse_types(self, env):
+        self.operand = self.operand.analyse_types(env)
+        if self.is_pythran_operation(env):
+            self.type = PythranExpr(pythran_unaryop_type(self.operator, self.operand.type))
+            self.is_temp = 1
+        elif self.is_py_operation():
+            self.coerce_operand_to_pyobject(env)
+            self.type = py_object_type
+            self.is_temp = 1
+        elif self.is_cpp_operation():
+            self.analyse_cpp_operation(env)
+        else:
+            self.analyse_c_operation(env)
+        return self
+
+    def check_const(self):
+        return self.operand.check_const()
+
+    def is_py_operation(self):
+        return self.operand.type.is_pyobject or self.operand.type.is_ctuple
+
+    def is_pythran_operation(self, env):
+        np_pythran = has_np_pythran(env)
+        op_type = self.operand.type
+        return np_pythran and (op_type.is_buffer or op_type.is_pythran_expr)
+
+    def nogil_check(self, env):
+        if self.is_py_operation():
+            self.gil_error()
+
+    def is_cpp_operation(self):
+        type = self.operand.type
+        return type.is_cpp_class
+
+    def coerce_operand_to_pyobject(self, env):
+        self.operand = self.operand.coerce_to_pyobject(env)
+
+    def generate_result_code(self, code):
+        if self.type.is_pythran_expr:
+            code.putln("// Pythran unaryop")
+            code.putln("__Pyx_call_destructor(%s);" % self.result())
+            code.putln("new (&%s) decltype(%s){%s%s};" % (
+                self.result(),
+                self.result(),
+                self.operator,
+                self.operand.pythran_result()))
+        elif self.operand.type.is_pyobject:
+            self.generate_py_operation_code(code)
+        elif self.is_temp:
+            if self.is_cpp_operation() and self.exception_check == '+':
+                translate_cpp_exception(code, self.pos,
+                    "%s = %s %s;" % (self.result(), self.operator, self.operand.result()),
+                    self.result() if self.type.is_pyobject else None,
+                    self.exception_value, self.in_nogil_context)
+            else:
+                code.putln("%s = %s %s;" % (self.result(), self.operator, self.operand.result()))
+
+    def generate_py_operation_code(self, code):
+        function = self.py_operation_function(code)
+        code.putln(
+            "%s = %s(%s); %s" % (
+                self.result(),
+                function,
+                self.operand.py_result(),
+                code.error_goto_if_null(self.result(), self.pos)))
+        self.generate_gotref(code)
+
+    def type_error(self):
+        if not self.operand.type.is_error:
+            error(self.pos, "Invalid operand type for '%s' (%s)" %
+                (self.operator, self.operand.type))
+        self.type = PyrexTypes.error_type
+
+    def analyse_cpp_operation(self, env, overload_check=True):
+        operand_types = [self.operand.type]
+        if self.is_inc_dec_op and not self.is_prefix:
+            operand_types.append(PyrexTypes.c_int_type)
+        entry = env.lookup_operator_for_types(self.pos, self.operator, operand_types)
+        if overload_check and not entry:
+            self.type_error()
+            return
+        if entry:
+            self.exception_check = entry.type.exception_check
+            self.exception_value = entry.type.exception_value
+            if self.exception_check == '+':
+                self.is_temp = True
+                if needs_cpp_exception_conversion(self):
+                    env.use_utility_code(UtilityCode.load_cached("CppExceptionConversion", "CppSupport.cpp"))
+        else:
+            self.exception_check = ''
+            self.exception_value = ''
+        if self.is_inc_dec_op and not self.is_prefix:
+            cpp_type = self.operand.type.find_cpp_operation_type(
+                self.operator, operand_type=PyrexTypes.c_int_type
+            )
+        else:
+            cpp_type = self.operand.type.find_cpp_operation_type(self.operator)
+        if overload_check and cpp_type is None:
+            error(self.pos, "'%s' operator not defined for %s" % (
+                self.operator, type))
+            self.type_error()
+            return
+        self.type = cpp_type
+
+
+class NotNode(UnopNode):
+    #  'not' operator
+    #
+    #  operand   ExprNode
+    operator = '!'
+
+    type = PyrexTypes.c_bint_type
+
+    def calculate_constant_result(self):
+        self.constant_result = not self.operand.constant_result
+
+    def compile_time_value(self, denv):
+        operand = self.operand.compile_time_value(denv)
+        try:
+            return not operand
+        except Exception as e:
+            self.compile_time_value_error(e)
+
+    def infer_unop_type(self, env, operand_type):
+        return PyrexTypes.c_bint_type
+
+    def analyse_types(self, env):
+        self.operand = self.operand.analyse_types(env)
+        operand_type = self.operand.type
+        if operand_type.is_cpp_class:
+            self.analyse_cpp_operation(env)
+        else:
+            self.operand = self.operand.coerce_to_boolean(env)
+        return self
+
+    def calculate_result_code(self):
+        return "(!%s)" % self.operand.result()
+
+
+class UnaryPlusNode(UnopNode):
+    #  unary '+' operator
+
+    operator = '+'
+
+    def analyse_c_operation(self, env):
+        self.type = PyrexTypes.widest_numeric_type(
+            self.operand.type, PyrexTypes.c_int_type)
+
+    def py_operation_function(self, code):
+        return "PyNumber_Positive"
+
+    def calculate_result_code(self):
+        if self.is_cpp_operation():
+            return "(+%s)" % self.operand.result()
+        else:
+            return self.operand.result()
+
+
+class UnaryMinusNode(UnopNode):
+    #  unary '-' operator
+
+    operator = '-'
+
+    def analyse_c_operation(self, env):
+        if self.operand.type.is_numeric:
+            self.type = PyrexTypes.widest_numeric_type(
+                self.operand.type, PyrexTypes.c_int_type)
+        elif self.operand.type.is_enum:
+            self.type = PyrexTypes.c_int_type
+        else:
+            self.type_error()
+        if self.type.is_complex:
+            self.infix = False
+
+    def py_operation_function(self, code):
+        return "PyNumber_Negative"
+
+    def calculate_result_code(self):
+        if self.infix:
+            return "(-%s)" % self.operand.result()
+        else:
+            return "%s(%s)" % (self.operand.type.unary_op('-'), self.operand.result())
+
+    def get_constant_c_result_code(self):
+        value = self.operand.get_constant_c_result_code()
+        if value:
+            return "(-%s)" % value
+
+class TildeNode(UnopNode):
+    #  unary '~' operator
+
+    def analyse_c_operation(self, env):
+        if self.operand.type.is_int:
+            self.type = PyrexTypes.widest_numeric_type(
+                self.operand.type, PyrexTypes.c_int_type)
+        elif self.operand.type.is_enum:
+            self.type = PyrexTypes.c_int_type
+        else:
+            self.type_error()
+
+    def py_operation_function(self, code):
+        return "PyNumber_Invert"
+
+    def calculate_result_code(self):
+        return "(~%s)" % self.operand.result()
+
+
+class CUnopNode(UnopNode):
+
+    def is_py_operation(self):
+        return False
+
+class DereferenceNode(CUnopNode):
+    #  unary * operator
+
+    operator = '*'
+
+    def infer_unop_type(self, env, operand_type):
+        if operand_type.is_ptr:
+            return operand_type.base_type
+        else:
+            return PyrexTypes.error_type
+
+    def analyse_c_operation(self, env):
+        if self.operand.type.is_ptr:
+            if env.is_cpp:
+                self.type = PyrexTypes.CReferenceType(self.operand.type.base_type)
+            else:
+                self.type = self.operand.type.base_type
+        else:
+            self.type_error()
+
+    def calculate_result_code(self):
+        return "(*%s)" % self.operand.result()
+
+
+class DecrementIncrementNode(CUnopNode):
+    #  unary ++/-- operator
+    is_inc_dec_op = True
+
+    def type_error(self):
+        if not self.operand.type.is_error:
+            if self.is_prefix:
+                error(self.pos, "No match for 'operator%s' (operand type is '%s')" %
+                    (self.operator, self.operand.type))
+            else:
+                error(self.pos, "No 'operator%s(int)' declared for postfix '%s' (operand type is '%s')" %
+                    (self.operator, self.operator, self.operand.type))
+        self.type = PyrexTypes.error_type
+
+    def analyse_c_operation(self, env):
+        if self.operand.type.is_numeric:
+            self.type = PyrexTypes.widest_numeric_type(
+                self.operand.type, PyrexTypes.c_int_type)
+        elif self.operand.type.is_ptr:
+            self.type = self.operand.type
+        else:
+            self.type_error()
+
+    def calculate_result_code(self):
+        if self.is_prefix:
+            return "(%s%s)" % (self.operator, self.operand.result())
+        else:
+            return "(%s%s)" % (self.operand.result(), self.operator)
+
+def inc_dec_constructor(is_prefix, operator):
+    return lambda pos, **kwds: DecrementIncrementNode(pos, is_prefix=is_prefix, operator=operator, **kwds)
+
+
+class AmpersandNode(CUnopNode):
+    #  The C address-of operator.
+    #
+    #  operand  ExprNode
+    operator = '&'
+
+    def infer_unop_type(self, env, operand_type):
+        return PyrexTypes.c_ptr_type(operand_type)
+
+    def analyse_types(self, env):
+        self.operand = self.operand.analyse_types(env)
+        argtype = self.operand.type
+        if argtype.is_cpp_class:
+            self.analyse_cpp_operation(env, overload_check=False)
+        if not (argtype.is_cfunction or argtype.is_reference or self.operand.is_addressable()):
+            if argtype.is_memoryviewslice:
+                self.error("Cannot take address of memoryview slice")
+            else:
+                self.error("Taking address of non-lvalue (type %s)" % argtype)
+            return self
+        if argtype.is_pyobject:
+            self.error("Cannot take address of Python %s" % (
+                "variable '%s'" % self.operand.name if self.operand.is_name else
+                "object attribute '%s'" % self.operand.attribute if self.operand.is_attribute else
+                "object"))
+            return self
+        if not argtype.is_cpp_class or not self.type:
+            self.type = PyrexTypes.c_ptr_type(argtype)
+        return self
+
+    def check_const(self):
+        return self.operand.check_const_addr()
+
+    def error(self, mess):
+        error(self.pos, mess)
+        self.type = PyrexTypes.error_type
+        self.result_code = "<error>"
+
+    def calculate_result_code(self):
+        return "(&%s)" % self.operand.result()
+
+    def generate_result_code(self, code):
+        if (self.operand.type.is_cpp_class and self.exception_check == '+'):
+            translate_cpp_exception(code, self.pos,
+                "%s = %s %s;" % (self.result(), self.operator, self.operand.result()),
+                self.result() if self.type.is_pyobject else None,
+                self.exception_value, self.in_nogil_context)
+
+
+unop_node_classes = {
+    "+":  UnaryPlusNode,
+    "-":  UnaryMinusNode,
+    "~":  TildeNode,
+}
+
+def unop_node(pos, operator, operand):
+    # Construct unnop node of appropriate class for
+    # given operator.
+    if isinstance(operand, IntNode) and operator == '-':
+        return IntNode(pos = operand.pos, value = str(-Utils.str_to_number(operand.value)),
+                       longness=operand.longness, unsigned=operand.unsigned)
+    elif isinstance(operand, UnopNode) and operand.operator == operator in '+-':
+        warning(pos, "Python has no increment/decrement operator: %s%sx == %s(%sx) == x" % ((operator,)*4), 5)
+    return unop_node_classes[operator](pos,
+        operator = operator,
+        operand = operand)
+
+
+class TypecastNode(ExprNode):
+    #  C type cast
+    #
+    #  operand      ExprNode
+    #  base_type    CBaseTypeNode
+    #  declarator   CDeclaratorNode
+    #  typecheck    boolean
+    #
+    #  If used from a transform, one can if wanted specify the attribute
+    #  "type" directly and leave base_type and declarator to None
+
+    subexprs = ['operand']
+    base_type = declarator = type = None
+
+    def type_dependencies(self, env):
+        return ()
+
+    def infer_type(self, env):
+        if self.type is None:
+            base_type = self.base_type.analyse(env)
+            _, self.type = self.declarator.analyse(base_type, env)
+        return self.type
+
+    def analyse_types(self, env):
+        if self.type is None:
+            base_type = self.base_type.analyse(env)
+            _, self.type = self.declarator.analyse(base_type, env)
+        if self.operand.has_constant_result():
+            # Must be done after self.type is resolved.
+            self.calculate_constant_result()
+        if self.type.is_cfunction:
+            error(self.pos,
+                "Cannot cast to a function type")
+            self.type = PyrexTypes.error_type
+        self.operand = self.operand.analyse_types(env)
+        if self.type is PyrexTypes.c_bint_type:
+            # short circuit this to a coercion
+            return self.operand.coerce_to_boolean(env)
+        to_py = self.type.is_pyobject
+        from_py = self.operand.type.is_pyobject
+        if from_py and not to_py and self.operand.is_ephemeral():
+            if not self.type.is_numeric and not self.type.is_cpp_class:
+                error(self.pos, "Casting temporary Python object to non-numeric non-Python type")
+        if to_py and not from_py:
+            if self.type is bytes_type and self.operand.type.is_int:
+                return CoerceIntToBytesNode(self.operand, env)
+            elif self.operand.type.can_coerce_to_pyobject(env):
+                self.result_ctype = py_object_type
+                self.operand = self.operand.coerce_to(self.type, env)
+            else:
+                if self.operand.type.is_ptr:
+                    if not (self.operand.type.base_type.is_void or self.operand.type.base_type.is_struct):
+                        error(self.pos, "Python objects cannot be cast from pointers of primitive types")
+                else:
+                    # Should this be an error?
+                    warning(self.pos, "No conversion from %s to %s, python object pointer used." % (
+                        self.operand.type, self.type))
+                self.operand = self.operand.coerce_to_simple(env)
+        elif from_py and not to_py:
+            if self.type.create_from_py_utility_code(env):
+                self.operand = self.operand.coerce_to(self.type, env)
+            elif self.type.is_ptr:
+                if not (self.type.base_type.is_void or self.type.base_type.is_struct):
+                    error(self.pos, "Python objects cannot be cast to pointers of primitive types")
+            else:
+                warning(self.pos, "No conversion from %s to %s, python object pointer used." % (
+                    self.type, self.operand.type))
+        elif from_py and to_py:
+            if self.typecheck:
+                self.operand = PyTypeTestNode(self.operand, self.type, env, notnone=True)
+            elif isinstance(self.operand, SliceIndexNode):
+                # This cast can influence the created type of string slices.
+                self.operand = self.operand.coerce_to(self.type, env)
+        elif self.type.is_complex and self.operand.type.is_complex:
+            self.operand = self.operand.coerce_to_simple(env)
+        elif self.operand.type.is_fused:
+            self.operand = self.operand.coerce_to(self.type, env)
+            #self.type = self.operand.type
+        if self.type.is_ptr and self.type.base_type.is_cfunction and self.type.base_type.nogil:
+            op_type = self.operand.type
+            if op_type.is_ptr:
+                op_type = op_type.base_type
+            if op_type.is_cfunction and not op_type.nogil:
+                warning(self.pos,
+                        "Casting a GIL-requiring function into a nogil function circumvents GIL validation", 1)
+        return self
+
+    def is_simple(self):
+        # either temp or a C cast => no side effects other than the operand's
+        return self.operand.is_simple()
+
+    def is_ephemeral(self):
+        # either temp or a C cast => no side effects other than the operand's
+        return self.operand.is_ephemeral()
+
+    def nonlocally_immutable(self):
+        return self.is_temp or self.operand.nonlocally_immutable()
+
+    def nogil_check(self, env):
+        if self.type and self.type.is_pyobject and self.is_temp:
+            self.gil_error()
+
+    def check_const(self):
+        return self.operand.check_const()
+
+    def calculate_constant_result(self):
+        self.constant_result = self.calculate_result_code(self.operand.constant_result)
+
+    def calculate_result_code(self, operand_result = None):
+        if operand_result is None:
+            operand_result = self.operand.result()
+        if self.type.is_complex:
+            operand_result = self.operand.result()
+            if self.operand.type.is_complex:
+                real_part = self.type.real_type.cast_code(
+                    self.operand.type.real_code(operand_result))
+                imag_part = self.type.real_type.cast_code(
+                    self.operand.type.imag_code(operand_result))
+            else:
+                real_part = self.type.real_type.cast_code(operand_result)
+                imag_part = "0"
+            return "%s(%s, %s)" % (
+                    self.type.from_parts,
+                    real_part,
+                    imag_part)
+        else:
+            return self.type.cast_code(operand_result)
+
+    def get_constant_c_result_code(self):
+        operand_result = self.operand.get_constant_c_result_code()
+        if operand_result:
+            return self.type.cast_code(operand_result)
+
+    def result_as(self, type):
+        if self.type.is_pyobject and not self.is_temp:
+            #  Optimise away some unnecessary casting
+            return self.operand.result_as(type)
+        else:
+            return ExprNode.result_as(self, type)
+
+    def generate_result_code(self, code):
+        if self.is_temp:
+            code.putln(
+                "%s = (PyObject *)%s;" % (
+                    self.result(),
+                    self.operand.result()))
+            code.put_incref(self.result(), self.ctype())
+
+
+ERR_START = "Start may not be given"
+ERR_NOT_STOP = "Stop must be provided to indicate shape"
+ERR_STEPS = ("Strides may only be given to indicate contiguity. "
+             "Consider slicing it after conversion")
+ERR_NOT_POINTER = "Can only create cython.array from pointer or array"
+ERR_BASE_TYPE = "Pointer base type does not match cython.array base type"
+
+
+class CythonArrayNode(ExprNode):
+    """
+    Used when a pointer of base_type is cast to a memoryviewslice with that
+    base type. i.e.
+
+        <int[:M:1, :N]> p
+
+    creates a fortran-contiguous cython.array.
+
+    We leave the type set to object so coercions to object are more efficient
+    and less work. Acquiring a memoryviewslice from this will be just as
+    efficient. ExprNode.coerce_to() will do the additional typecheck on
+    self.compile_time_type
+
+    This also handles <int[:, :]> my_c_array
+
+
+    operand             ExprNode                 the thing we're casting
+    base_type_node      MemoryViewSliceTypeNode  the cast expression node
+    """
+
+    subexprs = ['operand', 'shapes']
+
+    shapes = None
+    is_temp = True
+    mode = "c"
+    array_dtype = None
+
+    shape_type = PyrexTypes.c_py_ssize_t_type
+
+    def analyse_types(self, env):
+        from . import MemoryView
+
+        self.operand = self.operand.analyse_types(env)
+        if self.array_dtype:
+            array_dtype = self.array_dtype
+        else:
+            array_dtype = self.base_type_node.base_type_node.analyse(env)
+        axes = self.base_type_node.axes
+
+        self.type = error_type
+        self.shapes = []
+        ndim = len(axes)
+
+        # Base type of the pointer or C array we are converting
+        base_type = self.operand.type
+
+        if not self.operand.type.is_ptr and not self.operand.type.is_array:
+            error(self.operand.pos, ERR_NOT_POINTER)
+            return self
+
+        # Dimension sizes of C array
+        array_dimension_sizes = []
+        if base_type.is_array:
+            while base_type.is_array:
+                array_dimension_sizes.append(base_type.size)
+                base_type = base_type.base_type
+        elif base_type.is_ptr:
+            base_type = base_type.base_type
+        else:
+            error(self.pos, "unexpected base type %s found" % base_type)
+            return self
+
+        if not (base_type.same_as(array_dtype) or base_type.is_void):
+            error(self.operand.pos, ERR_BASE_TYPE)
+            return self
+        elif self.operand.type.is_array and len(array_dimension_sizes) != ndim:
+            error(self.operand.pos,
+                  "Expected %d dimensions, array has %d dimensions" %
+                                            (ndim, len(array_dimension_sizes)))
+            return self
+
+        # Verify the start, stop and step values
+        # In case of a C array, use the size of C array in each dimension to
+        # get an automatic cast
+        for axis_no, axis in enumerate(axes):
+            if not axis.start.is_none:
+                error(axis.start.pos, ERR_START)
+                return self
+
+            if axis.stop.is_none:
+                if array_dimension_sizes:
+                    dimsize = array_dimension_sizes[axis_no]
+                    axis.stop = IntNode(self.pos, value=str(dimsize),
+                                        constant_result=dimsize,
+                                        type=PyrexTypes.c_int_type)
+                else:
+                    error(axis.pos, ERR_NOT_STOP)
+                    return self
+
+            axis.stop = axis.stop.analyse_types(env)
+            shape = axis.stop.coerce_to(self.shape_type, env)
+            if not shape.is_literal:
+                shape.coerce_to_temp(env)
+
+            self.shapes.append(shape)
+
+            first_or_last = axis_no in (0, ndim - 1)
+            if not axis.step.is_none and first_or_last:
+                # '1' in the first or last dimension denotes F or C contiguity
+                axis.step = axis.step.analyse_types(env)
+                if (not axis.step.type.is_int and axis.step.is_literal and not
+                        axis.step.type.is_error):
+                    error(axis.step.pos, "Expected an integer literal")
+                    return self
+
+                if axis.step.compile_time_value(env) != 1:
+                    error(axis.step.pos, ERR_STEPS)
+                    return self
+
+                if axis_no == 0:
+                    self.mode = "fortran"
+
+            elif not axis.step.is_none and not first_or_last:
+                # step provided in some other dimension
+                error(axis.step.pos, ERR_STEPS)
+                return self
+
+        if not self.operand.is_name:
+            self.operand = self.operand.coerce_to_temp(env)
+
+        axes = [('direct', 'follow')] * len(axes)
+        if self.mode == "fortran":
+            axes[0] = ('direct', 'contig')
+        else:
+            axes[-1] = ('direct', 'contig')
+
+        self.coercion_type = PyrexTypes.MemoryViewSliceType(array_dtype, axes)
+        self.coercion_type.validate_memslice_dtype(self.pos)
+        self.type = self.get_cython_array_type(env)
+        MemoryView.use_cython_array_utility_code(env)
+        env.use_utility_code(MemoryView.typeinfo_to_format_code)
+        return self
+
+    def allocate_temp_result(self, code):
+        if self.temp_code:
+            raise RuntimeError("temp allocated multiple times")
+
+        self.temp_code = code.funcstate.allocate_temp(self.type, True)
+
+    def infer_type(self, env):
+        return self.get_cython_array_type(env)
+
+    def get_cython_array_type(self, env):
+        cython_scope = env.global_scope().context.cython_scope
+        cython_scope.load_cythonscope()
+        return cython_scope.viewscope.lookup("array").type
+
+    def generate_result_code(self, code):
+        from . import Buffer
+
+        shapes = [self.shape_type.cast_code(shape.result())
+                      for shape in self.shapes]
+        dtype = self.coercion_type.dtype
+
+        shapes_temp = code.funcstate.allocate_temp(py_object_type, True)
+        format_temp = code.funcstate.allocate_temp(py_object_type, True)
+
+        itemsize = "sizeof(%s)" % dtype.empty_declaration_code()
+        type_info = Buffer.get_type_information_cname(code, dtype)
+
+        if self.operand.type.is_ptr:
+            code.putln("if (!%s) {" % self.operand.result())
+            code.putln(    'PyErr_SetString(PyExc_ValueError,'
+                                '"Cannot create cython.array from NULL pointer");')
+            code.putln(code.error_goto(self.operand.pos))
+            code.putln("}")
+
+        code.putln("%s = __pyx_format_from_typeinfo(&%s); %s" % (
+            format_temp,
+            type_info,
+            code.error_goto_if_null(format_temp, self.pos),
+        ))
+        code.put_gotref(format_temp, py_object_type)
+
+        buildvalue_fmt = " __PYX_BUILD_PY_SSIZE_T " * len(shapes)
+        code.putln('%s = Py_BuildValue((char*) "(" %s ")", %s); %s' % (
+            shapes_temp,
+            buildvalue_fmt,
+            ", ".join(shapes),
+            code.error_goto_if_null(shapes_temp, self.pos),
+        ))
+        code.put_gotref(shapes_temp, py_object_type)
+
+        code.putln('%s = __pyx_array_new(%s, %s, PyBytes_AS_STRING(%s), (char *) "%s", (char *) %s); %s' % (
+            self.result(),
+            shapes_temp, itemsize, format_temp, self.mode, self.operand.result(),
+            code.error_goto_if_null(self.result(), self.pos),
+        ))
+        self.generate_gotref(code)
+
+        def dispose(temp):
+            code.put_decref_clear(temp, py_object_type)
+            code.funcstate.release_temp(temp)
+
+        dispose(shapes_temp)
+        dispose(format_temp)
+
+    @classmethod
+    def from_carray(cls, src_node, env):
+        """
+        Given a C array type, return a CythonArrayNode
+        """
+        pos = src_node.pos
+        base_type = src_node.type
+
+        none_node = NoneNode(pos)
+        axes = []
+
+        while base_type.is_array:
+            axes.append(SliceNode(pos, start=none_node, stop=none_node,
+                                       step=none_node))
+            base_type = base_type.base_type
+        axes[-1].step = IntNode(pos, value="1", is_c_literal=True)
+
+        memslicenode = Nodes.MemoryViewSliceTypeNode(pos, axes=axes,
+                                                     base_type_node=base_type)
+        result = CythonArrayNode(pos, base_type_node=memslicenode,
+                                 operand=src_node, array_dtype=base_type)
+        result = result.analyse_types(env)
+        return result
+
+class SizeofNode(ExprNode):
+    #  Abstract base class for sizeof(x) expression nodes.
+
+    type = PyrexTypes.c_size_t_type
+
+    def check_const(self):
+        return True
+
+    def generate_result_code(self, code):
+        pass
+
+
+class SizeofTypeNode(SizeofNode):
+    #  C sizeof function applied to a type
+    #
+    #  base_type   CBaseTypeNode
+    #  declarator  CDeclaratorNode
+
+    subexprs = []
+    arg_type = None
+
+    def analyse_types(self, env):
+        # we may have incorrectly interpreted a dotted name as a type rather than an attribute
+        # this could be better handled by more uniformly treating types as runtime-available objects
+        if 0 and self.base_type.module_path:
+            path = self.base_type.module_path
+            obj = env.lookup(path[0])
+            if obj.as_module is None:
+                operand = NameNode(pos=self.pos, name=path[0])
+                for attr in path[1:]:
+                    operand = AttributeNode(pos=self.pos, obj=operand, attribute=attr)
+                operand = AttributeNode(pos=self.pos, obj=operand, attribute=self.base_type.name)
+                node = SizeofVarNode(self.pos, operand=operand).analyse_types(env)
+                return node
+        if self.arg_type is None:
+            base_type = self.base_type.analyse(env)
+            _, arg_type = self.declarator.analyse(base_type, env)
+            self.arg_type = arg_type
+        self.check_type()
+        return self
+
+    def check_type(self):
+        arg_type = self.arg_type
+        if not arg_type:
+            return
+        if arg_type.is_pyobject and not arg_type.is_extension_type:
+            error(self.pos, "Cannot take sizeof Python object")
+        elif arg_type.is_void:
+            error(self.pos, "Cannot take sizeof void")
+        elif not arg_type.is_complete():
+            error(self.pos, "Cannot take sizeof incomplete type '%s'" % arg_type)
+
+    def calculate_result_code(self):
+        if self.arg_type.is_extension_type:
+            # the size of the pointer is boring
+            # we want the size of the actual struct
+            arg_code = self.arg_type.declaration_code("", deref=1)
+        else:
+            arg_code = self.arg_type.empty_declaration_code()
+        return "(sizeof(%s))" % arg_code
+
+
+class SizeofVarNode(SizeofNode):
+    #  C sizeof function applied to a variable
+    #
+    #  operand   ExprNode
+
+    subexprs = ['operand']
+
+    def analyse_types(self, env):
+        # We may actually be looking at a type rather than a variable...
+        # If we are, traditional analysis would fail...
+        operand_as_type = self.operand.analyse_as_type(env)
+        if operand_as_type:
+            self.arg_type = operand_as_type
+            if self.arg_type.is_fused:
+                try:
+                    self.arg_type = self.arg_type.specialize(env.fused_to_specific)
+                except CannotSpecialize:
+                    error(self.operand.pos,
+                          "Type cannot be specialized since it is not a fused argument to this function")
+            self.__class__ = SizeofTypeNode
+            self.check_type()
+        else:
+            self.operand = self.operand.analyse_types(env)
+        return self
+
+    def calculate_result_code(self):
+        return "(sizeof(%s))" % self.operand.result()
+
+    def generate_result_code(self, code):
+        pass
+
+
+class TypeidNode(ExprNode):
+    #  C++ typeid operator applied to a type or variable
+    #
+    #  operand       ExprNode
+    #  arg_type      ExprNode
+    #  is_variable   boolean
+
+    subexprs = ['operand']
+
+    arg_type = None
+    is_variable = None
+    is_temp = 1
+
+    def get_type_info_type(self, env):
+        env_module = env
+        while not env_module.is_module_scope:
+            env_module = env_module.outer_scope
+        typeinfo_module = env_module.find_module('libcpp.typeinfo', self.pos)
+        typeinfo_entry = typeinfo_module.lookup('type_info')
+        return PyrexTypes.CFakeReferenceType(PyrexTypes.c_const_type(typeinfo_entry.type))
+
+    cpp_message = 'typeid operator'
+
+    def analyse_types(self, env):
+        if not self.type:
+            self.type = PyrexTypes.error_type  # default value if it isn't analysed successfully
+        self.cpp_check(env)
+        type_info = self.get_type_info_type(env)
+        if not type_info:
+            self.error("The 'libcpp.typeinfo' module must be cimported to use the typeid() operator")
+            return self
+        if self.operand is None:
+            return self  # already analysed, no need to repeat
+        self.type = type_info
+        as_type = self.operand.analyse_as_specialized_type(env)
+        if as_type:
+            self.arg_type = as_type
+            self.is_type = True
+            self.operand = None  # nothing further uses self.operand - will only cause problems if its used in code generation
+        else:
+            self.arg_type = self.operand.analyse_types(env)
+            self.is_type = False
+            self.operand = None  # nothing further uses self.operand - will only cause problems if its used in code generation
+            if self.arg_type.type.is_pyobject:
+                self.error("Cannot use typeid on a Python object")
+                return self
+            elif self.arg_type.type.is_void:
+                self.error("Cannot use typeid on void")
+                return self
+            elif not self.arg_type.type.is_complete():
+                self.error("Cannot use typeid on incomplete type '%s'" % self.arg_type.type)
+                return self
+        env.use_utility_code(UtilityCode.load_cached("CppExceptionConversion", "CppSupport.cpp"))
+        return self
+
+    def error(self, mess):
+        error(self.pos, mess)
+        self.type = PyrexTypes.error_type
+        self.result_code = "<error>"
+
+    def check_const(self):
+        return True
+
+    def calculate_result_code(self):
+        return self.temp_code
+
+    def generate_result_code(self, code):
+        if self.is_type:
+            arg_code = self.arg_type.empty_declaration_code()
+        else:
+            arg_code = self.arg_type.result()
+        translate_cpp_exception(code, self.pos,
+            "%s = typeid(%s);" % (self.temp_code, arg_code),
+            None, None, self.in_nogil_context)
+
+class TypeofNode(ExprNode):
+    #  Compile-time type of an expression, as a string.
+    #
+    #  operand   ExprNode
+    #  literal   StringNode # internal
+
+    literal = None
+    type = py_object_type
+
+    subexprs = ['literal']  # 'operand' will be ignored after type analysis!
+
+    def analyse_types(self, env):
+        self.operand = self.operand.analyse_types(env)
+        value = StringEncoding.EncodedString(str(self.operand.type))  #self.operand.type.typeof_name())
+        literal = StringNode(self.pos, value=value)
+        literal = literal.analyse_types(env)
+        self.literal = literal.coerce_to_pyobject(env)
+        return self
+
+    def analyse_as_type(self, env):
+        self.operand = self.operand.analyse_types(env)
+        return self.operand.type
+
+    def may_be_none(self):
+        return False
+
+    def generate_evaluation_code(self, code):
+        self.literal.generate_evaluation_code(code)
+
+    def calculate_result_code(self):
+        return self.literal.calculate_result_code()
+
+#-------------------------------------------------------------------
+#
+#  Binary operator nodes
+#
+#-------------------------------------------------------------------
+
+try:
+    matmul_operator = operator.matmul
+except AttributeError:
+    def matmul_operator(a, b):
+        try:
+            func = a.__matmul__
+        except AttributeError:
+            func = b.__rmatmul__
+        return func(a, b)
+
+compile_time_binary_operators = {
+    '<': operator.lt,
+    '<=': operator.le,
+    '==': operator.eq,
+    '!=': operator.ne,
+    '>=': operator.ge,
+    '>': operator.gt,
+    'is': operator.is_,
+    'is_not': operator.is_not,
+    '+': operator.add,
+    '&': operator.and_,
+    '/': operator.truediv,
+    '//': operator.floordiv,
+    '<<': operator.lshift,
+    '%': operator.mod,
+    '*': operator.mul,
+    '|': operator.or_,
+    '**': operator.pow,
+    '>>': operator.rshift,
+    '-': operator.sub,
+    '^': operator.xor,
+    '@': matmul_operator,
+    'in': lambda x, seq: x in seq,
+    'not_in': lambda x, seq: x not in seq,
+}
+
+def get_compile_time_binop(node):
+    func = compile_time_binary_operators.get(node.operator)
+    if not func:
+        error(node.pos,
+            "Binary '%s' not supported in compile-time expression"
+                % node.operator)
+    return func
+
+
+class BinopNode(ExprNode):
+    #  operator     string
+    #  operand1     ExprNode
+    #  operand2     ExprNode
+    #
+    #  Processing during analyse_expressions phase:
+    #
+    #    analyse_c_operation
+    #      Called when neither operand is a pyobject.
+    #      - Check operand types and coerce if needed.
+    #      - Determine result type and result code fragment.
+    #      - Allocate temporary for result if needed.
+
+    subexprs = ['operand1', 'operand2']
+    inplace = False
+
+    def calculate_constant_result(self):
+        func = compile_time_binary_operators[self.operator]
+        self.constant_result = func(
+            self.operand1.constant_result,
+            self.operand2.constant_result)
+
+    def compile_time_value(self, denv):
+        func = get_compile_time_binop(self)
+        operand1 = self.operand1.compile_time_value(denv)
+        operand2 = self.operand2.compile_time_value(denv)
+        try:
+            return func(operand1, operand2)
+        except Exception as e:
+            self.compile_time_value_error(e)
+
+    def infer_type(self, env):
+        return self.result_type(self.operand1.infer_type(env),
+                                self.operand2.infer_type(env), env)
+
+    def analyse_types(self, env):
+        self.operand1 = self.operand1.analyse_types(env)
+        self.operand2 = self.operand2.analyse_types(env)
+        self.analyse_operation(env)
+        return self
+
+    def analyse_operation(self, env):
+        if self.is_pythran_operation(env):
+            self.type = self.result_type(self.operand1.type,
+                                         self.operand2.type, env)
+            assert self.type.is_pythran_expr
+            self.is_temp = 1
+        elif self.is_py_operation():
+            self.coerce_operands_to_pyobjects(env)
+            self.type = self.result_type(self.operand1.type,
+                                         self.operand2.type, env)
+            assert self.type.is_pyobject
+            self.is_temp = 1
+        elif self.is_cpp_operation():
+            self.analyse_cpp_operation(env)
+        else:
+            self.analyse_c_operation(env)
+
+    def is_py_operation(self):
+        return self.is_py_operation_types(self.operand1.type, self.operand2.type)
+
+    def is_py_operation_types(self, type1, type2):
+        return type1.is_pyobject or type2.is_pyobject or type1.is_ctuple or type2.is_ctuple
+
+    def is_pythran_operation(self, env):
+        return self.is_pythran_operation_types(self.operand1.type, self.operand2.type, env)
+
+    def is_pythran_operation_types(self, type1, type2, env):
+        # Support only expr op supported_type, or supported_type op expr
+        return has_np_pythran(env) and \
+               (is_pythran_supported_operation_type(type1) and is_pythran_supported_operation_type(type2)) and \
+               (is_pythran_expr(type1) or is_pythran_expr(type2))
+
+    def is_cpp_operation(self):
+        return (self.operand1.type.is_cpp_class
+            or self.operand2.type.is_cpp_class)
+
+    def analyse_cpp_operation(self, env):
+        entry = env.lookup_operator(self.operator, [self.operand1, self.operand2])
+        if not entry:
+            self.type_error()
+            return
+        func_type = entry.type
+        self.exception_check = func_type.exception_check
+        self.exception_value = func_type.exception_value
+        if self.exception_check == '+':
+            # Used by NumBinopNodes to break up expressions involving multiple
+            # operators so that exceptions can be handled properly.
+            self.is_temp = 1
+            if needs_cpp_exception_conversion(self):
+                env.use_utility_code(UtilityCode.load_cached("CppExceptionConversion", "CppSupport.cpp"))
+        if func_type.is_ptr:
+            func_type = func_type.base_type
+        if len(func_type.args) == 1:
+            self.operand2 = self.operand2.coerce_to(func_type.args[0].type, env)
+        else:
+            self.operand1 = self.operand1.coerce_to(func_type.args[0].type, env)
+            self.operand2 = self.operand2.coerce_to(func_type.args[1].type, env)
+        self.type = func_type.return_type
+
+    def result_type(self, type1, type2, env):
+        if self.is_pythran_operation_types(type1, type2, env):
+            return PythranExpr(pythran_binop_type(self.operator, type1, type2))
+        if self.is_py_operation_types(type1, type2):
+            if type2.is_string:
+                type2 = Builtin.bytes_type
+            elif type2.is_pyunicode_ptr:
+                type2 = Builtin.unicode_type
+            if type1.is_string:
+                type1 = Builtin.bytes_type
+            elif type1.is_pyunicode_ptr:
+                type1 = Builtin.unicode_type
+            if type1.is_builtin_type or type2.is_builtin_type:
+                if type1 is type2 and self.operator in '**%+|&^':
+                    # FIXME: at least these operators should be safe - others?
+                    return type1
+                result_type = self.infer_builtin_types_operation(type1, type2)
+                if result_type is not None:
+                    return result_type
+            return py_object_type
+        elif type1.is_error or type2.is_error:
+            return PyrexTypes.error_type
+        else:
+            return self.compute_c_result_type(type1, type2)
+
+    def infer_builtin_types_operation(self, type1, type2):
+        return None
+
+    def nogil_check(self, env):
+        if self.is_py_operation():
+            self.gil_error()
+
+    def coerce_operands_to_pyobjects(self, env):
+        self.operand1 = self.operand1.coerce_to_pyobject(env)
+        self.operand2 = self.operand2.coerce_to_pyobject(env)
+
+    def check_const(self):
+        return self.operand1.check_const() and self.operand2.check_const()
+
+    def is_ephemeral(self):
+        return (super(BinopNode, self).is_ephemeral() or
+                self.operand1.is_ephemeral() or self.operand2.is_ephemeral())
+
+    def generate_result_code(self, code):
+        type1 = self.operand1.type
+        type2 = self.operand2.type
+        if self.type.is_pythran_expr:
+            code.putln("// Pythran binop")
+            code.putln("__Pyx_call_destructor(%s);" % self.result())
+            if self.operator == '**':
+                code.putln("new (&%s) decltype(%s){pythonic::numpy::functor::power{}(%s, %s)};" % (
+                    self.result(),
+                    self.result(),
+                    self.operand1.pythran_result(),
+                    self.operand2.pythran_result()))
+            else:
+                code.putln("new (&%s) decltype(%s){%s %s %s};" % (
+                    self.result(),
+                    self.result(),
+                    self.operand1.pythran_result(),
+                    self.operator,
+                    self.operand2.pythran_result()))
+        elif type1.is_pyobject or type2.is_pyobject:
+            function = self.py_operation_function(code)
+            extra_args = ", Py_None" if self.operator == '**' else ""
+            op1_result = self.operand1.py_result() if type1.is_pyobject else self.operand1.result()
+            op2_result = self.operand2.py_result() if type2.is_pyobject else self.operand2.result()
+            code.putln(
+                "%s = %s(%s, %s%s); %s" % (
+                    self.result(),
+                    function,
+                    op1_result,
+                    op2_result,
+                    extra_args,
+                    code.error_goto_if_null(self.result(), self.pos)))
+            self.generate_gotref(code)
+        elif self.is_temp:
+            # C++ overloaded operators with exception values are currently all
+            # handled through temporaries.
+            if self.is_cpp_operation() and self.exception_check == '+':
+                translate_cpp_exception(code, self.pos,
+                                        "%s = %s;" % (self.result(), self.calculate_result_code()),
+                                        self.result() if self.type.is_pyobject else None,
+                                        self.exception_value, self.in_nogil_context)
+            else:
+                code.putln("%s = %s;" % (self.result(), self.calculate_result_code()))
+
+    def type_error(self):
+        if not (self.operand1.type.is_error
+                or self.operand2.type.is_error):
+            error(self.pos, "Invalid operand types for '%s' (%s; %s)" %
+                (self.operator, self.operand1.type,
+                    self.operand2.type))
+        self.type = PyrexTypes.error_type
+
+
+class CBinopNode(BinopNode):
+
+    def analyse_types(self, env):
+        node = BinopNode.analyse_types(self, env)
+        if node.is_py_operation():
+            node.type = PyrexTypes.error_type
+        return node
+
+    def py_operation_function(self, code):
+        return ""
+
+    def calculate_result_code(self):
+        return "(%s %s %s)" % (
+            self.operand1.result(),
+            self.operator,
+            self.operand2.result())
+
+    def compute_c_result_type(self, type1, type2):
+        cpp_type = None
+        if type1.is_cpp_class or type1.is_ptr:
+            cpp_type = type1.find_cpp_operation_type(self.operator, type2)
+        if cpp_type is None and (type2.is_cpp_class or type2.is_ptr):
+            cpp_type = type2.find_cpp_operation_type(self.operator, type1)
+        # FIXME: do we need to handle other cases here?
+        return cpp_type
+
+
+def c_binop_constructor(operator):
+    def make_binop_node(pos, **operands):
+        return CBinopNode(pos, operator=operator, **operands)
+    return make_binop_node
+
+class NumBinopNode(BinopNode):
+    #  Binary operation taking numeric arguments.
+
+    infix = True
+    overflow_check = False
+    overflow_bit_node = None
+
+    def analyse_c_operation(self, env):
+        type1 = self.operand1.type
+        type2 = self.operand2.type
+        self.type = self.compute_c_result_type(type1, type2)
+        if not self.type:
+            self.type_error()
+            return
+        if self.type.is_complex:
+            self.infix = False
+        if (self.type.is_int
+                and env.directives['overflowcheck']
+                and self.operator in self.overflow_op_names):
+            if (self.operator in ('+', '*')
+                    and self.operand1.has_constant_result()
+                    and not self.operand2.has_constant_result()):
+                self.operand1, self.operand2 = self.operand2, self.operand1
+            self.overflow_check = True
+            self.overflow_fold = env.directives['overflowcheck.fold']
+            self.func = self.type.overflow_check_binop(
+                self.overflow_op_names[self.operator],
+                env,
+                const_rhs = self.operand2.has_constant_result())
+            self.is_temp = True
+        if not self.infix or (type1.is_numeric and type2.is_numeric):
+            self.operand1 = self.operand1.coerce_to(self.type, env)
+            self.operand2 = self.operand2.coerce_to(self.type, env)
+
+    def compute_c_result_type(self, type1, type2):
+        if self.c_types_okay(type1, type2):
+            widest_type = PyrexTypes.widest_numeric_type(type1, type2)
+            if widest_type is PyrexTypes.c_bint_type:
+                if self.operator not in '|^&':
+                    # False + False == 0 # not False!
+                    widest_type = PyrexTypes.c_int_type
+            else:
+                widest_type = PyrexTypes.widest_numeric_type(
+                    widest_type, PyrexTypes.c_int_type)
+            return widest_type
+        else:
+            return None
+
+    def may_be_none(self):
+        if self.type and self.type.is_builtin_type:
+            # if we know the result type, we know the operation, so it can't be None
+            return False
+        type1 = self.operand1.type
+        type2 = self.operand2.type
+        if type1 and type1.is_builtin_type and type2 and type2.is_builtin_type:
+            # XXX: I can't think of any case where a binary operation
+            # on builtin types evaluates to None - add a special case
+            # here if there is one.
+            return False
+        return super(NumBinopNode, self).may_be_none()
+
+    def get_constant_c_result_code(self):
+        value1 = self.operand1.get_constant_c_result_code()
+        value2 = self.operand2.get_constant_c_result_code()
+        if value1 and value2:
+            return "(%s %s %s)" % (value1, self.operator, value2)
+        else:
+            return None
+
+    def c_types_okay(self, type1, type2):
+        #print "NumBinopNode.c_types_okay:", type1, type2 ###
+        return (type1.is_numeric or type1.is_enum) \
+            and (type2.is_numeric or type2.is_enum)
+
+    def generate_evaluation_code(self, code):
+        if self.overflow_check:
+            self.overflow_bit_node = self
+            self.overflow_bit = code.funcstate.allocate_temp(PyrexTypes.c_int_type, manage_ref=False)
+            code.putln("%s = 0;" % self.overflow_bit)
+        super(NumBinopNode, self).generate_evaluation_code(code)
+        if self.overflow_check:
+            code.putln("if (unlikely(%s)) {" % self.overflow_bit)
+            code.putln('PyErr_SetString(PyExc_OverflowError, "value too large");')
+            code.putln(code.error_goto(self.pos))
+            code.putln("}")
+            code.funcstate.release_temp(self.overflow_bit)
+
+    def calculate_result_code(self):
+        if self.overflow_bit_node is not None:
+            return "%s(%s, %s, &%s)" % (
+                self.func,
+                self.operand1.result(),
+                self.operand2.result(),
+                self.overflow_bit_node.overflow_bit)
+        elif self.type.is_cpp_class or self.infix:
+            if is_pythran_expr(self.type):
+                result1, result2 = self.operand1.pythran_result(), self.operand2.pythran_result()
+            else:
+                result1, result2 = self.operand1.result(), self.operand2.result()
+            return "(%s %s %s)" % (result1, self.operator, result2)
+        else:
+            func = self.type.binary_op(self.operator)
+            if func is None:
+                error(self.pos, "binary operator %s not supported for %s" % (self.operator, self.type))
+            return "%s(%s, %s)" % (
+                func,
+                self.operand1.result(),
+                self.operand2.result())
+
+    def is_py_operation_types(self, type1, type2):
+        return (type1.is_unicode_char or
+                type2.is_unicode_char or
+                BinopNode.is_py_operation_types(self, type1, type2))
+
+    def py_operation_function(self, code):
+        function_name = self.py_functions[self.operator]
+        if self.inplace:
+            function_name = function_name.replace('PyNumber_', 'PyNumber_InPlace')
+        return function_name
+
+    py_functions = {
+        "|":        "PyNumber_Or",
+        "^":        "PyNumber_Xor",
+        "&":        "PyNumber_And",
+        "<<":       "PyNumber_Lshift",
+        ">>":       "PyNumber_Rshift",
+        "+":        "PyNumber_Add",
+        "-":        "PyNumber_Subtract",
+        "*":        "PyNumber_Multiply",
+        "@":        "__Pyx_PyNumber_MatrixMultiply",
+        "/":        "__Pyx_PyNumber_Divide",
+        "//":       "PyNumber_FloorDivide",
+        "%":        "PyNumber_Remainder",
+        "**":       "PyNumber_Power",
+    }
+
+    overflow_op_names = {
+        "+":  "add",
+        "-":  "sub",
+        "*":  "mul",
+        "<<":  "lshift",
+    }
+
+
+class IntBinopNode(NumBinopNode):
+    #  Binary operation taking integer arguments.
+
+    def c_types_okay(self, type1, type2):
+        #print "IntBinopNode.c_types_okay:", type1, type2 ###
+        return (type1.is_int or type1.is_enum) \
+            and (type2.is_int or type2.is_enum)
+
+
+class AddNode(NumBinopNode):
+    #  '+' operator.
+
+    def is_py_operation_types(self, type1, type2):
+        if type1.is_string and type2.is_string or type1.is_pyunicode_ptr and type2.is_pyunicode_ptr:
+            return 1
+        else:
+            return NumBinopNode.is_py_operation_types(self, type1, type2)
+
+    def infer_builtin_types_operation(self, type1, type2):
+        # b'abc' + 'abc' raises an exception in Py3,
+        # so we can safely infer the Py2 type for bytes here
+        string_types = (bytes_type, bytearray_type, str_type, basestring_type, unicode_type)
+        if type1 in string_types and type2 in string_types:
+            return string_types[max(string_types.index(type1),
+                                    string_types.index(type2))]
+        return None
+
+    def compute_c_result_type(self, type1, type2):
+        #print "AddNode.compute_c_result_type:", type1, self.operator, type2 ###
+        if (type1.is_ptr or type1.is_array) and (type2.is_int or type2.is_enum):
+            return type1
+        elif (type2.is_ptr or type2.is_array) and (type1.is_int or type1.is_enum):
+            return type2
+        else:
+            return NumBinopNode.compute_c_result_type(
+                self, type1, type2)
+
+    def py_operation_function(self, code):
+        type1, type2 = self.operand1.type, self.operand2.type
+        func = None
+        if type1 is unicode_type or type2 is unicode_type:
+            if type1 in (unicode_type, str_type) and type2 in (unicode_type, str_type):
+                is_unicode_concat = True
+            elif isinstance(self.operand1, FormattedValueNode) or isinstance(self.operand2, FormattedValueNode):
+                # Assume that even if we don't know the second type, it's going to be a string.
+                is_unicode_concat = True
+            else:
+                # Operation depends on the second type.
+                is_unicode_concat = False
+
+            if is_unicode_concat:
+                if self.inplace or self.operand1.is_temp:
+                    code.globalstate.use_utility_code(
+                        UtilityCode.load_cached("UnicodeConcatInPlace", "ObjectHandling.c"))
+                func = '__Pyx_PyUnicode_Concat'
+        elif type1 is str_type and type2 is str_type:
+            code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("StrConcatInPlace", "ObjectHandling.c"))
+            func = '__Pyx_PyStr_Concat'
+
+        if func:
+            # any necessary utility code will be got by "NumberAdd" in generate_evaluation_code
+            if self.inplace or self.operand1.is_temp:
+                func += 'InPlace'  # upper case to indicate unintuitive macro
+            if self.operand1.may_be_none() or self.operand2.may_be_none():
+                func += 'Safe'
+            return func
+
+        return super(AddNode, self).py_operation_function(code)
+
+
+class SubNode(NumBinopNode):
+    #  '-' operator.
+
+    def compute_c_result_type(self, type1, type2):
+        if (type1.is_ptr or type1.is_array) and (type2.is_int or type2.is_enum):
+            return type1
+        elif (type1.is_ptr or type1.is_array) and (type2.is_ptr or type2.is_array):
+            return PyrexTypes.c_ptrdiff_t_type
+        else:
+            return NumBinopNode.compute_c_result_type(
+                self, type1, type2)
+
+
+class MulNode(NumBinopNode):
+    #  '*' operator.
+    is_sequence_mul = False
+
+    def analyse_types(self, env):
+        self.operand1 = self.operand1.analyse_types(env)
+        self.operand2 = self.operand2.analyse_types(env)
+        self.is_sequence_mul = self.calculate_is_sequence_mul()
+
+        # TODO: we could also optimise the case of "[...] * 2 * n", i.e. with an existing 'mult_factor'
+        if self.is_sequence_mul:
+            operand1 = self.operand1
+            operand2 = self.operand2
+            if operand1.is_sequence_constructor and operand1.mult_factor is None:
+                return self.analyse_sequence_mul(env, operand1, operand2)
+            elif operand2.is_sequence_constructor and operand2.mult_factor is None:
+                return self.analyse_sequence_mul(env, operand2, operand1)
+
+        self.analyse_operation(env)
+        return self
+
+    @staticmethod
+    def is_builtin_seqmul_type(type):
+        return type.is_builtin_type and type in builtin_sequence_types and type is not memoryview_type
+
+    def calculate_is_sequence_mul(self):
+        type1 = self.operand1.type
+        type2 = self.operand2.type
+        if type1 is long_type or type1.is_int:
+            # normalise to (X * int)
+            type1, type2 = type2, type1
+        if type2 is long_type or type2.is_int:
+            if type1.is_string or type1.is_ctuple:
+                return True
+            if self.is_builtin_seqmul_type(type1):
+                return True
+        return False
+
+    def analyse_sequence_mul(self, env, seq, mult):
+        assert seq.mult_factor is None
+        seq = seq.coerce_to_pyobject(env)
+        seq.mult_factor = mult
+        return seq.analyse_types(env)
+
+    def coerce_operands_to_pyobjects(self, env):
+        if self.is_sequence_mul:
+            # Keep operands as they are, but ctuples must become Python tuples to multiply them.
+            if self.operand1.type.is_ctuple:
+                self.operand1 = self.operand1.coerce_to_pyobject(env)
+            elif self.operand2.type.is_ctuple:
+                self.operand2 = self.operand2.coerce_to_pyobject(env)
+            return
+        super(MulNode, self).coerce_operands_to_pyobjects(env)
+
+    def is_py_operation_types(self, type1, type2):
+        return self.is_sequence_mul or super(MulNode, self).is_py_operation_types(type1, type2)
+
+    def py_operation_function(self, code):
+        if self.is_sequence_mul:
+            code.globalstate.use_utility_code(
+                UtilityCode.load_cached("PySequenceMultiply", "ObjectHandling.c"))
+            return "__Pyx_PySequence_Multiply" if self.operand1.type.is_pyobject else "__Pyx_PySequence_Multiply_Left"
+        return super(MulNode, self).py_operation_function(code)
+
+    def infer_builtin_types_operation(self, type1, type2):
+        # let's assume that whatever builtin type you multiply a builtin sequence type with
+        # will either return a sequence of the same type or fail with an exception
+        if type1.is_builtin_type and type2.is_builtin_type:
+            if self.is_builtin_seqmul_type(type1):
+                return type1
+            if self.is_builtin_seqmul_type(type2):
+                return type2
+        # multiplication of containers/numbers with an integer value
+        # always (?) returns the same type
+        if type1.is_int:
+            return type2
+        if type2.is_int:
+            return type1
+        return None
+
+
+class MatMultNode(NumBinopNode):
+    #  '@' operator.
+
+    def is_py_operation_types(self, type1, type2):
+        return True
+
+    def generate_evaluation_code(self, code):
+        code.globalstate.use_utility_code(UtilityCode.load_cached("MatrixMultiply", "ObjectHandling.c"))
+        super(MatMultNode, self).generate_evaluation_code(code)
+
+
+class DivNode(NumBinopNode):
+    #  '/' or '//' operator.
+
+    cdivision = None
+    truedivision = None   # == "unknown" if operator == '/'
+    ctruedivision = False
+    cdivision_warnings = False
+    zerodivision_check = None
+
+    def find_compile_time_binary_operator(self, op1, op2):
+        func = compile_time_binary_operators[self.operator]
+        if self.operator == '/' and self.truedivision is None:
+            # => true div for floats, floor div for integers
+            if isinstance(op1, _py_int_types) and isinstance(op2, _py_int_types):
+                func = compile_time_binary_operators['//']
+        return func
+
+    def calculate_constant_result(self):
+        op1 = self.operand1.constant_result
+        op2 = self.operand2.constant_result
+        func = self.find_compile_time_binary_operator(op1, op2)
+        self.constant_result = func(
+            self.operand1.constant_result,
+            self.operand2.constant_result)
+
+    def compile_time_value(self, denv):
+        operand1 = self.operand1.compile_time_value(denv)
+        operand2 = self.operand2.compile_time_value(denv)
+        try:
+            func = self.find_compile_time_binary_operator(
+                operand1, operand2)
+            return func(operand1, operand2)
+        except Exception as e:
+            self.compile_time_value_error(e)
+
+    def _check_truedivision(self, env):
+        if self.cdivision or env.directives['cdivision']:
+            self.ctruedivision = False
+        else:
+            self.ctruedivision = self.truedivision
+
+    def infer_type(self, env):
+        self._check_truedivision(env)
+        return self.result_type(
+            self.operand1.infer_type(env),
+            self.operand2.infer_type(env), env)
+
+    def analyse_operation(self, env):
+        self._check_truedivision(env)
+        NumBinopNode.analyse_operation(self, env)
+        if self.is_cpp_operation():
+            self.cdivision = True
+        if not self.type.is_pyobject:
+            self.zerodivision_check = (
+                self.cdivision is None and not env.directives['cdivision']
+                and (not self.operand2.has_constant_result() or
+                     self.operand2.constant_result == 0))
+            if self.zerodivision_check or env.directives['cdivision_warnings']:
+                # Need to check ahead of time to warn or raise zero division error
+                self.operand1 = self.operand1.coerce_to_simple(env)
+                self.operand2 = self.operand2.coerce_to_simple(env)
+
+    def compute_c_result_type(self, type1, type2):
+        if self.operator == '/' and self.ctruedivision and not type1.is_cpp_class and not type2.is_cpp_class:
+            if not type1.is_float and not type2.is_float:
+                widest_type = PyrexTypes.widest_numeric_type(type1, PyrexTypes.c_double_type)
+                widest_type = PyrexTypes.widest_numeric_type(type2, widest_type)
+                return widest_type
+        return NumBinopNode.compute_c_result_type(self, type1, type2)
+
+    def zero_division_message(self):
+        if self.type.is_int:
+            return "integer division or modulo by zero"
+        else:
+            return "float division"
+
+    def generate_evaluation_code(self, code):
+        if not self.type.is_pyobject and not self.type.is_complex:
+            if self.cdivision is None:
+                self.cdivision = (
+                    code.globalstate.directives['cdivision']
+                    or self.type.is_float
+                    or ((self.type.is_numeric or self.type.is_enum) and not self.type.signed)
+                )
+            if not self.cdivision:
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("DivInt", "CMath.c").specialize(self.type))
+        NumBinopNode.generate_evaluation_code(self, code)
+        self.generate_div_warning_code(code)
+
+    def generate_div_warning_code(self, code):
+        in_nogil = self.in_nogil_context
+        if not self.type.is_pyobject:
+            if self.zerodivision_check:
+                if not self.infix:
+                    zero_test = "%s(%s)" % (self.type.unary_op('zero'), self.operand2.result())
+                else:
+                    zero_test = "%s == 0" % self.operand2.result()
+                code.putln("if (unlikely(%s)) {" % zero_test)
+                if in_nogil:
+                    code.put_ensure_gil()
+                code.putln('PyErr_SetString(PyExc_ZeroDivisionError, "%s");' % self.zero_division_message())
+                if in_nogil:
+                    code.put_release_ensured_gil()
+                code.putln(code.error_goto(self.pos))
+                code.putln("}")
+                if self.type.is_int and self.type.signed and self.operator != '%':
+                    code.globalstate.use_utility_code(UtilityCode.load_cached("UnaryNegOverflows", "Overflow.c"))
+                    if self.operand2.type.signed == 2:
+                        # explicitly signed, no runtime check needed
+                        minus1_check = 'unlikely(%s == -1)' % self.operand2.result()
+                    else:
+                        type_of_op2 = self.operand2.type.empty_declaration_code()
+                        minus1_check = '(!(((%s)-1) > 0)) && unlikely(%s == (%s)-1)' % (
+                            type_of_op2, self.operand2.result(), type_of_op2)
+                    code.putln("else if (sizeof(%s) == sizeof(long) && %s "
+                               " && unlikely(__Pyx_UNARY_NEG_WOULD_OVERFLOW(%s))) {" % (
+                               self.type.empty_declaration_code(),
+                               minus1_check,
+                               self.operand1.result()))
+                    if in_nogil:
+                        code.put_ensure_gil()
+                    code.putln('PyErr_SetString(PyExc_OverflowError, "value too large to perform division");')
+                    if in_nogil:
+                        code.put_release_ensured_gil()
+                    code.putln(code.error_goto(self.pos))
+                    code.putln("}")
+            if code.globalstate.directives['cdivision_warnings'] and self.operator != '/':
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("CDivisionWarning", "CMath.c"))
+                code.putln("if (unlikely((%s < 0) ^ (%s < 0))) {" % (
+                                self.operand1.result(),
+                                self.operand2.result()))
+                warning_code = "__Pyx_cdivision_warning(%(FILENAME)s, %(LINENO)s)" % {
+                    'FILENAME': Naming.filename_cname,
+                    'LINENO':  Naming.lineno_cname,
+                }
+
+                if in_nogil:
+                    result_code = 'result'
+                    code.putln("int %s;" % result_code)
+                    code.put_ensure_gil()
+                    code.putln(code.set_error_info(self.pos, used=True))
+                    code.putln("%s = %s;" % (result_code, warning_code))
+                    code.put_release_ensured_gil()
+                else:
+                    result_code = warning_code
+                    code.putln(code.set_error_info(self.pos, used=True))
+
+                code.put("if (unlikely(%s)) " % result_code)
+                code.put_goto(code.error_label)
+                code.putln("}")
+
+    def calculate_result_code(self):
+        if self.type.is_complex or self.is_cpp_operation():
+            return NumBinopNode.calculate_result_code(self)
+        elif self.type.is_float and self.operator == '//':
+            return "floor(%s / %s)" % (
+                self.operand1.result(),
+                self.operand2.result())
+        elif self.truedivision or self.cdivision:
+            op1 = self.operand1.result()
+            op2 = self.operand2.result()
+            if self.truedivision:
+                if self.type != self.operand1.type:
+                    op1 = self.type.cast_code(op1)
+                if self.type != self.operand2.type:
+                    op2 = self.type.cast_code(op2)
+            return "(%s / %s)" % (op1, op2)
+        else:
+            return "__Pyx_div_%s(%s, %s)" % (
+                self.type.specialization_name(),
+                self.operand1.result(),
+                self.operand2.result())
+
+
+_find_formatting_types = re.compile(
+    br"%"
+    br"(?:%|"  # %%
+    br"(?:\([^)]+\))?"  # %(name)
+    br"[-+#,0-9 ]*([a-z])"  # %.2f  etc.
+    br")").findall
+
+# These format conversion types can never trigger a Unicode string conversion in Py2.
+_safe_bytes_formats = frozenset({
+    # Excludes 's' and 'r', which can generate non-bytes strings.
+    b'd', b'i', b'o', b'u', b'x', b'X', b'e', b'E', b'f', b'F', b'g', b'G', b'c', b'b', b'a',
+})
+
+
+class ModNode(DivNode):
+    #  '%' operator.
+
+    def is_py_operation_types(self, type1, type2):
+        return (type1.is_string
+                or type2.is_string
+                or NumBinopNode.is_py_operation_types(self, type1, type2))
+
+    def infer_builtin_types_operation(self, type1, type2):
+        # b'%s' % xyz  raises an exception in Py3<3.5, so it's safe to infer the type for Py2 and later Py3's.
+        if type1 is unicode_type:
+            # None + xyz  may be implemented by RHS
+            if type2.is_builtin_type or not self.operand1.may_be_none():
+                return type1
+        elif type1 in (bytes_type, str_type, basestring_type):
+            if type2 is unicode_type:
+                return type2
+            elif type2.is_numeric:
+                return type1
+            elif self.operand1.is_string_literal:
+                if type1 is str_type or type1 is bytes_type:
+                    if set(_find_formatting_types(self.operand1.value)) <= _safe_bytes_formats:
+                        return type1
+                return basestring_type
+            elif type1 is bytes_type and not type2.is_builtin_type:
+                return None   # RHS might implement '% operator differently in Py3
+            else:
+                return basestring_type  # either str or unicode, can't tell
+        return None
+
+    def zero_division_message(self):
+        if self.type.is_int:
+            return "integer division or modulo by zero"
+        else:
+            return "float divmod()"
+
+    def analyse_operation(self, env):
+        DivNode.analyse_operation(self, env)
+        if not self.type.is_pyobject:
+            if self.cdivision is None:
+                self.cdivision = env.directives['cdivision'] or not self.type.signed
+            if not self.cdivision and not self.type.is_int and not self.type.is_float:
+                error(self.pos, "mod operator not supported for type '%s'" % self.type)
+
+    def generate_evaluation_code(self, code):
+        if not self.type.is_pyobject and not self.cdivision:
+            if self.type.is_int:
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("ModInt", "CMath.c").specialize(self.type))
+            else:  # float
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("ModFloat", "CMath.c").specialize(
+                        self.type, math_h_modifier=self.type.math_h_modifier))
+        # NOTE: skipping over DivNode here
+        NumBinopNode.generate_evaluation_code(self, code)
+        self.generate_div_warning_code(code)
+
+    def calculate_result_code(self):
+        if self.cdivision:
+            if self.type.is_float:
+                return "fmod%s(%s, %s)" % (
+                    self.type.math_h_modifier,
+                    self.operand1.result(),
+                    self.operand2.result())
+            else:
+                return "(%s %% %s)" % (
+                    self.operand1.result(),
+                    self.operand2.result())
+        else:
+            return "__Pyx_mod_%s(%s, %s)" % (
+                    self.type.specialization_name(),
+                    self.operand1.result(),
+                    self.operand2.result())
+
+    def py_operation_function(self, code):
+        type1, type2 = self.operand1.type, self.operand2.type
+        # ("..." % x)  must call "x.__rmod__()" for string subtypes.
+        if type1 is unicode_type:
+            if self.operand1.may_be_none() or (
+                    type2.is_extension_type and type2.subtype_of(type1) or
+                    type2 is py_object_type and not isinstance(self.operand2, CoerceToPyTypeNode)):
+                return '__Pyx_PyUnicode_FormatSafe'
+            else:
+                return 'PyUnicode_Format'
+        elif type1 is str_type:
+            if self.operand1.may_be_none() or (
+                    type2.is_extension_type and type2.subtype_of(type1) or
+                    type2 is py_object_type and not isinstance(self.operand2, CoerceToPyTypeNode)):
+                return '__Pyx_PyString_FormatSafe'
+            else:
+                return '__Pyx_PyString_Format'
+        return super(ModNode, self).py_operation_function(code)
+
+
+class PowNode(NumBinopNode):
+    #  '**' operator.
+
+    is_cpow = None
+    type_was_inferred = False  # was the result type affected by cpow==False?
+            # Intended to allow it to be changed if the node is coerced.
+
+    def _check_cpow(self, env):
+        if self.is_cpow is not None:
+            return  # already set
+        self.is_cpow = env.directives['cpow']
+
+    def infer_type(self, env):
+        self._check_cpow(env)
+        return super(PowNode, self).infer_type(env)
+
+    def analyse_types(self, env):
+        self._check_cpow(env)
+        return super(PowNode, self).analyse_types(env)
+
+    def analyse_c_operation(self, env):
+        NumBinopNode.analyse_c_operation(self, env)
+        if self.type.is_complex:
+            if self.type.real_type.is_float:
+                self.operand1 = self.operand1.coerce_to(self.type, env)
+                self.operand2 = self.operand2.coerce_to(self.type, env)
+                self.pow_func = self.type.binary_op('**')
+            else:
+                error(self.pos, "complex int powers not supported")
+                self.pow_func = "<error>"
+        elif self.type.is_float:
+            self.pow_func = "pow" + self.type.math_h_modifier
+        elif self.type.is_int:
+            self.pow_func = "__Pyx_pow_%s" % self.type.empty_declaration_code().replace(' ', '_')
+            env.use_utility_code(
+                UtilityCode.load_cached("IntPow", "CMath.c").specialize(
+                    func_name=self.pow_func,
+                    type=self.type.empty_declaration_code(),
+                    signed=self.type.signed and 1 or 0))
+        elif not self.type.is_error:
+            error(self.pos, "got unexpected types for C power operator: %s, %s" %
+                            (self.operand1.type, self.operand2.type))
+
+    def compute_c_result_type(self, type1, type2):
+        from numbers import Real
+        c_result_type = None
+        op1_is_definitely_positive = (
+            self.operand1.has_constant_result()
+            and self.operand1.constant_result >= 0
+        ) or (
+            type1.is_int and type1.signed == 0  # definitely unsigned
+        )
+        type2_is_int = type2.is_int or (
+            self.operand2.has_constant_result() and
+            isinstance(self.operand2.constant_result, Real) and
+            int(self.operand2.constant_result) == self.operand2.constant_result
+        )
+        needs_widening = False
+        if self.is_cpow:
+            c_result_type = super(PowNode, self).compute_c_result_type(type1, type2)
+            if not self.operand2.has_constant_result():
+                needs_widening = (
+                    isinstance(self.operand2.constant_result, _py_int_types) and self.operand2.constant_result < 0
+                )
+        elif op1_is_definitely_positive or type2_is_int:  # cpow==False
+            # if type2 is an integer then we can't end up going from real to complex
+            c_result_type = super(PowNode, self).compute_c_result_type(type1, type2)
+            if not self.operand2.has_constant_result():
+                needs_widening = type2.is_int and type2.signed
+                if needs_widening:
+                    self.type_was_inferred = True
+            else:
+                needs_widening = (
+                    isinstance(self.operand2.constant_result, _py_int_types) and self.operand2.constant_result < 0
+                )
+        elif self.c_types_okay(type1, type2):
+            # Allowable result types are double or complex double.
+            # Return the special "soft complex" type to store it as a
+            # complex number but with specialized coercions to Python
+            c_result_type = PyrexTypes.soft_complex_type
+            self.type_was_inferred = True
+        if needs_widening:
+            c_result_type = PyrexTypes.widest_numeric_type(c_result_type, PyrexTypes.c_double_type)
+        return c_result_type
+
+    def calculate_result_code(self):
+        # Work around MSVC overloading ambiguity.
+        def typecast(operand):
+            if self.type == operand.type:
+                return operand.result()
+            else:
+                return self.type.cast_code(operand.result())
+        return "%s(%s, %s)" % (
+            self.pow_func,
+            typecast(self.operand1),
+            typecast(self.operand2))
+
+    def py_operation_function(self, code):
+        if (self.type.is_pyobject and
+                self.operand1.constant_result == 2 and
+                isinstance(self.operand1.constant_result, _py_int_types) and
+                self.operand2.type is py_object_type):
+            code.globalstate.use_utility_code(UtilityCode.load_cached('PyNumberPow2', 'Optimize.c'))
+            if self.inplace:
+                return '__Pyx_PyNumber_InPlacePowerOf2'
+            else:
+                return '__Pyx_PyNumber_PowerOf2'
+        return super(PowNode, self).py_operation_function(code)
+
+    def coerce_to(self, dst_type, env):
+        if dst_type == self.type:
+            return self
+        if (self.is_cpow is None and self.type_was_inferred and
+                (dst_type.is_float or dst_type.is_int)):
+            # if we're trying to coerce this directly to a C float or int
+            # then fall back to the cpow == True behaviour since this is
+            # almost certainly the user intent.
+            # However, ensure that the operand types are suitable C types
+            if self.type is PyrexTypes.soft_complex_type:
+                def check_types(operand, recurse=True):
+                    if operand.type.is_float or operand.type.is_int:
+                        return True, operand
+                    if recurse and isinstance(operand, CoerceToComplexNode):
+                        return check_types(operand.arg, recurse=False), operand.arg
+                    return False, None
+                msg_detail = "a non-complex C numeric type"
+            elif dst_type.is_int:
+                def check_types(operand):
+                    if operand.type.is_int:
+                        return True, operand
+                    else:
+                        # int, int doesn't seem to involve coercion nodes
+                        return False, None
+                msg_detail = "an integer C numeric type"
+            else:
+                def check_types(operand):
+                    return False, None
+            check_op1, op1 = check_types(self.operand1)
+            check_op2, op2 = check_types(self.operand2)
+            if check_op1 and check_op2:
+                warning(self.pos, "Treating '**' as if 'cython.cpow(True)' since it "
+                    "is directly assigned to a %s. "
+                    "This is likely to be fragile and we recommend setting "
+                    "'cython.cpow' explicitly." % msg_detail)
+                self.is_cpow = True
+                self.operand1 = op1
+                self.operand2 = op2
+                result = self.analyse_types(env)
+                if result.type != dst_type:
+                    result = result.coerce_to(dst_type, env)
+                return result
+        return super(PowNode, self).coerce_to(dst_type, env)
+
+
+class BoolBinopNode(ExprNode):
+    """
+    Short-circuiting boolean operation.
+
+    Note that this node provides the same code generation method as
+    BoolBinopResultNode to simplify expression nesting.
+
+    operator  string                              "and"/"or"
+    operand1  BoolBinopNode/BoolBinopResultNode   left operand
+    operand2  BoolBinopNode/BoolBinopResultNode   right operand
+    """
+    subexprs = ['operand1', 'operand2']
+    is_temp = True
+    operator = None
+    operand1 = None
+    operand2 = None
+
+    def infer_type(self, env):
+        type1 = self.operand1.infer_type(env)
+        type2 = self.operand2.infer_type(env)
+        return PyrexTypes.independent_spanning_type(type1, type2)
+
+    def may_be_none(self):
+        if self.operator == 'or':
+            return self.operand2.may_be_none()
+        else:
+            return self.operand1.may_be_none() or self.operand2.may_be_none()
+
+    def calculate_constant_result(self):
+        operand1 = self.operand1.constant_result
+        operand2 = self.operand2.constant_result
+        if self.operator == 'and':
+            self.constant_result = operand1 and operand2
+        else:
+            self.constant_result = operand1 or operand2
+
+    def compile_time_value(self, denv):
+        operand1 = self.operand1.compile_time_value(denv)
+        operand2 = self.operand2.compile_time_value(denv)
+        if self.operator == 'and':
+            return operand1 and operand2
+        else:
+            return operand1 or operand2
+
+    def is_ephemeral(self):
+        return self.operand1.is_ephemeral() or self.operand2.is_ephemeral()
+
+    def analyse_types(self, env):
+        # Note: we do not do any coercion here as we most likely do not know the final type anyway.
+        # We even accept to set self.type to ErrorType if both operands do not have a spanning type.
+        # The coercion to the final type and to a "simple" value is left to coerce_to().
+        operand1 = self.operand1.analyse_types(env)
+        operand2 = self.operand2.analyse_types(env)
+        self.type = PyrexTypes.independent_spanning_type(
+            operand1.type, operand2.type)
+        self.operand1 = self._wrap_operand(operand1, env)
+        self.operand2 = self._wrap_operand(operand2, env)
+        return self
+
+    def _wrap_operand(self, operand, env):
+        if not isinstance(operand, (BoolBinopNode, BoolBinopResultNode)):
+            operand = BoolBinopResultNode(operand, self.type, env)
+        return operand
+
+    def wrap_operands(self, env):
+        """
+        Must get called by transforms that want to create a correct BoolBinopNode
+        after the type analysis phase.
+        """
+        self.operand1 = self._wrap_operand(self.operand1, env)
+        self.operand2 = self._wrap_operand(self.operand2, env)
+
+    def coerce_to_boolean(self, env):
+        return self.coerce_to(PyrexTypes.c_bint_type, env)
+
+    def coerce_to(self, dst_type, env):
+        operand1 = self.operand1.coerce_to(dst_type, env)
+        operand2 = self.operand2.coerce_to(dst_type, env)
+        return BoolBinopNode.from_node(
+            self, type=dst_type,
+            operator=self.operator,
+            operand1=operand1, operand2=operand2)
+
+    def generate_bool_evaluation_code(self, code, final_result_temp, final_result_type, and_label, or_label, end_label, fall_through):
+        code.mark_pos(self.pos)
+
+        outer_labels = (and_label, or_label)
+        if self.operator == 'and':
+            my_label = and_label = code.new_label('next_and')
+        else:
+            my_label = or_label = code.new_label('next_or')
+        self.operand1.generate_bool_evaluation_code(
+            code, final_result_temp, final_result_type, and_label, or_label, end_label, my_label)
+
+        and_label, or_label = outer_labels
+
+        code.put_label(my_label)
+        self.operand2.generate_bool_evaluation_code(
+            code, final_result_temp, final_result_type, and_label, or_label, end_label, fall_through)
+
+    def generate_evaluation_code(self, code):
+        self.allocate_temp_result(code)
+        result_type = PyrexTypes.py_object_type if self.type.is_pyobject else self.type
+        or_label = and_label = None
+        end_label = code.new_label('bool_binop_done')
+        self.generate_bool_evaluation_code(code, self.result(), result_type, and_label, or_label, end_label, end_label)
+        code.put_label(end_label)
+
+    gil_message = "Truth-testing Python object"
+
+    def check_const(self):
+        return self.operand1.check_const() and self.operand2.check_const()
+
+    def generate_subexpr_disposal_code(self, code):
+        pass  # nothing to do here, all done in generate_evaluation_code()
+
+    def free_subexpr_temps(self, code):
+        pass  # nothing to do here, all done in generate_evaluation_code()
+
+    def generate_operand1_test(self, code):
+        #  Generate code to test the truth of the first operand.
+        if self.type.is_pyobject:
+            test_result = code.funcstate.allocate_temp(
+                PyrexTypes.c_bint_type, manage_ref=False)
+            code.putln(
+                "%s = __Pyx_PyObject_IsTrue(%s); %s" % (
+                    test_result,
+                    self.operand1.py_result(),
+                    code.error_goto_if_neg(test_result, self.pos)))
+        else:
+            test_result = self.operand1.result()
+        return (test_result, self.type.is_pyobject)
+
+
+class BoolBinopResultNode(ExprNode):
+    """
+    Intermediate result of a short-circuiting and/or expression.
+    Tests the result for 'truthiness' and takes care of coercing the final result
+    of the overall expression to the target type.
+
+    Note that this node provides the same code generation method as
+    BoolBinopNode to simplify expression nesting.
+
+    arg     ExprNode    the argument to test
+    value   ExprNode    the coerced result value node
+    """
+
+    subexprs = ['arg', 'value']
+    is_temp = True
+    arg = None
+    value = None
+
+    def __init__(self, arg, result_type, env):
+        # using 'arg' multiple times, so it must be a simple/temp value
+        arg = arg.coerce_to_simple(env)
+        # wrap in ProxyNode, in case a transform wants to replace self.arg later
+        arg = ProxyNode(arg)
+        super(BoolBinopResultNode, self).__init__(
+            arg.pos, arg=arg, type=result_type,
+            value=CloneNode(arg).coerce_to(result_type, env))
+
+    def coerce_to_boolean(self, env):
+        return self.coerce_to(PyrexTypes.c_bint_type, env)
+
+    def coerce_to(self, dst_type, env):
+        # unwrap, coerce, rewrap
+        arg = self.arg.arg
+        if dst_type is PyrexTypes.c_bint_type:
+            arg = arg.coerce_to_boolean(env)
+        # TODO: unwrap more coercion nodes?
+        return BoolBinopResultNode(arg, dst_type, env)
+
+    def nogil_check(self, env):
+        # let's leave all errors to BoolBinopNode
+        pass
+
+    def generate_operand_test(self, code):
+        #  Generate code to test the truth of the first operand.
+        if self.arg.type.is_pyobject:
+            test_result = code.funcstate.allocate_temp(
+                PyrexTypes.c_bint_type, manage_ref=False)
+            code.putln(
+                "%s = __Pyx_PyObject_IsTrue(%s); %s" % (
+                    test_result,
+                    self.arg.py_result(),
+                    code.error_goto_if_neg(test_result, self.pos)))
+        else:
+            test_result = self.arg.result()
+        return (test_result, self.arg.type.is_pyobject)
+
+    def generate_bool_evaluation_code(self, code, final_result_temp, final_result_type, and_label, or_label, end_label, fall_through):
+        code.mark_pos(self.pos)
+
+        # x => x
+        # x and ... or ... => next 'and' / 'or'
+        # False ... or x => next 'or'
+        # True and x => next 'and'
+        # True or x => True (operand)
+
+        self.arg.generate_evaluation_code(code)
+        if and_label or or_label:
+            test_result, uses_temp = self.generate_operand_test(code)
+            if uses_temp and (and_label and or_label):
+                # cannot become final result => free early
+                # disposal: uses_temp and (and_label and or_label)
+                self.arg.generate_disposal_code(code)
+            sense = '!' if or_label else ''
+            code.putln("if (%s%s) {" % (sense, test_result))
+            if uses_temp:
+                code.funcstate.release_temp(test_result)
+            if not uses_temp or not (and_label and or_label):
+                # disposal: (not uses_temp) or {not (and_label and or_label) [if]}
+                self.arg.generate_disposal_code(code)
+
+            if or_label and or_label != fall_through:
+                # value is false => short-circuit to next 'or'
+                code.put_goto(or_label)
+            if and_label:
+                # value is true => go to next 'and'
+                if or_label:
+                    code.putln("} else {")
+                    if not uses_temp:
+                        # disposal: (not uses_temp) and {(and_label and or_label) [else]}
+                        self.arg.generate_disposal_code(code)
+                if and_label != fall_through:
+                    code.put_goto(and_label)
+
+        if not and_label or not or_label:
+            # if no next 'and' or 'or', we provide the result
+            if and_label or or_label:
+                code.putln("} else {")
+            self.value.generate_evaluation_code(code)
+            self.value.make_owned_reference(code)
+            code.putln("%s = %s;" % (final_result_temp, self.value.result_as(final_result_type)))
+            self.value.generate_post_assignment_code(code)
+            # disposal: {not (and_label and or_label) [else]}
+            self.arg.generate_disposal_code(code)
+            self.value.free_temps(code)
+            if end_label != fall_through:
+                code.put_goto(end_label)
+
+        if and_label or or_label:
+            code.putln("}")
+        self.arg.free_temps(code)
+
+    def analyse_types(self, env):
+        return self
+
+
+class CondExprNode(ExprNode):
+    #  Short-circuiting conditional expression.
+    #
+    #  test        ExprNode
+    #  true_val    ExprNode
+    #  false_val   ExprNode
+
+    true_val = None
+    false_val = None
+    is_temp = True
+
+    subexprs = ['test', 'true_val', 'false_val']
+
+    def type_dependencies(self, env):
+        return self.true_val.type_dependencies(env) + self.false_val.type_dependencies(env)
+
+    def infer_type(self, env):
+        return PyrexTypes.independent_spanning_type(
+            self.true_val.infer_type(env),
+            self.false_val.infer_type(env))
+
+    def calculate_constant_result(self):
+        if self.test.constant_result:
+            self.constant_result = self.true_val.constant_result
+        else:
+            self.constant_result = self.false_val.constant_result
+
+    def is_ephemeral(self):
+        return self.true_val.is_ephemeral() or self.false_val.is_ephemeral()
+
+    def analyse_types(self, env):
+        self.test = self.test.analyse_temp_boolean_expression(env)
+        self.true_val = self.true_val.analyse_types(env)
+        self.false_val = self.false_val.analyse_types(env)
+        return self.analyse_result_type(env)
+
+    def analyse_result_type(self, env):
+        true_val_type = self.true_val.type
+        false_val_type = self.false_val.type
+        self.type = PyrexTypes.independent_spanning_type(true_val_type, false_val_type)
+
+        if self.type.is_reference:
+            self.type = PyrexTypes.CFakeReferenceType(self.type.ref_base_type)
+        if self.type.is_pyobject:
+            self.result_ctype = py_object_type
+        elif self.true_val.is_ephemeral() or self.false_val.is_ephemeral():
+            error(self.pos, "Unsafe C derivative of temporary Python reference used in conditional expression")
+
+        if true_val_type.is_pyobject or false_val_type.is_pyobject or self.type.is_pyobject:
+            if true_val_type != self.type:
+                self.true_val = self.true_val.coerce_to(self.type, env)
+            if false_val_type != self.type:
+                self.false_val = self.false_val.coerce_to(self.type, env)
+
+        if self.type.is_error:
+            self.type_error()
+        return self
+
+    def coerce_to_integer(self, env):
+        if not self.true_val.type.is_int:
+            self.true_val = self.true_val.coerce_to_integer(env)
+        if not self.false_val.type.is_int:
+            self.false_val = self.false_val.coerce_to_integer(env)
+        self.result_ctype = None
+        out = self.analyse_result_type(env)
+        if not out.type.is_int:
+            # fall back to ordinary coercion since we haven't ended as the correct type
+            if out is self:
+                out = super(CondExprNode, out).coerce_to_integer(env)
+            else:
+                # I believe `analyse_result_type` always returns a CondExprNode but
+                # handle the opposite case just in case
+                out = out.coerce_to_integer(env)
+        return out
+
+    def coerce_to(self, dst_type, env):
+        if self.true_val.type != dst_type:
+            self.true_val = self.true_val.coerce_to(dst_type, env)
+        if self.false_val.type != dst_type:
+            self.false_val = self.false_val.coerce_to(dst_type, env)
+        self.result_ctype = None
+        out = self.analyse_result_type(env)
+        if out.type != dst_type:
+            # fall back to ordinary coercion since we haven't ended as the correct type
+            if out is self:
+                out = super(CondExprNode, out).coerce_to(dst_type, env)
+            else:
+                # I believe `analyse_result_type` always returns a CondExprNode but
+                # handle the opposite case just in case
+                out = out.coerce_to(dst_type, env)
+        return out
+
+    def type_error(self):
+        if not (self.true_val.type.is_error or self.false_val.type.is_error):
+            error(self.pos, "Incompatible types in conditional expression (%s; %s)" %
+                (self.true_val.type, self.false_val.type))
+        self.type = PyrexTypes.error_type
+
+    def check_const(self):
+        return (self.test.check_const()
+            and self.true_val.check_const()
+            and self.false_val.check_const())
+
+    def generate_evaluation_code(self, code):
+        # Because subexprs may not be evaluated we can use a more optimal
+        # subexpr allocation strategy than the default, so override evaluation_code.
+
+        code.mark_pos(self.pos)
+        self.allocate_temp_result(code)
+        self.test.generate_evaluation_code(code)
+        code.putln("if (%s) {" % self.test.result())
+        self.eval_and_get(code, self.true_val)
+        code.putln("} else {")
+        self.eval_and_get(code, self.false_val)
+        code.putln("}")
+        self.test.generate_disposal_code(code)
+        self.test.free_temps(code)
+
+    def eval_and_get(self, code, expr):
+        expr.generate_evaluation_code(code)
+        if self.type.is_memoryviewslice:
+            expr.make_owned_memoryviewslice(code)
+        else:
+            expr.make_owned_reference(code)
+        code.putln('%s = %s;' % (self.result(), expr.result_as(self.ctype())))
+        expr.generate_post_assignment_code(code)
+        expr.free_temps(code)
+
+    def generate_subexpr_disposal_code(self, code):
+        pass  # done explicitly above (cleanup must separately happen within the if/else blocks)
+
+    def free_subexpr_temps(self, code):
+        pass  # done explicitly above (cleanup must separately happen within the if/else blocks)
+
+
+richcmp_constants = {
+    "<" : "Py_LT",
+    "<=": "Py_LE",
+    "==": "Py_EQ",
+    "!=": "Py_NE",
+    "<>": "Py_NE",
+    ">" : "Py_GT",
+    ">=": "Py_GE",
+    # the following are faked by special compare functions
+    "in"    : "Py_EQ",
+    "not_in": "Py_NE",
+}
+
+class CmpNode(object):
+    #  Mixin class containing code common to PrimaryCmpNodes
+    #  and CascadedCmpNodes.
+
+    special_bool_cmp_function = None
+    special_bool_cmp_utility_code = None
+    special_bool_extra_args = []
+
+    def infer_type(self, env):
+        # TODO: Actually implement this (after merging with -unstable).
+        return py_object_type
+
+    def calculate_cascaded_constant_result(self, operand1_result):
+        func = compile_time_binary_operators[self.operator]
+        operand2_result = self.operand2.constant_result
+        if (isinstance(operand1_result, any_string_type) and
+                isinstance(operand2_result, any_string_type) and
+                type(operand1_result) != type(operand2_result)):
+            # string comparison of different types isn't portable
+            return
+
+        if self.operator in ('in', 'not_in'):
+            if isinstance(self.operand2, (ListNode, TupleNode, SetNode)):
+                if not self.operand2.args:
+                    self.constant_result = self.operator == 'not_in'
+                    return
+                elif isinstance(self.operand2, ListNode) and not self.cascade:
+                    # tuples are more efficient to store than lists
+                    self.operand2 = self.operand2.as_tuple()
+            elif isinstance(self.operand2, DictNode):
+                if not self.operand2.key_value_pairs:
+                    self.constant_result = self.operator == 'not_in'
+                    return
+
+        self.constant_result = func(operand1_result, operand2_result)
+
+    def cascaded_compile_time_value(self, operand1, denv):
+        func = get_compile_time_binop(self)
+        operand2 = self.operand2.compile_time_value(denv)
+        try:
+            result = func(operand1, operand2)
+        except Exception as e:
+            self.compile_time_value_error(e)
+            result = None
+        if result:
+            cascade = self.cascade
+            if cascade:
+                result = result and cascade.cascaded_compile_time_value(operand2, denv)
+        return result
+
+    def is_cpp_comparison(self):
+        return self.operand1.type.is_cpp_class or self.operand2.type.is_cpp_class
+
+    def find_common_int_type(self, env, op, operand1, operand2):
+        # type1 != type2 and at least one of the types is not a C int
+        type1 = operand1.type
+        type2 = operand2.type
+        type1_can_be_int = False
+        type2_can_be_int = False
+
+        if operand1.is_string_literal and operand1.can_coerce_to_char_literal():
+            type1_can_be_int = True
+        if operand2.is_string_literal and operand2.can_coerce_to_char_literal():
+            type2_can_be_int = True
+
+        if type1.is_int:
+            if type2_can_be_int:
+                return type1
+        elif type2.is_int:
+            if type1_can_be_int:
+                return type2
+        elif type1_can_be_int:
+            if type2_can_be_int:
+                if Builtin.unicode_type in (type1, type2):
+                    return PyrexTypes.c_py_ucs4_type
+                else:
+                    return PyrexTypes.c_uchar_type
+
+        return None
+
+    def find_common_type(self, env, op, operand1, common_type=None):
+        operand2 = self.operand2
+        type1 = operand1.type
+        type2 = operand2.type
+
+        new_common_type = None
+
+        # catch general errors
+        if (type1 == str_type and (type2.is_string or type2 in (bytes_type, unicode_type)) or
+                type2 == str_type and (type1.is_string or type1 in (bytes_type, unicode_type))):
+            error(self.pos, "Comparisons between bytes/unicode and str are not portable to Python 3")
+            new_common_type = error_type
+
+        # try to use numeric comparisons where possible
+        elif type1.is_complex or type2.is_complex:
+            if (op not in ('==', '!=')
+                    and (type1.is_complex or type1.is_numeric)
+                    and (type2.is_complex or type2.is_numeric)):
+                error(self.pos, "complex types are unordered")
+                new_common_type = error_type
+            elif type1.is_pyobject:
+                new_common_type = Builtin.complex_type if type1.subtype_of(Builtin.complex_type) else py_object_type
+            elif type2.is_pyobject:
+                new_common_type = Builtin.complex_type if type2.subtype_of(Builtin.complex_type) else py_object_type
+            else:
+                new_common_type = PyrexTypes.widest_numeric_type(type1, type2)
+        elif type1.is_numeric and type2.is_numeric:
+            new_common_type = PyrexTypes.widest_numeric_type(type1, type2)
+        elif common_type is None or not common_type.is_pyobject:
+            new_common_type = self.find_common_int_type(env, op, operand1, operand2)
+
+        if new_common_type is None:
+            # fall back to generic type compatibility tests
+            if type1.is_ctuple or type2.is_ctuple:
+                new_common_type = py_object_type
+            elif type1 == type2:
+                new_common_type = type1
+            elif type1.is_pyobject or type2.is_pyobject:
+                if type2.is_numeric or type2.is_string:
+                    if operand2.check_for_coercion_error(type1, env):
+                        new_common_type = error_type
+                    else:
+                        new_common_type = py_object_type
+                elif type1.is_numeric or type1.is_string:
+                    if operand1.check_for_coercion_error(type2, env):
+                        new_common_type = error_type
+                    else:
+                        new_common_type = py_object_type
+                elif py_object_type.assignable_from(type1) and py_object_type.assignable_from(type2):
+                    new_common_type = py_object_type
+                else:
+                    # one Python type and one non-Python type, not assignable
+                    self.invalid_types_error(operand1, op, operand2)
+                    new_common_type = error_type
+            elif type1.assignable_from(type2):
+                new_common_type = type1
+            elif type2.assignable_from(type1):
+                new_common_type = type2
+            else:
+                # C types that we couldn't handle up to here are an error
+                self.invalid_types_error(operand1, op, operand2)
+                new_common_type = error_type
+
+        if new_common_type.is_string and (isinstance(operand1, BytesNode) or
+                                          isinstance(operand2, BytesNode)):
+            # special case when comparing char* to bytes literal: must
+            # compare string values!
+            new_common_type = bytes_type
+
+        # recursively merge types
+        if common_type is None or new_common_type.is_error:
+            common_type = new_common_type
+        else:
+            # we could do a lot better by splitting the comparison
+            # into a non-Python part and a Python part, but this is
+            # safer for now
+            common_type = PyrexTypes.spanning_type(common_type, new_common_type)
+
+        if self.cascade:
+            common_type = self.cascade.find_common_type(env, self.operator, operand2, common_type)
+
+        return common_type
+
+    def invalid_types_error(self, operand1, op, operand2):
+        error(self.pos, "Invalid types for '%s' (%s, %s)" %
+              (op, operand1.type, operand2.type))
+
+    def is_python_comparison(self):
+        return (not self.is_ptr_contains()
+            and not self.is_c_string_contains()
+            and (self.has_python_operands()
+                 or (self.cascade and self.cascade.is_python_comparison())
+                 or self.operator in ('in', 'not_in')))
+
+    def coerce_operands_to(self, dst_type, env):
+        operand2 = self.operand2
+        if operand2.type != dst_type:
+            self.operand2 = operand2.coerce_to(dst_type, env)
+        if self.cascade:
+            self.cascade.coerce_operands_to(dst_type, env)
+
+    def is_python_result(self):
+        return ((self.has_python_operands() and
+                 self.special_bool_cmp_function is None and
+                 self.operator not in ('is', 'is_not', 'in', 'not_in') and
+                 not self.is_c_string_contains() and
+                 not self.is_ptr_contains())
+            or (self.cascade and self.cascade.is_python_result()))
+
+    def is_c_string_contains(self):
+        return self.operator in ('in', 'not_in') and \
+               ((self.operand1.type.is_int
+                 and (self.operand2.type.is_string or self.operand2.type is bytes_type)) or
+                (self.operand1.type.is_unicode_char
+                 and self.operand2.type is unicode_type))
+
+    def is_ptr_contains(self):
+        if self.operator in ('in', 'not_in'):
+            container_type = self.operand2.type
+            return (container_type.is_ptr or container_type.is_array) \
+                and not container_type.is_string
+
+    def find_special_bool_compare_function(self, env, operand1, result_is_bool=False):
+        # note: currently operand1 must get coerced to a Python object if we succeed here!
+        if self.operator in ('==', '!='):
+            type1, type2 = operand1.type, self.operand2.type
+            if result_is_bool or (type1.is_builtin_type and type2.is_builtin_type):
+                if type1 is Builtin.unicode_type or type2 is Builtin.unicode_type:
+                    self.special_bool_cmp_utility_code = UtilityCode.load_cached("UnicodeEquals", "StringTools.c")
+                    self.special_bool_cmp_function = "__Pyx_PyUnicode_Equals"
+                    return True
+                elif type1 is Builtin.bytes_type or type2 is Builtin.bytes_type:
+                    self.special_bool_cmp_utility_code = UtilityCode.load_cached("BytesEquals", "StringTools.c")
+                    self.special_bool_cmp_function = "__Pyx_PyBytes_Equals"
+                    return True
+                elif type1 is Builtin.basestring_type or type2 is Builtin.basestring_type:
+                    self.special_bool_cmp_utility_code = UtilityCode.load_cached("UnicodeEquals", "StringTools.c")
+                    self.special_bool_cmp_function = "__Pyx_PyUnicode_Equals"
+                    return True
+                elif type1 is Builtin.str_type or type2 is Builtin.str_type:
+                    self.special_bool_cmp_utility_code = UtilityCode.load_cached("StrEquals", "StringTools.c")
+                    self.special_bool_cmp_function = "__Pyx_PyString_Equals"
+                    return True
+                elif result_is_bool:
+                    from .Optimize import optimise_numeric_binop
+                    result = optimise_numeric_binop(
+                        "Eq" if self.operator == "==" else "Ne",
+                        self,
+                        PyrexTypes.c_bint_type,
+                        operand1,
+                        self.operand2
+                    )
+                    if result:
+                        (self.special_bool_cmp_function,
+                         self.special_bool_cmp_utility_code,
+                         self.special_bool_extra_args,
+                         _) = result
+                        return True
+        elif self.operator in ('in', 'not_in'):
+            if self.operand2.type is Builtin.dict_type:
+                self.operand2 = self.operand2.as_none_safe_node("'NoneType' object is not iterable")
+                self.special_bool_cmp_utility_code = UtilityCode.load_cached("PyDictContains", "ObjectHandling.c")
+                self.special_bool_cmp_function = "__Pyx_PyDict_ContainsTF"
+                return True
+            elif self.operand2.type is Builtin.set_type:
+                self.operand2 = self.operand2.as_none_safe_node("'NoneType' object is not iterable")
+                self.special_bool_cmp_utility_code = UtilityCode.load_cached("PySetContains", "ObjectHandling.c")
+                self.special_bool_cmp_function = "__Pyx_PySet_ContainsTF"
+                return True
+            elif self.operand2.type is Builtin.unicode_type:
+                self.operand2 = self.operand2.as_none_safe_node("'NoneType' object is not iterable")
+                self.special_bool_cmp_utility_code = UtilityCode.load_cached("PyUnicodeContains", "StringTools.c")
+                self.special_bool_cmp_function = "__Pyx_PyUnicode_ContainsTF"
+                return True
+            else:
+                if not self.operand2.type.is_pyobject:
+                    self.operand2 = self.operand2.coerce_to_pyobject(env)
+                self.special_bool_cmp_utility_code = UtilityCode.load_cached("PySequenceContains", "ObjectHandling.c")
+                self.special_bool_cmp_function = "__Pyx_PySequence_ContainsTF"
+                return True
+        return False
+
+    def generate_operation_code(self, code, result_code,
+            operand1, op, operand2):
+        if self.type.is_pyobject:
+            error_clause = code.error_goto_if_null
+            got_ref = "__Pyx_XGOTREF(%s); " % result_code
+            if self.special_bool_cmp_function:
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("PyBoolOrNullFromLong", "ObjectHandling.c"))
+                coerce_result = "__Pyx_PyBoolOrNull_FromLong"
+            else:
+                coerce_result = "__Pyx_PyBool_FromLong"
+        else:
+            error_clause = code.error_goto_if_neg
+            got_ref = ""
+            coerce_result = ""
+
+        if self.special_bool_cmp_function:
+            if operand1.type.is_pyobject:
+                result1 = operand1.py_result()
+            else:
+                result1 = operand1.result()
+            if operand2.type.is_pyobject:
+                result2 = operand2.py_result()
+            else:
+                result2 = operand2.result()
+            special_bool_extra_args_result = ", ".join([
+                extra_arg.result() for extra_arg in self.special_bool_extra_args
+            ])
+            if self.special_bool_cmp_utility_code:
+                code.globalstate.use_utility_code(self.special_bool_cmp_utility_code)
+            code.putln(
+                "%s = %s(%s(%s, %s, %s)); %s%s" % (
+                    result_code,
+                    coerce_result,
+                    self.special_bool_cmp_function,
+                    result1, result2,
+                    special_bool_extra_args_result if self.special_bool_extra_args else richcmp_constants[op],
+                    got_ref,
+                    error_clause(result_code, self.pos)))
+
+        elif operand1.type.is_pyobject and op not in ('is', 'is_not'):
+            assert op not in ('in', 'not_in'), op
+            assert self.type.is_pyobject or self.type is PyrexTypes.c_bint_type
+            code.putln("%s = PyObject_RichCompare%s(%s, %s, %s); %s%s" % (
+                    result_code,
+                    "" if self.type.is_pyobject else "Bool",
+                    operand1.py_result(),
+                    operand2.py_result(),
+                    richcmp_constants[op],
+                    got_ref,
+                    error_clause(result_code, self.pos)))
+
+        elif operand1.type.is_complex:
+            code.putln("%s = %s(%s%s(%s, %s));" % (
+                result_code,
+                coerce_result,
+                op == "!=" and "!" or "",
+                operand1.type.unary_op('eq'),
+                operand1.result(),
+                operand2.result()))
+
+        else:
+            type1 = operand1.type
+            type2 = operand2.type
+            if (type1.is_extension_type or type2.is_extension_type) \
+                    and not type1.same_as(type2):
+                common_type = py_object_type
+            elif type1.is_numeric:
+                common_type = PyrexTypes.widest_numeric_type(type1, type2)
+            else:
+                common_type = type1
+            code1 = operand1.result_as(common_type)
+            code2 = operand2.result_as(common_type)
+            statement = "%s = %s(%s %s %s);" % (
+                result_code,
+                coerce_result,
+                code1,
+                self.c_operator(op),
+                code2)
+            if self.is_cpp_comparison() and self.exception_check == '+':
+                translate_cpp_exception(
+                    code,
+                    self.pos,
+                    statement,
+                    result_code if self.type.is_pyobject else None,
+                    self.exception_value,
+                    self.in_nogil_context)
+            else:
+                code.putln(statement)
+
+    def c_operator(self, op):
+        if op == 'is':
+            return "=="
+        elif op == 'is_not':
+            return "!="
+        else:
+            return op
+
+class PrimaryCmpNode(ExprNode, CmpNode):
+    #  Non-cascaded comparison or first comparison of
+    #  a cascaded sequence.
+    #
+    #  operator      string
+    #  operand1      ExprNode
+    #  operand2      ExprNode
+    #  cascade       CascadedCmpNode
+
+    #  We don't use the subexprs mechanism, because
+    #  things here are too complicated for it to handle.
+    #  Instead, we override all the framework methods
+    #  which use it.
+
+    child_attrs = ['operand1', 'operand2', 'coerced_operand2', 'cascade',
+                   'special_bool_extra_args']
+
+    cascade = None
+    coerced_operand2 = None
+    is_memslice_nonecheck = False
+
+    def infer_type(self, env):
+        type1 = self.operand1.infer_type(env)
+        type2 = self.operand2.infer_type(env)
+
+        if is_pythran_expr(type1) or is_pythran_expr(type2):
+            if is_pythran_supported_type(type1) and is_pythran_supported_type(type2):
+                return PythranExpr(pythran_binop_type(self.operator, type1, type2))
+
+        # TODO: implement this for other types.
+        return py_object_type
+
+    def type_dependencies(self, env):
+        return ()
+
+    def calculate_constant_result(self):
+        assert not self.cascade
+        self.calculate_cascaded_constant_result(self.operand1.constant_result)
+
+    def compile_time_value(self, denv):
+        operand1 = self.operand1.compile_time_value(denv)
+        return self.cascaded_compile_time_value(operand1, denv)
+
+    def unify_cascade_type(self):
+        cdr = self.cascade
+        while cdr:
+            cdr.type = self.type
+            cdr = cdr.cascade
+
+    def analyse_types(self, env):
+        self.operand1 = self.operand1.analyse_types(env)
+        self.operand2 = self.operand2.analyse_types(env)
+        if self.is_cpp_comparison():
+            self.analyse_cpp_comparison(env)
+            if self.cascade:
+                error(self.pos, "Cascading comparison not yet supported for cpp types.")
+            return self
+
+        type1 = self.operand1.type
+        type2 = self.operand2.type
+        if is_pythran_expr(type1) or is_pythran_expr(type2):
+            if is_pythran_supported_type(type1) and is_pythran_supported_type(type2):
+                self.type = PythranExpr(pythran_binop_type(self.operator, type1, type2))
+                self.is_pycmp = False
+                return self
+
+        if self.analyse_memoryviewslice_comparison(env):
+            return self
+
+        if self.cascade:
+            self.cascade = self.cascade.analyse_types(env)
+
+        if self.operator in ('in', 'not_in'):
+            if self.is_c_string_contains():
+                self.is_pycmp = False
+                common_type = None
+                if self.cascade:
+                    error(self.pos, "Cascading comparison not yet supported for 'int_val in string'.")
+                    return self
+                if self.operand2.type is unicode_type:
+                    env.use_utility_code(UtilityCode.load_cached("PyUCS4InUnicode", "StringTools.c"))
+                else:
+                    if self.operand1.type is PyrexTypes.c_uchar_type:
+                        self.operand1 = self.operand1.coerce_to(PyrexTypes.c_char_type, env)
+                    if self.operand2.type is not bytes_type:
+                        self.operand2 = self.operand2.coerce_to(bytes_type, env)
+                    env.use_utility_code(UtilityCode.load_cached("BytesContains", "StringTools.c"))
+                self.operand2 = self.operand2.as_none_safe_node(
+                    "argument of type 'NoneType' is not iterable")
+            elif self.is_ptr_contains():
+                if self.cascade:
+                    error(self.pos, "Cascading comparison not supported for 'val in sliced pointer'.")
+                self.type = PyrexTypes.c_bint_type
+                # Will be transformed by IterationTransform
+                return self
+            elif self.find_special_bool_compare_function(env, self.operand1):
+                if not self.operand1.type.is_pyobject:
+                    self.operand1 = self.operand1.coerce_to_pyobject(env)
+                common_type = None  # if coercion needed, the method call above has already done it
+                self.is_pycmp = False  # result is bint
+            else:
+                common_type = py_object_type
+                self.is_pycmp = True
+        elif self.find_special_bool_compare_function(env, self.operand1):
+            if not self.operand1.type.is_pyobject:
+                self.operand1 = self.operand1.coerce_to_pyobject(env)
+            common_type = None  # if coercion needed, the method call above has already done it
+            self.is_pycmp = False  # result is bint
+        else:
+            common_type = self.find_common_type(env, self.operator, self.operand1)
+            self.is_pycmp = common_type.is_pyobject
+
+        if common_type is not None and not common_type.is_error:
+            if self.operand1.type != common_type:
+                self.operand1 = self.operand1.coerce_to(common_type, env)
+            self.coerce_operands_to(common_type, env)
+
+        if self.cascade:
+            self.operand2 = self.operand2.coerce_to_simple(env)
+            self.cascade.coerce_cascaded_operands_to_temp(env)
+            operand2 = self.cascade.optimise_comparison(self.operand2, env)
+            if operand2 is not self.operand2:
+                self.coerced_operand2 = operand2
+        if self.is_python_result():
+            self.type = PyrexTypes.py_object_type
+        else:
+            self.type = PyrexTypes.c_bint_type
+        self.unify_cascade_type()
+        if self.is_pycmp or self.cascade or self.special_bool_cmp_function:
+            # 1) owned reference, 2) reused value, 3) potential function error return value
+            self.is_temp = 1
+        return self
+
+    def analyse_cpp_comparison(self, env):
+        type1 = self.operand1.type
+        type2 = self.operand2.type
+        self.is_pycmp = False
+        entry = env.lookup_operator(self.operator, [self.operand1, self.operand2])
+        if entry is None:
+            error(self.pos, "Invalid types for '%s' (%s, %s)" %
+                (self.operator, type1, type2))
+            self.type = PyrexTypes.error_type
+            self.result_code = "<error>"
+            return
+        func_type = entry.type
+        if func_type.is_ptr:
+            func_type = func_type.base_type
+        self.exception_check = func_type.exception_check
+        self.exception_value = func_type.exception_value
+        if self.exception_check == '+':
+            self.is_temp = True
+            if needs_cpp_exception_conversion(self):
+                env.use_utility_code(UtilityCode.load_cached("CppExceptionConversion", "CppSupport.cpp"))
+        if len(func_type.args) == 1:
+            self.operand2 = self.operand2.coerce_to(func_type.args[0].type, env)
+        else:
+            self.operand1 = self.operand1.coerce_to(func_type.args[0].type, env)
+            self.operand2 = self.operand2.coerce_to(func_type.args[1].type, env)
+        self.type = func_type.return_type
+
+    def analyse_memoryviewslice_comparison(self, env):
+        have_none = self.operand1.is_none or self.operand2.is_none
+        have_slice = (self.operand1.type.is_memoryviewslice or
+                      self.operand2.type.is_memoryviewslice)
+        ops = ('==', '!=', 'is', 'is_not')
+        if have_slice and have_none and self.operator in ops:
+            self.is_pycmp = False
+            self.type = PyrexTypes.c_bint_type
+            self.is_memslice_nonecheck = True
+            return True
+
+        return False
+
+    def coerce_to_boolean(self, env):
+        if self.is_pycmp:
+            # coercing to bool => may allow for more efficient comparison code
+            if self.find_special_bool_compare_function(
+                    env, self.operand1, result_is_bool=True):
+                self.is_pycmp = False
+                self.type = PyrexTypes.c_bint_type
+                self.is_temp = 1
+                if self.cascade:
+                    operand2 = self.cascade.optimise_comparison(
+                        self.operand2, env, result_is_bool=True)
+                    if operand2 is not self.operand2:
+                        self.coerced_operand2 = operand2
+                self.unify_cascade_type()
+                return self
+        # TODO: check if we can optimise parts of the cascade here
+        return ExprNode.coerce_to_boolean(self, env)
+
+    def has_python_operands(self):
+        return (self.operand1.type.is_pyobject
+            or self.operand2.type.is_pyobject)
+
+    def check_const(self):
+        if self.cascade:
+            self.not_const()
+            return False
+        else:
+            return self.operand1.check_const() and self.operand2.check_const()
+
+    def calculate_result_code(self):
+        operand1, operand2 = self.operand1, self.operand2
+        if operand1.type.is_complex:
+            if self.operator == "!=":
+                negation = "!"
+            else:
+                negation = ""
+            return "(%s%s(%s, %s))" % (
+                negation,
+                operand1.type.binary_op('=='),
+                operand1.result(),
+                operand2.result())
+        elif self.is_c_string_contains():
+            if operand2.type is unicode_type:
+                method = "__Pyx_UnicodeContainsUCS4"
+            else:
+                method = "__Pyx_BytesContains"
+            if self.operator == "not_in":
+                negation = "!"
+            else:
+                negation = ""
+            return "(%s%s(%s, %s))" % (
+                negation,
+                method,
+                operand2.result(),
+                operand1.result())
+        else:
+            if is_pythran_expr(self.type):
+                result1, result2 = operand1.pythran_result(), operand2.pythran_result()
+            else:
+                result1, result2 = operand1.result(), operand2.result()
+                if self.is_memslice_nonecheck:
+                    if operand1.type.is_memoryviewslice:
+                        result1 = "((PyObject *) %s.memview)" % result1
+                    else:
+                        result2 = "((PyObject *) %s.memview)" % result2
+
+            return "(%s %s %s)" % (
+                result1,
+                self.c_operator(self.operator),
+                result2)
+
+    def generate_evaluation_code(self, code):
+        self.operand1.generate_evaluation_code(code)
+        self.operand2.generate_evaluation_code(code)
+        for extra_arg in self.special_bool_extra_args:
+            extra_arg.generate_evaluation_code(code)
+        if self.is_temp:
+            self.allocate_temp_result(code)
+            self.generate_operation_code(code, self.result(),
+                self.operand1, self.operator, self.operand2)
+            if self.cascade:
+                self.cascade.generate_evaluation_code(
+                    code, self.result(), self.coerced_operand2 or self.operand2,
+                    needs_evaluation=self.coerced_operand2 is not None)
+            self.operand1.generate_disposal_code(code)
+            self.operand1.free_temps(code)
+            self.operand2.generate_disposal_code(code)
+            self.operand2.free_temps(code)
+
+    def generate_subexpr_disposal_code(self, code):
+        #  If this is called, it is a non-cascaded cmp,
+        #  so only need to dispose of the two main operands.
+        self.operand1.generate_disposal_code(code)
+        self.operand2.generate_disposal_code(code)
+
+    def free_subexpr_temps(self, code):
+        #  If this is called, it is a non-cascaded cmp,
+        #  so only need to dispose of the two main operands.
+        self.operand1.free_temps(code)
+        self.operand2.free_temps(code)
+
+    def annotate(self, code):
+        self.operand1.annotate(code)
+        self.operand2.annotate(code)
+        if self.cascade:
+            self.cascade.annotate(code)
+
+
+class CascadedCmpNode(Node, CmpNode):
+    #  A CascadedCmpNode is not a complete expression node. It
+    #  hangs off the side of another comparison node, shares
+    #  its left operand with that node, and shares its result
+    #  with the PrimaryCmpNode at the head of the chain.
+    #
+    #  operator      string
+    #  operand2      ExprNode
+    #  cascade       CascadedCmpNode
+
+    child_attrs = ['operand2', 'coerced_operand2', 'cascade',
+                   'special_bool_extra_args']
+
+    cascade = None
+    coerced_operand2 = None
+    constant_result = constant_value_not_set  # FIXME: where to calculate this?
+
+    def infer_type(self, env):
+        # TODO: Actually implement this (after merging with -unstable).
+        return py_object_type
+
+    def type_dependencies(self, env):
+        return ()
+
+    def has_constant_result(self):
+        return self.constant_result is not constant_value_not_set and \
+               self.constant_result is not not_a_constant
+
+    def analyse_types(self, env):
+        self.operand2 = self.operand2.analyse_types(env)
+        if self.cascade:
+            self.cascade = self.cascade.analyse_types(env)
+        return self
+
+    def has_python_operands(self):
+        return self.operand2.type.is_pyobject
+
+    def is_cpp_comparison(self):
+        # cascaded comparisons aren't currently implemented for c++ classes.
+        return False
+
+    def optimise_comparison(self, operand1, env, result_is_bool=False):
+        if self.find_special_bool_compare_function(env, operand1, result_is_bool):
+            self.is_pycmp = False
+            self.type = PyrexTypes.c_bint_type
+            if not operand1.type.is_pyobject:
+                operand1 = operand1.coerce_to_pyobject(env)
+        if self.cascade:
+            operand2 = self.cascade.optimise_comparison(self.operand2, env, result_is_bool)
+            if operand2 is not self.operand2:
+                self.coerced_operand2 = operand2
+        return operand1
+
+    def coerce_operands_to_pyobjects(self, env):
+        self.operand2 = self.operand2.coerce_to_pyobject(env)
+        if self.operand2.type is dict_type and self.operator in ('in', 'not_in'):
+            self.operand2 = self.operand2.as_none_safe_node("'NoneType' object is not iterable")
+        if self.cascade:
+            self.cascade.coerce_operands_to_pyobjects(env)
+
+    def coerce_cascaded_operands_to_temp(self, env):
+        if self.cascade:
+            #self.operand2 = self.operand2.coerce_to_temp(env) #CTT
+            self.operand2 = self.operand2.coerce_to_simple(env)
+            self.cascade.coerce_cascaded_operands_to_temp(env)
+
+    def generate_evaluation_code(self, code, result, operand1, needs_evaluation=False):
+        if self.type.is_pyobject:
+            code.putln("if (__Pyx_PyObject_IsTrue(%s)) {" % result)
+            code.put_decref(result, self.type)
+        else:
+            code.putln("if (%s) {" % result)
+        if needs_evaluation:
+            operand1.generate_evaluation_code(code)
+        self.operand2.generate_evaluation_code(code)
+        for extra_arg in self.special_bool_extra_args:
+            extra_arg.generate_evaluation_code(code)
+        self.generate_operation_code(code, result,
+            operand1, self.operator, self.operand2)
+        if self.cascade:
+            self.cascade.generate_evaluation_code(
+                code, result, self.coerced_operand2 or self.operand2,
+                needs_evaluation=self.coerced_operand2 is not None)
+        if needs_evaluation:
+            operand1.generate_disposal_code(code)
+            operand1.free_temps(code)
+        # Cascaded cmp result is always temp
+        self.operand2.generate_disposal_code(code)
+        self.operand2.free_temps(code)
+        code.putln("}")
+
+    def annotate(self, code):
+        self.operand2.annotate(code)
+        if self.cascade:
+            self.cascade.annotate(code)
+
+
+binop_node_classes = {
+    "or":       BoolBinopNode,
+    "and":      BoolBinopNode,
+    "|":        IntBinopNode,
+    "^":        IntBinopNode,
+    "&":        IntBinopNode,
+    "<<":       IntBinopNode,
+    ">>":       IntBinopNode,
+    "+":        AddNode,
+    "-":        SubNode,
+    "*":        MulNode,
+    "@":        MatMultNode,
+    "/":        DivNode,
+    "//":       DivNode,
+    "%":        ModNode,
+    "**":       PowNode,
+}
+
+
+def binop_node(pos, operator, operand1, operand2, inplace=False, **kwargs):
+    # Construct binop node of appropriate class for
+    # given operator.
+    return binop_node_classes[operator](
+        pos,
+        operator=operator,
+        operand1=operand1,
+        operand2=operand2,
+        inplace=inplace,
+        **kwargs)
+
+
+#-------------------------------------------------------------------
+#
+#  Coercion nodes
+#
+#  Coercion nodes are special in that they are created during
+#  the analyse_types phase of parse tree processing.
+#  Their __init__ methods consequently incorporate some aspects
+#  of that phase.
+#
+#-------------------------------------------------------------------
+
+class CoercionNode(ExprNode):
+    #  Abstract base class for coercion nodes.
+    #
+    #  arg       ExprNode       node being coerced
+
+    subexprs = ['arg']
+    constant_result = not_a_constant
+
+    def __init__(self, arg):
+        super(CoercionNode, self).__init__(arg.pos)
+        self.arg = arg
+        if debug_coercion:
+            print("%s Coercing %s" % (self, self.arg))
+
+    def calculate_constant_result(self):
+        # constant folding can break type coercion, so this is disabled
+        pass
+
+    def annotate(self, code):
+        self.arg.annotate(code)
+        if self.arg.type != self.type:
+            file, line, col = self.pos
+            code.annotate((file, line, col-1), AnnotationItem(
+                style='coerce', tag='coerce', text='[%s] to [%s]' % (self.arg.type, self.type)))
+
+    def analyse_types(self, env):
+        return self
+
+
+class CoerceToMemViewSliceNode(CoercionNode):
+    """
+    Coerce an object to a memoryview slice. This holds a new reference in
+    a managed temp.
+    """
+
+    def __init__(self, arg, dst_type, env):
+        assert dst_type.is_memoryviewslice
+        assert not arg.type.is_memoryviewslice
+        CoercionNode.__init__(self, arg)
+        self.type = dst_type
+        self.is_temp = 1
+        self.use_managed_ref = True
+        self.arg = arg
+        self.type.create_from_py_utility_code(env)
+
+    def generate_result_code(self, code):
+        code.putln(self.type.from_py_call_code(
+            self.arg.py_result(),
+            self.result(),
+            self.pos,
+            code
+        ))
+
+
+class CastNode(CoercionNode):
+    #  Wrap a node in a C type cast.
+
+    def __init__(self, arg, new_type):
+        CoercionNode.__init__(self, arg)
+        self.type = new_type
+
+    def may_be_none(self):
+        return self.arg.may_be_none()
+
+    def calculate_result_code(self):
+        return self.arg.result_as(self.type)
+
+    def generate_result_code(self, code):
+        self.arg.generate_result_code(code)
+
+
+class PyTypeTestNode(CoercionNode):
+    #  This node is used to check that a generic Python
+    #  object is an instance of a particular extension type.
+    #  This node borrows the result of its argument node.
+
+    exact_builtin_type = True
+
+    def __init__(self, arg, dst_type, env, notnone=False):
+        #  The arg is known to be a Python object, and
+        #  the dst_type is known to be an extension type.
+        assert dst_type.is_extension_type or dst_type.is_builtin_type, \
+            "PyTypeTest for %s against non extension type %s" % (arg.type, dst_type)
+        CoercionNode.__init__(self, arg)
+        self.type = dst_type
+        self.result_ctype = arg.ctype()
+        self.notnone = notnone
+
+    nogil_check = Node.gil_error
+    gil_message = "Python type test"
+
+    def analyse_types(self, env):
+        return self
+
+    def may_be_none(self):
+        if self.notnone:
+            return False
+        return self.arg.may_be_none()
+
+    def is_simple(self):
+        return self.arg.is_simple()
+
+    def result_in_temp(self):
+        return self.arg.result_in_temp()
+
+    def is_ephemeral(self):
+        return self.arg.is_ephemeral()
+
+    def nonlocally_immutable(self):
+        return self.arg.nonlocally_immutable()
+
+    def reanalyse(self):
+        if self.type != self.arg.type or not self.arg.is_temp:
+            return self
+        if not self.type.typeobj_is_available():
+            return self
+        if self.arg.may_be_none() and self.notnone:
+            return self.arg.as_none_safe_node("Cannot convert NoneType to %.200s" % self.type.name)
+        return self.arg
+
+    def calculate_constant_result(self):
+        # FIXME
+        pass
+
+    def calculate_result_code(self):
+        return self.arg.result()
+
+    def generate_result_code(self, code):
+        if self.type.typeobj_is_available():
+            if self.type.is_builtin_type:
+                type_test = self.type.type_test_code(
+                    self.arg.py_result(),
+                    self.notnone, exact=self.exact_builtin_type)
+                code.globalstate.use_utility_code(UtilityCode.load_cached(
+                    "RaiseUnexpectedTypeError", "ObjectHandling.c"))
+            else:
+                type_test = self.type.type_test_code(
+                    self.arg.py_result(), self.notnone)
+                code.globalstate.use_utility_code(
+                    UtilityCode.load_cached("ExtTypeTest", "ObjectHandling.c"))
+            code.putln("if (!(%s)) %s" % (
+                type_test, code.error_goto(self.pos)))
+        else:
+            error(self.pos, "Cannot test type of extern C class "
+                "without type object name specification")
+
+    def generate_post_assignment_code(self, code):
+        self.arg.generate_post_assignment_code(code)
+
+    def allocate_temp_result(self, code):
+        pass
+
+    def release_temp_result(self, code):
+        pass
+
+    def free_temps(self, code):
+        self.arg.free_temps(code)
+
+    def free_subexpr_temps(self, code):
+        self.arg.free_subexpr_temps(code)
+
+
+class NoneCheckNode(CoercionNode):
+    # This node is used to check that a Python object is not None and
+    # raises an appropriate exception (as specified by the creating
+    # transform).
+
+    is_nonecheck = True
+
+    def __init__(self, arg, exception_type_cname, exception_message,
+                 exception_format_args=()):
+        CoercionNode.__init__(self, arg)
+        self.type = arg.type
+        self.result_ctype = arg.ctype()
+        self.exception_type_cname = exception_type_cname
+        self.exception_message = exception_message
+        self.exception_format_args = tuple(exception_format_args or ())
+
+    nogil_check = None  # this node only guards an operation that would fail already
+
+    def analyse_types(self, env):
+        return self
+
+    def may_be_none(self):
+        return False
+
+    def is_simple(self):
+        return self.arg.is_simple()
+
+    def result_in_temp(self):
+        return self.arg.result_in_temp()
+
+    def nonlocally_immutable(self):
+        return self.arg.nonlocally_immutable()
+
+    def calculate_result_code(self):
+        return self.arg.result()
+
+    def condition(self):
+        if self.type.is_pyobject:
+            return self.arg.py_result()
+        elif self.type.is_memoryviewslice:
+            return "((PyObject *) %s.memview)" % self.arg.result()
+        else:
+            raise Exception("unsupported type")
+
+    @classmethod
+    def generate(cls, arg, code, exception_message,
+                 exception_type_cname="PyExc_TypeError", exception_format_args=(), in_nogil_context=False):
+        node = cls(arg, exception_type_cname, exception_message, exception_format_args)
+        node.in_nogil_context = in_nogil_context
+        node.put_nonecheck(code)
+
+    @classmethod
+    def generate_if_needed(cls, arg, code, exception_message,
+                           exception_type_cname="PyExc_TypeError", exception_format_args=(), in_nogil_context=False):
+        if arg.may_be_none():
+            cls.generate(arg, code, exception_message, exception_type_cname, exception_format_args, in_nogil_context)
+
+    def put_nonecheck(self, code):
+        code.putln(
+            "if (unlikely(%s == Py_None)) {" % self.condition())
+
+        if self.in_nogil_context:
+            code.put_ensure_gil()
+
+        escape = StringEncoding.escape_byte_string
+        if self.exception_format_args:
+            code.putln('PyErr_Format(%s, "%s", %s);' % (
+                self.exception_type_cname,
+                StringEncoding.escape_byte_string(
+                    self.exception_message.encode('UTF-8')),
+                ', '.join([ '"%s"' % escape(str(arg).encode('UTF-8'))
+                            for arg in self.exception_format_args ])))
+        else:
+            code.putln('PyErr_SetString(%s, "%s");' % (
+                self.exception_type_cname,
+                escape(self.exception_message.encode('UTF-8'))))
+
+        if self.in_nogil_context:
+            code.put_release_ensured_gil()
+
+        code.putln(code.error_goto(self.pos))
+        code.putln("}")
+
+    def generate_result_code(self, code):
+        self.put_nonecheck(code)
+
+    def generate_post_assignment_code(self, code):
+        self.arg.generate_post_assignment_code(code)
+
+    def free_temps(self, code):
+        self.arg.free_temps(code)
+
+
+class CoerceToPyTypeNode(CoercionNode):
+    #  This node is used to convert a C data type
+    #  to a Python object.
+
+    type = py_object_type
+    target_type = py_object_type
+    is_temp = 1
+
+    def __init__(self, arg, env, type=py_object_type):
+        if not arg.type.create_to_py_utility_code(env):
+            error(arg.pos, "Cannot convert '%s' to Python object" % arg.type)
+        elif arg.type.is_complex:
+            # special case: complex coercion is so complex that it
+            # uses a macro ("__pyx_PyComplex_FromComplex()"), for
+            # which the argument must be simple
+            arg = arg.coerce_to_simple(env)
+        CoercionNode.__init__(self, arg)
+        if type is py_object_type:
+            # be specific about some known types
+            if arg.type.is_string or arg.type.is_cpp_string:
+                self.type = default_str_type(env)
+            elif arg.type.is_pyunicode_ptr or arg.type.is_unicode_char:
+                self.type = unicode_type
+            elif arg.type.is_complex:
+                self.type = Builtin.complex_type
+            self.target_type = self.type
+        elif arg.type.is_string or arg.type.is_cpp_string:
+            if (type not in (bytes_type, bytearray_type)
+                    and not env.directives['c_string_encoding']):
+                error(arg.pos,
+                    "default encoding required for conversion from '%s' to '%s'" %
+                    (arg.type, type))
+            self.type = self.target_type = type
+        else:
+            # FIXME: check that the target type and the resulting type are compatible
+            self.target_type = type
+
+    gil_message = "Converting to Python object"
+
+    def may_be_none(self):
+        # FIXME: is this always safe?
+        return False
+
+    def coerce_to_boolean(self, env):
+        arg_type = self.arg.type
+        if (arg_type == PyrexTypes.c_bint_type or
+                (arg_type.is_pyobject and arg_type.name == 'bool')):
+            return self.arg.coerce_to_temp(env)
+        else:
+            return CoerceToBooleanNode(self, env)
+
+    def coerce_to_integer(self, env):
+        # If not already some C integer type, coerce to longint.
+        if self.arg.type.is_int:
+            return self.arg
+        else:
+            return self.arg.coerce_to(PyrexTypes.c_long_type, env)
+
+    def analyse_types(self, env):
+        # The arg is always already analysed
+        return self
+
+    def generate_result_code(self, code):
+        code.putln('%s; %s' % (
+            self.arg.type.to_py_call_code(
+                self.arg.result(),
+                self.result(),
+                self.target_type),
+            code.error_goto_if_null(self.result(), self.pos)))
+
+        self.generate_gotref(code)
+
+
+class CoerceIntToBytesNode(CoerceToPyTypeNode):
+    #  This node is used to convert a C int type to a Python bytes
+    #  object.
+
+    is_temp = 1
+
+    def __init__(self, arg, env):
+        arg = arg.coerce_to_simple(env)
+        CoercionNode.__init__(self, arg)
+        self.type = Builtin.bytes_type
+
+    def generate_result_code(self, code):
+        arg = self.arg
+        arg_result = arg.result()
+        if arg.type not in (PyrexTypes.c_char_type,
+                            PyrexTypes.c_uchar_type,
+                            PyrexTypes.c_schar_type):
+            if arg.type.signed:
+                code.putln("if ((%s < 0) || (%s > 255)) {" % (
+                    arg_result, arg_result))
+            else:
+                code.putln("if (%s > 255) {" % arg_result)
+            code.putln('PyErr_SetString(PyExc_OverflowError, '
+                       '"value too large to pack into a byte"); %s' % (
+                           code.error_goto(self.pos)))
+            code.putln('}')
+        temp = None
+        if arg.type is not PyrexTypes.c_char_type:
+            temp = code.funcstate.allocate_temp(PyrexTypes.c_char_type, manage_ref=False)
+            code.putln("%s = (char)%s;" % (temp, arg_result))
+            arg_result = temp
+        code.putln('%s = PyBytes_FromStringAndSize(&%s, 1); %s' % (
+            self.result(),
+            arg_result,
+            code.error_goto_if_null(self.result(), self.pos)))
+        if temp is not None:
+            code.funcstate.release_temp(temp)
+        self.generate_gotref(code)
+
+
+class CoerceFromPyTypeNode(CoercionNode):
+    #  This node is used to convert a Python object
+    #  to a C data type.
+
+    # Allow 'None' to map to a difference C value independent of the coercion, e.g. to 'NULL' or '0'.
+    special_none_cvalue = None
+
+    def __init__(self, result_type, arg, env):
+        CoercionNode.__init__(self, arg)
+        self.type = result_type
+        self.is_temp = 1
+        if not result_type.create_from_py_utility_code(env):
+            error(arg.pos,
+                  "Cannot convert Python object to '%s'" % result_type)
+        if self.type.is_string or self.type.is_pyunicode_ptr:
+            if self.arg.is_name and self.arg.entry and self.arg.entry.is_pyglobal:
+                warning(arg.pos,
+                        "Obtaining '%s' from externally modifiable global Python value" % result_type,
+                        level=1)
+            if self.type.is_pyunicode_ptr:
+                warning(arg.pos,
+                        "Py_UNICODE* has been removed in Python 3.12. This conversion to a "
+                        "Py_UNICODE* will no longer compile in the latest Python versions. "
+                        "Use Python C API functions like PyUnicode_AsWideCharString if you "
+                        "need to obtain a wchar_t* on Windows (and free the string manually after use).",
+                        level=1)
+
+    def analyse_types(self, env):
+        # The arg is always already analysed
+        return self
+
+    def is_ephemeral(self):
+        return (self.type.is_ptr and not self.type.is_array) and self.arg.is_ephemeral()
+
+    def generate_result_code(self, code):
+        from_py_function = None
+        # for certain source types, we can do better than the generic coercion
+        if self.type.is_string and self.arg.type is bytes_type:
+            if self.type.from_py_function.startswith('__Pyx_PyObject_As'):
+                from_py_function = '__Pyx_PyBytes' + self.type.from_py_function[len('__Pyx_PyObject'):]
+                NoneCheckNode.generate_if_needed(self.arg, code, "expected bytes, NoneType found")
+
+        code.putln(self.type.from_py_call_code(
+            self.arg.py_result(), self.result(), self.pos, code,
+            from_py_function=from_py_function,
+            special_none_cvalue=self.special_none_cvalue,
+        ))
+        if self.type.is_pyobject:
+            self.generate_gotref(code)
+
+    def nogil_check(self, env):
+        error(self.pos, "Coercion from Python not allowed without the GIL")
+
+
+class CoerceToBooleanNode(CoercionNode):
+    #  This node is used when a result needs to be used
+    #  in a boolean context.
+
+    type = PyrexTypes.c_bint_type
+
+    _special_builtins = {
+        Builtin.list_type:       'PyList_GET_SIZE',
+        Builtin.tuple_type:      'PyTuple_GET_SIZE',
+        Builtin.set_type:        'PySet_GET_SIZE',
+        Builtin.frozenset_type:  'PySet_GET_SIZE',
+        Builtin.bytes_type:      'PyBytes_GET_SIZE',
+        Builtin.bytearray_type:  'PyByteArray_GET_SIZE',
+        Builtin.unicode_type:    '__Pyx_PyUnicode_IS_TRUE',
+    }
+
+    def __init__(self, arg, env):
+        CoercionNode.__init__(self, arg)
+        if arg.type.is_pyobject:
+            self.is_temp = 1
+
+    def nogil_check(self, env):
+        if self.arg.type.is_pyobject and self._special_builtins.get(self.arg.type) is None:
+            self.gil_error()
+
+    gil_message = "Truth-testing Python object"
+
+    def check_const(self):
+        if self.is_temp:
+            self.not_const()
+            return False
+        return self.arg.check_const()
+
+    def calculate_result_code(self):
+        return "(%s != 0)" % self.arg.result()
+
+    def generate_result_code(self, code):
+        if not self.is_temp:
+            return
+        test_func = self._special_builtins.get(self.arg.type)
+        if test_func is not None:
+            checks = ["(%s != Py_None)" % self.arg.py_result()] if self.arg.may_be_none() else []
+            checks.append("(%s(%s) != 0)" % (test_func, self.arg.py_result()))
+            code.putln("%s = %s;" % (self.result(), '&&'.join(checks)))
+        else:
+            code.putln(
+                "%s = __Pyx_PyObject_IsTrue(%s); %s" % (
+                    self.result(),
+                    self.arg.py_result(),
+                    code.error_goto_if_neg(self.result(), self.pos)))
+
+    def analyse_types(self, env):
+        return self
+
+
+class CoerceToComplexNode(CoercionNode):
+
+    def __init__(self, arg, dst_type, env):
+        if arg.type.is_complex:
+            arg = arg.coerce_to_simple(env)
+        self.type = dst_type
+        CoercionNode.__init__(self, arg)
+        dst_type.create_declaration_utility_code(env)
+
+    def calculate_result_code(self):
+        if self.arg.type.is_complex:
+            real_part = self.arg.type.real_code(self.arg.result())
+            imag_part = self.arg.type.imag_code(self.arg.result())
+        else:
+            real_part = self.arg.result()
+            imag_part = "0"
+        return "%s(%s, %s)" % (
+                self.type.from_parts,
+                real_part,
+                imag_part)
+
+    def generate_result_code(self, code):
+        pass
+
+    def analyse_types(self, env):
+        return self
+
+
+def coerce_from_soft_complex(arg, dst_type, env):
+    from .UtilNodes import HasGilNode
+    cfunc_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_double_type,
+        [ PyrexTypes.CFuncTypeArg("value", PyrexTypes.soft_complex_type, None),
+          PyrexTypes.CFuncTypeArg("have_gil", PyrexTypes.c_bint_type, None) ],
+        exception_value="-1",
+        exception_check=True,
+        nogil=True  # We can acquire the GIL internally on failure
+    )
+    call = PythonCapiCallNode(
+        arg.pos,
+        "__Pyx_SoftComplexToDouble",
+        cfunc_type,
+        utility_code = UtilityCode.load_cached("SoftComplexToDouble", "Complex.c"),
+        args = [arg, HasGilNode(arg.pos)],
+    )
+    call = call.analyse_types(env)
+    if call.type != dst_type:
+        call = call.coerce_to(dst_type, env)
+    return call
+
+
+class CoerceToTempNode(CoercionNode):
+    #  This node is used to force the result of another node
+    #  to be stored in a temporary. It is only used if the
+    #  argument node's result is not already in a temporary.
+
+    def __init__(self, arg, env):
+        CoercionNode.__init__(self, arg)
+        self.type = self.arg.type.as_argument_type()
+        self.constant_result = self.arg.constant_result
+        self.is_temp = 1
+        if self.type.is_pyobject:
+            self.result_ctype = py_object_type
+
+    gil_message = "Creating temporary Python reference"
+
+    def analyse_types(self, env):
+        # The arg is always already analysed
+        return self
+
+    def may_be_none(self):
+        return self.arg.may_be_none()
+
+    def coerce_to_boolean(self, env):
+        self.arg = self.arg.coerce_to_boolean(env)
+        if self.arg.is_simple():
+            return self.arg
+        self.type = self.arg.type
+        self.result_ctype = self.type
+        return self
+
+    def generate_result_code(self, code):
+        #self.arg.generate_evaluation_code(code) # Already done
+        # by generic generate_subexpr_evaluation_code!
+        code.putln("%s = %s;" % (
+            self.result(), self.arg.result_as(self.ctype())))
+        if self.use_managed_ref:
+            if not self.type.is_memoryviewslice:
+                code.put_incref(self.result(), self.ctype())
+            else:
+                code.put_incref_memoryviewslice(self.result(), self.type,
+                                            have_gil=not self.in_nogil_context)
+
+
+class ProxyNode(CoercionNode):
+    """
+    A node that should not be replaced by transforms or other means,
+    and hence can be useful to wrap the argument to a clone node
+
+    MyNode    -> ProxyNode -> ArgNode
+    CloneNode -^
+    """
+
+    nogil_check = None
+
+    def __init__(self, arg):
+        super(ProxyNode, self).__init__(arg)
+        self.constant_result = arg.constant_result
+        self.update_type_and_entry()
+
+    def analyse_types(self, env):
+        self.arg = self.arg.analyse_expressions(env)
+        self.update_type_and_entry()
+        return self
+
+    def infer_type(self, env):
+        return self.arg.infer_type(env)
+
+    def update_type_and_entry(self):
+        type = getattr(self.arg, 'type', None)
+        if type:
+            self.type = type
+            self.result_ctype = self.arg.result_ctype
+        arg_entry = getattr(self.arg, 'entry', None)
+        if arg_entry:
+            self.entry = arg_entry
+
+    def generate_result_code(self, code):
+        self.arg.generate_result_code(code)
+
+    def result(self):
+        return self.arg.result()
+
+    def is_simple(self):
+        return self.arg.is_simple()
+
+    def may_be_none(self):
+        return self.arg.may_be_none()
+
+    def generate_evaluation_code(self, code):
+        self.arg.generate_evaluation_code(code)
+
+    def generate_disposal_code(self, code):
+        self.arg.generate_disposal_code(code)
+
+    def free_temps(self, code):
+        self.arg.free_temps(code)
+
+class CloneNode(CoercionNode):
+    #  This node is employed when the result of another node needs
+    #  to be used multiple times. The argument node's result must
+    #  be in a temporary. This node "borrows" the result from the
+    #  argument node, and does not generate any evaluation or
+    #  disposal code for it. The original owner of the argument
+    #  node is responsible for doing those things.
+
+    subexprs = []  # Arg is not considered a subexpr
+    nogil_check = None
+
+    def __init__(self, arg):
+        CoercionNode.__init__(self, arg)
+        self.constant_result = arg.constant_result
+        type = getattr(arg, 'type', None)
+        if type:
+            self.type = type
+            self.result_ctype = arg.result_ctype
+        arg_entry = getattr(arg, 'entry', None)
+        if arg_entry:
+            self.entry = arg_entry
+
+    def result(self):
+        return self.arg.result()
+
+    def may_be_none(self):
+        return self.arg.may_be_none()
+
+    def type_dependencies(self, env):
+        return self.arg.type_dependencies(env)
+
+    def infer_type(self, env):
+        return self.arg.infer_type(env)
+
+    def analyse_types(self, env):
+        self.type = self.arg.type
+        self.result_ctype = self.arg.result_ctype
+        self.is_temp = 1
+        arg_entry = getattr(self.arg, 'entry', None)
+        if arg_entry:
+            self.entry = arg_entry
+        return self
+
+    def coerce_to(self, dest_type, env):
+        if self.arg.is_literal:
+            return self.arg.coerce_to(dest_type, env)
+        return super(CloneNode, self).coerce_to(dest_type, env)
+
+    def is_simple(self):
+        return True  # result is always in a temp (or a name)
+
+    def generate_evaluation_code(self, code):
+        pass
+
+    def generate_result_code(self, code):
+        pass
+
+    def generate_disposal_code(self, code):
+        pass
+
+    def generate_post_assignment_code(self, code):
+        # if we're assigning from a CloneNode then it's "giveref"ed away, so it does
+        # need a matching incref (ideally this should happen before the assignment though)
+        if self.is_temp:  # should usually be true
+            code.put_incref(self.result(), self.ctype())
+
+    def free_temps(self, code):
+        pass
+
+
+class CppOptionalTempCoercion(CoercionNode):
+    """
+    Used only in CoerceCppTemps - handles cases the temp is actually a OptionalCppClassType (and thus needs dereferencing when on the rhs)
+    """
+    is_temp = False
+
+    @property
+    def type(self):
+        return self.arg.type
+
+    def calculate_result_code(self):
+        return "(*%s)" % self.arg.result()
+
+    def generate_result_code(self, code):
+        pass
+
+    def _make_move_result_rhs(self, result, optional=False):
+        # this wouldn't normally get moved (because it isn't a temp), but force it to be because it
+        # is a thin wrapper around a temp
+        return super(CppOptionalTempCoercion, self)._make_move_result_rhs(result, optional=False)
+
+
+class CMethodSelfCloneNode(CloneNode):
+    # Special CloneNode for the self argument of builtin C methods
+    # that accepts subtypes of the builtin type.  This is safe only
+    # for 'final' subtypes, as subtypes of the declared type may
+    # override the C method.
+
+    def coerce_to(self, dst_type, env):
+        if dst_type.is_builtin_type and self.type.subtype_of(dst_type):
+            return self
+        return CloneNode.coerce_to(self, dst_type, env)
+
+
+class ModuleRefNode(ExprNode):
+    # Simple returns the module object
+
+    type = py_object_type
+    is_temp = False
+    subexprs = []
+
+    def analyse_types(self, env):
+        return self
+
+    def may_be_none(self):
+        return False
+
+    def calculate_result_code(self):
+        return Naming.module_cname
+
+    def generate_result_code(self, code):
+        pass
+
+class DocstringRefNode(ExprNode):
+    # Extracts the docstring of the body element
+
+    subexprs = ['body']
+    type = py_object_type
+    is_temp = True
+
+    def __init__(self, pos, body):
+        ExprNode.__init__(self, pos)
+        assert body.type.is_pyobject
+        self.body = body
+
+    def analyse_types(self, env):
+        return self
+
+    def generate_result_code(self, code):
+        code.putln('%s = __Pyx_GetAttr(%s, %s); %s' % (
+            self.result(), self.body.result(),
+            code.intern_identifier(StringEncoding.EncodedString("__doc__")),
+            code.error_goto_if_null(self.result(), self.pos)))
+        self.generate_gotref(code)
+
+class AnnotationNode(ExprNode):
+    # Deals with the two possible uses of an annotation.
+    # 1. The post PEP-563 use where an annotation is stored
+    #  as a string
+    # 2. The Cython use where the annotation can indicate an
+    #  object type
+    #
+    # Doesn't handle the pre PEP-563 version where the
+    # annotation is evaluated into a Python Object.
+
+    subexprs = []
+
+    # 'untyped' is set for fused specializations:
+    # Once a fused function has been created we don't want
+    # annotations to override an already set type.
+    untyped = False
+
+    def __init__(self, pos, expr, string=None):
+        """string is expected to already be a StringNode or None"""
+        ExprNode.__init__(self, pos)
+        if string is None:
+            # import doesn't work at top of file?
+            from .AutoDocTransforms import AnnotationWriter
+            string = StringEncoding.EncodedString(
+                AnnotationWriter(description="annotation").write(expr))
+            string = StringNode(pos, unicode_value=string, value=string.as_utf8_string())
+        self.string = string
+        self.expr = expr
+
+    def analyse_types(self, env):
+        return self  # nothing needs doing
+
+    def analyse_as_type(self, env):
+        # for compatibility when used as a return_type_node, have this interface too
+        return self.analyse_type_annotation(env)[1]
+
+    def _warn_on_unknown_annotation(self, env, annotation):
+        """Method checks for cases when user should be warned that annotation contains unknown types."""
+        if isinstance(annotation, SliceIndexNode):
+            annotation = annotation.base
+        if annotation.is_name:
+            # Validate annotation in form `var: type`
+            if not env.lookup(annotation.name):
+                warning(annotation.pos,
+                        "Unknown type declaration '%s' in annotation, ignoring" % self.string.value, level=1)
+        elif annotation.is_attribute and annotation.obj.is_name:
+            # Validate annotation in form `var: module.type`
+            if not env.lookup(annotation.obj.name):
+                # `module` is undeclared
+                warning(annotation.pos,
+                        "Unknown type declaration '%s' in annotation, ignoring" % self.string.value, level=1)
+            elif annotation.obj.is_cython_module:
+                # `module` is cython
+                module_scope = annotation.obj.analyse_as_module(env)
+                if module_scope and not module_scope.lookup_type(annotation.attribute):
+                    error(annotation.pos,
+                            "Unknown type declaration '%s' in annotation" % self.string.value)
+            else:
+                module_scope = annotation.obj.analyse_as_module(env)
+                if module_scope and module_scope.pxd_file_loaded:
+                    warning(annotation.pos,
+                            "Unknown type declaration '%s' in annotation, ignoring" % self.string.value, level=1)
+        else:
+            warning(annotation.pos, "Unknown type declaration in annotation, ignoring")
+
+    def analyse_type_annotation(self, env, assigned_value=None):
+        if self.untyped:
+            # Already applied as a fused type, not re-evaluating it here.
+            return [], None
+        annotation = self.expr
+        explicit_pytype = explicit_ctype = False
+        if annotation.is_dict_literal:
+            warning(annotation.pos,
+                    "Dicts should no longer be used as type annotations. Use 'cython.int' etc. directly.", level=1)
+            for name, value in annotation.key_value_pairs:
+                if not name.is_string_literal:
+                    continue
+                if name.value in ('type', b'type'):
+                    explicit_pytype = True
+                    if not explicit_ctype:
+                        annotation = value
+                elif name.value in ('ctype', b'ctype'):
+                    explicit_ctype = True
+                    annotation = value
+            if explicit_pytype and explicit_ctype:
+                warning(annotation.pos, "Duplicate type declarations found in signature annotation", level=1)
+        elif isinstance(annotation, TupleNode):
+            warning(annotation.pos,
+                    "Tuples cannot be declared as simple tuples of types. Use 'tuple[type1, type2, ...]'.", level=1)
+            return [], None
+
+        with env.new_c_type_context(in_c_type_context=explicit_ctype):
+            arg_type = annotation.analyse_as_type(env)
+
+            if arg_type is None:
+                self._warn_on_unknown_annotation(env, annotation)
+                return [], arg_type
+
+            if annotation.is_string_literal:
+                warning(annotation.pos,
+                        "Strings should no longer be used for type declarations. Use 'cython.int' etc. directly.",
+                        level=1)
+            if explicit_pytype and not explicit_ctype and not (arg_type.is_pyobject or arg_type.equivalent_type):
+                warning(annotation.pos,
+                        "Python type declaration in signature annotation does not refer to a Python type")
+            if arg_type.is_complex:
+                # creating utility code needs to be special-cased for complex types
+                arg_type.create_declaration_utility_code(env)
+
+            # Check for declaration modifiers, e.g. "typing.Optional[...]" or "dataclasses.InitVar[...]"
+            modifiers = annotation.analyse_pytyping_modifiers(env) if annotation.is_subscript else []
+
+        return modifiers, arg_type
+
+
+class AssignmentExpressionNode(ExprNode):
+    """
+    Also known as a named expression or the walrus operator
+
+    Arguments
+    lhs - NameNode - not stored directly as an attribute of the node
+    rhs - ExprNode
+
+    Attributes
+    rhs        - ExprNode
+    assignment - SingleAssignmentNode
+    """
+    # subexprs and child_attrs are intentionally different here, because the assignment is not an expression
+    subexprs = ["rhs"]
+    child_attrs = ["rhs", "assignment"]  # This order is important for control-flow (i.e. xdecref) to be right
+
+    is_temp = False
+    assignment = None
+    clone_node = None
+
+    def __init__(self, pos, lhs, rhs, **kwds):
+        super(AssignmentExpressionNode, self).__init__(pos, **kwds)
+        self.rhs = ProxyNode(rhs)
+        assign_expr_rhs = CloneNode(self.rhs)
+        self.assignment = SingleAssignmentNode(
+            pos, lhs=lhs, rhs=assign_expr_rhs, is_assignment_expression=True)
+
+    @property
+    def type(self):
+        return self.rhs.type
+
+    @property
+    def target_name(self):
+        return self.assignment.lhs.name
+
+    def infer_type(self, env):
+        return self.rhs.infer_type(env)
+
+    def analyse_declarations(self, env):
+        self.assignment.analyse_declarations(env)
+
+    def analyse_types(self, env):
+        # we're trying to generate code that looks roughly like:
+        #   __pyx_t_1 = rhs
+        #   lhs = __pyx_t_1
+        #   __pyx_t_1
+        # (plus any reference counting that's needed)
+
+        self.rhs = self.rhs.analyse_types(env)
+        if not self.rhs.arg.is_temp:
+            if not self.rhs.arg.is_literal:
+                # for anything but the simplest cases (where it can be used directly)
+                # we convert rhs to a temp, because CloneNode requires arg to be a temp
+                self.rhs.arg = self.rhs.arg.coerce_to_temp(env)
+            else:
+                # For literals we can optimize by just using the literal twice
+                #
+                # We aren't including `self.rhs.is_name` in this optimization
+                # because that goes wrong for assignment expressions run in
+                # parallel. e.g. `(a := b) + (b := a + c)`)
+                # This is a special case of https://github.com/cython/cython/issues/4146
+                # TODO - once that's fixed general revisit this code and possibly
+                # use coerce_to_simple
+                self.assignment.rhs = copy.copy(self.rhs)
+
+        # TODO - there's a missed optimization in the code generation stage
+        # for self.rhs.arg.is_temp: an incref/decref pair can be removed
+        # (but needs a general mechanism to do that)
+        self.assignment = self.assignment.analyse_types(env)
+        return self
+
+    def coerce_to(self, dst_type, env):
+        if dst_type == self.assignment.rhs.type:
+            # in this quite common case (for example, when both lhs, and self are being coerced to Python)
+            # we can optimize the coercion out by sharing it between
+            # this and the assignment
+            old_rhs_arg = self.rhs.arg
+            if isinstance(old_rhs_arg, CoerceToTempNode):
+                old_rhs_arg = old_rhs_arg.arg
+            rhs_arg = old_rhs_arg.coerce_to(dst_type, env)
+            if rhs_arg is not old_rhs_arg:
+                self.rhs.arg = rhs_arg
+                self.rhs.update_type_and_entry()
+                # clean up the old coercion node that the assignment has likely generated
+                if (isinstance(self.assignment.rhs, CoercionNode)
+                        and not isinstance(self.assignment.rhs, CloneNode)):
+                    self.assignment.rhs = self.assignment.rhs.arg
+                    self.assignment.rhs.type = self.assignment.rhs.arg.type
+                return self
+        return super(AssignmentExpressionNode, self).coerce_to(dst_type, env)
+
+    def calculate_result_code(self):
+        return self.rhs.result()
+
+    def generate_result_code(self, code):
+        # we have to do this manually because it isn't a subexpression
+        self.assignment.generate_execution_code(code)
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/FlowControl.pxd b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/FlowControl.pxd
new file mode 100644
index 0000000000000000000000000000000000000000..5338d4fe490aacf8bc3b781ffa82451f1548404e
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/FlowControl.pxd
@@ -0,0 +1,111 @@
+# cython: language_level=3
+
+cimport cython
+
+from .Visitor cimport CythonTransform, TreeVisitor
+
+cdef class ControlBlock:
+    cdef public set children
+    cdef public set parents
+    cdef public set positions
+    cdef public list stats
+    cdef public dict gen
+    cdef public set bounded
+
+    # Big integer bitsets
+    cdef public object i_input
+    cdef public object i_output
+    cdef public object i_gen
+    cdef public object i_kill
+    cdef public object i_state
+
+    cpdef bint empty(self)
+    cpdef detach(self)
+    cpdef add_child(self, block)
+
+cdef class ExitBlock(ControlBlock):
+    cpdef bint empty(self)
+
+cdef class NameAssignment:
+    cdef public bint is_arg
+    cdef public bint is_deletion
+    cdef public object lhs
+    cdef public object rhs
+    cdef public object entry
+    cdef public object pos
+    cdef public set refs
+    cdef public object bit
+    cdef public object inferred_type
+    cdef public object rhs_scope
+
+cdef class AssignmentList:
+    cdef public object bit
+    cdef public object mask
+    cdef public list stats
+
+cdef class AssignmentCollector(TreeVisitor):
+    cdef list assignments
+
+@cython.final
+cdef class ControlFlow:
+    cdef public set blocks
+    cdef public set entries
+    cdef public list loops
+    cdef public list exceptions
+
+    cdef public ControlBlock entry_point
+    cdef public ExitBlock exit_point
+    cdef public ControlBlock block
+
+    cdef public dict assmts
+
+    cdef public Py_ssize_t in_try_block
+
+    cpdef newblock(self, ControlBlock parent=*)
+    cpdef nextblock(self, ControlBlock parent=*)
+    cpdef bint is_tracked(self, entry)
+    cpdef bint is_statically_assigned(self, entry)
+    cpdef mark_position(self, node)
+    cpdef mark_assignment(self, lhs, rhs, entry, rhs_scope=*)
+    cpdef mark_argument(self, lhs, rhs, entry)
+    cpdef mark_deletion(self, node, entry)
+    cpdef mark_reference(self, node, entry)
+
+    @cython.locals(block=ControlBlock, parent=ControlBlock, unreachable=set)
+    cpdef normalize(self)
+
+    @cython.locals(bit=object, assmts=AssignmentList, block=ControlBlock)
+    cpdef initialize(self)
+
+    @cython.locals(assmts=AssignmentList, assmt=NameAssignment)
+    cpdef set map_one(self, istate, entry)
+
+    @cython.locals(block=ControlBlock, parent=ControlBlock)
+    cdef reaching_definitions(self)
+
+cdef class Uninitialized:
+    pass
+
+cdef class Unknown:
+    pass
+
+cdef class MessageCollection:
+    cdef set messages
+
+@cython.locals(dirty=bint, block=ControlBlock, parent=ControlBlock,
+               assmt=NameAssignment)
+cdef check_definitions(ControlFlow flow, dict compiler_directives)
+
+@cython.final
+cdef class ControlFlowAnalysis(CythonTransform):
+    cdef object gv_ctx
+    cdef object constant_folder
+    cdef set reductions
+    cdef list stack  # a stack of (env, flow) tuples
+    cdef object env
+    cdef ControlFlow flow
+    cdef object object_expr
+    cdef bint in_inplace_assignment
+
+    cpdef mark_assignment(self, lhs, rhs=*, rhs_scope=*)
+    cpdef mark_position(self, node)
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/FlowControl.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/FlowControl.py
new file mode 100644
index 0000000000000000000000000000000000000000..c8575435738eada568f58c894ced50e4afccb449
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/FlowControl.py
@@ -0,0 +1,1383 @@
+# cython: language_level=3str
+# cython: auto_pickle=True
+
+from __future__ import absolute_import
+
+import cython
+cython.declare(PyrexTypes=object, ExprNodes=object, Nodes=object, Builtin=object,
+               Options=object, TreeVisitor=object, CythonTransform=object,
+               InternalError=object, error=object, warning=object,
+               fake_rhs_expr=object, TypedExprNode=object)
+
+from . import Builtin
+from . import ExprNodes
+from . import Nodes
+from . import Options
+from . import PyrexTypes
+
+from .Visitor import TreeVisitor, CythonTransform
+from .Errors import error, warning, InternalError
+
+
+class TypedExprNode(ExprNodes.ExprNode):
+    # Used for declaring assignments of a specified type without a known entry.
+    def __init__(self, type, may_be_none=None, pos=None):
+        super(TypedExprNode, self).__init__(pos)
+        self.type = type
+        self._may_be_none = may_be_none
+
+    def may_be_none(self):
+        return self._may_be_none != False
+
+# Fake rhs to silence "unused variable" warning
+fake_rhs_expr = TypedExprNode(PyrexTypes.unspecified_type)
+
+
+class ControlBlock(object):
+    """Control flow graph node. Sequence of assignments and name references.
+
+       children  set of children nodes
+       parents   set of parent nodes
+       positions set of position markers
+
+       stats     list of block statements
+       gen       dict of assignments generated by this block
+       bounded   set  of entries that are definitely bounded in this block
+
+       Example:
+
+        a = 1
+        b = a + c # 'c' is already bounded or exception here
+
+        stats = [Assignment(a), NameReference(a), NameReference(c),
+                     Assignment(b)]
+        gen = {Entry(a): Assignment(a), Entry(b): Assignment(b)}
+        bounded = {Entry(a), Entry(c)}
+
+    """
+
+    def __init__(self):
+        self.children = set()
+        self.parents = set()
+        self.positions = set()
+
+        self.stats = []
+        self.gen = {}
+        self.bounded = set()
+
+        self.i_input = 0
+        self.i_output = 0
+        self.i_gen = 0
+        self.i_kill = 0
+        self.i_state = 0
+
+    def empty(self):
+        return (not self.stats and not self.positions)
+
+    def detach(self):
+        """Detach block from parents and children."""
+        for child in self.children:
+            child.parents.remove(self)
+        for parent in self.parents:
+            parent.children.remove(self)
+        self.parents.clear()
+        self.children.clear()
+
+    def add_child(self, block):
+        self.children.add(block)
+        block.parents.add(self)
+
+
+class ExitBlock(ControlBlock):
+    """Non-empty exit point block."""
+
+    def empty(self):
+        return False
+
+
+class AssignmentList(object):
+    def __init__(self):
+        self.stats = []
+
+
+class ControlFlow(object):
+    """Control-flow graph.
+
+       entry_point ControlBlock entry point for this graph
+       exit_point  ControlBlock normal exit point
+       block       ControlBlock current block
+       blocks      set    children nodes
+       entries     set    tracked entries
+       loops       list   stack for loop descriptors
+       exceptions  list   stack for exception descriptors
+       in_try_block  int  track if we're in a try...except or try...finally block
+    """
+
+    def __init__(self):
+        self.blocks = set()
+        self.entries = set()
+        self.loops = []
+        self.exceptions = []
+
+        self.entry_point = ControlBlock()
+        self.exit_point = ExitBlock()
+        self.blocks.add(self.exit_point)
+        self.block = self.entry_point
+        self.in_try_block = 0
+
+    def newblock(self, parent=None):
+        """Create floating block linked to `parent` if given.
+
+           NOTE: Block is NOT added to self.blocks
+        """
+        block = ControlBlock()
+        self.blocks.add(block)
+        if parent:
+            parent.add_child(block)
+        return block
+
+    def nextblock(self, parent=None):
+        """Create block children block linked to current or `parent` if given.
+
+           NOTE: Block is added to self.blocks
+        """
+        block = ControlBlock()
+        self.blocks.add(block)
+        if parent:
+            parent.add_child(block)
+        elif self.block:
+            self.block.add_child(block)
+        self.block = block
+        return self.block
+
+    def is_tracked(self, entry):
+        if entry.is_anonymous:
+            return False
+        return (entry.is_local or entry.is_pyclass_attr or entry.is_arg or
+                entry.from_closure or entry.in_closure or
+                entry.error_on_uninitialized)
+
+    def is_statically_assigned(self, entry):
+        if (entry.is_local and entry.is_variable and
+                (entry.type.is_struct_or_union or
+                 entry.type.is_complex or
+                 entry.type.is_array or
+                 (entry.type.is_cpp_class and not entry.is_cpp_optional))):
+            # stack allocated structured variable => never uninitialised
+            return True
+        return False
+
+    def mark_position(self, node):
+        """Mark position, will be used to draw graph nodes."""
+        if self.block:
+            self.block.positions.add(node.pos[:2])
+
+    def mark_assignment(self, lhs, rhs, entry, rhs_scope=None):
+        if self.block and self.is_tracked(entry):
+            assignment = NameAssignment(lhs, rhs, entry, rhs_scope=rhs_scope)
+            self.block.stats.append(assignment)
+            self.block.gen[entry] = assignment
+            self.entries.add(entry)
+
+    def mark_argument(self, lhs, rhs, entry):
+        if self.block and self.is_tracked(entry):
+            assignment = Argument(lhs, rhs, entry)
+            self.block.stats.append(assignment)
+            self.block.gen[entry] = assignment
+            self.entries.add(entry)
+
+    def mark_deletion(self, node, entry):
+        if self.block and self.is_tracked(entry):
+            assignment = NameDeletion(node, entry)
+            self.block.stats.append(assignment)
+            self.block.gen[entry] = Uninitialized
+            self.entries.add(entry)
+
+    def mark_reference(self, node, entry):
+        if self.block and self.is_tracked(entry):
+            self.block.stats.append(NameReference(node, entry))
+            ## XXX: We don't track expression evaluation order so we can't use
+            ## XXX: successful reference as initialization sign.
+            ## # Local variable is definitely bound after this reference
+            ## if not node.allow_null:
+            ##     self.block.bounded.add(entry)
+            self.entries.add(entry)
+
+    def normalize(self):
+        """Delete unreachable and orphan blocks."""
+        queue = {self.entry_point}
+        visited = set()
+        while queue:
+            root = queue.pop()
+            visited.add(root)
+            for child in root.children:
+                if child not in visited:
+                    queue.add(child)
+        unreachable = self.blocks - visited
+        for block in unreachable:
+            block.detach()
+        visited.remove(self.entry_point)
+        for block in visited:
+            if block.empty():
+                for parent in block.parents:  # Re-parent
+                    for child in block.children:
+                        parent.add_child(child)
+                block.detach()
+                unreachable.add(block)
+        self.blocks -= unreachable
+
+    def initialize(self):
+        """Set initial state, map assignments to bits."""
+        self.assmts = {}
+
+        bit = 1
+        for entry in self.entries:
+            assmts = AssignmentList()
+            assmts.mask = assmts.bit = bit
+            self.assmts[entry] = assmts
+            bit <<= 1
+
+        for block in self.blocks:
+            for stat in block.stats:
+                if isinstance(stat, NameAssignment):
+                    stat.bit = bit
+                    assmts = self.assmts[stat.entry]
+                    assmts.stats.append(stat)
+                    assmts.mask |= bit
+                    bit <<= 1
+
+        for block in self.blocks:
+            for entry, stat in block.gen.items():
+                assmts = self.assmts[entry]
+                if stat is Uninitialized:
+                    block.i_gen |= assmts.bit
+                else:
+                    block.i_gen |= stat.bit
+                block.i_kill |= assmts.mask
+            block.i_output = block.i_gen
+            for entry in block.bounded:
+                block.i_kill |= self.assmts[entry].bit
+
+        for assmts in self.assmts.values():
+            self.entry_point.i_gen |= assmts.bit
+        self.entry_point.i_output = self.entry_point.i_gen
+
+    def map_one(self, istate, entry):
+        ret = set()
+        assmts = self.assmts[entry]
+        if istate & assmts.bit:
+            if self.is_statically_assigned(entry):
+                ret.add(StaticAssignment(entry))
+            elif entry.from_closure:
+                ret.add(Unknown)
+            else:
+                ret.add(Uninitialized)
+        for assmt in assmts.stats:
+            if istate & assmt.bit:
+                ret.add(assmt)
+        return ret
+
+    def reaching_definitions(self):
+        """Per-block reaching definitions analysis."""
+        dirty = True
+        while dirty:
+            dirty = False
+            for block in self.blocks:
+                i_input = 0
+                for parent in block.parents:
+                    i_input |= parent.i_output
+                i_output = (i_input & ~block.i_kill) | block.i_gen
+                if i_output != block.i_output:
+                    dirty = True
+                block.i_input = i_input
+                block.i_output = i_output
+
+
+class LoopDescr(object):
+    def __init__(self, next_block, loop_block):
+        self.next_block = next_block
+        self.loop_block = loop_block
+        self.exceptions = []
+
+
+class ExceptionDescr(object):
+    """Exception handling helper.
+
+    entry_point   ControlBlock Exception handling entry point
+    finally_enter ControlBlock Normal finally clause entry point
+    finally_exit  ControlBlock Normal finally clause exit point
+    """
+
+    def __init__(self, entry_point, finally_enter=None, finally_exit=None):
+        self.entry_point = entry_point
+        self.finally_enter = finally_enter
+        self.finally_exit = finally_exit
+
+
+class NameAssignment(object):
+    def __init__(self, lhs, rhs, entry, rhs_scope=None):
+        if lhs.cf_state is None:
+            lhs.cf_state = set()
+        self.lhs = lhs
+        self.rhs = rhs
+        self.entry = entry
+        self.pos = lhs.pos
+        self.refs = set()
+        self.is_arg = False
+        self.is_deletion = False
+        self.inferred_type = None
+        # For generator expression targets, the rhs can have a different scope than the lhs.
+        self.rhs_scope = rhs_scope
+
+    def __repr__(self):
+        return '%s(entry=%r)' % (self.__class__.__name__, self.entry)
+
+    def infer_type(self):
+        self.inferred_type = self.rhs.infer_type(self.rhs_scope or self.entry.scope)
+        return self.inferred_type
+
+    def type_dependencies(self):
+        return self.rhs.type_dependencies(self.rhs_scope or self.entry.scope)
+
+    @property
+    def type(self):
+        if not self.entry.type.is_unspecified:
+            return self.entry.type
+        return self.inferred_type
+
+
+class StaticAssignment(NameAssignment):
+    """Initialised at declaration time, e.g. stack allocation."""
+    def __init__(self, entry):
+        if not entry.type.is_pyobject:
+            may_be_none = False
+        else:
+            may_be_none = None  # unknown
+        lhs = TypedExprNode(
+            entry.type, may_be_none=may_be_none, pos=entry.pos)
+        super(StaticAssignment, self).__init__(lhs, lhs, entry)
+
+    def infer_type(self):
+        return self.entry.type
+
+    def type_dependencies(self):
+        return ()
+
+
+class Argument(NameAssignment):
+    def __init__(self, lhs, rhs, entry):
+        NameAssignment.__init__(self, lhs, rhs, entry)
+        self.is_arg = True
+
+
+class NameDeletion(NameAssignment):
+    def __init__(self, lhs, entry):
+        NameAssignment.__init__(self, lhs, lhs, entry)
+        self.is_deletion = True
+
+    def infer_type(self):
+        inferred_type = self.rhs.infer_type(self.entry.scope)
+        if (not inferred_type.is_pyobject
+                and inferred_type.can_coerce_to_pyobject(self.entry.scope)):
+            return PyrexTypes.py_object_type
+        self.inferred_type = inferred_type
+        return inferred_type
+
+
+class Uninitialized(object):
+    """Definitely not initialised yet."""
+
+
+class Unknown(object):
+    """Coming from outer closure, might be initialised or not."""
+
+
+class NameReference(object):
+    def __init__(self, node, entry):
+        if node.cf_state is None:
+            node.cf_state = set()
+        self.node = node
+        self.entry = entry
+        self.pos = node.pos
+
+    def __repr__(self):
+        return '%s(entry=%r)' % (self.__class__.__name__, self.entry)
+
+
+class ControlFlowState(list):
+    # Keeps track of Node's entry assignments
+    #
+    # cf_is_null        [boolean] It is uninitialized
+    # cf_maybe_null     [boolean] May be uninitialized
+    # is_single         [boolean] Has only one assignment at this point
+
+    cf_maybe_null = False
+    cf_is_null = False
+    is_single = False
+
+    def __init__(self, state):
+        if Uninitialized in state:
+            state.discard(Uninitialized)
+            self.cf_maybe_null = True
+            if not state:
+                self.cf_is_null = True
+        elif Unknown in state:
+            state.discard(Unknown)
+            self.cf_maybe_null = True
+        else:
+            if len(state) == 1:
+                self.is_single = True
+        # XXX: Remove fake_rhs_expr
+        super(ControlFlowState, self).__init__(
+            [i for i in state if i.rhs is not fake_rhs_expr])
+
+    def one(self):
+        return self[0]
+
+
+class GVContext(object):
+    """Graphviz subgraph object."""
+
+    def __init__(self):
+        self.blockids = {}
+        self.nextid = 0
+        self.children = []
+        self.sources = {}
+
+    def add(self, child):
+        self.children.append(child)
+
+    def nodeid(self, block):
+        if block not in self.blockids:
+            self.blockids[block] = 'block%d' % self.nextid
+            self.nextid += 1
+        return self.blockids[block]
+
+    def extract_sources(self, block):
+        if not block.positions:
+            return ''
+        start = min(block.positions)
+        stop = max(block.positions)
+        srcdescr = start[0]
+        if srcdescr not in self.sources:
+            self.sources[srcdescr] = list(srcdescr.get_lines())
+        lines = self.sources[srcdescr]
+        return '\\n'.join([l.strip() for l in lines[start[1] - 1:stop[1]]])
+
+    def render(self, fp, name, annotate_defs=False):
+        """Render graphviz dot graph"""
+        fp.write('digraph %s {\n' % name)
+        fp.write(' node [shape=box];\n')
+        for child in self.children:
+            child.render(fp, self, annotate_defs)
+        fp.write('}\n')
+
+    def escape(self, text):
+        return text.replace('"', '\\"').replace('\n', '\\n')
+
+
+class GV(object):
+    """Graphviz DOT renderer."""
+
+    def __init__(self, name, flow):
+        self.name = name
+        self.flow = flow
+
+    def render(self, fp, ctx, annotate_defs=False):
+        fp.write(' subgraph %s {\n' % self.name)
+        for block in self.flow.blocks:
+            label = ctx.extract_sources(block)
+            if annotate_defs:
+                for stat in block.stats:
+                    if isinstance(stat, NameAssignment):
+                        label += '\n %s [%s %s]' % (
+                            stat.entry.name, 'deletion' if stat.is_deletion else 'definition', stat.pos[1])
+                    elif isinstance(stat, NameReference):
+                        if stat.entry:
+                            label += '\n %s [reference %s]' % (stat.entry.name, stat.pos[1])
+            if not label:
+                label = 'empty'
+            pid = ctx.nodeid(block)
+            fp.write('  %s [label="%s"];\n' % (pid, ctx.escape(label)))
+        for block in self.flow.blocks:
+            pid = ctx.nodeid(block)
+            for child in block.children:
+                fp.write('  %s -> %s;\n' % (pid, ctx.nodeid(child)))
+        fp.write(' }\n')
+
+
+class MessageCollection(object):
+    """Collect error/warnings messages first then sort"""
+    def __init__(self):
+        self.messages = set()
+
+    def error(self, pos, message):
+        self.messages.add((pos, True, message))
+
+    def warning(self, pos, message):
+        self.messages.add((pos, False, message))
+
+    def report(self):
+        for pos, is_error, message in sorted(self.messages):
+            if is_error:
+                error(pos, message)
+            else:
+                warning(pos, message, 2)
+
+
+def check_definitions(flow, compiler_directives):
+    flow.initialize()
+    flow.reaching_definitions()
+
+    # Track down state
+    assignments = set()
+    # Node to entry map
+    references = {}
+    assmt_nodes = set()
+
+    for block in flow.blocks:
+        i_state = block.i_input
+        for stat in block.stats:
+            i_assmts = flow.assmts[stat.entry]
+            state = flow.map_one(i_state, stat.entry)
+            if isinstance(stat, NameAssignment):
+                stat.lhs.cf_state.update(state)
+                assmt_nodes.add(stat.lhs)
+                i_state = i_state & ~i_assmts.mask
+                if stat.is_deletion:
+                    i_state |= i_assmts.bit
+                else:
+                    i_state |= stat.bit
+                assignments.add(stat)
+                if stat.rhs is not fake_rhs_expr:
+                    stat.entry.cf_assignments.append(stat)
+            elif isinstance(stat, NameReference):
+                references[stat.node] = stat.entry
+                stat.entry.cf_references.append(stat)
+                stat.node.cf_state.update(state)
+                ## if not stat.node.allow_null:
+                ##     i_state &= ~i_assmts.bit
+                ## # after successful read, the state is known to be initialised
+                state.discard(Uninitialized)
+                state.discard(Unknown)
+                for assmt in state:
+                    assmt.refs.add(stat)
+
+    # Check variable usage
+    warn_maybe_uninitialized = compiler_directives['warn.maybe_uninitialized']
+    warn_unused_result = compiler_directives['warn.unused_result']
+    warn_unused = compiler_directives['warn.unused']
+    warn_unused_arg = compiler_directives['warn.unused_arg']
+
+    messages = MessageCollection()
+
+    # assignment hints
+    for node in assmt_nodes:
+        if Uninitialized in node.cf_state:
+            node.cf_maybe_null = True
+            if len(node.cf_state) == 1:
+                node.cf_is_null = True
+            else:
+                node.cf_is_null = False
+        elif Unknown in node.cf_state:
+            node.cf_maybe_null = True
+        else:
+            node.cf_is_null = False
+            node.cf_maybe_null = False
+
+    # Find uninitialized references and cf-hints
+    for node, entry in references.items():
+        if Uninitialized in node.cf_state:
+            node.cf_maybe_null = True
+            if (not entry.from_closure and len(node.cf_state) == 1
+                    and entry.name not in entry.scope.scope_predefined_names):
+                node.cf_is_null = True
+            if (node.allow_null or entry.from_closure
+                    or entry.is_pyclass_attr or entry.type.is_error):
+                pass  # Can be uninitialized here
+            elif node.cf_is_null and not entry.in_closure:
+                if entry.error_on_uninitialized or (
+                        Options.error_on_uninitialized and (
+                        entry.type.is_pyobject or entry.type.is_unspecified)):
+                    messages.error(
+                        node.pos,
+                        "local variable '%s' referenced before assignment"
+                        % entry.name)
+                else:
+                    messages.warning(
+                        node.pos,
+                        "local variable '%s' referenced before assignment"
+                        % entry.name)
+            elif warn_maybe_uninitialized:
+                msg = "local variable '%s' might be referenced before assignment" % entry.name
+                if entry.in_closure:
+                    msg += " (maybe initialized inside a closure)"
+                messages.warning(
+                    node.pos,
+                    msg)
+        elif Unknown in node.cf_state:
+            # TODO: better cross-closure analysis to know when inner functions
+            #       are being called before a variable is being set, and when
+            #       a variable is known to be set before even defining the
+            #       inner function, etc.
+            node.cf_maybe_null = True
+        else:
+            node.cf_is_null = False
+            node.cf_maybe_null = False
+
+    # Unused result
+    for assmt in assignments:
+        if (not assmt.refs and not assmt.entry.is_pyclass_attr
+                and not assmt.entry.in_closure):
+            if assmt.entry.cf_references and warn_unused_result:
+                if assmt.is_arg:
+                    messages.warning(assmt.pos, "Unused argument value '%s'" %
+                                     assmt.entry.name)
+                else:
+                    messages.warning(assmt.pos, "Unused result in '%s'" %
+                                     assmt.entry.name)
+            assmt.lhs.cf_used = False
+
+    # Unused entries
+    for entry in flow.entries:
+        if (not entry.cf_references
+                and not entry.is_pyclass_attr):
+            if entry.name != '_' and not entry.name.startswith('unused'):
+                # '_' is often used for unused variables, e.g. in loops
+                if entry.is_arg:
+                    if warn_unused_arg:
+                        messages.warning(entry.pos, "Unused argument '%s'" %
+                                         entry.name)
+                else:
+                    if warn_unused:
+                        messages.warning(entry.pos, "Unused entry '%s'" %
+                                         entry.name)
+            entry.cf_used = False
+
+    messages.report()
+
+    for node in assmt_nodes:
+        node.cf_state = ControlFlowState(node.cf_state)
+    for node in references:
+        node.cf_state = ControlFlowState(node.cf_state)
+
+
+class AssignmentCollector(TreeVisitor):
+    def __init__(self):
+        super(AssignmentCollector, self).__init__()
+        self.assignments = []
+
+    def visit_Node(self):
+        self._visitchildren(self, None, None)
+
+    def visit_SingleAssignmentNode(self, node):
+        self.assignments.append((node.lhs, node.rhs))
+
+    def visit_CascadedAssignmentNode(self, node):
+        for lhs in node.lhs_list:
+            self.assignments.append((lhs, node.rhs))
+
+
+class ControlFlowAnalysis(CythonTransform):
+
+    def find_in_stack(self, env):
+        if env == self.env:
+            return self.flow
+        for e, flow in reversed(self.stack):
+            if e is env:
+                return flow
+        assert False
+
+    def visit_ModuleNode(self, node):
+        dot_output = self.current_directives['control_flow.dot_output']
+        self.gv_ctx = GVContext() if dot_output else None
+
+        from .Optimize import ConstantFolding
+        self.constant_folder = ConstantFolding()
+
+        # Set of NameNode reductions
+        self.reductions = set()
+
+        self.in_inplace_assignment = False
+        self.env = node.scope
+        self.flow = ControlFlow()
+        self.stack = []  # a stack of (env, flow) tuples
+        self.object_expr = TypedExprNode(PyrexTypes.py_object_type, may_be_none=True)
+        self.visitchildren(node)
+
+        check_definitions(self.flow, self.current_directives)
+
+        if dot_output:
+            annotate_defs = self.current_directives['control_flow.dot_annotate_defs']
+            with open(dot_output, 'wt') as fp:
+                self.gv_ctx.render(fp, 'module', annotate_defs=annotate_defs)
+        return node
+
+    def visit_FuncDefNode(self, node):
+        for arg in node.args:
+            if arg.default:
+                self.visitchildren(arg)
+        self.visitchildren(node, ('decorators',))
+        self.stack.append((self.env, self.flow))
+        self.env = node.local_scope
+        self.flow = ControlFlow()
+
+        # Collect all entries
+        for entry in node.local_scope.entries.values():
+            if self.flow.is_tracked(entry):
+                self.flow.entries.add(entry)
+
+        self.mark_position(node)
+        # Function body block
+        self.flow.nextblock()
+
+        for arg in node.args:
+            self._visit(arg)
+        if node.star_arg:
+            self.flow.mark_argument(node.star_arg,
+                                    TypedExprNode(Builtin.tuple_type,
+                                                  may_be_none=False),
+                                    node.star_arg.entry)
+        if node.starstar_arg:
+            self.flow.mark_argument(node.starstar_arg,
+                                    TypedExprNode(Builtin.dict_type,
+                                                  may_be_none=False),
+                                    node.starstar_arg.entry)
+        self._visit(node.body)
+        # Workaround for generators
+        if node.is_generator:
+            self._visit(node.gbody.body)
+
+        # Exit point
+        if self.flow.block:
+            self.flow.block.add_child(self.flow.exit_point)
+
+        # Cleanup graph
+        self.flow.normalize()
+        check_definitions(self.flow, self.current_directives)
+        self.flow.blocks.add(self.flow.entry_point)
+
+        if self.gv_ctx is not None:
+            self.gv_ctx.add(GV(node.local_scope.name, self.flow))
+
+        self.env, self.flow = self.stack.pop()
+        return node
+
+    def visit_DefNode(self, node):
+        node.used = True
+        return self.visit_FuncDefNode(node)
+
+    def visit_GeneratorBodyDefNode(self, node):
+        return node
+
+    def visit_CTypeDefNode(self, node):
+        return node
+
+    def mark_assignment(self, lhs, rhs=None, rhs_scope=None):
+        if not self.flow.block:
+            return
+        if self.flow.exceptions:
+            exc_descr = self.flow.exceptions[-1]
+            self.flow.block.add_child(exc_descr.entry_point)
+            self.flow.nextblock()
+
+        if not rhs:
+            rhs = self.object_expr
+        if lhs.is_name:
+            if lhs.entry is not None:
+                entry = lhs.entry
+            else:
+                entry = self.env.lookup(lhs.name)
+            if entry is None:  # TODO: This shouldn't happen...
+                return
+            self.flow.mark_assignment(lhs, rhs, entry, rhs_scope=rhs_scope)
+        elif lhs.is_sequence_constructor:
+            for i, arg in enumerate(lhs.args):
+                if arg.is_starred:
+                    # "a, *b = x" assigns a list to "b"
+                    item_node = TypedExprNode(Builtin.list_type, may_be_none=False, pos=arg.pos)
+                elif rhs is self.object_expr:
+                    item_node = rhs
+                else:
+                    item_node = rhs.inferable_item_node(i)
+                self.mark_assignment(arg, item_node)
+        else:
+            self._visit(lhs)
+
+        if self.flow.exceptions:
+            exc_descr = self.flow.exceptions[-1]
+            self.flow.block.add_child(exc_descr.entry_point)
+            self.flow.nextblock()
+
+    def mark_position(self, node):
+        """Mark position if DOT output is enabled."""
+        if self.current_directives['control_flow.dot_output']:
+            self.flow.mark_position(node)
+
+    def visit_FromImportStatNode(self, node):
+        for name, target in node.items:
+            if name != "*":
+                self.mark_assignment(target)
+        self.visitchildren(node)
+        return node
+
+    def visit_AssignmentNode(self, node):
+        raise InternalError("Unhandled assignment node %s" % type(node))
+
+    def visit_SingleAssignmentNode(self, node):
+        self._visit(node.rhs)
+        self.mark_assignment(node.lhs, node.rhs)
+        return node
+
+    def visit_CascadedAssignmentNode(self, node):
+        self._visit(node.rhs)
+        for lhs in node.lhs_list:
+            self.mark_assignment(lhs, node.rhs)
+        return node
+
+    def visit_ParallelAssignmentNode(self, node):
+        collector = AssignmentCollector()
+        collector.visitchildren(node)
+        for lhs, rhs in collector.assignments:
+            self._visit(rhs)
+        for lhs, rhs in collector.assignments:
+            self.mark_assignment(lhs, rhs)
+        return node
+
+    def visit_InPlaceAssignmentNode(self, node):
+        self.in_inplace_assignment = True
+        self.visitchildren(node)
+        self.in_inplace_assignment = False
+        self.mark_assignment(node.lhs, self.constant_folder(node.create_binop_node()))
+        return node
+
+    def visit_DelStatNode(self, node):
+        for arg in node.args:
+            if arg.is_name:
+                entry = arg.entry or self.env.lookup(arg.name)
+                if entry.in_closure or entry.from_closure:
+                    error(arg.pos,
+                          "can not delete variable '%s' "
+                          "referenced in nested scope" % entry.name)
+                if not node.ignore_nonexisting:
+                    self._visit(arg)  # mark reference
+                self.flow.mark_deletion(arg, entry)
+            else:
+                self._visit(arg)
+        return node
+
+    def visit_CArgDeclNode(self, node):
+        entry = self.env.lookup(node.name)
+        if entry:
+            may_be_none = not node.not_none
+            self.flow.mark_argument(
+                node, TypedExprNode(entry.type, may_be_none), entry)
+        return node
+
+    def visit_NameNode(self, node):
+        if self.flow.block:
+            entry = node.entry or self.env.lookup(node.name)
+            if entry:
+                self.flow.mark_reference(node, entry)
+
+                if entry in self.reductions and not self.in_inplace_assignment:
+                    error(node.pos,
+                          "Cannot read reduction variable in loop body")
+
+        return node
+
+    def visit_StatListNode(self, node):
+        if self.flow.block:
+            for stat in node.stats:
+                self._visit(stat)
+                if not self.flow.block:
+                    stat.is_terminator = True
+                    break
+        return node
+
+    def visit_Node(self, node):
+        self.visitchildren(node)
+        self.mark_position(node)
+        return node
+
+    def visit_SizeofVarNode(self, node):
+        return node
+
+    def visit_TypeidNode(self, node):
+        return node
+
+    def visit_IfStatNode(self, node):
+        next_block = self.flow.newblock()
+        parent = self.flow.block
+        # If clauses
+        for clause in node.if_clauses:
+            parent = self.flow.nextblock(parent)
+            self._visit(clause.condition)
+            self.flow.nextblock()
+            self._visit(clause.body)
+            if self.flow.block:
+                self.flow.block.add_child(next_block)
+        # Else clause
+        if node.else_clause:
+            self.flow.nextblock(parent=parent)
+            self._visit(node.else_clause)
+            if self.flow.block:
+                self.flow.block.add_child(next_block)
+        else:
+            parent.add_child(next_block)
+
+        if next_block.parents:
+            self.flow.block = next_block
+        else:
+            self.flow.block = None
+        return node
+
+    def visit_AssertStatNode(self, node):
+        """Essentially an if-condition that wraps a RaiseStatNode.
+        """
+        self.mark_position(node)
+        next_block = self.flow.newblock()
+        parent = self.flow.block
+        # failure case
+        parent = self.flow.nextblock(parent)
+        self._visit(node.condition)
+        self.flow.nextblock()
+        self._visit(node.exception)
+        if self.flow.block:
+            self.flow.block.add_child(next_block)
+        parent.add_child(next_block)
+        if next_block.parents:
+            self.flow.block = next_block
+        else:
+            self.flow.block = None
+        return node
+
+    def visit_WhileStatNode(self, node):
+        condition_block = self.flow.nextblock()
+        next_block = self.flow.newblock()
+        # Condition block
+        self.flow.loops.append(LoopDescr(next_block, condition_block))
+        if node.condition:
+            self._visit(node.condition)
+        # Body block
+        self.flow.nextblock()
+        self._visit(node.body)
+        self.flow.loops.pop()
+        # Loop it
+        if self.flow.block:
+            self.flow.block.add_child(condition_block)
+            self.flow.block.add_child(next_block)
+        # Else clause
+        if node.else_clause:
+            self.flow.nextblock(parent=condition_block)
+            self._visit(node.else_clause)
+            if self.flow.block:
+                self.flow.block.add_child(next_block)
+        else:
+            condition_block.add_child(next_block)
+
+        if next_block.parents:
+            self.flow.block = next_block
+        else:
+            self.flow.block = None
+        return node
+
+    def mark_forloop_target(self, node):
+        # TODO: Remove redundancy with range optimization...
+        is_special = False
+        sequence = node.iterator.sequence
+        target = node.target
+        env = node.iterator.expr_scope or self.env
+        if isinstance(sequence, ExprNodes.SimpleCallNode):
+            function = sequence.function
+            if sequence.self is None and function.is_name:
+                entry = env.lookup(function.name)
+                if not entry or entry.is_builtin:
+                    if function.name == 'reversed' and len(sequence.args) == 1:
+                        sequence = sequence.args[0]
+                    elif function.name == 'enumerate' and len(sequence.args) == 1:
+                        if target.is_sequence_constructor and len(target.args) == 2:
+                            iterator = sequence.args[0]
+                            if iterator.is_name:
+                                iterator_type = iterator.infer_type(env)
+                                if iterator_type.is_builtin_type:
+                                    # assume that builtin types have a length within Py_ssize_t
+                                    self.mark_assignment(
+                                        target.args[0],
+                                        ExprNodes.IntNode(target.pos, value='PY_SSIZE_T_MAX',
+                                                          type=PyrexTypes.c_py_ssize_t_type),
+                                        rhs_scope=node.iterator.expr_scope)
+                                    target = target.args[1]
+                                    sequence = sequence.args[0]
+        if isinstance(sequence, ExprNodes.SimpleCallNode):
+            function = sequence.function
+            if sequence.self is None and function.is_name:
+                entry = env.lookup(function.name)
+                if not entry or entry.is_builtin:
+                    if function.name in ('range', 'xrange'):
+                        is_special = True
+                        for arg in sequence.args[:2]:
+                            self.mark_assignment(target, arg, rhs_scope=node.iterator.expr_scope)
+                        if len(sequence.args) > 2:
+                            self.mark_assignment(target, self.constant_folder(
+                                ExprNodes.binop_node(node.pos,
+                                                     '+',
+                                                     sequence.args[0],
+                                                     sequence.args[2])),
+                                                rhs_scope=node.iterator.expr_scope)
+
+        if not is_special:
+            # A for-loop basically translates to subsequent calls to
+            # __getitem__(), so using an IndexNode here allows us to
+            # naturally infer the base type of pointers, C arrays,
+            # Python strings, etc., while correctly falling back to an
+            # object type when the base type cannot be handled.
+
+            self.mark_assignment(target, node.item, rhs_scope=node.iterator.expr_scope)
+
+    def visit_AsyncForStatNode(self, node):
+        return self.visit_ForInStatNode(node)
+
+    def visit_ForInStatNode(self, node):
+        condition_block = self.flow.nextblock()
+        next_block = self.flow.newblock()
+        # Condition with iterator
+        self.flow.loops.append(LoopDescr(next_block, condition_block))
+        self._visit(node.iterator)
+        # Target assignment
+        self.flow.nextblock()
+
+        if isinstance(node, Nodes.ForInStatNode):
+            self.mark_forloop_target(node)
+        elif isinstance(node, Nodes.AsyncForStatNode):
+            # not entirely correct, but good enough for now
+            self.mark_assignment(node.target, node.item)
+        else:  # Parallel
+            self.mark_assignment(node.target)
+
+        # Body block
+        if isinstance(node, Nodes.ParallelRangeNode):
+            # In case of an invalid
+            self._delete_privates(node, exclude=node.target.entry)
+
+        self.flow.nextblock()
+        self._visit(node.body)
+        self.flow.loops.pop()
+
+        # Loop it
+        if self.flow.block:
+            self.flow.block.add_child(condition_block)
+        # Else clause
+        if node.else_clause:
+            self.flow.nextblock(parent=condition_block)
+            self._visit(node.else_clause)
+            if self.flow.block:
+                self.flow.block.add_child(next_block)
+        else:
+            condition_block.add_child(next_block)
+
+        if next_block.parents:
+            self.flow.block = next_block
+        else:
+            self.flow.block = None
+        return node
+
+    def _delete_privates(self, node, exclude=None):
+        for private_node in node.assigned_nodes:
+            if not exclude or private_node.entry is not exclude:
+                self.flow.mark_deletion(private_node, private_node.entry)
+
+    def visit_ParallelRangeNode(self, node):
+        reductions = self.reductions
+
+        # if node.target is None or not a NameNode, an error will have
+        # been previously issued
+        if hasattr(node.target, 'entry'):
+            self.reductions = set(reductions)
+
+            for private_node in node.assigned_nodes:
+                private_node.entry.error_on_uninitialized = True
+                pos, reduction = node.assignments[private_node.entry]
+                if reduction:
+                    self.reductions.add(private_node.entry)
+
+            node = self.visit_ForInStatNode(node)
+
+        self.reductions = reductions
+        return node
+
+    def visit_ParallelWithBlockNode(self, node):
+        for private_node in node.assigned_nodes:
+            private_node.entry.error_on_uninitialized = True
+
+        self._delete_privates(node)
+        self.visitchildren(node)
+        self._delete_privates(node)
+
+        return node
+
+    def visit_ForFromStatNode(self, node):
+        condition_block = self.flow.nextblock()
+        next_block = self.flow.newblock()
+        # Condition with iterator
+        self.flow.loops.append(LoopDescr(next_block, condition_block))
+        self._visit(node.bound1)
+        self._visit(node.bound2)
+        if node.step is not None:
+            self._visit(node.step)
+        # Target assignment
+        self.flow.nextblock()
+        self.mark_assignment(node.target, node.bound1)
+        if node.step is not None:
+            self.mark_assignment(node.target, self.constant_folder(
+                ExprNodes.binop_node(node.pos, '+', node.bound1, node.step)))
+        # Body block
+        self.flow.nextblock()
+        self._visit(node.body)
+        self.flow.loops.pop()
+        # Loop it
+        if self.flow.block:
+            self.flow.block.add_child(condition_block)
+        # Else clause
+        if node.else_clause:
+            self.flow.nextblock(parent=condition_block)
+            self._visit(node.else_clause)
+            if self.flow.block:
+                self.flow.block.add_child(next_block)
+        else:
+            condition_block.add_child(next_block)
+
+        if next_block.parents:
+            self.flow.block = next_block
+        else:
+            self.flow.block = None
+        return node
+
+    def visit_LoopNode(self, node):
+        raise InternalError("Generic loops are not supported")
+
+    def visit_WithTargetAssignmentStatNode(self, node):
+        self.mark_assignment(node.lhs, node.with_node.enter_call)
+        return node
+
+    def visit_WithStatNode(self, node):
+        self._visit(node.manager)
+        self._visit(node.enter_call)
+        self._visit(node.body)
+        return node
+
+    def visit_TryExceptStatNode(self, node):
+        # After exception handling
+        next_block = self.flow.newblock()
+        # Body block
+        self.flow.newblock()
+        # Exception entry point
+        entry_point = self.flow.newblock()
+        self.flow.exceptions.append(ExceptionDescr(entry_point))
+        self.flow.nextblock()
+        ## XXX: links to exception handling point should be added by
+        ## XXX: children nodes
+        self.flow.block.add_child(entry_point)
+        self.flow.nextblock()
+        self.flow.in_try_block += 1
+        self._visit(node.body)
+        self.flow.in_try_block -= 1
+        self.flow.exceptions.pop()
+
+        # After exception
+        if self.flow.block:
+            if node.else_clause:
+                self.flow.nextblock()
+                self._visit(node.else_clause)
+            if self.flow.block:
+                self.flow.block.add_child(next_block)
+
+        for clause in node.except_clauses:
+            self.flow.block = entry_point
+            if clause.pattern:
+                for pattern in clause.pattern:
+                    self._visit(pattern)
+            else:
+                # TODO: handle * pattern
+                pass
+            entry_point = self.flow.newblock(parent=self.flow.block)
+            self.flow.nextblock()
+            if clause.target:
+                self.mark_assignment(clause.target)
+            self._visit(clause.body)
+            if self.flow.block:
+                self.flow.block.add_child(next_block)
+
+        if self.flow.exceptions:
+            entry_point.add_child(self.flow.exceptions[-1].entry_point)
+
+        if next_block.parents:
+            self.flow.block = next_block
+        else:
+            self.flow.block = None
+        return node
+
+    def visit_TryFinallyStatNode(self, node):
+        body_block = self.flow.nextblock()
+
+        # Exception entry point
+        entry_point = self.flow.newblock()
+        self.flow.block = entry_point
+        self._visit(node.finally_except_clause)
+
+        if self.flow.block and self.flow.exceptions:
+            self.flow.block.add_child(self.flow.exceptions[-1].entry_point)
+
+        # Normal execution
+        finally_enter = self.flow.newblock()
+        self.flow.block = finally_enter
+        self._visit(node.finally_clause)
+        finally_exit = self.flow.block
+
+        descr = ExceptionDescr(entry_point, finally_enter, finally_exit)
+        self.flow.exceptions.append(descr)
+        if self.flow.loops:
+            self.flow.loops[-1].exceptions.append(descr)
+        self.flow.block = body_block
+        body_block.add_child(entry_point)
+        self.flow.nextblock()
+        self.flow.in_try_block += 1
+        self._visit(node.body)
+        self.flow.in_try_block -= 1
+        self.flow.exceptions.pop()
+        if self.flow.loops:
+            self.flow.loops[-1].exceptions.pop()
+
+        if self.flow.block:
+            self.flow.block.add_child(finally_enter)
+            if finally_exit:
+                self.flow.block = self.flow.nextblock(parent=finally_exit)
+            else:
+                self.flow.block = None
+        return node
+
+    def visit_RaiseStatNode(self, node):
+        self.mark_position(node)
+        self.visitchildren(node)
+        if self.flow.exceptions:
+            self.flow.block.add_child(self.flow.exceptions[-1].entry_point)
+        self.flow.block = None
+        if self.flow.in_try_block:
+            node.in_try_block = True
+        return node
+
+    def visit_ReraiseStatNode(self, node):
+        self.mark_position(node)
+        if self.flow.exceptions:
+            self.flow.block.add_child(self.flow.exceptions[-1].entry_point)
+        self.flow.block = None
+        return node
+
+    def visit_ReturnStatNode(self, node):
+        self.mark_position(node)
+        self.visitchildren(node)
+
+        outer_exception_handlers = iter(self.flow.exceptions[::-1])
+        for handler in outer_exception_handlers:
+            if handler.finally_enter:
+                self.flow.block.add_child(handler.finally_enter)
+                if handler.finally_exit:
+                    # 'return' goes to function exit, or to the next outer 'finally' clause
+                    exit_point = self.flow.exit_point
+                    for next_handler in outer_exception_handlers:
+                        if next_handler.finally_enter:
+                            exit_point = next_handler.finally_enter
+                            break
+                    handler.finally_exit.add_child(exit_point)
+                break
+        else:
+            if self.flow.block:
+                self.flow.block.add_child(self.flow.exit_point)
+        self.flow.block = None
+        return node
+
+    def visit_BreakStatNode(self, node):
+        if not self.flow.loops:
+            #error(node.pos, "break statement not inside loop")
+            return node
+        loop = self.flow.loops[-1]
+        self.mark_position(node)
+        for exception in loop.exceptions[::-1]:
+            if exception.finally_enter:
+                self.flow.block.add_child(exception.finally_enter)
+                if exception.finally_exit:
+                    exception.finally_exit.add_child(loop.next_block)
+                break
+        else:
+            self.flow.block.add_child(loop.next_block)
+        self.flow.block = None
+        return node
+
+    def visit_ContinueStatNode(self, node):
+        if not self.flow.loops:
+            #error(node.pos, "continue statement not inside loop")
+            return node
+        loop = self.flow.loops[-1]
+        self.mark_position(node)
+        for exception in loop.exceptions[::-1]:
+            if exception.finally_enter:
+                self.flow.block.add_child(exception.finally_enter)
+                if exception.finally_exit:
+                    exception.finally_exit.add_child(loop.loop_block)
+                break
+        else:
+            self.flow.block.add_child(loop.loop_block)
+        self.flow.block = None
+        return node
+
+    def visit_ComprehensionNode(self, node):
+        if node.expr_scope:
+            self.stack.append((self.env, self.flow))
+            self.env = node.expr_scope
+        # Skip append node here
+        self._visit(node.loop)
+        if node.expr_scope:
+            self.env, _ = self.stack.pop()
+        return node
+
+    def visit_ScopedExprNode(self, node):
+        # currently this is written to deal with these two types
+        # (with comprehensions covered in their own function)
+        assert isinstance(node, (ExprNodes.IteratorNode, ExprNodes.AsyncIteratorNode)), node
+        if node.expr_scope:
+            self.stack.append((self.env, self.flow))
+            self.flow = self.find_in_stack(node.expr_scope)
+            self.env = node.expr_scope
+        self.visitchildren(node)
+        if node.expr_scope:
+            self.env, self.flow = self.stack.pop()
+        return node
+
+    def visit_PyClassDefNode(self, node):
+        self.visitchildren(node, ('dict', 'metaclass',
+                                  'mkw', 'bases', 'class_result'))
+        self.flow.mark_assignment(node.target, node.classobj,
+                                  self.env.lookup(node.target.name))
+        self.stack.append((self.env, self.flow))
+        self.env = node.scope
+        self.flow.nextblock()
+        if node.doc_node:
+            self.flow.mark_assignment(node.doc_node, fake_rhs_expr, node.doc_node.entry)
+        self.visitchildren(node, ('body',))
+        self.flow.nextblock()
+        self.env, _ = self.stack.pop()
+        return node
+
+    def visit_CClassDefNode(self, node):
+        # just make sure the nodes scope is findable in-case there is a list comprehension in it
+        self.stack.append((node.scope, self.flow))
+        self.visitchildren(node)
+        self.stack.pop()
+        return node
+
+    def visit_AmpersandNode(self, node):
+        if node.operand.is_name:
+            # Fake assignment to silence warning
+            self.mark_assignment(node.operand, fake_rhs_expr)
+        self.visitchildren(node)
+        return node
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Main.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Main.py
new file mode 100644
index 0000000000000000000000000000000000000000..80946c0776719637d247baaa87a7af9c988f0f2f
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Main.py
@@ -0,0 +1,789 @@
+#
+#   Cython Top Level
+#
+
+from __future__ import absolute_import, print_function
+
+import os
+import re
+import sys
+import io
+
+if sys.version_info[:2] < (2, 7) or (3, 0) <= sys.version_info[:2] < (3, 3):
+    sys.stderr.write("Sorry, Cython requires Python 2.7 or 3.3+, found %d.%d\n" % tuple(sys.version_info[:2]))
+    sys.exit(1)
+
+try:
+    from __builtin__ import basestring
+except ImportError:
+    basestring = str
+
+# Do not import Parsing here, import it when needed, because Parsing imports
+# Nodes, which globally needs debug command line options initialized to set a
+# conditional metaclass. These options are processed by CmdLine called from
+# main() in this file.
+# import Parsing
+from . import Errors
+from .StringEncoding import EncodedString
+from .Scanning import PyrexScanner, FileSourceDescriptor
+from .Errors import PyrexError, CompileError, error, warning
+from .Symtab import ModuleScope
+from .. import Utils
+from . import Options
+from .Options import CompilationOptions, default_options
+from .CmdLine import parse_command_line
+from .Lexicon import (unicode_start_ch_any, unicode_continuation_ch_any,
+                      unicode_start_ch_range, unicode_continuation_ch_range)
+
+
+def _make_range_re(chrs):
+    out = []
+    for i in range(0, len(chrs), 2):
+        out.append(u"{0}-{1}".format(chrs[i], chrs[i+1]))
+    return u"".join(out)
+
+# py2 version looked like r"[A-Za-z_][A-Za-z0-9_]*(\.[A-Za-z_][A-Za-z0-9_]*)*$"
+module_name_pattern = u"[{0}{1}][{0}{2}{1}{3}]*".format(
+    unicode_start_ch_any, _make_range_re(unicode_start_ch_range),
+    unicode_continuation_ch_any,
+    _make_range_re(unicode_continuation_ch_range))
+module_name_pattern = re.compile(u"{0}(\\.{0})*$".format(module_name_pattern))
+
+
+standard_include_path = os.path.abspath(
+    os.path.join(os.path.dirname(os.path.dirname(__file__)), 'Includes'))
+
+
+class Context(object):
+    #  This class encapsulates the context needed for compiling
+    #  one or more Cython implementation files along with their
+    #  associated and imported declaration files. It includes
+    #  the root of the module import namespace and the list
+    #  of directories to search for include files.
+    #
+    #  modules               {string : ModuleScope}
+    #  include_directories   [string]
+    #  future_directives     [object]
+    #  language_level        int     currently 2 or 3 for Python 2/3
+
+    cython_scope = None
+    language_level = None  # warn when not set but default to Py2
+
+    def __init__(self, include_directories, compiler_directives, cpp=False,
+                 language_level=None, options=None):
+        # cython_scope is a hack, set to False by subclasses, in order to break
+        # an infinite loop.
+        # Better code organization would fix it.
+
+        from . import Builtin, CythonScope
+        self.modules = {"__builtin__" : Builtin.builtin_scope}
+        self.cython_scope = CythonScope.create_cython_scope(self)
+        self.modules["cython"] = self.cython_scope
+        self.include_directories = include_directories
+        self.future_directives = set()
+        self.compiler_directives = compiler_directives
+        self.cpp = cpp
+        self.options = options
+
+        self.pxds = {}  # full name -> node tree
+        self._interned = {}  # (type(value), value, *key_args) -> interned_value
+
+        if language_level is not None:
+            self.set_language_level(language_level)
+
+        self.legacy_implicit_noexcept = self.compiler_directives.get('legacy_implicit_noexcept', False)
+
+        self.gdb_debug_outputwriter = None
+
+    @classmethod
+    def from_options(cls, options):
+        return cls(options.include_path, options.compiler_directives,
+                   options.cplus, options.language_level, options=options)
+
+    def set_language_level(self, level):
+        from .Future import print_function, unicode_literals, absolute_import, division, generator_stop
+        future_directives = set()
+        if level == '3str':
+            level = 3
+        else:
+            level = int(level)
+            if level >= 3:
+                future_directives.add(unicode_literals)
+        if level >= 3:
+            future_directives.update([print_function, absolute_import, division, generator_stop])
+        self.language_level = level
+        self.future_directives = future_directives
+        if level >= 3:
+            self.modules['builtins'] = self.modules['__builtin__']
+
+    def intern_ustring(self, value, encoding=None):
+        key = (EncodedString, value, encoding)
+        try:
+            return self._interned[key]
+        except KeyError:
+            pass
+        value = EncodedString(value)
+        if encoding:
+            value.encoding = encoding
+        self._interned[key] = value
+        return value
+
+    # pipeline creation functions can now be found in Pipeline.py
+
+    def process_pxd(self, source_desc, scope, module_name):
+        from . import Pipeline
+        if isinstance(source_desc, FileSourceDescriptor) and source_desc._file_type == 'pyx':
+            source = CompilationSource(source_desc, module_name, os.getcwd())
+            result_sink = create_default_resultobj(source, self.options)
+            pipeline = Pipeline.create_pyx_as_pxd_pipeline(self, result_sink)
+            result = Pipeline.run_pipeline(pipeline, source)
+        else:
+            pipeline = Pipeline.create_pxd_pipeline(self, scope, module_name)
+            result = Pipeline.run_pipeline(pipeline, source_desc)
+        return result
+
+    def nonfatal_error(self, exc):
+        return Errors.report_error(exc)
+
+    def _split_qualified_name(self, qualified_name, relative_import=False):
+        # Splits qualified_name into parts in form of 2-tuples: (PART_NAME, IS_PACKAGE).
+        qualified_name_parts = qualified_name.split('.')
+        last_part = qualified_name_parts.pop()
+        qualified_name_parts = [(p, True) for p in qualified_name_parts]
+        if last_part != '__init__':
+            # If Last part is __init__, then it is omitted. Otherwise, we need to check whether we can find
+            # __init__.pyx/__init__.py file to determine if last part is package or not.
+            is_package = False
+            for suffix in ('.py', '.pyx'):
+                path = self.search_include_directories(
+                    qualified_name, suffix=suffix, source_pos=None, source_file_path=None, sys_path=not relative_import)
+                if path:
+                    is_package = self._is_init_file(path)
+                    break
+
+            qualified_name_parts.append((last_part, is_package))
+        return qualified_name_parts
+
+    @staticmethod
+    def _is_init_file(path):
+        return os.path.basename(path) in ('__init__.pyx', '__init__.py', '__init__.pxd') if path else False
+
+    @staticmethod
+    def _check_pxd_filename(pos, pxd_pathname, qualified_name):
+        if not pxd_pathname:
+            return
+        pxd_filename = os.path.basename(pxd_pathname)
+        if '.' in qualified_name and qualified_name == os.path.splitext(pxd_filename)[0]:
+            warning(pos, "Dotted filenames ('%s') are deprecated."
+                    " Please use the normal Python package directory layout." % pxd_filename, level=1)
+
+    def find_module(self, module_name, from_module=None, pos=None, need_pxd=1,
+                    absolute_fallback=True, relative_import=False):
+        # Finds and returns the module scope corresponding to
+        # the given relative or absolute module name. If this
+        # is the first time the module has been requested, finds
+        # the corresponding .pxd file and process it.
+        # If from_module is not None, it must be a module scope,
+        # and the module will first be searched for relative to
+        # that module, provided its name is not a dotted name.
+        debug_find_module = 0
+        if debug_find_module:
+            print("Context.find_module: module_name = %s, from_module = %s, pos = %s, need_pxd = %s" % (
+                module_name, from_module, pos, need_pxd))
+
+        scope = None
+        pxd_pathname = None
+        if from_module:
+            if module_name:
+                # from .module import ...
+                qualified_name = from_module.qualify_name(module_name)
+            else:
+                # from . import ...
+                qualified_name = from_module.qualified_name
+                scope = from_module
+                from_module = None
+        else:
+            qualified_name = module_name
+
+        if not module_name_pattern.match(qualified_name):
+            raise CompileError(pos or (module_name, 0, 0),
+                               u"'%s' is not a valid module name" % module_name)
+
+        if from_module:
+            if debug_find_module:
+                print("...trying relative import")
+            scope = from_module.lookup_submodule(module_name)
+            if not scope:
+                pxd_pathname = self.find_pxd_file(qualified_name, pos, sys_path=not relative_import)
+                self._check_pxd_filename(pos, pxd_pathname, qualified_name)
+                if pxd_pathname:
+                    is_package = self._is_init_file(pxd_pathname)
+                    scope = from_module.find_submodule(module_name, as_package=is_package)
+        if not scope:
+            if debug_find_module:
+                print("...trying absolute import")
+            if absolute_fallback:
+                qualified_name = module_name
+            scope = self
+            for name, is_package in self._split_qualified_name(qualified_name, relative_import=relative_import):
+                scope = scope.find_submodule(name, as_package=is_package)
+        if debug_find_module:
+            print("...scope = %s" % scope)
+        if not scope.pxd_file_loaded:
+            if debug_find_module:
+                print("...pxd not loaded")
+            if not pxd_pathname:
+                if debug_find_module:
+                    print("...looking for pxd file")
+                # Only look in sys.path if we are explicitly looking
+                # for a .pxd file.
+                pxd_pathname = self.find_pxd_file(qualified_name, pos, sys_path=need_pxd and not relative_import)
+                self._check_pxd_filename(pos, pxd_pathname, qualified_name)
+                if debug_find_module:
+                    print("......found %s" % pxd_pathname)
+                if not pxd_pathname and need_pxd:
+                    # Set pxd_file_loaded such that we don't need to
+                    # look for the non-existing pxd file next time.
+                    scope.pxd_file_loaded = True
+                    package_pathname = self.search_include_directories(
+                        qualified_name, suffix=".py", source_pos=pos, sys_path=not relative_import)
+                    if package_pathname and package_pathname.endswith(Utils.PACKAGE_FILES):
+                        pass
+                    else:
+                        error(pos, "'%s.pxd' not found" % qualified_name.replace('.', os.sep))
+            if pxd_pathname:
+                scope.pxd_file_loaded = True
+                try:
+                    if debug_find_module:
+                        print("Context.find_module: Parsing %s" % pxd_pathname)
+                    rel_path = module_name.replace('.', os.sep) + os.path.splitext(pxd_pathname)[1]
+                    if not pxd_pathname.endswith(rel_path):
+                        rel_path = pxd_pathname  # safety measure to prevent printing incorrect paths
+                    source_desc = FileSourceDescriptor(pxd_pathname, rel_path)
+                    err, result = self.process_pxd(source_desc, scope, qualified_name)
+                    if err:
+                        raise err
+                    (pxd_codenodes, pxd_scope) = result
+                    self.pxds[module_name] = (pxd_codenodes, pxd_scope)
+                except CompileError:
+                    pass
+        return scope
+
+    def find_pxd_file(self, qualified_name, pos=None, sys_path=True, source_file_path=None):
+        # Search include path (and sys.path if sys_path is True) for
+        # the .pxd file corresponding to the given fully-qualified
+        # module name.
+        # Will find either a dotted filename or a file in a
+        # package directory. If a source file position is given,
+        # the directory containing the source file is searched first
+        # for a dotted filename, and its containing package root
+        # directory is searched first for a non-dotted filename.
+        pxd = self.search_include_directories(
+            qualified_name, suffix=".pxd", source_pos=pos, sys_path=sys_path, source_file_path=source_file_path)
+        if pxd is None and Options.cimport_from_pyx:
+            return self.find_pyx_file(qualified_name, pos, sys_path=sys_path)
+        return pxd
+
+    def find_pyx_file(self, qualified_name, pos=None, sys_path=True, source_file_path=None):
+        # Search include path for the .pyx file corresponding to the
+        # given fully-qualified module name, as for find_pxd_file().
+        return self.search_include_directories(
+            qualified_name, suffix=".pyx", source_pos=pos, sys_path=sys_path, source_file_path=source_file_path)
+
+    def find_include_file(self, filename, pos=None, source_file_path=None):
+        # Search list of include directories for filename.
+        # Reports an error and returns None if not found.
+        path = self.search_include_directories(
+            filename, source_pos=pos, include=True, source_file_path=source_file_path)
+        if not path:
+            error(pos, "'%s' not found" % filename)
+        return path
+
+    def search_include_directories(self, qualified_name,
+                                   suffix=None, source_pos=None, include=False, sys_path=False, source_file_path=None):
+        include_dirs = self.include_directories
+        if sys_path:
+            include_dirs = include_dirs + sys.path
+        # include_dirs must be hashable for caching in @cached_function
+        include_dirs = tuple(include_dirs + [standard_include_path])
+        return search_include_directories(
+            include_dirs, qualified_name, suffix or "", source_pos, include, source_file_path)
+
+    def find_root_package_dir(self, file_path):
+        return Utils.find_root_package_dir(file_path)
+
+    def check_package_dir(self, dir, package_names):
+        return Utils.check_package_dir(dir, tuple(package_names))
+
+    def c_file_out_of_date(self, source_path, output_path):
+        if not os.path.exists(output_path):
+            return 1
+        c_time = Utils.modification_time(output_path)
+        if Utils.file_newer_than(source_path, c_time):
+            return 1
+        pxd_path = Utils.replace_suffix(source_path, ".pxd")
+        if os.path.exists(pxd_path) and Utils.file_newer_than(pxd_path, c_time):
+            return 1
+        for kind, name in self.read_dependency_file(source_path):
+            if kind == "cimport":
+                dep_path = self.find_pxd_file(name, source_file_path=source_path)
+            elif kind == "include":
+                dep_path = self.search_include_directories(name, source_file_path=source_path)
+            else:
+                continue
+            if dep_path and Utils.file_newer_than(dep_path, c_time):
+                return 1
+        return 0
+
+    def find_cimported_module_names(self, source_path):
+        return [ name for kind, name in self.read_dependency_file(source_path)
+                 if kind == "cimport" ]
+
+    def is_package_dir(self, dir_path):
+        return Utils.is_package_dir(dir_path)
+
+    def read_dependency_file(self, source_path):
+        dep_path = Utils.replace_suffix(source_path, ".dep")
+        if os.path.exists(dep_path):
+            with open(dep_path, "rU") as f:
+                chunks = [ line.split(" ", 1)
+                           for line in (l.strip() for l in f)
+                           if " " in line ]
+            return chunks
+        else:
+            return ()
+
+    def lookup_submodule(self, name):
+        # Look up a top-level module. Returns None if not found.
+        return self.modules.get(name, None)
+
+    def find_submodule(self, name, as_package=False):
+        # Find a top-level module, creating a new one if needed.
+        scope = self.lookup_submodule(name)
+        if not scope:
+            scope = ModuleScope(name,
+                parent_module = None, context = self, is_package=as_package)
+            self.modules[name] = scope
+        return scope
+
+    def parse(self, source_desc, scope, pxd, full_module_name):
+        if not isinstance(source_desc, FileSourceDescriptor):
+            raise RuntimeError("Only file sources for code supported")
+        source_filename = source_desc.filename
+        scope.cpp = self.cpp
+        # Parse the given source file and return a parse tree.
+        num_errors = Errors.get_errors_count()
+        try:
+            with Utils.open_source_file(source_filename) as f:
+                from . import Parsing
+                s = PyrexScanner(f, source_desc, source_encoding = f.encoding,
+                                 scope = scope, context = self)
+                tree = Parsing.p_module(s, pxd, full_module_name)
+                if self.options.formal_grammar:
+                    try:
+                        from ..Parser import ConcreteSyntaxTree
+                    except ImportError:
+                        raise RuntimeError(
+                            "Formal grammar can only be used with compiled Cython with an available pgen.")
+                    ConcreteSyntaxTree.p_module(source_filename)
+        except UnicodeDecodeError as e:
+            #import traceback
+            #traceback.print_exc()
+            raise self._report_decode_error(source_desc, e)
+
+        if Errors.get_errors_count() > num_errors:
+            raise CompileError()
+        return tree
+
+    def _report_decode_error(self, source_desc, exc):
+        msg = exc.args[-1]
+        position = exc.args[2]
+        encoding = exc.args[0]
+
+        line = 1
+        column = idx = 0
+        with io.open(source_desc.filename, "r", encoding='iso8859-1', newline='') as f:
+            for line, data in enumerate(f, 1):
+                idx += len(data)
+                if idx >= position:
+                    column = position - (idx - len(data)) + 1
+                    break
+
+        return error((source_desc, line, column),
+                     "Decoding error, missing or incorrect coding=<encoding-name> "
+                     "at top of source (cannot decode with encoding %r: %s)" % (encoding, msg))
+
+    def extract_module_name(self, path, options):
+        # Find fully_qualified module name from the full pathname
+        # of a source file.
+        dir, filename = os.path.split(path)
+        module_name, _ = os.path.splitext(filename)
+        if "." in module_name:
+            return module_name
+        names = [module_name]
+        while self.is_package_dir(dir):
+            parent, package_name = os.path.split(dir)
+            if parent == dir:
+                break
+            names.append(package_name)
+            dir = parent
+        names.reverse()
+        return ".".join(names)
+
+    def setup_errors(self, options, result):
+        Errors.init_thread()
+        if options.use_listing_file:
+            path = result.listing_file = Utils.replace_suffix(result.main_source_file, ".lis")
+        else:
+            path = None
+        Errors.open_listing_file(path=path, echo_to_stderr=options.errors_to_stderr)
+
+    def teardown_errors(self, err, options, result):
+        source_desc = result.compilation_source.source_desc
+        if not isinstance(source_desc, FileSourceDescriptor):
+            raise RuntimeError("Only file sources for code supported")
+        Errors.close_listing_file()
+        result.num_errors = Errors.get_errors_count()
+        if result.num_errors > 0:
+            err = True
+        if err and result.c_file:
+            try:
+                Utils.castrate_file(result.c_file, os.stat(source_desc.filename))
+            except EnvironmentError:
+                pass
+            result.c_file = None
+
+
+def get_output_filename(source_filename, cwd, options):
+    if options.cplus:
+        c_suffix = ".cpp"
+    else:
+        c_suffix = ".c"
+    suggested_file_name = Utils.replace_suffix(source_filename, c_suffix)
+    if options.output_file:
+        out_path = os.path.join(cwd, options.output_file)
+        if os.path.isdir(out_path):
+            return os.path.join(out_path, os.path.basename(suggested_file_name))
+        else:
+            return out_path
+    else:
+        return suggested_file_name
+
+
+def create_default_resultobj(compilation_source, options):
+    result = CompilationResult()
+    result.main_source_file = compilation_source.source_desc.filename
+    result.compilation_source = compilation_source
+    source_desc = compilation_source.source_desc
+    result.c_file = get_output_filename(source_desc.filename,
+                        compilation_source.cwd, options)
+    result.embedded_metadata = options.embedded_metadata
+    return result
+
+
+def run_pipeline(source, options, full_module_name=None, context=None):
+    from . import Pipeline
+
+    # ensure that the inputs are unicode (for Python 2)
+    if sys.version_info[0] == 2:
+        source = Utils.decode_filename(source)
+        if full_module_name:
+            full_module_name = Utils.decode_filename(full_module_name)
+
+    source_ext = os.path.splitext(source)[1]
+    options.configure_language_defaults(source_ext[1:])  # py/pyx
+    if context is None:
+        context = Context.from_options(options)
+
+    # Set up source object
+    cwd = os.getcwd()
+    abs_path = os.path.abspath(source)
+    full_module_name = full_module_name or context.extract_module_name(source, options)
+    full_module_name = EncodedString(full_module_name)
+
+    Utils.raise_error_if_module_name_forbidden(full_module_name)
+
+    if options.relative_path_in_code_position_comments:
+        rel_path = full_module_name.replace('.', os.sep) + source_ext
+        if not abs_path.endswith(rel_path):
+            rel_path = source  # safety measure to prevent printing incorrect paths
+    else:
+        rel_path = abs_path
+    source_desc = FileSourceDescriptor(abs_path, rel_path)
+    source = CompilationSource(source_desc, full_module_name, cwd)
+
+    # Set up result object
+    result = create_default_resultobj(source, options)
+
+    if options.annotate is None:
+        # By default, decide based on whether an html file already exists.
+        html_filename = os.path.splitext(result.c_file)[0] + ".html"
+        if os.path.exists(html_filename):
+            with io.open(html_filename, "r", encoding="UTF-8") as html_file:
+                if u'<!-- Generated by Cython' in html_file.read(100):
+                    options.annotate = True
+
+    # Get pipeline
+    if source_ext.lower() == '.py' or not source_ext:
+        pipeline = Pipeline.create_py_pipeline(context, options, result)
+    else:
+        pipeline = Pipeline.create_pyx_pipeline(context, options, result)
+
+    context.setup_errors(options, result)
+
+    if '.' in full_module_name and '.' in os.path.splitext(os.path.basename(abs_path))[0]:
+        warning((source_desc, 1, 0),
+                "Dotted filenames ('%s') are deprecated."
+                " Please use the normal Python package directory layout." % os.path.basename(abs_path), level=1)
+
+    err, enddata = Pipeline.run_pipeline(pipeline, source)
+    context.teardown_errors(err, options, result)
+    if err is None and options.depfile:
+        from ..Build.Dependencies import create_dependency_tree
+        dependencies = create_dependency_tree(context).all_dependencies(result.main_source_file)
+        Utils.write_depfile(result.c_file, result.main_source_file, dependencies)
+    return result
+
+
+# ------------------------------------------------------------------------
+#
+#  Main Python entry points
+#
+# ------------------------------------------------------------------------
+
+class CompilationSource(object):
+    """
+    Contains the data necessary to start up a compilation pipeline for
+    a single compilation unit.
+    """
+    def __init__(self, source_desc, full_module_name, cwd):
+        self.source_desc = source_desc
+        self.full_module_name = full_module_name
+        self.cwd = cwd
+
+
+class CompilationResult(object):
+    """
+    Results from the Cython compiler:
+
+    c_file           string or None   The generated C source file
+    h_file           string or None   The generated C header file
+    i_file           string or None   The generated .pxi file
+    api_file         string or None   The generated C API .h file
+    listing_file     string or None   File of error messages
+    object_file      string or None   Result of compiling the C file
+    extension_file   string or None   Result of linking the object file
+    num_errors       integer          Number of compilation errors
+    compilation_source CompilationSource
+    """
+
+    def __init__(self):
+        self.c_file = None
+        self.h_file = None
+        self.i_file = None
+        self.api_file = None
+        self.listing_file = None
+        self.object_file = None
+        self.extension_file = None
+        self.main_source_file = None
+
+
+class CompilationResultSet(dict):
+    """
+    Results from compiling multiple Pyrex source files. A mapping
+    from source file paths to CompilationResult instances. Also
+    has the following attributes:
+
+    num_errors   integer   Total number of compilation errors
+    """
+
+    num_errors = 0
+
+    def add(self, source, result):
+        self[source] = result
+        self.num_errors += result.num_errors
+
+
+def compile_single(source, options, full_module_name = None):
+    """
+    compile_single(source, options, full_module_name)
+
+    Compile the given Pyrex implementation file and return a CompilationResult.
+    Always compiles a single file; does not perform timestamp checking or
+    recursion.
+    """
+    return run_pipeline(source, options, full_module_name)
+
+
+def compile_multiple(sources, options):
+    """
+    compile_multiple(sources, options)
+
+    Compiles the given sequence of Pyrex implementation files and returns
+    a CompilationResultSet. Performs timestamp checking and/or recursion
+    if these are specified in the options.
+    """
+    if len(sources) > 1 and options.module_name:
+        raise RuntimeError('Full module name can only be set '
+                           'for single source compilation')
+    # run_pipeline creates the context
+    # context = Context.from_options(options)
+    sources = [os.path.abspath(source) for source in sources]
+    processed = set()
+    results = CompilationResultSet()
+    timestamps = options.timestamps
+    verbose = options.verbose
+    context = None
+    cwd = os.getcwd()
+    for source in sources:
+        if source not in processed:
+            if context is None:
+                context = Context.from_options(options)
+            output_filename = get_output_filename(source, cwd, options)
+            out_of_date = context.c_file_out_of_date(source, output_filename)
+            if (not timestamps) or out_of_date:
+                if verbose:
+                    sys.stderr.write("Compiling %s\n" % source)
+                result = run_pipeline(source, options,
+                                      full_module_name=options.module_name,
+                                      context=context)
+                results.add(source, result)
+                # Compiling multiple sources in one context doesn't quite
+                # work properly yet.
+                context = None
+            processed.add(source)
+    return results
+
+
+def compile(source, options = None, full_module_name = None, **kwds):
+    """
+    compile(source [, options], [, <option> = <value>]...)
+
+    Compile one or more Pyrex implementation files, with optional timestamp
+    checking and recursing on dependencies.  The source argument may be a string
+    or a sequence of strings.  If it is a string and no recursion or timestamp
+    checking is requested, a CompilationResult is returned, otherwise a
+    CompilationResultSet is returned.
+    """
+    options = CompilationOptions(defaults = options, **kwds)
+    if isinstance(source, basestring):
+        if not options.timestamps:
+            return compile_single(source, options, full_module_name)
+        source = [source]
+    return compile_multiple(source, options)
+
+
+@Utils.cached_function
+def search_include_directories(dirs, qualified_name, suffix="", pos=None, include=False, source_file_path=None):
+    """
+    Search the list of include directories for the given file name.
+
+    If a source file path or position is given, first searches the directory
+    containing that file.  Returns None if not found, but does not report an error.
+
+    The 'include' option will disable package dereferencing.
+    """
+    if pos and not source_file_path:
+        file_desc = pos[0]
+        if not isinstance(file_desc, FileSourceDescriptor):
+            raise RuntimeError("Only file sources for code supported")
+        source_file_path = file_desc.filename
+    if source_file_path:
+        if include:
+            dirs = (os.path.dirname(source_file_path),) + dirs
+        else:
+            dirs = (Utils.find_root_package_dir(source_file_path),) + dirs
+
+    # search for dotted filename e.g. <dir>/foo.bar.pxd
+    dotted_filename = qualified_name
+    if suffix:
+        dotted_filename += suffix
+
+    for dirname in dirs:
+        path = os.path.join(dirname, dotted_filename)
+        if os.path.exists(path):
+            return path
+
+    # search for filename in package structure e.g. <dir>/foo/bar.pxd or <dir>/foo/bar/__init__.pxd
+    if not include:
+
+        names = qualified_name.split('.')
+        package_names = tuple(names[:-1])
+        module_name = names[-1]
+
+        # search for standard packages first - PEP420
+        namespace_dirs = []
+        for dirname in dirs:
+            package_dir, is_namespace = Utils.check_package_dir(dirname, package_names)
+            if package_dir is not None:
+                if is_namespace:
+                    namespace_dirs.append(package_dir)
+                    continue
+                path = search_module_in_dir(package_dir, module_name, suffix)
+                if path:
+                    return path
+
+        # search for namespaces second - PEP420
+        for package_dir in namespace_dirs:
+            path = search_module_in_dir(package_dir, module_name, suffix)
+            if path:
+                return path
+
+    return None
+
+
+@Utils.cached_function
+def search_module_in_dir(package_dir, module_name, suffix):
+    # matches modules of the form: <dir>/foo/bar.pxd
+    path = Utils.find_versioned_file(package_dir, module_name, suffix)
+
+    # matches modules of the form: <dir>/foo/bar/__init__.pxd
+    if not path and suffix:
+        path = Utils.find_versioned_file(os.path.join(package_dir, module_name), "__init__", suffix)
+
+    return path
+
+
+# ------------------------------------------------------------------------
+#
+#  Main command-line entry point
+#
+# ------------------------------------------------------------------------
+
+def setuptools_main():
+    return main(command_line = 1)
+
+
+def main(command_line = 0):
+    args = sys.argv[1:]
+    any_failures = 0
+    if command_line:
+        try:
+            options, sources = parse_command_line(args)
+        except IOError as e:
+            # TODO: IOError can be replaced with FileNotFoundError in Cython 3.1
+            import errno
+            if errno.ENOENT != e.errno:
+                # Raised IOError is not caused by missing file.
+                raise
+            print("{}: No such file or directory: '{}'".format(sys.argv[0], e.filename), file=sys.stderr)
+            sys.exit(1)
+    else:
+        options = CompilationOptions(default_options)
+        sources = args
+
+    if options.show_version:
+        Utils.print_version()
+
+    if options.working_path!="":
+        os.chdir(options.working_path)
+
+    try:
+        result = compile(sources, options)
+        if result.num_errors > 0:
+            any_failures = 1
+    except (EnvironmentError, PyrexError) as e:
+        sys.stderr.write(str(e) + '\n')
+        any_failures = 1
+    if any_failures:
+        sys.exit(1)
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Naming.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Naming.py
new file mode 100644
index 0000000000000000000000000000000000000000..65b6e1a7fb3e0eb5754c364450831e469053d653
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Naming.py
@@ -0,0 +1,198 @@
+#
+#   C naming conventions
+#
+#
+#   Prefixes for generating C names.
+#   Collected here to facilitate ensuring uniqueness.
+#
+from .. import __version__
+
+pyrex_prefix    = "__pyx_"
+cyversion = __version__.replace('.', '_')
+
+
+codewriter_temp_prefix = pyrex_prefix + "t_"
+
+temp_prefix       = u"__cyt_"
+
+pyunicode_identifier_prefix = pyrex_prefix + 'U'
+
+builtin_prefix    = pyrex_prefix + "builtin_"
+arg_prefix        = pyrex_prefix + "arg_"
+genexpr_arg_prefix = pyrex_prefix + "genexpr_arg_"
+funcdoc_prefix    = pyrex_prefix + "doc_"
+enum_prefix       = pyrex_prefix + "e_"
+func_prefix       = pyrex_prefix + "f_"
+func_prefix_api   = pyrex_prefix + "api_f_"
+pyfunc_prefix     = pyrex_prefix + "pf_"
+pywrap_prefix     = pyrex_prefix + "pw_"
+genbody_prefix    = pyrex_prefix + "gb_"
+gstab_prefix      = pyrex_prefix + "getsets_"
+prop_get_prefix   = pyrex_prefix + "getprop_"
+const_prefix      = pyrex_prefix + "k_"
+py_const_prefix   = pyrex_prefix + "kp_"
+label_prefix      = pyrex_prefix + "L"
+pymethdef_prefix  = pyrex_prefix + "mdef_"
+method_wrapper_prefix = pyrex_prefix + "specialmethod_"
+methtab_prefix    = pyrex_prefix + "methods_"
+memtab_prefix     = pyrex_prefix + "members_"
+objstruct_prefix  = pyrex_prefix + "obj_"
+typeptr_prefix    = pyrex_prefix + "ptype_"
+prop_set_prefix   = pyrex_prefix + "setprop_"
+type_prefix       = pyrex_prefix + "t_"
+typeobj_prefix    = pyrex_prefix + "type_"
+var_prefix        = pyrex_prefix + "v_"
+varptr_prefix     = pyrex_prefix + "vp_"
+varptr_prefix_api = pyrex_prefix + "api_vp_"
+wrapperbase_prefix= pyrex_prefix + "wrapperbase_"
+pybuffernd_prefix   = pyrex_prefix + "pybuffernd_"
+pybufferstruct_prefix  = pyrex_prefix + "pybuffer_"
+vtable_prefix     = pyrex_prefix + "vtable_"
+vtabptr_prefix    = pyrex_prefix + "vtabptr_"
+vtabstruct_prefix = pyrex_prefix + "vtabstruct_"
+unicode_vtabentry_prefix  = pyrex_prefix + "Uvtabentry_"
+# vtab entries aren't normally mangled,
+# but punycode names sometimes start with numbers leading to a C syntax error
+unicode_structmember_prefix = pyrex_prefix + "Umember_"
+# as above -
+# not normally mangled but punycode names cause specific problems
+opt_arg_prefix    = pyrex_prefix + "opt_args_"
+convert_func_prefix = pyrex_prefix + "convert_"
+closure_scope_prefix = pyrex_prefix + "scope_"
+closure_class_prefix = pyrex_prefix + "scope_struct_"
+lambda_func_prefix = pyrex_prefix + "lambda_"
+module_is_main   = pyrex_prefix + "module_is_main"
+defaults_struct_prefix = pyrex_prefix + "defaults"
+dynamic_args_cname = pyrex_prefix + "dynamic_args"
+
+interned_prefixes = {
+    'str': pyrex_prefix + "n_",
+    'int': pyrex_prefix + "int_",
+    'float': pyrex_prefix + "float_",
+    'tuple': pyrex_prefix + "tuple_",
+    'codeobj': pyrex_prefix + "codeobj_",
+    'slice': pyrex_prefix + "slice_",
+    'ustring': pyrex_prefix + "ustring_",
+    'umethod': pyrex_prefix + "umethod_",
+}
+
+ctuple_type_prefix = pyrex_prefix + "ctuple_"
+args_cname       = pyrex_prefix + "args"
+nargs_cname      = pyrex_prefix + "nargs"
+kwvalues_cname   = pyrex_prefix + "kwvalues"
+generator_cname  = pyrex_prefix + "generator"
+sent_value_cname = pyrex_prefix + "sent_value"
+pykwdlist_cname  = pyrex_prefix + "pyargnames"
+obj_base_cname   = pyrex_prefix + "base"
+builtins_cname   = pyrex_prefix + "b"
+preimport_cname  = pyrex_prefix + "i"
+moddict_cname    = pyrex_prefix + "d"
+dummy_cname      = pyrex_prefix + "dummy"
+filename_cname   = pyrex_prefix + "filename"
+modulename_cname = pyrex_prefix + "modulename"
+filetable_cname  = pyrex_prefix + "f"
+intern_tab_cname = pyrex_prefix + "intern_tab"
+kwds_cname       = pyrex_prefix + "kwds"
+lineno_cname     = pyrex_prefix + "lineno"
+clineno_cname    = pyrex_prefix + "clineno"
+cfilenm_cname    = pyrex_prefix + "cfilenm"
+local_tstate_cname = pyrex_prefix + "tstate"
+module_cname     = pyrex_prefix + "m"
+modulestate_cname = pyrex_prefix + "mstate"
+modulestateglobal_cname = pyrex_prefix + "mstate_global"
+moddoc_cname     = pyrex_prefix + "mdoc"
+methtable_cname  = pyrex_prefix + "methods"
+retval_cname     = pyrex_prefix + "r"
+reqd_kwds_cname  = pyrex_prefix + "reqd_kwds"
+self_cname       = pyrex_prefix + "self"
+stringtab_cname  = pyrex_prefix + "string_tab"
+vtabslot_cname   = pyrex_prefix + "vtab"
+c_api_tab_cname  = pyrex_prefix + "c_api_tab"
+gilstate_cname   = pyrex_prefix + "state"
+skip_dispatch_cname = pyrex_prefix + "skip_dispatch"
+empty_tuple      = pyrex_prefix + "empty_tuple"
+empty_bytes      = pyrex_prefix + "empty_bytes"
+empty_unicode    = pyrex_prefix + "empty_unicode"
+print_function   = pyrex_prefix + "print"
+print_function_kwargs   = pyrex_prefix + "print_kwargs"
+cleanup_cname    = pyrex_prefix + "module_cleanup"
+pymoduledef_cname = pyrex_prefix + "moduledef"
+pymoduledef_slots_cname = pyrex_prefix + "moduledef_slots"
+pymodinit_module_arg = pyrex_prefix + "pyinit_module"
+pymodule_create_func_cname = pyrex_prefix + "pymod_create"
+pymodule_exec_func_cname = pyrex_prefix + "pymod_exec"
+optional_args_cname = pyrex_prefix + "optional_args"
+import_star      = pyrex_prefix + "import_star"
+import_star_set  = pyrex_prefix + "import_star_set"
+outer_scope_cname= pyrex_prefix + "outer_scope"
+cur_scope_cname  = pyrex_prefix + "cur_scope"
+enc_scope_cname  = pyrex_prefix + "enc_scope"
+frame_cname      = pyrex_prefix + "frame"
+frame_code_cname = pyrex_prefix + "frame_code"
+error_without_exception_cname = pyrex_prefix + "error_without_exception"
+binding_cfunc    = pyrex_prefix + "binding_PyCFunctionType"
+fused_func_prefix = pyrex_prefix + 'fuse_'
+fused_dtype_prefix = pyrex_prefix + 'fused_dtype_'
+quick_temp_cname = pyrex_prefix + "temp"  # temp variable for quick'n'dirty temping
+tp_dict_version_temp = pyrex_prefix + "tp_dict_version"
+obj_dict_version_temp = pyrex_prefix + "obj_dict_version"
+type_dict_guard_temp = pyrex_prefix + "typedict_guard"
+cython_runtime_cname   = pyrex_prefix + "cython_runtime"
+cyfunction_type_cname = pyrex_prefix + "CyFunctionType"
+fusedfunction_type_cname = pyrex_prefix + "FusedFunctionType"
+# the name "dflt" was picked by analogy with the CPython dataclass module which stores
+# the default values in variables named f"_dflt_{field.name}" in a hidden scope that's
+# passed to the __init__ function. (The name is unimportant to the exact workings though)
+dataclass_field_default_cname = pyrex_prefix + "dataclass_dflt"
+
+global_code_object_cache_find = pyrex_prefix + 'find_code_object'
+global_code_object_cache_insert = pyrex_prefix + 'insert_code_object'
+
+genexpr_id_ref = 'genexpr'
+freelist_name  = 'freelist'
+freecount_name = 'freecount'
+
+line_c_macro = "__LINE__"
+
+file_c_macro = "__FILE__"
+
+extern_c_macro  = pyrex_prefix.upper() + "EXTERN_C"
+
+exc_type_name   = pyrex_prefix + "exc_type"
+exc_value_name  = pyrex_prefix + "exc_value"
+exc_tb_name     = pyrex_prefix + "exc_tb"
+exc_lineno_name = pyrex_prefix + "exc_lineno"
+
+parallel_exc_type = pyrex_prefix + "parallel_exc_type"
+parallel_exc_value = pyrex_prefix + "parallel_exc_value"
+parallel_exc_tb = pyrex_prefix + "parallel_exc_tb"
+parallel_filename = pyrex_prefix + "parallel_filename"
+parallel_lineno = pyrex_prefix + "parallel_lineno"
+parallel_clineno = pyrex_prefix + "parallel_clineno"
+parallel_why = pyrex_prefix + "parallel_why"
+
+exc_vars = (exc_type_name, exc_value_name, exc_tb_name)
+
+api_name        = pyrex_prefix + "capi__"
+
+# the h and api guards get changed to:
+#  __PYX_HAVE__FILENAME (for ascii filenames)
+#  __PYX_HAVE_U_PUNYCODEFILENAME (for non-ascii filenames)
+h_guard_prefix   = "__PYX_HAVE_"
+api_guard_prefix = "__PYX_HAVE_API_"
+api_func_guard   = "__PYX_HAVE_API_FUNC_"
+
+PYX_NAN          = "__PYX_NAN()"
+
+def py_version_hex(major, minor=0, micro=0, release_level=0, release_serial=0):
+    return (major << 24) | (minor << 16) | (micro << 8) | (release_level << 4) | (release_serial)
+
+# there's a few places where it's useful to iterate over all of these
+used_types_and_macros = [
+    (cyfunction_type_cname, '__Pyx_CyFunction_USED'),
+    (fusedfunction_type_cname, '__Pyx_FusedFunction_USED'),
+    ('__pyx_GeneratorType', '__Pyx_Generator_USED'),
+    ('__pyx_IterableCoroutineType', '__Pyx_IterableCoroutine_USED'),
+    ('__pyx_CoroutineAwaitType', '__Pyx_Coroutine_USED'),
+    ('__pyx_CoroutineType', '__Pyx_Coroutine_USED'),
+]
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Optimize.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Optimize.py
new file mode 100644
index 0000000000000000000000000000000000000000..8dd48e951acf3218328851e029e67c5b2f71e8ae
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Optimize.py
@@ -0,0 +1,5213 @@
+from __future__ import absolute_import
+
+import re
+import sys
+import copy
+import codecs
+import itertools
+
+from . import TypeSlots
+from .ExprNodes import not_a_constant
+import cython
+cython.declare(UtilityCode=object, EncodedString=object, bytes_literal=object, encoded_string=object,
+               Nodes=object, ExprNodes=object, PyrexTypes=object, Builtin=object,
+               UtilNodes=object, _py_int_types=object)
+
+if sys.version_info[0] >= 3:
+    _py_int_types = int
+    _py_string_types = (bytes, str)
+else:
+    _py_int_types = (int, long)
+    _py_string_types = (bytes, unicode)
+
+from . import Nodes
+from . import ExprNodes
+from . import PyrexTypes
+from . import Visitor
+from . import Builtin
+from . import UtilNodes
+from . import Options
+
+from .Code import UtilityCode, TempitaUtilityCode
+from .StringEncoding import EncodedString, bytes_literal, encoded_string
+from .Errors import error, warning
+from .ParseTreeTransforms import SkipDeclarations
+from .. import Utils
+
+try:
+    from __builtin__ import reduce
+except ImportError:
+    from functools import reduce
+
+try:
+    from __builtin__ import basestring
+except ImportError:
+    basestring = str  # Python 3
+
+
+def load_c_utility(name):
+    return UtilityCode.load_cached(name, "Optimize.c")
+
+
+def unwrap_coerced_node(node, coercion_nodes=(ExprNodes.CoerceToPyTypeNode, ExprNodes.CoerceFromPyTypeNode)):
+    if isinstance(node, coercion_nodes):
+        return node.arg
+    return node
+
+
+def unwrap_node(node):
+    while isinstance(node, UtilNodes.ResultRefNode):
+        node = node.expression
+    return node
+
+
+def is_common_value(a, b):
+    a = unwrap_node(a)
+    b = unwrap_node(b)
+    if isinstance(a, ExprNodes.NameNode) and isinstance(b, ExprNodes.NameNode):
+        return a.name == b.name
+    if isinstance(a, ExprNodes.AttributeNode) and isinstance(b, ExprNodes.AttributeNode):
+        return not a.is_py_attr and is_common_value(a.obj, b.obj) and a.attribute == b.attribute
+    return False
+
+
+def filter_none_node(node):
+    if node is not None and node.constant_result is None:
+        return None
+    return node
+
+
+class _YieldNodeCollector(Visitor.TreeVisitor):
+    """
+    YieldExprNode finder for generator expressions.
+    """
+    def __init__(self):
+        Visitor.TreeVisitor.__init__(self)
+        self.yield_stat_nodes = {}
+        self.yield_nodes = []
+
+    visit_Node = Visitor.TreeVisitor.visitchildren
+
+    def visit_YieldExprNode(self, node):
+        self.yield_nodes.append(node)
+        self.visitchildren(node)
+
+    def visit_ExprStatNode(self, node):
+        self.visitchildren(node)
+        if node.expr in self.yield_nodes:
+            self.yield_stat_nodes[node.expr] = node
+
+    # everything below these nodes is out of scope:
+
+    def visit_GeneratorExpressionNode(self, node):
+        pass
+
+    def visit_LambdaNode(self, node):
+        pass
+
+    def visit_FuncDefNode(self, node):
+        pass
+
+
+def _find_single_yield_expression(node):
+    yield_statements = _find_yield_statements(node)
+    if len(yield_statements) != 1:
+        return None, None
+    return yield_statements[0]
+
+
+def _find_yield_statements(node):
+    collector = _YieldNodeCollector()
+    collector.visitchildren(node)
+    try:
+        yield_statements = [
+            (yield_node.arg, collector.yield_stat_nodes[yield_node])
+            for yield_node in collector.yield_nodes
+        ]
+    except KeyError:
+        # found YieldExprNode without ExprStatNode (i.e. a non-statement usage of 'yield')
+        yield_statements = []
+    return yield_statements
+
+
+class IterationTransform(Visitor.EnvTransform):
+    """Transform some common for-in loop patterns into efficient C loops:
+
+    - for-in-dict loop becomes a while loop calling PyDict_Next()
+    - for-in-enumerate is replaced by an external counter variable
+    - for-in-range loop becomes a plain C for loop
+    """
+    def visit_PrimaryCmpNode(self, node):
+        if node.is_ptr_contains():
+
+            # for t in operand2:
+            #     if operand1 == t:
+            #         res = True
+            #         break
+            # else:
+            #     res = False
+
+            pos = node.pos
+            result_ref = UtilNodes.ResultRefNode(node)
+            if node.operand2.is_subscript:
+                base_type = node.operand2.base.type.base_type
+            else:
+                base_type = node.operand2.type.base_type
+            target_handle = UtilNodes.TempHandle(base_type)
+            target = target_handle.ref(pos)
+            cmp_node = ExprNodes.PrimaryCmpNode(
+                pos, operator=u'==', operand1=node.operand1, operand2=target)
+            if_body = Nodes.StatListNode(
+                pos,
+                stats = [Nodes.SingleAssignmentNode(pos, lhs=result_ref, rhs=ExprNodes.BoolNode(pos, value=1)),
+                         Nodes.BreakStatNode(pos)])
+            if_node = Nodes.IfStatNode(
+                pos,
+                if_clauses=[Nodes.IfClauseNode(pos, condition=cmp_node, body=if_body)],
+                else_clause=None)
+            for_loop = UtilNodes.TempsBlockNode(
+                pos,
+                temps = [target_handle],
+                body = Nodes.ForInStatNode(
+                    pos,
+                    target=target,
+                    iterator=ExprNodes.IteratorNode(node.operand2.pos, sequence=node.operand2),
+                    body=if_node,
+                    else_clause=Nodes.SingleAssignmentNode(pos, lhs=result_ref, rhs=ExprNodes.BoolNode(pos, value=0))))
+            for_loop = for_loop.analyse_expressions(self.current_env())
+            for_loop = self.visit(for_loop)
+            new_node = UtilNodes.TempResultFromStatNode(result_ref, for_loop)
+
+            if node.operator == 'not_in':
+                new_node = ExprNodes.NotNode(pos, operand=new_node)
+            return new_node
+
+        else:
+            self.visitchildren(node)
+            return node
+
+    def visit_ForInStatNode(self, node):
+        self.visitchildren(node)
+        return self._optimise_for_loop(node, node.iterator.sequence)
+
+    def _optimise_for_loop(self, node, iterable, reversed=False):
+        annotation_type = None
+        if (iterable.is_name or iterable.is_attribute) and iterable.entry and iterable.entry.annotation:
+            annotation = iterable.entry.annotation.expr
+            if annotation.is_subscript:
+                annotation = annotation.base  # container base type
+
+        if Builtin.dict_type in (iterable.type, annotation_type):
+            # like iterating over dict.keys()
+            if reversed:
+                # CPython raises an error here: not a sequence
+                return node
+            return self._transform_dict_iteration(
+                node, dict_obj=iterable, method=None, keys=True, values=False)
+
+        if (Builtin.set_type in (iterable.type, annotation_type) or
+                Builtin.frozenset_type in (iterable.type, annotation_type)):
+            if reversed:
+                # CPython raises an error here: not a sequence
+                return node
+            return self._transform_set_iteration(node, iterable)
+
+        # C array (slice) iteration?
+        if iterable.type.is_ptr or iterable.type.is_array:
+            return self._transform_carray_iteration(node, iterable, reversed=reversed)
+        if iterable.type is Builtin.bytes_type:
+            return self._transform_bytes_iteration(node, iterable, reversed=reversed)
+        if iterable.type is Builtin.unicode_type:
+            return self._transform_unicode_iteration(node, iterable, reversed=reversed)
+        # in principle _transform_indexable_iteration would work on most of the above, and
+        # also tuple and list. However, it probably isn't quite as optimized
+        if iterable.type is Builtin.bytearray_type:
+            return self._transform_indexable_iteration(node, iterable, is_mutable=True, reversed=reversed)
+        if isinstance(iterable, ExprNodes.CoerceToPyTypeNode) and iterable.arg.type.is_memoryviewslice:
+            return self._transform_indexable_iteration(node, iterable.arg, is_mutable=False, reversed=reversed)
+
+        # the rest is based on function calls
+        if not isinstance(iterable, ExprNodes.SimpleCallNode):
+            return node
+
+        if iterable.args is None:
+            arg_count = iterable.arg_tuple and len(iterable.arg_tuple.args) or 0
+        else:
+            arg_count = len(iterable.args)
+            if arg_count and iterable.self is not None:
+                arg_count -= 1
+
+        function = iterable.function
+        # dict iteration?
+        if function.is_attribute and not reversed and not arg_count:
+            base_obj = iterable.self or function.obj
+            method = function.attribute
+            # in Py3, items() is equivalent to Py2's iteritems()
+            is_safe_iter = self.global_scope().context.language_level >= 3
+
+            if not is_safe_iter and method in ('keys', 'values', 'items'):
+                # try to reduce this to the corresponding .iter*() methods
+                if isinstance(base_obj, ExprNodes.CallNode):
+                    inner_function = base_obj.function
+                    if (inner_function.is_name and inner_function.name == 'dict'
+                            and inner_function.entry
+                            and inner_function.entry.is_builtin):
+                        # e.g. dict(something).items() => safe to use .iter*()
+                        is_safe_iter = True
+
+            keys = values = False
+            if method == 'iterkeys' or (is_safe_iter and method == 'keys'):
+                keys = True
+            elif method == 'itervalues' or (is_safe_iter and method == 'values'):
+                values = True
+            elif method == 'iteritems' or (is_safe_iter and method == 'items'):
+                keys = values = True
+
+            if keys or values:
+                return self._transform_dict_iteration(
+                    node, base_obj, method, keys, values)
+
+        # enumerate/reversed ?
+        if iterable.self is None and function.is_name and \
+               function.entry and function.entry.is_builtin:
+            if function.name == 'enumerate':
+                if reversed:
+                    # CPython raises an error here: not a sequence
+                    return node
+                return self._transform_enumerate_iteration(node, iterable)
+            elif function.name == 'reversed':
+                if reversed:
+                    # CPython raises an error here: not a sequence
+                    return node
+                return self._transform_reversed_iteration(node, iterable)
+
+        # range() iteration?
+        if Options.convert_range and 1 <= arg_count <= 3 and (
+                iterable.self is None and
+                function.is_name and function.name in ('range', 'xrange') and
+                function.entry and function.entry.is_builtin):
+            if node.target.type.is_int or node.target.type.is_enum:
+                return self._transform_range_iteration(node, iterable, reversed=reversed)
+            if node.target.type.is_pyobject:
+                # Assume that small integer ranges (C long >= 32bit) are best handled in C as well.
+                for arg in (iterable.arg_tuple.args if iterable.args is None else iterable.args):
+                    if isinstance(arg, ExprNodes.IntNode):
+                        if arg.has_constant_result() and -2**30 <= arg.constant_result < 2**30:
+                            continue
+                    break
+                else:
+                    return self._transform_range_iteration(node, iterable, reversed=reversed)
+
+        return node
+
+    def _transform_reversed_iteration(self, node, reversed_function):
+        args = reversed_function.arg_tuple.args
+        if len(args) == 0:
+            error(reversed_function.pos,
+                  "reversed() requires an iterable argument")
+            return node
+        elif len(args) > 1:
+            error(reversed_function.pos,
+                  "reversed() takes exactly 1 argument")
+            return node
+        arg = args[0]
+
+        # reversed(list/tuple) ?
+        if arg.type in (Builtin.tuple_type, Builtin.list_type):
+            node.iterator.sequence = arg.as_none_safe_node("'NoneType' object is not iterable")
+            node.iterator.reversed = True
+            return node
+
+        return self._optimise_for_loop(node, arg, reversed=True)
+
+    def _transform_indexable_iteration(self, node, slice_node, is_mutable, reversed=False):
+        """In principle can handle any iterable that Cython has a len() for and knows how to index"""
+        unpack_temp_node = UtilNodes.LetRefNode(
+            slice_node.as_none_safe_node("'NoneType' is not iterable"),
+            may_hold_none=False, is_temp=True
+            )
+
+        start_node = ExprNodes.IntNode(
+            node.pos, value='0', constant_result=0, type=PyrexTypes.c_py_ssize_t_type)
+        def make_length_call():
+            # helper function since we need to create this node for a couple of places
+            builtin_len = ExprNodes.NameNode(node.pos, name="len",
+                                             entry=Builtin.builtin_scope.lookup("len"))
+            return ExprNodes.SimpleCallNode(node.pos,
+                                    function=builtin_len,
+                                    args=[unpack_temp_node]
+                                    )
+        length_temp = UtilNodes.LetRefNode(make_length_call(), type=PyrexTypes.c_py_ssize_t_type, is_temp=True)
+        end_node = length_temp
+
+        if reversed:
+            relation1, relation2 = '>', '>='
+            start_node, end_node = end_node, start_node
+        else:
+            relation1, relation2 = '<=', '<'
+
+        counter_ref = UtilNodes.LetRefNode(pos=node.pos, type=PyrexTypes.c_py_ssize_t_type)
+
+        target_value = ExprNodes.IndexNode(slice_node.pos, base=unpack_temp_node,
+                                           index=counter_ref)
+
+        target_assign = Nodes.SingleAssignmentNode(
+            pos = node.target.pos,
+            lhs = node.target,
+            rhs = target_value)
+
+        # analyse with boundscheck and wraparound
+        # off (because we're confident we know the size)
+        env = self.current_env()
+        new_directives = Options.copy_inherited_directives(env.directives, boundscheck=False, wraparound=False)
+        target_assign = Nodes.CompilerDirectivesNode(
+            target_assign.pos,
+            directives=new_directives,
+            body=target_assign,
+        )
+
+        body = Nodes.StatListNode(
+            node.pos,
+            stats = [target_assign])  # exclude node.body for now to not reanalyse it
+        if is_mutable:
+            # We need to be slightly careful here that we are actually modifying the loop
+            # bounds and not a temp copy of it. Setting is_temp=True on length_temp seems
+            # to ensure this.
+            # If this starts to fail then we could insert an "if out_of_bounds: break" instead
+            loop_length_reassign = Nodes.SingleAssignmentNode(node.pos,
+                                                        lhs = length_temp,
+                                                        rhs = make_length_call())
+            body.stats.append(loop_length_reassign)
+
+        loop_node = Nodes.ForFromStatNode(
+            node.pos,
+            bound1=start_node, relation1=relation1,
+            target=counter_ref,
+            relation2=relation2, bound2=end_node,
+            step=None, body=body,
+            else_clause=node.else_clause,
+            from_range=True)
+
+        ret = UtilNodes.LetNode(
+                    unpack_temp_node,
+                    UtilNodes.LetNode(
+                        length_temp,
+                        # TempResultFromStatNode provides the framework where the "counter_ref"
+                        # temp is set up and can be assigned to. However, we don't need the
+                        # result it returns so wrap it in an ExprStatNode.
+                        Nodes.ExprStatNode(node.pos,
+                            expr=UtilNodes.TempResultFromStatNode(
+                                    counter_ref,
+                                    loop_node
+                            )
+                        )
+                    )
+                ).analyse_expressions(env)
+        body.stats.insert(1, node.body)
+        return ret
+
+    PyBytes_AS_STRING_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_char_ptr_type, [
+            PyrexTypes.CFuncTypeArg("s", Builtin.bytes_type, None)
+            ])
+
+    PyBytes_GET_SIZE_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_py_ssize_t_type, [
+            PyrexTypes.CFuncTypeArg("s", Builtin.bytes_type, None)
+            ])
+
+    def _transform_bytes_iteration(self, node, slice_node, reversed=False):
+        target_type = node.target.type
+        if not target_type.is_int and target_type is not Builtin.bytes_type:
+            # bytes iteration returns bytes objects in Py2, but
+            # integers in Py3
+            return node
+
+        unpack_temp_node = UtilNodes.LetRefNode(
+            slice_node.as_none_safe_node("'NoneType' is not iterable"))
+
+        slice_base_node = ExprNodes.PythonCapiCallNode(
+            slice_node.pos, "PyBytes_AS_STRING",
+            self.PyBytes_AS_STRING_func_type,
+            args = [unpack_temp_node],
+            is_temp = 0,
+            )
+        len_node = ExprNodes.PythonCapiCallNode(
+            slice_node.pos, "PyBytes_GET_SIZE",
+            self.PyBytes_GET_SIZE_func_type,
+            args = [unpack_temp_node],
+            is_temp = 0,
+            )
+
+        return UtilNodes.LetNode(
+            unpack_temp_node,
+            self._transform_carray_iteration(
+                node,
+                ExprNodes.SliceIndexNode(
+                    slice_node.pos,
+                    base = slice_base_node,
+                    start = None,
+                    step = None,
+                    stop = len_node,
+                    type = slice_base_node.type,
+                    is_temp = 1,
+                    ),
+                reversed = reversed))
+
+    PyUnicode_READ_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_py_ucs4_type, [
+            PyrexTypes.CFuncTypeArg("kind", PyrexTypes.c_int_type, None),
+            PyrexTypes.CFuncTypeArg("data", PyrexTypes.c_void_ptr_type, None),
+            PyrexTypes.CFuncTypeArg("index", PyrexTypes.c_py_ssize_t_type, None)
+        ])
+
+    init_unicode_iteration_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_int_type, [
+            PyrexTypes.CFuncTypeArg("s", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("length", PyrexTypes.c_py_ssize_t_ptr_type, None),
+            PyrexTypes.CFuncTypeArg("data", PyrexTypes.c_void_ptr_ptr_type, None),
+            PyrexTypes.CFuncTypeArg("kind", PyrexTypes.c_int_ptr_type, None)
+        ],
+        exception_value = '-1')
+
+    def _transform_unicode_iteration(self, node, slice_node, reversed=False):
+        if slice_node.is_literal:
+            # try to reduce to byte iteration for plain Latin-1 strings
+            try:
+                bytes_value = bytes_literal(slice_node.value.encode('latin1'), 'iso8859-1')
+            except UnicodeEncodeError:
+                pass
+            else:
+                bytes_slice = ExprNodes.SliceIndexNode(
+                    slice_node.pos,
+                    base=ExprNodes.BytesNode(
+                        slice_node.pos, value=bytes_value,
+                        constant_result=bytes_value,
+                        type=PyrexTypes.c_const_char_ptr_type).coerce_to(
+                            PyrexTypes.c_const_uchar_ptr_type, self.current_env()),
+                    start=None,
+                    stop=ExprNodes.IntNode(
+                        slice_node.pos, value=str(len(bytes_value)),
+                        constant_result=len(bytes_value),
+                        type=PyrexTypes.c_py_ssize_t_type),
+                    type=Builtin.unicode_type,  # hint for Python conversion
+                )
+                return self._transform_carray_iteration(node, bytes_slice, reversed)
+
+        unpack_temp_node = UtilNodes.LetRefNode(
+            slice_node.as_none_safe_node("'NoneType' is not iterable"))
+
+        start_node = ExprNodes.IntNode(
+            node.pos, value='0', constant_result=0, type=PyrexTypes.c_py_ssize_t_type)
+        length_temp = UtilNodes.TempHandle(PyrexTypes.c_py_ssize_t_type)
+        end_node = length_temp.ref(node.pos)
+        if reversed:
+            relation1, relation2 = '>', '>='
+            start_node, end_node = end_node, start_node
+        else:
+            relation1, relation2 = '<=', '<'
+
+        kind_temp = UtilNodes.TempHandle(PyrexTypes.c_int_type)
+        data_temp = UtilNodes.TempHandle(PyrexTypes.c_void_ptr_type)
+        counter_temp = UtilNodes.TempHandle(PyrexTypes.c_py_ssize_t_type)
+
+        target_value = ExprNodes.PythonCapiCallNode(
+            slice_node.pos, "__Pyx_PyUnicode_READ",
+            self.PyUnicode_READ_func_type,
+            args = [kind_temp.ref(slice_node.pos),
+                    data_temp.ref(slice_node.pos),
+                    counter_temp.ref(node.target.pos)],
+            is_temp = False,
+            )
+        if target_value.type != node.target.type:
+            target_value = target_value.coerce_to(node.target.type,
+                                                  self.current_env())
+        target_assign = Nodes.SingleAssignmentNode(
+            pos = node.target.pos,
+            lhs = node.target,
+            rhs = target_value)
+        body = Nodes.StatListNode(
+            node.pos,
+            stats = [target_assign, node.body])
+
+        loop_node = Nodes.ForFromStatNode(
+            node.pos,
+            bound1=start_node, relation1=relation1,
+            target=counter_temp.ref(node.target.pos),
+            relation2=relation2, bound2=end_node,
+            step=None, body=body,
+            else_clause=node.else_clause,
+            from_range=True)
+
+        setup_node = Nodes.ExprStatNode(
+            node.pos,
+            expr = ExprNodes.PythonCapiCallNode(
+                slice_node.pos, "__Pyx_init_unicode_iteration",
+                self.init_unicode_iteration_func_type,
+                args = [unpack_temp_node,
+                        ExprNodes.AmpersandNode(slice_node.pos, operand=length_temp.ref(slice_node.pos),
+                                                type=PyrexTypes.c_py_ssize_t_ptr_type),
+                        ExprNodes.AmpersandNode(slice_node.pos, operand=data_temp.ref(slice_node.pos),
+                                                type=PyrexTypes.c_void_ptr_ptr_type),
+                        ExprNodes.AmpersandNode(slice_node.pos, operand=kind_temp.ref(slice_node.pos),
+                                                type=PyrexTypes.c_int_ptr_type),
+                        ],
+                is_temp = True,
+                result_is_used = False,
+                utility_code=UtilityCode.load_cached("unicode_iter", "Optimize.c"),
+                ))
+        return UtilNodes.LetNode(
+            unpack_temp_node,
+            UtilNodes.TempsBlockNode(
+                node.pos, temps=[counter_temp, length_temp, data_temp, kind_temp],
+                body=Nodes.StatListNode(node.pos, stats=[setup_node, loop_node])))
+
+    def _transform_carray_iteration(self, node, slice_node, reversed=False):
+        neg_step = False
+        if isinstance(slice_node, ExprNodes.SliceIndexNode):
+            slice_base = slice_node.base
+            start = filter_none_node(slice_node.start)
+            stop = filter_none_node(slice_node.stop)
+            step = None
+            if not stop:
+                if not slice_base.type.is_pyobject:
+                    error(slice_node.pos, "C array iteration requires known end index")
+                return node
+
+        elif slice_node.is_subscript:
+            assert isinstance(slice_node.index, ExprNodes.SliceNode)
+            slice_base = slice_node.base
+            index = slice_node.index
+            start = filter_none_node(index.start)
+            stop = filter_none_node(index.stop)
+            step = filter_none_node(index.step)
+            if step:
+                if not isinstance(step.constant_result, _py_int_types) \
+                       or step.constant_result == 0 \
+                       or step.constant_result > 0 and not stop \
+                       or step.constant_result < 0 and not start:
+                    if not slice_base.type.is_pyobject:
+                        error(step.pos, "C array iteration requires known step size and end index")
+                    return node
+                else:
+                    # step sign is handled internally by ForFromStatNode
+                    step_value = step.constant_result
+                    if reversed:
+                        step_value = -step_value
+                    neg_step = step_value < 0
+                    step = ExprNodes.IntNode(step.pos, type=PyrexTypes.c_py_ssize_t_type,
+                                             value=str(abs(step_value)),
+                                             constant_result=abs(step_value))
+
+        elif slice_node.type.is_array:
+            if slice_node.type.size is None:
+                error(slice_node.pos, "C array iteration requires known end index")
+                return node
+            slice_base = slice_node
+            start = None
+            stop = ExprNodes.IntNode(
+                slice_node.pos, value=str(slice_node.type.size),
+                type=PyrexTypes.c_py_ssize_t_type, constant_result=slice_node.type.size)
+            step = None
+
+        else:
+            if not slice_node.type.is_pyobject:
+                error(slice_node.pos, "C array iteration requires known end index")
+            return node
+
+        if start:
+            start = start.coerce_to(PyrexTypes.c_py_ssize_t_type, self.current_env())
+        if stop:
+            stop = stop.coerce_to(PyrexTypes.c_py_ssize_t_type, self.current_env())
+        if stop is None:
+            if neg_step:
+                stop = ExprNodes.IntNode(
+                    slice_node.pos, value='-1', type=PyrexTypes.c_py_ssize_t_type, constant_result=-1)
+            else:
+                error(slice_node.pos, "C array iteration requires known step size and end index")
+                return node
+
+        if reversed:
+            if not start:
+                start = ExprNodes.IntNode(slice_node.pos, value="0",  constant_result=0,
+                                          type=PyrexTypes.c_py_ssize_t_type)
+            # if step was provided, it was already negated above
+            start, stop = stop, start
+
+        ptr_type = slice_base.type
+        if ptr_type.is_array:
+            ptr_type = ptr_type.element_ptr_type()
+        carray_ptr = slice_base.coerce_to_simple(self.current_env())
+
+        if start and start.constant_result != 0:
+            start_ptr_node = ExprNodes.AddNode(
+                start.pos,
+                operand1=carray_ptr,
+                operator='+',
+                operand2=start,
+                type=ptr_type)
+        else:
+            start_ptr_node = carray_ptr
+
+        if stop and stop.constant_result != 0:
+            stop_ptr_node = ExprNodes.AddNode(
+                stop.pos,
+                operand1=ExprNodes.CloneNode(carray_ptr),
+                operator='+',
+                operand2=stop,
+                type=ptr_type
+                ).coerce_to_simple(self.current_env())
+        else:
+            stop_ptr_node = ExprNodes.CloneNode(carray_ptr)
+
+        counter = UtilNodes.TempHandle(ptr_type)
+        counter_temp = counter.ref(node.target.pos)
+
+        if slice_base.type.is_string and node.target.type.is_pyobject:
+            # special case: char* -> bytes/unicode
+            if slice_node.type is Builtin.unicode_type:
+                target_value = ExprNodes.CastNode(
+                    ExprNodes.DereferenceNode(
+                        node.target.pos, operand=counter_temp,
+                        type=ptr_type.base_type),
+                    PyrexTypes.c_py_ucs4_type).coerce_to(
+                        node.target.type, self.current_env())
+            else:
+                # char* -> bytes coercion requires slicing, not indexing
+                target_value = ExprNodes.SliceIndexNode(
+                    node.target.pos,
+                    start=ExprNodes.IntNode(node.target.pos, value='0',
+                                            constant_result=0,
+                                            type=PyrexTypes.c_int_type),
+                    stop=ExprNodes.IntNode(node.target.pos, value='1',
+                                           constant_result=1,
+                                           type=PyrexTypes.c_int_type),
+                    base=counter_temp,
+                    type=Builtin.bytes_type,
+                    is_temp=1)
+        elif node.target.type.is_ptr and not node.target.type.assignable_from(ptr_type.base_type):
+            # Allow iteration with pointer target to avoid copy.
+            target_value = counter_temp
+        else:
+            # TODO: can this safely be replaced with DereferenceNode() as above?
+            target_value = ExprNodes.IndexNode(
+                node.target.pos,
+                index=ExprNodes.IntNode(node.target.pos, value='0',
+                                        constant_result=0,
+                                        type=PyrexTypes.c_int_type),
+                base=counter_temp,
+                type=ptr_type.base_type)
+
+        if target_value.type != node.target.type:
+            target_value = target_value.coerce_to(node.target.type,
+                                                  self.current_env())
+
+        target_assign = Nodes.SingleAssignmentNode(
+            pos = node.target.pos,
+            lhs = node.target,
+            rhs = target_value)
+
+        body = Nodes.StatListNode(
+            node.pos,
+            stats = [target_assign, node.body])
+
+        relation1, relation2 = self._find_for_from_node_relations(neg_step, reversed)
+
+        for_node = Nodes.ForFromStatNode(
+            node.pos,
+            bound1=start_ptr_node, relation1=relation1,
+            target=counter_temp,
+            relation2=relation2, bound2=stop_ptr_node,
+            step=step, body=body,
+            else_clause=node.else_clause,
+            from_range=True)
+
+        return UtilNodes.TempsBlockNode(
+            node.pos, temps=[counter],
+            body=for_node)
+
+    def _transform_enumerate_iteration(self, node, enumerate_function):
+        args = enumerate_function.arg_tuple.args
+        if len(args) == 0:
+            error(enumerate_function.pos,
+                  "enumerate() requires an iterable argument")
+            return node
+        elif len(args) > 2:
+            error(enumerate_function.pos,
+                  "enumerate() takes at most 2 arguments")
+            return node
+
+        if not node.target.is_sequence_constructor:
+            # leave this untouched for now
+            return node
+        targets = node.target.args
+        if len(targets) != 2:
+            # leave this untouched for now
+            return node
+
+        enumerate_target, iterable_target = targets
+        counter_type = enumerate_target.type
+
+        if not counter_type.is_pyobject and not counter_type.is_int:
+            # nothing we can do here, I guess
+            return node
+
+        if len(args) == 2:
+            start = unwrap_coerced_node(args[1]).coerce_to(counter_type, self.current_env())
+        else:
+            start = ExprNodes.IntNode(enumerate_function.pos,
+                                      value='0',
+                                      type=counter_type,
+                                      constant_result=0)
+        temp = UtilNodes.LetRefNode(start)
+
+        inc_expression = ExprNodes.AddNode(
+            enumerate_function.pos,
+            operand1 = temp,
+            operand2 = ExprNodes.IntNode(node.pos, value='1',
+                                         type=counter_type,
+                                         constant_result=1),
+            operator = '+',
+            type = counter_type,
+            #inplace = True,   # not worth using in-place operation for Py ints
+            is_temp = counter_type.is_pyobject
+            )
+
+        loop_body = [
+            Nodes.SingleAssignmentNode(
+                pos = enumerate_target.pos,
+                lhs = enumerate_target,
+                rhs = temp),
+            Nodes.SingleAssignmentNode(
+                pos = enumerate_target.pos,
+                lhs = temp,
+                rhs = inc_expression)
+            ]
+
+        if isinstance(node.body, Nodes.StatListNode):
+            node.body.stats = loop_body + node.body.stats
+        else:
+            loop_body.append(node.body)
+            node.body = Nodes.StatListNode(
+                node.body.pos,
+                stats = loop_body)
+
+        node.target = iterable_target
+        node.item = node.item.coerce_to(iterable_target.type, self.current_env())
+        node.iterator.sequence = args[0]
+
+        # recurse into loop to check for further optimisations
+        return UtilNodes.LetNode(temp, self._optimise_for_loop(node, node.iterator.sequence))
+
+    def _find_for_from_node_relations(self, neg_step_value, reversed):
+        if reversed:
+            if neg_step_value:
+                return '<', '<='
+            else:
+                return '>', '>='
+        else:
+            if neg_step_value:
+                return '>=', '>'
+            else:
+                return '<=', '<'
+
+    def _transform_range_iteration(self, node, range_function, reversed=False):
+        args = range_function.arg_tuple.args
+        if len(args) < 3:
+            step_pos = range_function.pos
+            step_value = 1
+            step = ExprNodes.IntNode(step_pos, value='1', constant_result=1)
+        else:
+            step = args[2]
+            step_pos = step.pos
+            if not isinstance(step.constant_result, _py_int_types):
+                # cannot determine step direction
+                return node
+            step_value = step.constant_result
+            if step_value == 0:
+                # will lead to an error elsewhere
+                return node
+            step = ExprNodes.IntNode(step_pos, value=str(step_value),
+                                     constant_result=step_value)
+
+        if len(args) == 1:
+            bound1 = ExprNodes.IntNode(range_function.pos, value='0',
+                                       constant_result=0)
+            bound2 = args[0].coerce_to_integer(self.current_env())
+        else:
+            bound1 = args[0].coerce_to_integer(self.current_env())
+            bound2 = args[1].coerce_to_integer(self.current_env())
+
+        relation1, relation2 = self._find_for_from_node_relations(step_value < 0, reversed)
+
+        bound2_ref_node = None
+        if reversed:
+            bound1, bound2 = bound2, bound1
+            abs_step = abs(step_value)
+            if abs_step != 1:
+                if (isinstance(bound1.constant_result, _py_int_types) and
+                        isinstance(bound2.constant_result, _py_int_types)):
+                    # calculate final bounds now
+                    if step_value < 0:
+                        begin_value = bound2.constant_result
+                        end_value = bound1.constant_result
+                        bound1_value = begin_value - abs_step * ((begin_value - end_value - 1) // abs_step) - 1
+                    else:
+                        begin_value = bound1.constant_result
+                        end_value = bound2.constant_result
+                        bound1_value = end_value + abs_step * ((begin_value - end_value - 1) // abs_step) + 1
+
+                    bound1 = ExprNodes.IntNode(
+                        bound1.pos, value=str(bound1_value), constant_result=bound1_value,
+                        type=PyrexTypes.spanning_type(bound1.type, bound2.type))
+                else:
+                    # evaluate the same expression as above at runtime
+                    bound2_ref_node = UtilNodes.LetRefNode(bound2)
+                    bound1 = self._build_range_step_calculation(
+                        bound1, bound2_ref_node, step, step_value)
+
+        if step_value < 0:
+            step_value = -step_value
+        step.value = str(step_value)
+        step.constant_result = step_value
+        step = step.coerce_to_integer(self.current_env())
+
+        if not bound2.is_literal:
+            # stop bound must be immutable => keep it in a temp var
+            bound2_is_temp = True
+            bound2 = bound2_ref_node or UtilNodes.LetRefNode(bound2)
+        else:
+            bound2_is_temp = False
+
+        for_node = Nodes.ForFromStatNode(
+            node.pos,
+            target=node.target,
+            bound1=bound1, relation1=relation1,
+            relation2=relation2, bound2=bound2,
+            step=step, body=node.body,
+            else_clause=node.else_clause,
+            from_range=True)
+        for_node.set_up_loop(self.current_env())
+
+        if bound2_is_temp:
+            for_node = UtilNodes.LetNode(bound2, for_node)
+
+        return for_node
+
+    def _build_range_step_calculation(self, bound1, bound2_ref_node, step, step_value):
+        abs_step = abs(step_value)
+        spanning_type = PyrexTypes.spanning_type(bound1.type, bound2_ref_node.type)
+        if step.type.is_int and abs_step < 0x7FFF:
+            # Avoid loss of integer precision warnings.
+            spanning_step_type = PyrexTypes.spanning_type(spanning_type, PyrexTypes.c_int_type)
+        else:
+            spanning_step_type = PyrexTypes.spanning_type(spanning_type, step.type)
+        if step_value < 0:
+            begin_value = bound2_ref_node
+            end_value = bound1
+            final_op = '-'
+        else:
+            begin_value = bound1
+            end_value = bound2_ref_node
+            final_op = '+'
+
+        step_calculation_node = ExprNodes.binop_node(
+            bound1.pos,
+            operand1=ExprNodes.binop_node(
+                bound1.pos,
+                operand1=bound2_ref_node,
+                operator=final_op,  # +/-
+                operand2=ExprNodes.MulNode(
+                    bound1.pos,
+                    operand1=ExprNodes.IntNode(
+                        bound1.pos,
+                        value=str(abs_step),
+                        constant_result=abs_step,
+                        type=spanning_step_type),
+                    operator='*',
+                    operand2=ExprNodes.DivNode(
+                        bound1.pos,
+                        operand1=ExprNodes.SubNode(
+                            bound1.pos,
+                            operand1=ExprNodes.SubNode(
+                                bound1.pos,
+                                operand1=begin_value,
+                                operator='-',
+                                operand2=end_value,
+                                type=spanning_type),
+                            operator='-',
+                            operand2=ExprNodes.IntNode(
+                                bound1.pos,
+                                value='1',
+                                constant_result=1),
+                            type=spanning_step_type),
+                        operator='//',
+                        operand2=ExprNodes.IntNode(
+                            bound1.pos,
+                            value=str(abs_step),
+                            constant_result=abs_step,
+                            type=spanning_step_type),
+                        type=spanning_step_type),
+                    type=spanning_step_type),
+                type=spanning_step_type),
+            operator=final_op,  # +/-
+            operand2=ExprNodes.IntNode(
+                bound1.pos,
+                value='1',
+                constant_result=1),
+            type=spanning_type)
+        return step_calculation_node
+
+    def _transform_dict_iteration(self, node, dict_obj, method, keys, values):
+        temps = []
+        temp = UtilNodes.TempHandle(PyrexTypes.py_object_type)
+        temps.append(temp)
+        dict_temp = temp.ref(dict_obj.pos)
+        temp = UtilNodes.TempHandle(PyrexTypes.c_py_ssize_t_type)
+        temps.append(temp)
+        pos_temp = temp.ref(node.pos)
+
+        key_target = value_target = tuple_target = None
+        if keys and values:
+            if node.target.is_sequence_constructor:
+                if len(node.target.args) == 2:
+                    key_target, value_target = node.target.args
+                else:
+                    # unusual case that may or may not lead to an error
+                    return node
+            else:
+                tuple_target = node.target
+        elif keys:
+            key_target = node.target
+        else:
+            value_target = node.target
+
+        if isinstance(node.body, Nodes.StatListNode):
+            body = node.body
+        else:
+            body = Nodes.StatListNode(pos = node.body.pos,
+                                      stats = [node.body])
+
+        # keep original length to guard against dict modification
+        dict_len_temp = UtilNodes.TempHandle(PyrexTypes.c_py_ssize_t_type)
+        temps.append(dict_len_temp)
+        dict_len_temp_addr = ExprNodes.AmpersandNode(
+            node.pos, operand=dict_len_temp.ref(dict_obj.pos),
+            type=PyrexTypes.c_ptr_type(dict_len_temp.type))
+        temp = UtilNodes.TempHandle(PyrexTypes.c_int_type)
+        temps.append(temp)
+        is_dict_temp = temp.ref(node.pos)
+        is_dict_temp_addr = ExprNodes.AmpersandNode(
+            node.pos, operand=is_dict_temp,
+            type=PyrexTypes.c_ptr_type(temp.type))
+
+        iter_next_node = Nodes.DictIterationNextNode(
+            dict_temp, dict_len_temp.ref(dict_obj.pos), pos_temp,
+            key_target, value_target, tuple_target,
+            is_dict_temp)
+        iter_next_node = iter_next_node.analyse_expressions(self.current_env())
+        body.stats[0:0] = [iter_next_node]
+
+        if method:
+            method_node = ExprNodes.StringNode(
+                dict_obj.pos, is_identifier=True, value=method)
+            dict_obj = dict_obj.as_none_safe_node(
+                "'NoneType' object has no attribute '%{0}s'".format('.30' if len(method) <= 30 else ''),
+                error = "PyExc_AttributeError",
+                format_args = [method])
+        else:
+            method_node = ExprNodes.NullNode(dict_obj.pos)
+            dict_obj = dict_obj.as_none_safe_node("'NoneType' object is not iterable")
+
+        def flag_node(value):
+            value = value and 1 or 0
+            return ExprNodes.IntNode(node.pos, value=str(value), constant_result=value)
+
+        result_code = [
+            Nodes.SingleAssignmentNode(
+                node.pos,
+                lhs = pos_temp,
+                rhs = ExprNodes.IntNode(node.pos, value='0',
+                                        constant_result=0)),
+            Nodes.SingleAssignmentNode(
+                dict_obj.pos,
+                lhs = dict_temp,
+                rhs = ExprNodes.PythonCapiCallNode(
+                    dict_obj.pos,
+                    "__Pyx_dict_iterator",
+                    self.PyDict_Iterator_func_type,
+                    utility_code = UtilityCode.load_cached("dict_iter", "Optimize.c"),
+                    args = [dict_obj, flag_node(dict_obj.type is Builtin.dict_type),
+                            method_node, dict_len_temp_addr, is_dict_temp_addr,
+                            ],
+                    is_temp=True,
+                )),
+            Nodes.WhileStatNode(
+                node.pos,
+                condition = None,
+                body = body,
+                else_clause = node.else_clause
+                )
+            ]
+
+        return UtilNodes.TempsBlockNode(
+            node.pos, temps=temps,
+            body=Nodes.StatListNode(
+                node.pos,
+                stats = result_code
+                ))
+
+    PyDict_Iterator_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [
+            PyrexTypes.CFuncTypeArg("dict",  PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("is_dict",  PyrexTypes.c_int_type, None),
+            PyrexTypes.CFuncTypeArg("method_name",  PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("p_orig_length",  PyrexTypes.c_py_ssize_t_ptr_type, None),
+            PyrexTypes.CFuncTypeArg("p_is_dict",  PyrexTypes.c_int_ptr_type, None),
+            ])
+
+    PySet_Iterator_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [
+            PyrexTypes.CFuncTypeArg("set",  PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("is_set",  PyrexTypes.c_int_type, None),
+            PyrexTypes.CFuncTypeArg("p_orig_length",  PyrexTypes.c_py_ssize_t_ptr_type, None),
+            PyrexTypes.CFuncTypeArg("p_is_set",  PyrexTypes.c_int_ptr_type, None),
+            ])
+
+    def _transform_set_iteration(self, node, set_obj):
+        temps = []
+        temp = UtilNodes.TempHandle(PyrexTypes.py_object_type)
+        temps.append(temp)
+        set_temp = temp.ref(set_obj.pos)
+        temp = UtilNodes.TempHandle(PyrexTypes.c_py_ssize_t_type)
+        temps.append(temp)
+        pos_temp = temp.ref(node.pos)
+
+        if isinstance(node.body, Nodes.StatListNode):
+            body = node.body
+        else:
+            body = Nodes.StatListNode(pos = node.body.pos,
+                                      stats = [node.body])
+
+        # keep original length to guard against set modification
+        set_len_temp = UtilNodes.TempHandle(PyrexTypes.c_py_ssize_t_type)
+        temps.append(set_len_temp)
+        set_len_temp_addr = ExprNodes.AmpersandNode(
+            node.pos, operand=set_len_temp.ref(set_obj.pos),
+            type=PyrexTypes.c_ptr_type(set_len_temp.type))
+        temp = UtilNodes.TempHandle(PyrexTypes.c_int_type)
+        temps.append(temp)
+        is_set_temp = temp.ref(node.pos)
+        is_set_temp_addr = ExprNodes.AmpersandNode(
+            node.pos, operand=is_set_temp,
+            type=PyrexTypes.c_ptr_type(temp.type))
+
+        value_target = node.target
+        iter_next_node = Nodes.SetIterationNextNode(
+            set_temp, set_len_temp.ref(set_obj.pos), pos_temp, value_target, is_set_temp)
+        iter_next_node = iter_next_node.analyse_expressions(self.current_env())
+        body.stats[0:0] = [iter_next_node]
+
+        def flag_node(value):
+            value = value and 1 or 0
+            return ExprNodes.IntNode(node.pos, value=str(value), constant_result=value)
+
+        result_code = [
+            Nodes.SingleAssignmentNode(
+                node.pos,
+                lhs=pos_temp,
+                rhs=ExprNodes.IntNode(node.pos, value='0', constant_result=0)),
+            Nodes.SingleAssignmentNode(
+                set_obj.pos,
+                lhs=set_temp,
+                rhs=ExprNodes.PythonCapiCallNode(
+                    set_obj.pos,
+                    "__Pyx_set_iterator",
+                    self.PySet_Iterator_func_type,
+                    utility_code=UtilityCode.load_cached("set_iter", "Optimize.c"),
+                    args=[set_obj, flag_node(set_obj.type is Builtin.set_type),
+                          set_len_temp_addr, is_set_temp_addr,
+                          ],
+                    is_temp=True,
+                )),
+            Nodes.WhileStatNode(
+                node.pos,
+                condition=None,
+                body=body,
+                else_clause=node.else_clause,
+                )
+            ]
+
+        return UtilNodes.TempsBlockNode(
+            node.pos, temps=temps,
+            body=Nodes.StatListNode(
+                node.pos,
+                stats = result_code
+                ))
+
+
+class SwitchTransform(Visitor.EnvTransform):
+    """
+    This transformation tries to turn long if statements into C switch statements.
+    The requirement is that every clause be an (or of) var == value, where the var
+    is common among all clauses and both var and value are ints.
+    """
+    NO_MATCH = (None, None, None)
+
+    def extract_conditions(self, cond, allow_not_in):
+        while True:
+            if isinstance(cond, (ExprNodes.CoerceToTempNode,
+                                 ExprNodes.CoerceToBooleanNode)):
+                cond = cond.arg
+            elif isinstance(cond, ExprNodes.BoolBinopResultNode):
+                cond = cond.arg.arg
+            elif isinstance(cond, UtilNodes.EvalWithTempExprNode):
+                # this is what we get from the FlattenInListTransform
+                cond = cond.subexpression
+            elif isinstance(cond, ExprNodes.TypecastNode):
+                cond = cond.operand
+            else:
+                break
+
+        if isinstance(cond, ExprNodes.PrimaryCmpNode):
+            if cond.cascade is not None:
+                return self.NO_MATCH
+            elif cond.is_c_string_contains() and \
+                   isinstance(cond.operand2, (ExprNodes.UnicodeNode, ExprNodes.BytesNode)):
+                not_in = cond.operator == 'not_in'
+                if not_in and not allow_not_in:
+                    return self.NO_MATCH
+                if isinstance(cond.operand2, ExprNodes.UnicodeNode) and \
+                       cond.operand2.contains_surrogates():
+                    # dealing with surrogates leads to different
+                    # behaviour on wide and narrow Unicode
+                    # platforms => refuse to optimise this case
+                    return self.NO_MATCH
+                return not_in, cond.operand1, self.extract_in_string_conditions(cond.operand2)
+            elif not cond.is_python_comparison():
+                if cond.operator == '==':
+                    not_in = False
+                elif allow_not_in and cond.operator == '!=':
+                    not_in = True
+                else:
+                    return self.NO_MATCH
+                # this looks somewhat silly, but it does the right
+                # checks for NameNode and AttributeNode
+                if is_common_value(cond.operand1, cond.operand1):
+                    if cond.operand2.is_literal:
+                        return not_in, cond.operand1, [cond.operand2]
+                    elif getattr(cond.operand2, 'entry', None) \
+                             and cond.operand2.entry.is_const:
+                        return not_in, cond.operand1, [cond.operand2]
+                if is_common_value(cond.operand2, cond.operand2):
+                    if cond.operand1.is_literal:
+                        return not_in, cond.operand2, [cond.operand1]
+                    elif getattr(cond.operand1, 'entry', None) \
+                             and cond.operand1.entry.is_const:
+                        return not_in, cond.operand2, [cond.operand1]
+        elif isinstance(cond, ExprNodes.BoolBinopNode):
+            if cond.operator == 'or' or (allow_not_in and cond.operator == 'and'):
+                allow_not_in = (cond.operator == 'and')
+                not_in_1, t1, c1 = self.extract_conditions(cond.operand1, allow_not_in)
+                not_in_2, t2, c2 = self.extract_conditions(cond.operand2, allow_not_in)
+                if t1 is not None and not_in_1 == not_in_2 and is_common_value(t1, t2):
+                    if (not not_in_1) or allow_not_in:
+                        return not_in_1, t1, c1+c2
+        return self.NO_MATCH
+
+    def extract_in_string_conditions(self, string_literal):
+        if isinstance(string_literal, ExprNodes.UnicodeNode):
+            charvals = list(map(ord, set(string_literal.value)))
+            charvals.sort()
+            return [ ExprNodes.IntNode(string_literal.pos, value=str(charval),
+                                       constant_result=charval)
+                     for charval in charvals ]
+        else:
+            # this is a bit tricky as Py3's bytes type returns
+            # integers on iteration, whereas Py2 returns 1-char byte
+            # strings
+            characters = string_literal.value
+            characters = list({ characters[i:i+1] for i in range(len(characters)) })
+            characters.sort()
+            return [ ExprNodes.CharNode(string_literal.pos, value=charval,
+                                        constant_result=charval)
+                     for charval in characters ]
+
+    def extract_common_conditions(self, common_var, condition, allow_not_in):
+        not_in, var, conditions = self.extract_conditions(condition, allow_not_in)
+        if var is None:
+            return self.NO_MATCH
+        elif common_var is not None and not is_common_value(var, common_var):
+            return self.NO_MATCH
+        elif not (var.type.is_int or var.type.is_enum) or any(
+                [not (cond.type.is_int or cond.type.is_enum) for cond in conditions]):
+            return self.NO_MATCH
+        return not_in, var, conditions
+
+    def has_duplicate_values(self, condition_values):
+        # duplicated values don't work in a switch statement
+        seen = set()
+        for value in condition_values:
+            if value.has_constant_result():
+                if value.constant_result in seen:
+                    return True
+                seen.add(value.constant_result)
+            else:
+                # this isn't completely safe as we don't know the
+                # final C value, but this is about the best we can do
+                try:
+                    value_entry = value.entry
+                    if ((value_entry.type.is_enum or value_entry.type.is_cpp_enum)
+                            and value_entry.enum_int_value is not None):
+                        value_for_seen = value_entry.enum_int_value
+                    else:
+                        value_for_seen = value_entry.cname
+                except AttributeError:
+                    return True  # play safe
+                if value_for_seen in seen:
+                    return True
+                seen.add(value_for_seen)
+        return False
+
+    def visit_IfStatNode(self, node):
+        if not self.current_directives.get('optimize.use_switch'):
+            self.visitchildren(node)
+            return node
+
+        common_var = None
+        cases = []
+        for if_clause in node.if_clauses:
+            _, common_var, conditions = self.extract_common_conditions(
+                common_var, if_clause.condition, False)
+            if common_var is None:
+                self.visitchildren(node)
+                return node
+            cases.append(Nodes.SwitchCaseNode(pos=if_clause.pos,
+                                              conditions=conditions,
+                                              body=if_clause.body))
+
+        condition_values = [
+            cond for case in cases for cond in case.conditions]
+        if len(condition_values) < 2:
+            self.visitchildren(node)
+            return node
+        if self.has_duplicate_values(condition_values):
+            self.visitchildren(node)
+            return node
+
+        # Recurse into body subtrees that we left untouched so far.
+        self.visitchildren(node, 'else_clause')
+        for case in cases:
+            self.visitchildren(case, 'body')
+
+        common_var = unwrap_node(common_var)
+        switch_node = Nodes.SwitchStatNode(pos=node.pos,
+                                           test=common_var,
+                                           cases=cases,
+                                           else_clause=node.else_clause)
+        return switch_node
+
+    def visit_CondExprNode(self, node):
+        if not self.current_directives.get('optimize.use_switch'):
+            self.visitchildren(node)
+            return node
+
+        not_in, common_var, conditions = self.extract_common_conditions(
+            None, node.test, True)
+        if common_var is None \
+                or len(conditions) < 2 \
+                or self.has_duplicate_values(conditions):
+            self.visitchildren(node)
+            return node
+
+        return self.build_simple_switch_statement(
+            node, common_var, conditions, not_in,
+            node.true_val, node.false_val)
+
+    def visit_BoolBinopNode(self, node):
+        if not self.current_directives.get('optimize.use_switch'):
+            self.visitchildren(node)
+            return node
+
+        not_in, common_var, conditions = self.extract_common_conditions(
+            None, node, True)
+        if common_var is None \
+                or len(conditions) < 2 \
+                or self.has_duplicate_values(conditions):
+            self.visitchildren(node)
+            node.wrap_operands(self.current_env())  # in case we changed the operands
+            return node
+
+        return self.build_simple_switch_statement(
+            node, common_var, conditions, not_in,
+            ExprNodes.BoolNode(node.pos, value=True, constant_result=True),
+            ExprNodes.BoolNode(node.pos, value=False, constant_result=False))
+
+    def visit_PrimaryCmpNode(self, node):
+        if not self.current_directives.get('optimize.use_switch'):
+            self.visitchildren(node)
+            return node
+
+        not_in, common_var, conditions = self.extract_common_conditions(
+            None, node, True)
+        if common_var is None \
+                or len(conditions) < 2 \
+                or self.has_duplicate_values(conditions):
+            self.visitchildren(node)
+            return node
+
+        return self.build_simple_switch_statement(
+            node, common_var, conditions, not_in,
+            ExprNodes.BoolNode(node.pos, value=True, constant_result=True),
+            ExprNodes.BoolNode(node.pos, value=False, constant_result=False))
+
+    def build_simple_switch_statement(self, node, common_var, conditions,
+                                      not_in, true_val, false_val):
+        result_ref = UtilNodes.ResultRefNode(node)
+        true_body = Nodes.SingleAssignmentNode(
+            node.pos,
+            lhs=result_ref,
+            rhs=true_val.coerce_to(node.type, self.current_env()),
+            first=True)
+        false_body = Nodes.SingleAssignmentNode(
+            node.pos,
+            lhs=result_ref,
+            rhs=false_val.coerce_to(node.type, self.current_env()),
+            first=True)
+
+        if not_in:
+            true_body, false_body = false_body, true_body
+
+        cases = [Nodes.SwitchCaseNode(pos = node.pos,
+                                      conditions = conditions,
+                                      body = true_body)]
+
+        common_var = unwrap_node(common_var)
+        switch_node = Nodes.SwitchStatNode(pos = node.pos,
+                                           test = common_var,
+                                           cases = cases,
+                                           else_clause = false_body)
+        replacement = UtilNodes.TempResultFromStatNode(result_ref, switch_node)
+        return replacement
+
+    def visit_EvalWithTempExprNode(self, node):
+        if not self.current_directives.get('optimize.use_switch'):
+            self.visitchildren(node)
+            return node
+
+        # drop unused expression temp from FlattenInListTransform
+        orig_expr = node.subexpression
+        temp_ref = node.lazy_temp
+        self.visitchildren(node)
+        if node.subexpression is not orig_expr:
+            # node was restructured => check if temp is still used
+            if not Visitor.tree_contains(node.subexpression, temp_ref):
+                return node.subexpression
+        return node
+
+    visit_Node = Visitor.VisitorTransform.recurse_to_children
+
+
+class FlattenInListTransform(Visitor.VisitorTransform, SkipDeclarations):
+    """
+    This transformation flattens "x in [val1, ..., valn]" into a sequential list
+    of comparisons.
+    """
+
+    def visit_PrimaryCmpNode(self, node):
+        self.visitchildren(node)
+        if node.cascade is not None:
+            return node
+        elif node.operator == 'in':
+            conjunction = 'or'
+            eq_or_neq = '=='
+        elif node.operator == 'not_in':
+            conjunction = 'and'
+            eq_or_neq = '!='
+        else:
+            return node
+
+        if not isinstance(node.operand2, (ExprNodes.TupleNode,
+                                          ExprNodes.ListNode,
+                                          ExprNodes.SetNode)):
+            return node
+
+        args = node.operand2.args
+        if len(args) == 0:
+            # note: lhs may have side effects
+            return node
+
+        if any([arg.is_starred for arg in args]):
+            # Starred arguments do not directly translate to comparisons or "in" tests.
+            return node
+
+        lhs = UtilNodes.ResultRefNode(node.operand1)
+
+        conds = []
+        temps = []
+        for arg in args:
+            try:
+                # Trial optimisation to avoid redundant temp
+                # assignments.  However, since is_simple() is meant to
+                # be called after type analysis, we ignore any errors
+                # and just play safe in that case.
+                is_simple_arg = arg.is_simple()
+            except Exception:
+                is_simple_arg = False
+            if not is_simple_arg:
+                # must evaluate all non-simple RHS before doing the comparisons
+                arg = UtilNodes.LetRefNode(arg)
+                temps.append(arg)
+            cond = ExprNodes.PrimaryCmpNode(
+                                pos = node.pos,
+                                operand1 = lhs,
+                                operator = eq_or_neq,
+                                operand2 = arg,
+                                cascade = None)
+            conds.append(ExprNodes.TypecastNode(
+                                pos = node.pos,
+                                operand = cond,
+                                type = PyrexTypes.c_bint_type))
+        def concat(left, right):
+            return ExprNodes.BoolBinopNode(
+                                pos = node.pos,
+                                operator = conjunction,
+                                operand1 = left,
+                                operand2 = right)
+
+        condition = reduce(concat, conds)
+        new_node = UtilNodes.EvalWithTempExprNode(lhs, condition)
+        for temp in temps[::-1]:
+            new_node = UtilNodes.EvalWithTempExprNode(temp, new_node)
+        return new_node
+
+    visit_Node = Visitor.VisitorTransform.recurse_to_children
+
+
+class DropRefcountingTransform(Visitor.VisitorTransform):
+    """Drop ref-counting in safe places.
+    """
+    visit_Node = Visitor.VisitorTransform.recurse_to_children
+
+    def visit_ParallelAssignmentNode(self, node):
+        """
+        Parallel swap assignments like 'a,b = b,a' are safe.
+        """
+        left_names, right_names = [], []
+        left_indices, right_indices = [], []
+        temps = []
+
+        for stat in node.stats:
+            if isinstance(stat, Nodes.SingleAssignmentNode):
+                if not self._extract_operand(stat.lhs, left_names,
+                                             left_indices, temps):
+                    return node
+                if not self._extract_operand(stat.rhs, right_names,
+                                             right_indices, temps):
+                    return node
+            elif isinstance(stat, Nodes.CascadedAssignmentNode):
+                # FIXME
+                return node
+            else:
+                return node
+
+        if left_names or right_names:
+            # lhs/rhs names must be a non-redundant permutation
+            lnames = [ path for path, n in left_names ]
+            rnames = [ path for path, n in right_names ]
+            if set(lnames) != set(rnames):
+                return node
+            if len(set(lnames)) != len(right_names):
+                return node
+
+        if left_indices or right_indices:
+            # base name and index of index nodes must be a
+            # non-redundant permutation
+            lindices = []
+            for lhs_node in left_indices:
+                index_id = self._extract_index_id(lhs_node)
+                if not index_id:
+                    return node
+                lindices.append(index_id)
+            rindices = []
+            for rhs_node in right_indices:
+                index_id = self._extract_index_id(rhs_node)
+                if not index_id:
+                    return node
+                rindices.append(index_id)
+
+            if set(lindices) != set(rindices):
+                return node
+            if len(set(lindices)) != len(right_indices):
+                return node
+
+            # really supporting IndexNode requires support in
+            # __Pyx_GetItemInt(), so let's stop short for now
+            return node
+
+        temp_args = [t.arg for t in temps]
+        for temp in temps:
+            temp.use_managed_ref = False
+
+        for _, name_node in left_names + right_names:
+            if name_node not in temp_args:
+                name_node.use_managed_ref = False
+
+        for index_node in left_indices + right_indices:
+            index_node.use_managed_ref = False
+
+        return node
+
+    def _extract_operand(self, node, names, indices, temps):
+        node = unwrap_node(node)
+        if not node.type.is_pyobject:
+            return False
+        if isinstance(node, ExprNodes.CoerceToTempNode):
+            temps.append(node)
+            node = node.arg
+        name_path = []
+        obj_node = node
+        while obj_node.is_attribute:
+            if obj_node.is_py_attr:
+                return False
+            name_path.append(obj_node.member)
+            obj_node = obj_node.obj
+        if obj_node.is_name:
+            name_path.append(obj_node.name)
+            names.append( ('.'.join(name_path[::-1]), node) )
+        elif node.is_subscript:
+            if node.base.type != Builtin.list_type:
+                return False
+            if not node.index.type.is_int:
+                return False
+            if not node.base.is_name:
+                return False
+            indices.append(node)
+        else:
+            return False
+        return True
+
+    def _extract_index_id(self, index_node):
+        base = index_node.base
+        index = index_node.index
+        if isinstance(index, ExprNodes.NameNode):
+            index_val = index.name
+        elif isinstance(index, ExprNodes.ConstNode):
+            # FIXME:
+            return None
+        else:
+            return None
+        return (base.name, index_val)
+
+
+class EarlyReplaceBuiltinCalls(Visitor.EnvTransform):
+    """Optimize some common calls to builtin types *before* the type
+    analysis phase and *after* the declarations analysis phase.
+
+    This transform cannot make use of any argument types, but it can
+    restructure the tree in a way that the type analysis phase can
+    respond to.
+
+    Introducing C function calls here may not be a good idea.  Move
+    them to the OptimizeBuiltinCalls transform instead, which runs
+    after type analysis.
+    """
+    # only intercept on call nodes
+    visit_Node = Visitor.VisitorTransform.recurse_to_children
+
+    def visit_SimpleCallNode(self, node):
+        self.visitchildren(node)
+        function = node.function
+        if not self._function_is_builtin_name(function):
+            return node
+        return self._dispatch_to_handler(node, function, node.args)
+
+    def visit_GeneralCallNode(self, node):
+        self.visitchildren(node)
+        function = node.function
+        if not self._function_is_builtin_name(function):
+            return node
+        arg_tuple = node.positional_args
+        if not isinstance(arg_tuple, ExprNodes.TupleNode):
+            return node
+        args = arg_tuple.args
+        return self._dispatch_to_handler(
+            node, function, args, node.keyword_args)
+
+    def _function_is_builtin_name(self, function):
+        if not function.is_name:
+            return False
+        env = self.current_env()
+        entry = env.lookup(function.name)
+        if entry is not env.builtin_scope().lookup_here(function.name):
+            return False
+        # if entry is None, it's at least an undeclared name, so likely builtin
+        return True
+
+    def _dispatch_to_handler(self, node, function, args, kwargs=None):
+        if kwargs is None:
+            handler_name = '_handle_simple_function_%s' % function.name
+        else:
+            handler_name = '_handle_general_function_%s' % function.name
+        handle_call = getattr(self, handler_name, None)
+        if handle_call is not None:
+            if kwargs is None:
+                return handle_call(node, args)
+            else:
+                return handle_call(node, args, kwargs)
+        return node
+
+    def _inject_capi_function(self, node, cname, func_type, utility_code=None):
+        node.function = ExprNodes.PythonCapiFunctionNode(
+            node.function.pos, node.function.name, cname, func_type,
+            utility_code = utility_code)
+
+    def _error_wrong_arg_count(self, function_name, node, args, expected=None):
+        if not expected:  # None or 0
+            arg_str = ''
+        elif isinstance(expected, basestring) or expected > 1:
+            arg_str = '...'
+        elif expected == 1:
+            arg_str = 'x'
+        else:
+            arg_str = ''
+        if expected is not None:
+            expected_str = 'expected %s, ' % expected
+        else:
+            expected_str = ''
+        error(node.pos, "%s(%s) called with wrong number of args, %sfound %d" % (
+            function_name, arg_str, expected_str, len(args)))
+
+    # specific handlers for simple call nodes
+
+    def _handle_simple_function_float(self, node, pos_args):
+        if not pos_args:
+            return ExprNodes.FloatNode(node.pos, value='0.0')
+        if len(pos_args) > 1:
+            self._error_wrong_arg_count('float', node, pos_args, 1)
+        arg_type = getattr(pos_args[0], 'type', None)
+        if arg_type in (PyrexTypes.c_double_type, Builtin.float_type):
+            return pos_args[0]
+        return node
+
+    def _handle_simple_function_slice(self, node, pos_args):
+        arg_count = len(pos_args)
+        start = step = None
+        if arg_count == 1:
+            stop, = pos_args
+        elif arg_count == 2:
+            start, stop = pos_args
+        elif arg_count == 3:
+            start, stop, step = pos_args
+        else:
+            self._error_wrong_arg_count('slice', node, pos_args)
+            return node
+        return ExprNodes.SliceNode(
+            node.pos,
+            start=start or ExprNodes.NoneNode(node.pos),
+            stop=stop,
+            step=step or ExprNodes.NoneNode(node.pos))
+
+    def _handle_simple_function_ord(self, node, pos_args):
+        """Unpack ord('X').
+        """
+        if len(pos_args) != 1:
+            return node
+        arg = pos_args[0]
+        if isinstance(arg, (ExprNodes.UnicodeNode, ExprNodes.BytesNode)):
+            if len(arg.value) == 1:
+                return ExprNodes.IntNode(
+                    arg.pos, type=PyrexTypes.c_long_type,
+                    value=str(ord(arg.value)),
+                    constant_result=ord(arg.value)
+                )
+        elif isinstance(arg, ExprNodes.StringNode):
+            if arg.unicode_value and len(arg.unicode_value) == 1 \
+                    and ord(arg.unicode_value) <= 255:  # Py2/3 portability
+                return ExprNodes.IntNode(
+                    arg.pos, type=PyrexTypes.c_int_type,
+                    value=str(ord(arg.unicode_value)),
+                    constant_result=ord(arg.unicode_value)
+                )
+        return node
+
+    # sequence processing
+
+    def _handle_simple_function_all(self, node, pos_args):
+        """Transform
+
+        _result = all(p(x) for L in LL for x in L)
+
+        into
+
+        for L in LL:
+            for x in L:
+                if not p(x):
+                    return False
+        else:
+            return True
+        """
+        return self._transform_any_all(node, pos_args, False)
+
+    def _handle_simple_function_any(self, node, pos_args):
+        """Transform
+
+        _result = any(p(x) for L in LL for x in L)
+
+        into
+
+        for L in LL:
+            for x in L:
+                if p(x):
+                    return True
+        else:
+            return False
+        """
+        return self._transform_any_all(node, pos_args, True)
+
+    def _transform_any_all(self, node, pos_args, is_any):
+        if len(pos_args) != 1:
+            return node
+        if not isinstance(pos_args[0], ExprNodes.GeneratorExpressionNode):
+            return node
+        gen_expr_node = pos_args[0]
+        generator_body = gen_expr_node.def_node.gbody
+        loop_node = generator_body.body
+        yield_expression, yield_stat_node = _find_single_yield_expression(loop_node)
+        if yield_expression is None:
+            return node
+
+        if is_any:
+            condition = yield_expression
+        else:
+            condition = ExprNodes.NotNode(yield_expression.pos, operand=yield_expression)
+
+        test_node = Nodes.IfStatNode(
+            yield_expression.pos, else_clause=None, if_clauses=[
+                Nodes.IfClauseNode(
+                    yield_expression.pos,
+                    condition=condition,
+                    body=Nodes.ReturnStatNode(
+                        node.pos,
+                        value=ExprNodes.BoolNode(yield_expression.pos, value=is_any, constant_result=is_any))
+                )]
+        )
+        loop_node.else_clause = Nodes.ReturnStatNode(
+            node.pos,
+            value=ExprNodes.BoolNode(yield_expression.pos, value=not is_any, constant_result=not is_any))
+
+        Visitor.recursively_replace_node(gen_expr_node, yield_stat_node, test_node)
+
+        return ExprNodes.InlinedGeneratorExpressionNode(
+            gen_expr_node.pos, gen=gen_expr_node, orig_func='any' if is_any else 'all')
+
+    PySequence_List_func_type = PyrexTypes.CFuncType(
+        Builtin.list_type,
+        [PyrexTypes.CFuncTypeArg("it", PyrexTypes.py_object_type, None)])
+
+    def _handle_simple_function_sorted(self, node, pos_args):
+        """Transform sorted(genexpr) and sorted([listcomp]) into
+        [listcomp].sort().  CPython just reads the iterable into a
+        list and calls .sort() on it.  Expanding the iterable in a
+        listcomp is still faster and the result can be sorted in
+        place.
+        """
+        if len(pos_args) != 1:
+            return node
+
+        arg = pos_args[0]
+        if isinstance(arg, ExprNodes.ComprehensionNode) and arg.type is Builtin.list_type:
+            list_node = arg
+            loop_node = list_node.loop
+
+        elif isinstance(arg, ExprNodes.GeneratorExpressionNode):
+            gen_expr_node = arg
+            loop_node = gen_expr_node.loop
+            yield_statements = _find_yield_statements(loop_node)
+            if not yield_statements:
+                return node
+
+            list_node = ExprNodes.InlinedGeneratorExpressionNode(
+                node.pos, gen_expr_node, orig_func='sorted',
+                comprehension_type=Builtin.list_type)
+
+            for yield_expression, yield_stat_node in yield_statements:
+                append_node = ExprNodes.ComprehensionAppendNode(
+                    yield_expression.pos,
+                    expr=yield_expression,
+                    target=list_node.target)
+                Visitor.recursively_replace_node(gen_expr_node, yield_stat_node, append_node)
+
+        elif arg.is_sequence_constructor:
+            # sorted([a, b, c]) or sorted((a, b, c)).  The result is always a list,
+            # so starting off with a fresh one is more efficient.
+            list_node = loop_node = arg.as_list()
+
+        else:
+            # Interestingly, PySequence_List works on a lot of non-sequence
+            # things as well.
+            list_node = loop_node = ExprNodes.PythonCapiCallNode(
+                node.pos,
+                "__Pyx_PySequence_ListKeepNew"
+                    if arg.is_temp and arg.type in (PyrexTypes.py_object_type, Builtin.list_type)
+                    else "PySequence_List",
+                self.PySequence_List_func_type,
+                args=pos_args, is_temp=True)
+
+        result_node = UtilNodes.ResultRefNode(
+            pos=loop_node.pos, type=Builtin.list_type, may_hold_none=False)
+        list_assign_node = Nodes.SingleAssignmentNode(
+            node.pos, lhs=result_node, rhs=list_node, first=True)
+
+        sort_method = ExprNodes.AttributeNode(
+            node.pos, obj=result_node, attribute=EncodedString('sort'),
+            # entry ? type ?
+            needs_none_check=False)
+        sort_node = Nodes.ExprStatNode(
+            node.pos, expr=ExprNodes.SimpleCallNode(
+                node.pos, function=sort_method, args=[]))
+
+        sort_node.analyse_declarations(self.current_env())
+
+        return UtilNodes.TempResultFromStatNode(
+            result_node,
+            Nodes.StatListNode(node.pos, stats=[list_assign_node, sort_node]))
+
+    def __handle_simple_function_sum(self, node, pos_args):
+        """Transform sum(genexpr) into an equivalent inlined aggregation loop.
+        """
+        if len(pos_args) not in (1,2):
+            return node
+        if not isinstance(pos_args[0], (ExprNodes.GeneratorExpressionNode,
+                                        ExprNodes.ComprehensionNode)):
+            return node
+        gen_expr_node = pos_args[0]
+        loop_node = gen_expr_node.loop
+
+        if isinstance(gen_expr_node, ExprNodes.GeneratorExpressionNode):
+            yield_expression, yield_stat_node = _find_single_yield_expression(loop_node)
+            # FIXME: currently nonfunctional
+            yield_expression = None
+            if yield_expression is None:
+                return node
+        else:  # ComprehensionNode
+            yield_stat_node = gen_expr_node.append
+            yield_expression = yield_stat_node.expr
+            try:
+                if not yield_expression.is_literal or not yield_expression.type.is_int:
+                    return node
+            except AttributeError:
+                return node  # in case we don't have a type yet
+            # special case: old Py2 backwards compatible "sum([int_const for ...])"
+            # can safely be unpacked into a genexpr
+
+        if len(pos_args) == 1:
+            start = ExprNodes.IntNode(node.pos, value='0', constant_result=0)
+        else:
+            start = pos_args[1]
+
+        result_ref = UtilNodes.ResultRefNode(pos=node.pos, type=PyrexTypes.py_object_type)
+        add_node = Nodes.SingleAssignmentNode(
+            yield_expression.pos,
+            lhs = result_ref,
+            rhs = ExprNodes.binop_node(node.pos, '+', result_ref, yield_expression)
+            )
+
+        Visitor.recursively_replace_node(gen_expr_node, yield_stat_node, add_node)
+
+        exec_code = Nodes.StatListNode(
+            node.pos,
+            stats = [
+                Nodes.SingleAssignmentNode(
+                    start.pos,
+                    lhs = UtilNodes.ResultRefNode(pos=node.pos, expression=result_ref),
+                    rhs = start,
+                    first = True),
+                loop_node
+                ])
+
+        return ExprNodes.InlinedGeneratorExpressionNode(
+            gen_expr_node.pos, loop = exec_code, result_node = result_ref,
+            expr_scope = gen_expr_node.expr_scope, orig_func = 'sum',
+            has_local_scope = gen_expr_node.has_local_scope)
+
+    def _handle_simple_function_min(self, node, pos_args):
+        return self._optimise_min_max(node, pos_args, '<')
+
+    def _handle_simple_function_max(self, node, pos_args):
+        return self._optimise_min_max(node, pos_args, '>')
+
+    def _optimise_min_max(self, node, args, operator):
+        """Replace min(a,b,...) and max(a,b,...) by explicit comparison code.
+        """
+        if len(args) <= 1:
+            if len(args) == 1 and args[0].is_sequence_constructor:
+                args = args[0].args
+            if len(args) <= 1:
+                # leave this to Python
+                return node
+
+        cascaded_nodes = list(map(UtilNodes.ResultRefNode, args[1:]))
+
+        last_result = args[0]
+        for arg_node in cascaded_nodes:
+            result_ref = UtilNodes.ResultRefNode(last_result)
+            last_result = ExprNodes.CondExprNode(
+                arg_node.pos,
+                true_val = arg_node,
+                false_val = result_ref,
+                test = ExprNodes.PrimaryCmpNode(
+                    arg_node.pos,
+                    operand1 = arg_node,
+                    operator = operator,
+                    operand2 = result_ref,
+                    )
+                )
+            last_result = UtilNodes.EvalWithTempExprNode(result_ref, last_result)
+
+        for ref_node in cascaded_nodes[::-1]:
+            last_result = UtilNodes.EvalWithTempExprNode(ref_node, last_result)
+
+        return last_result
+
+    # builtin type creation
+
+    def _DISABLED_handle_simple_function_tuple(self, node, pos_args):
+        if not pos_args:
+            return ExprNodes.TupleNode(node.pos, args=[], constant_result=())
+        # This is a bit special - for iterables (including genexps),
+        # Python actually overallocates and resizes a newly created
+        # tuple incrementally while reading items, which we can't
+        # easily do without explicit node support. Instead, we read
+        # the items into a list and then copy them into a tuple of the
+        # final size.  This takes up to twice as much memory, but will
+        # have to do until we have real support for genexps.
+        result = self._transform_list_set_genexpr(node, pos_args, Builtin.list_type)
+        if result is not node:
+            return ExprNodes.AsTupleNode(node.pos, arg=result)
+        return node
+
+    def _handle_simple_function_frozenset(self, node, pos_args):
+        """Replace frozenset([...]) by frozenset((...)) as tuples are more efficient.
+        """
+        if len(pos_args) != 1:
+            return node
+        if pos_args[0].is_sequence_constructor and not pos_args[0].args:
+            del pos_args[0]
+        elif isinstance(pos_args[0], ExprNodes.ListNode):
+            pos_args[0] = pos_args[0].as_tuple()
+        return node
+
+    def _handle_simple_function_list(self, node, pos_args):
+        if not pos_args:
+            return ExprNodes.ListNode(node.pos, args=[], constant_result=[])
+        return self._transform_list_set_genexpr(node, pos_args, Builtin.list_type)
+
+    def _handle_simple_function_set(self, node, pos_args):
+        if not pos_args:
+            return ExprNodes.SetNode(node.pos, args=[], constant_result=set())
+        return self._transform_list_set_genexpr(node, pos_args, Builtin.set_type)
+
+    def _transform_list_set_genexpr(self, node, pos_args, target_type):
+        """Replace set(genexpr) and list(genexpr) by an inlined comprehension.
+        """
+        if len(pos_args) > 1:
+            return node
+        if not isinstance(pos_args[0], ExprNodes.GeneratorExpressionNode):
+            return node
+        gen_expr_node = pos_args[0]
+        loop_node = gen_expr_node.loop
+
+        yield_statements = _find_yield_statements(loop_node)
+        if not yield_statements:
+            return node
+
+        result_node = ExprNodes.InlinedGeneratorExpressionNode(
+            node.pos, gen_expr_node,
+            orig_func='set' if target_type is Builtin.set_type else 'list',
+            comprehension_type=target_type)
+
+        for yield_expression, yield_stat_node in yield_statements:
+            append_node = ExprNodes.ComprehensionAppendNode(
+                yield_expression.pos,
+                expr=yield_expression,
+                target=result_node.target)
+            Visitor.recursively_replace_node(gen_expr_node, yield_stat_node, append_node)
+
+        return result_node
+
+    def _handle_simple_function_dict(self, node, pos_args):
+        """Replace dict( (a,b) for ... ) by an inlined { a:b for ... }
+        """
+        if len(pos_args) == 0:
+            return ExprNodes.DictNode(node.pos, key_value_pairs=[], constant_result={})
+        if len(pos_args) > 1:
+            return node
+        if not isinstance(pos_args[0], ExprNodes.GeneratorExpressionNode):
+            return node
+        gen_expr_node = pos_args[0]
+        loop_node = gen_expr_node.loop
+
+        yield_statements = _find_yield_statements(loop_node)
+        if not yield_statements:
+            return node
+
+        for yield_expression, _ in yield_statements:
+            if not isinstance(yield_expression, ExprNodes.TupleNode):
+                return node
+            if len(yield_expression.args) != 2:
+                return node
+
+        result_node = ExprNodes.InlinedGeneratorExpressionNode(
+            node.pos, gen_expr_node, orig_func='dict',
+            comprehension_type=Builtin.dict_type)
+
+        for yield_expression, yield_stat_node in yield_statements:
+            append_node = ExprNodes.DictComprehensionAppendNode(
+                yield_expression.pos,
+                key_expr=yield_expression.args[0],
+                value_expr=yield_expression.args[1],
+                target=result_node.target)
+            Visitor.recursively_replace_node(gen_expr_node, yield_stat_node, append_node)
+
+        return result_node
+
+    # specific handlers for general call nodes
+
+    def _handle_general_function_dict(self, node, pos_args, kwargs):
+        """Replace dict(a=b,c=d,...) by the underlying keyword dict
+        construction which is done anyway.
+        """
+        if len(pos_args) > 0:
+            return node
+        if not isinstance(kwargs, ExprNodes.DictNode):
+            return node
+        return kwargs
+
+
+class InlineDefNodeCalls(Visitor.NodeRefCleanupMixin, Visitor.EnvTransform):
+    visit_Node = Visitor.VisitorTransform.recurse_to_children
+
+    def get_constant_value_node(self, name_node):
+        if name_node.cf_state is None:
+            return None
+        if name_node.cf_state.cf_is_null:
+            return None
+        entry = self.current_env().lookup(name_node.name)
+        if not entry or (not entry.cf_assignments
+                         or len(entry.cf_assignments) != 1):
+            # not just a single assignment in all closures
+            return None
+        return entry.cf_assignments[0].rhs
+
+    def visit_SimpleCallNode(self, node):
+        self.visitchildren(node)
+        if not self.current_directives.get('optimize.inline_defnode_calls'):
+            return node
+        function_name = node.function
+        if not function_name.is_name:
+            return node
+        function = self.get_constant_value_node(function_name)
+        if not isinstance(function, ExprNodes.PyCFunctionNode):
+            return node
+        inlined = ExprNodes.InlinedDefNodeCallNode(
+            node.pos, function_name=function_name,
+            function=function, args=node.args,
+            generator_arg_tag=node.generator_arg_tag)
+        if inlined.can_be_inlined():
+            return self.replace(node, inlined)
+        return node
+
+
+class OptimizeBuiltinCalls(Visitor.NodeRefCleanupMixin,
+                           Visitor.MethodDispatcherTransform):
+    """Optimize some common methods calls and instantiation patterns
+    for builtin types *after* the type analysis phase.
+
+    Running after type analysis, this transform can only perform
+    function replacements that do not alter the function return type
+    in a way that was not anticipated by the type analysis.
+    """
+    ### cleanup to avoid redundant coercions to/from Python types
+
+    def visit_PyTypeTestNode(self, node):
+        """Flatten redundant type checks after tree changes.
+        """
+        self.visitchildren(node)
+        return node.reanalyse()
+
+    def _visit_TypecastNode(self, node):
+        # disabled - the user may have had a reason to put a type
+        # cast, even if it looks redundant to Cython
+        """
+        Drop redundant type casts.
+        """
+        self.visitchildren(node)
+        if node.type == node.operand.type:
+            return node.operand
+        return node
+
+    def visit_ExprStatNode(self, node):
+        """
+        Drop dead code and useless coercions.
+        """
+        self.visitchildren(node)
+        if isinstance(node.expr, ExprNodes.CoerceToPyTypeNode):
+            node.expr = node.expr.arg
+        expr = node.expr
+        if expr is None or expr.is_none or expr.is_literal:
+            # Expression was removed or is dead code => remove ExprStatNode as well.
+            return None
+        if expr.is_name and expr.entry and (expr.entry.is_local or expr.entry.is_arg):
+            # Ignore dead references to local variables etc.
+            return None
+        return node
+
+    def visit_CoerceToBooleanNode(self, node):
+        """Drop redundant conversion nodes after tree changes.
+        """
+        self.visitchildren(node)
+        arg = node.arg
+        if isinstance(arg, ExprNodes.PyTypeTestNode):
+            arg = arg.arg
+        if isinstance(arg, ExprNodes.CoerceToPyTypeNode):
+            if arg.type in (PyrexTypes.py_object_type, Builtin.bool_type):
+                return arg.arg.coerce_to_boolean(self.current_env())
+        return node
+
+    PyNumber_Float_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [
+            PyrexTypes.CFuncTypeArg("o", PyrexTypes.py_object_type, None)
+            ])
+
+    def visit_CoerceToPyTypeNode(self, node):
+        """Drop redundant conversion nodes after tree changes."""
+        self.visitchildren(node)
+        arg = node.arg
+        if isinstance(arg, ExprNodes.CoerceFromPyTypeNode):
+            arg = arg.arg
+        if isinstance(arg, ExprNodes.PythonCapiCallNode):
+            if arg.function.name == 'float' and len(arg.args) == 1:
+                # undo redundant Py->C->Py coercion
+                func_arg = arg.args[0]
+                if func_arg.type is Builtin.float_type:
+                    return func_arg.as_none_safe_node("float() argument must be a string or a number, not 'NoneType'")
+                elif func_arg.type.is_pyobject and arg.function.cname == "__Pyx_PyObject_AsDouble":
+                    return ExprNodes.PythonCapiCallNode(
+                        node.pos, '__Pyx_PyNumber_Float', self.PyNumber_Float_func_type,
+                        args=[func_arg],
+                        py_name='float',
+                        is_temp=node.is_temp,
+                        utility_code = UtilityCode.load_cached("pynumber_float", "TypeConversion.c"),
+                        result_is_used=node.result_is_used,
+                    ).coerce_to(node.type, self.current_env())
+        return node
+
+    def visit_CoerceFromPyTypeNode(self, node):
+        """Drop redundant conversion nodes after tree changes.
+
+        Also, optimise away calls to Python's builtin int() and
+        float() if the result is going to be coerced back into a C
+        type anyway.
+        """
+        self.visitchildren(node)
+        arg = node.arg
+        if not arg.type.is_pyobject:
+            # no Python conversion left at all, just do a C coercion instead
+            if node.type != arg.type:
+                arg = arg.coerce_to(node.type, self.current_env())
+            return arg
+        if isinstance(arg, ExprNodes.PyTypeTestNode):
+            arg = arg.arg
+        if arg.is_literal:
+            if (node.type.is_int and isinstance(arg, ExprNodes.IntNode) or
+                    node.type.is_float and isinstance(arg, ExprNodes.FloatNode) or
+                    node.type.is_int and isinstance(arg, ExprNodes.BoolNode)):
+                return arg.coerce_to(node.type, self.current_env())
+        elif isinstance(arg, ExprNodes.CoerceToPyTypeNode):
+            if arg.type is PyrexTypes.py_object_type:
+                if node.type.assignable_from(arg.arg.type):
+                    # completely redundant C->Py->C coercion
+                    return arg.arg.coerce_to(node.type, self.current_env())
+            elif arg.type is Builtin.unicode_type:
+                if arg.arg.type.is_unicode_char and node.type.is_unicode_char:
+                    return arg.arg.coerce_to(node.type, self.current_env())
+        elif isinstance(arg, ExprNodes.SimpleCallNode):
+            if node.type.is_int or node.type.is_float:
+                return self._optimise_numeric_cast_call(node, arg)
+        elif arg.is_subscript:
+            index_node = arg.index
+            if isinstance(index_node, ExprNodes.CoerceToPyTypeNode):
+                index_node = index_node.arg
+            if index_node.type.is_int:
+                return self._optimise_int_indexing(node, arg, index_node)
+        return node
+
+    PyBytes_GetItemInt_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_char_type, [
+            PyrexTypes.CFuncTypeArg("bytes", Builtin.bytes_type, None),
+            PyrexTypes.CFuncTypeArg("index", PyrexTypes.c_py_ssize_t_type, None),
+            PyrexTypes.CFuncTypeArg("check_bounds", PyrexTypes.c_int_type, None),
+            ],
+        exception_value = "((char)-1)",
+        exception_check = True)
+
+    def _optimise_int_indexing(self, coerce_node, arg, index_node):
+        env = self.current_env()
+        bound_check_bool = env.directives['boundscheck'] and 1 or 0
+        if arg.base.type is Builtin.bytes_type:
+            if coerce_node.type in (PyrexTypes.c_char_type, PyrexTypes.c_uchar_type):
+                # bytes[index] -> char
+                bound_check_node = ExprNodes.IntNode(
+                    coerce_node.pos, value=str(bound_check_bool),
+                    constant_result=bound_check_bool)
+                node = ExprNodes.PythonCapiCallNode(
+                    coerce_node.pos, "__Pyx_PyBytes_GetItemInt",
+                    self.PyBytes_GetItemInt_func_type,
+                    args=[
+                        arg.base.as_none_safe_node("'NoneType' object is not subscriptable"),
+                        index_node.coerce_to(PyrexTypes.c_py_ssize_t_type, env),
+                        bound_check_node,
+                        ],
+                    is_temp=True,
+                    utility_code=UtilityCode.load_cached(
+                        'bytes_index', 'StringTools.c'))
+                if coerce_node.type is not PyrexTypes.c_char_type:
+                    node = node.coerce_to(coerce_node.type, env)
+                return node
+        return coerce_node
+
+    float_float_func_types = dict(
+        (float_type, PyrexTypes.CFuncType(
+            float_type, [
+                PyrexTypes.CFuncTypeArg("arg", float_type, None)
+            ]))
+        for float_type in (PyrexTypes.c_float_type, PyrexTypes.c_double_type, PyrexTypes.c_longdouble_type))
+
+    def _optimise_numeric_cast_call(self, node, arg):
+        function = arg.function
+        args = None
+        if isinstance(arg, ExprNodes.PythonCapiCallNode):
+            args = arg.args
+        elif isinstance(function, ExprNodes.NameNode):
+            if function.type.is_builtin_type and isinstance(arg.arg_tuple, ExprNodes.TupleNode):
+                args = arg.arg_tuple.args
+
+        if args is None or len(args) != 1:
+            return node
+        func_arg = args[0]
+        if isinstance(func_arg, ExprNodes.CoerceToPyTypeNode):
+            func_arg = func_arg.arg
+        elif func_arg.type.is_pyobject:
+            # play it safe: Python conversion might work on all sorts of things
+            return node
+
+        if function.name == 'int':
+            if func_arg.type.is_int or node.type.is_int:
+                if func_arg.type == node.type:
+                    return func_arg
+                elif func_arg.type in (PyrexTypes.c_py_ucs4_type, PyrexTypes.c_py_unicode_type):
+                    # need to parse (<Py_UCS4>'1') as digit 1
+                    return self._pyucs4_to_number(node, function.name, func_arg)
+                elif node.type.assignable_from(func_arg.type) or func_arg.type.is_float:
+                    return ExprNodes.TypecastNode(node.pos, operand=func_arg, type=node.type)
+            elif func_arg.type.is_float and node.type.is_numeric:
+                if func_arg.type.math_h_modifier == 'l':
+                    # Work around missing Cygwin definition.
+                    truncl = '__Pyx_truncl'
+                else:
+                    truncl = 'trunc' + func_arg.type.math_h_modifier
+                return ExprNodes.PythonCapiCallNode(
+                    node.pos, truncl,
+                    func_type=self.float_float_func_types[func_arg.type],
+                    args=[func_arg],
+                    py_name='int',
+                    is_temp=node.is_temp,
+                    result_is_used=node.result_is_used,
+                ).coerce_to(node.type, self.current_env())
+        elif function.name == 'float':
+            if func_arg.type.is_float or node.type.is_float:
+                if func_arg.type == node.type:
+                    return func_arg
+                elif func_arg.type in (PyrexTypes.c_py_ucs4_type, PyrexTypes.c_py_unicode_type):
+                    # need to parse (<Py_UCS4>'1') as digit 1
+                    return self._pyucs4_to_number(node, function.name, func_arg)
+                elif node.type.assignable_from(func_arg.type) or func_arg.type.is_float:
+                    return ExprNodes.TypecastNode(
+                        node.pos, operand=func_arg, type=node.type)
+        return node
+
+    pyucs4_int_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_int_type, [
+            PyrexTypes.CFuncTypeArg("arg", PyrexTypes.c_py_ucs4_type, None)
+        ],
+        exception_value="-1")
+
+    pyucs4_double_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_double_type, [
+            PyrexTypes.CFuncTypeArg("arg", PyrexTypes.c_py_ucs4_type, None)
+        ],
+        exception_value="-1.0")
+
+    def _pyucs4_to_number(self, node, py_type_name, func_arg):
+        assert py_type_name in ("int", "float")
+        return ExprNodes.PythonCapiCallNode(
+            node.pos, "__Pyx_int_from_UCS4" if py_type_name == "int" else "__Pyx_double_from_UCS4",
+            func_type=self.pyucs4_int_func_type if py_type_name == "int" else self.pyucs4_double_func_type,
+            args=[func_arg],
+            py_name=py_type_name,
+            is_temp=node.is_temp,
+            result_is_used=node.result_is_used,
+            utility_code=UtilityCode.load_cached("int_pyucs4" if py_type_name == "int" else "float_pyucs4", "Builtins.c"),
+        ).coerce_to(node.type, self.current_env())
+
+    def _error_wrong_arg_count(self, function_name, node, args, expected=None):
+        if not expected:  # None or 0
+            arg_str = ''
+        elif isinstance(expected, basestring) or expected > 1:
+            arg_str = '...'
+        elif expected == 1:
+            arg_str = 'x'
+        else:
+            arg_str = ''
+        if expected is not None:
+            expected_str = 'expected %s, ' % expected
+        else:
+            expected_str = ''
+        error(node.pos, "%s(%s) called with wrong number of args, %sfound %d" % (
+            function_name, arg_str, expected_str, len(args)))
+
+    ### generic fallbacks
+
+    def _handle_function(self, node, function_name, function, arg_list, kwargs):
+        return node
+
+    def _handle_method(self, node, type_name, attr_name, function,
+                       arg_list, is_unbound_method, kwargs):
+        """
+        Try to inject C-API calls for unbound method calls to builtin types.
+        While the method declarations in Builtin.py already handle this, we
+        can additionally resolve bound and unbound methods here that were
+        assigned to variables ahead of time.
+        """
+        if kwargs:
+            return node
+        if not function or not function.is_attribute or not function.obj.is_name:
+            # cannot track unbound method calls over more than one indirection as
+            # the names might have been reassigned in the meantime
+            return node
+        type_entry = self.current_env().lookup(type_name)
+        if not type_entry:
+            return node
+        method = ExprNodes.AttributeNode(
+            node.function.pos,
+            obj=ExprNodes.NameNode(
+                function.pos,
+                name=type_name,
+                entry=type_entry,
+                type=type_entry.type),
+            attribute=attr_name,
+            is_called=True).analyse_as_type_attribute(self.current_env())
+        if method is None:
+            return self._optimise_generic_builtin_method_call(
+                node, attr_name, function, arg_list, is_unbound_method)
+        args = node.args
+        if args is None and node.arg_tuple:
+            args = node.arg_tuple.args
+        call_node = ExprNodes.SimpleCallNode(
+            node.pos,
+            function=method,
+            args=args)
+        if not is_unbound_method:
+            call_node.self = function.obj
+        call_node.analyse_c_function_call(self.current_env())
+        call_node.analysed = True
+        return call_node.coerce_to(node.type, self.current_env())
+
+    ### builtin types
+
+    def _optimise_generic_builtin_method_call(self, node, attr_name, function, arg_list, is_unbound_method):
+        """
+        Try to inject an unbound method call for a call to a method of a known builtin type.
+        This enables caching the underlying C function of the method at runtime.
+        """
+        arg_count = len(arg_list)
+        if is_unbound_method or arg_count >= 3 or not (function.is_attribute and function.is_py_attr):
+            return node
+        if not function.obj.type.is_builtin_type:
+            return node
+        if function.obj.type.name in ('basestring', 'type'):
+            # these allow different actual types => unsafe
+            return node
+        return ExprNodes.CachedBuiltinMethodCallNode(
+            node, function.obj, attr_name, arg_list)
+
+    PyObject_String_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [  # Change this to Builtin.str_type when removing Py2 support.
+            PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None)
+            ])
+
+    def _handle_simple_function_str(self, node, function, pos_args):
+        """Optimize single argument calls to str().
+        """
+        if node.type is Builtin.unicode_type:
+            # type already deduced as unicode (language_level=3)
+            return self._handle_simple_function_unicode(node, function, pos_args)
+        if len(pos_args) != 1:
+            if len(pos_args) == 0:
+                return ExprNodes.StringNode(node.pos, value=EncodedString(), constant_result='')
+            return node
+        arg = pos_args[0]
+
+        if arg.type is Builtin.str_type:
+            if not arg.may_be_none():
+                return arg
+
+            cname = "__Pyx_PyStr_Str"
+            utility_code = UtilityCode.load_cached('PyStr_Str', 'StringTools.c')
+        else:
+            cname = '__Pyx_PyObject_Str'
+            utility_code = UtilityCode.load_cached('PyObject_Str', 'StringTools.c')
+
+        return ExprNodes.PythonCapiCallNode(
+            node.pos, cname, self.PyObject_String_func_type,
+            args=pos_args,
+            is_temp=node.is_temp,
+            utility_code=utility_code,
+            py_name="str"
+        )
+
+    PyObject_Unicode_func_type = PyrexTypes.CFuncType(
+        Builtin.unicode_type, [
+            PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None)
+            ])
+
+    def _handle_simple_function_unicode(self, node, function, pos_args):
+        """Optimise single argument calls to unicode().
+        """
+        if len(pos_args) != 1:
+            if len(pos_args) == 0:
+                return ExprNodes.UnicodeNode(node.pos, value=EncodedString(), constant_result=u'')
+            return node
+        arg = pos_args[0]
+        if arg.type is Builtin.unicode_type:
+            if not arg.may_be_none():
+                return arg
+            cname = "__Pyx_PyUnicode_Unicode"
+            utility_code = UtilityCode.load_cached('PyUnicode_Unicode', 'StringTools.c')
+        else:
+            cname = "__Pyx_PyObject_Unicode"
+            utility_code = UtilityCode.load_cached('PyObject_Unicode', 'StringTools.c')
+        return ExprNodes.PythonCapiCallNode(
+            node.pos, cname, self.PyObject_Unicode_func_type,
+            args=pos_args,
+            is_temp=node.is_temp,
+            utility_code=utility_code,
+            py_name="unicode")
+
+    def visit_FormattedValueNode(self, node):
+        """Simplify or avoid plain string formatting of a unicode value.
+        This seems misplaced here, but plain unicode formatting is essentially
+        a call to the unicode() builtin, which is optimised right above.
+        """
+        self.visitchildren(node)
+        if node.value.type is Builtin.unicode_type and not node.c_format_spec and not node.format_spec:
+            if not node.conversion_char or node.conversion_char == 's':
+                # value is definitely a unicode string and we don't format it any special
+                return self._handle_simple_function_unicode(node, None, [node.value])
+        return node
+
+    PyDict_Copy_func_type = PyrexTypes.CFuncType(
+        Builtin.dict_type, [
+            PyrexTypes.CFuncTypeArg("dict", Builtin.dict_type, None)
+            ])
+
+    def _handle_simple_function_dict(self, node, function, pos_args):
+        """Replace dict(some_dict) by PyDict_Copy(some_dict).
+        """
+        if len(pos_args) != 1:
+            return node
+        arg = pos_args[0]
+        if arg.type is Builtin.dict_type:
+            arg = arg.as_none_safe_node("'NoneType' is not iterable")
+            return ExprNodes.PythonCapiCallNode(
+                node.pos, "PyDict_Copy", self.PyDict_Copy_func_type,
+                args = [arg],
+                is_temp = node.is_temp
+                )
+        return node
+
+    PySequence_List_func_type = PyrexTypes.CFuncType(
+        Builtin.list_type,
+        [PyrexTypes.CFuncTypeArg("it", PyrexTypes.py_object_type, None)])
+
+    def _handle_simple_function_list(self, node, function, pos_args):
+        """Turn list(ob) into PySequence_List(ob).
+        """
+        if len(pos_args) != 1:
+            return node
+        arg = pos_args[0]
+        return ExprNodes.PythonCapiCallNode(
+            node.pos,
+            "__Pyx_PySequence_ListKeepNew"
+                if node.is_temp and arg.is_temp and arg.type in (PyrexTypes.py_object_type, Builtin.list_type)
+                else "PySequence_List",
+            self.PySequence_List_func_type,
+            args=pos_args,
+            is_temp=node.is_temp,
+        )
+
+    PyList_AsTuple_func_type = PyrexTypes.CFuncType(
+        Builtin.tuple_type, [
+            PyrexTypes.CFuncTypeArg("list", Builtin.list_type, None)
+            ])
+
+    def _handle_simple_function_tuple(self, node, function, pos_args):
+        """Replace tuple([...]) by PyList_AsTuple or PySequence_Tuple.
+        """
+        if len(pos_args) != 1 or not node.is_temp:
+            return node
+        arg = pos_args[0]
+        if arg.type is Builtin.tuple_type and not arg.may_be_none():
+            return arg
+        if arg.type is Builtin.list_type:
+            pos_args[0] = arg.as_none_safe_node(
+                "'NoneType' object is not iterable")
+
+            return ExprNodes.PythonCapiCallNode(
+                node.pos, "PyList_AsTuple", self.PyList_AsTuple_func_type,
+                args=pos_args, is_temp=node.is_temp)
+        else:
+            return ExprNodes.AsTupleNode(node.pos, arg=arg, type=Builtin.tuple_type)
+
+    PySet_New_func_type = PyrexTypes.CFuncType(
+        Builtin.set_type, [
+            PyrexTypes.CFuncTypeArg("it", PyrexTypes.py_object_type, None)
+        ])
+
+    def _handle_simple_function_set(self, node, function, pos_args):
+        if len(pos_args) != 1:
+            return node
+        if pos_args[0].is_sequence_constructor:
+            # We can optimise set([x,y,z]) safely into a set literal,
+            # but only if we create all items before adding them -
+            # adding an item may raise an exception if it is not
+            # hashable, but creating the later items may have
+            # side-effects.
+            args = []
+            temps = []
+            for arg in pos_args[0].args:
+                if not arg.is_simple():
+                    arg = UtilNodes.LetRefNode(arg)
+                    temps.append(arg)
+                args.append(arg)
+            result = ExprNodes.SetNode(node.pos, is_temp=1, args=args)
+            self.replace(node, result)
+            for temp in temps[::-1]:
+                result = UtilNodes.EvalWithTempExprNode(temp, result)
+            return result
+        else:
+            # PySet_New(it) is better than a generic Python call to set(it)
+            return self.replace(node, ExprNodes.PythonCapiCallNode(
+                node.pos, "PySet_New",
+                self.PySet_New_func_type,
+                args=pos_args,
+                is_temp=node.is_temp,
+                py_name="set"))
+
+    PyFrozenSet_New_func_type = PyrexTypes.CFuncType(
+        Builtin.frozenset_type, [
+            PyrexTypes.CFuncTypeArg("it", PyrexTypes.py_object_type, None)
+        ])
+
+    def _handle_simple_function_frozenset(self, node, function, pos_args):
+        if not pos_args:
+            pos_args = [ExprNodes.NullNode(node.pos)]
+        elif len(pos_args) > 1:
+            return node
+        elif pos_args[0].type is Builtin.frozenset_type and not pos_args[0].may_be_none():
+            return pos_args[0]
+        # PyFrozenSet_New(it) is better than a generic Python call to frozenset(it)
+        return ExprNodes.PythonCapiCallNode(
+            node.pos, "__Pyx_PyFrozenSet_New",
+            self.PyFrozenSet_New_func_type,
+            args=pos_args,
+            is_temp=node.is_temp,
+            utility_code=UtilityCode.load_cached('pyfrozenset_new', 'Builtins.c'),
+            py_name="frozenset")
+
+    PyObject_AsDouble_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_double_type, [
+            PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None),
+            ],
+        exception_value = "((double)-1)",
+        exception_check = True)
+
+    def _handle_simple_function_float(self, node, function, pos_args):
+        """Transform float() into either a C type cast or a faster C
+        function call.
+        """
+        # Note: this requires the float() function to be typed as
+        # returning a C 'double'
+        if len(pos_args) == 0:
+            return ExprNodes.FloatNode(
+                node, value="0.0", constant_result=0.0
+                ).coerce_to(Builtin.float_type, self.current_env())
+        elif len(pos_args) != 1:
+            self._error_wrong_arg_count('float', node, pos_args, '0 or 1')
+            return node
+
+        func_arg = pos_args[0]
+        if isinstance(func_arg, ExprNodes.CoerceToPyTypeNode):
+            func_arg = func_arg.arg
+        if func_arg.type is PyrexTypes.c_double_type:
+            return func_arg
+        elif func_arg.type in (PyrexTypes.c_py_ucs4_type, PyrexTypes.c_py_unicode_type):
+            # need to parse (<Py_UCS4>'1') as digit 1
+            return self._pyucs4_to_number(node, function.name, func_arg)
+        elif node.type.assignable_from(func_arg.type) or func_arg.type.is_numeric:
+            return ExprNodes.TypecastNode(
+                node.pos, operand=func_arg, type=node.type)
+
+        arg = pos_args[0].as_none_safe_node(
+            "float() argument must be a string or a number, not 'NoneType'")
+
+        if func_arg.type is Builtin.bytes_type:
+            cfunc_name = "__Pyx_PyBytes_AsDouble"
+            utility_code_name = 'pybytes_as_double'
+        elif func_arg.type is Builtin.bytearray_type:
+            cfunc_name = "__Pyx_PyByteArray_AsDouble"
+            utility_code_name = 'pybytes_as_double'
+        elif func_arg.type is Builtin.unicode_type:
+            cfunc_name = "__Pyx_PyUnicode_AsDouble"
+            utility_code_name = 'pyunicode_as_double'
+        elif func_arg.type is Builtin.str_type:
+            cfunc_name = "__Pyx_PyString_AsDouble"
+            utility_code_name = 'pystring_as_double'
+        elif func_arg.type is Builtin.long_type:
+            cfunc_name = "PyLong_AsDouble"
+        else:
+            arg = pos_args[0]  # no need for an additional None check
+            cfunc_name = "__Pyx_PyObject_AsDouble"
+            utility_code_name = 'pyobject_as_double'
+
+        return ExprNodes.PythonCapiCallNode(
+            node.pos, cfunc_name,
+            self.PyObject_AsDouble_func_type,
+            args = [arg],
+            is_temp = node.is_temp,
+            utility_code = load_c_utility(utility_code_name) if utility_code_name else None,
+            py_name = "float")
+
+    PyNumber_Int_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [
+            PyrexTypes.CFuncTypeArg("o", PyrexTypes.py_object_type, None)
+            ])
+
+    PyInt_FromDouble_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [
+            PyrexTypes.CFuncTypeArg("value", PyrexTypes.c_double_type, None)
+            ])
+
+    def _handle_simple_function_int(self, node, function, pos_args):
+        """Transform int() into a faster C function call.
+        """
+        if len(pos_args) == 0:
+            return ExprNodes.IntNode(node.pos, value="0", constant_result=0,
+                                     type=PyrexTypes.py_object_type)
+        elif len(pos_args) != 1:
+            return node  # int(x, base)
+        func_arg = pos_args[0]
+        if isinstance(func_arg, ExprNodes.CoerceToPyTypeNode):
+            if func_arg.arg.type.is_float:
+                return ExprNodes.PythonCapiCallNode(
+                    node.pos, "__Pyx_PyInt_FromDouble", self.PyInt_FromDouble_func_type,
+                    args=[func_arg.arg], is_temp=True, py_name='int',
+                    utility_code=UtilityCode.load_cached("PyIntFromDouble", "TypeConversion.c"))
+            else:
+                return node  # handled in visit_CoerceFromPyTypeNode()
+        if func_arg.type.is_pyobject and node.type.is_pyobject:
+            return ExprNodes.PythonCapiCallNode(
+                node.pos, "__Pyx_PyNumber_Int", self.PyNumber_Int_func_type,
+                args=pos_args, is_temp=True, py_name='int')
+        return node
+
+    def _handle_simple_function_bool(self, node, function, pos_args):
+        """Transform bool(x) into a type coercion to a boolean.
+        """
+        if len(pos_args) == 0:
+            return ExprNodes.BoolNode(
+                node.pos, value=False, constant_result=False
+                ).coerce_to(Builtin.bool_type, self.current_env())
+        elif len(pos_args) != 1:
+            self._error_wrong_arg_count('bool', node, pos_args, '0 or 1')
+            return node
+        else:
+            # => !!<bint>(x)  to make sure it's exactly 0 or 1
+            operand = pos_args[0].coerce_to_boolean(self.current_env())
+            operand = ExprNodes.NotNode(node.pos, operand = operand)
+            operand = ExprNodes.NotNode(node.pos, operand = operand)
+            # coerce back to Python object as that's the result we are expecting
+            return operand.coerce_to_pyobject(self.current_env())
+
+    PyMemoryView_FromObject_func_type = PyrexTypes.CFuncType(
+        Builtin.memoryview_type, [
+            PyrexTypes.CFuncTypeArg("value", PyrexTypes.py_object_type, None)
+            ])
+
+    PyMemoryView_FromBuffer_func_type = PyrexTypes.CFuncType(
+        Builtin.memoryview_type, [
+            PyrexTypes.CFuncTypeArg("value", Builtin.py_buffer_type, None)
+            ])
+
+    def _handle_simple_function_memoryview(self, node, function, pos_args):
+        if len(pos_args) != 1:
+            self._error_wrong_arg_count('memoryview', node, pos_args, '1')
+            return node
+        else:
+            if pos_args[0].type.is_pyobject:
+                return ExprNodes.PythonCapiCallNode(
+                    node.pos, "PyMemoryView_FromObject",
+                    self.PyMemoryView_FromObject_func_type,
+                    args = [pos_args[0]],
+                    is_temp = node.is_temp,
+                    py_name = "memoryview")
+            elif pos_args[0].type.is_ptr and pos_args[0].base_type is Builtin.py_buffer_type:
+                # TODO - this currently doesn't work because the buffer fails a
+                # "can coerce to python object" test earlier. But it'd be nice to support
+                return ExprNodes.PythonCapiCallNode(
+                    node.pos, "PyMemoryView_FromBuffer",
+                    self.PyMemoryView_FromBuffer_func_type,
+                    args = [pos_args[0]],
+                    is_temp = node.is_temp,
+                    py_name = "memoryview")
+        return node
+
+
+    ### builtin functions
+
+    Pyx_ssize_strlen_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_py_ssize_t_type, [
+            PyrexTypes.CFuncTypeArg("bytes", PyrexTypes.c_const_char_ptr_type, None)
+        ],
+        exception_value="-1")
+
+    Pyx_Py_UNICODE_strlen_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_py_ssize_t_type, [
+            PyrexTypes.CFuncTypeArg("unicode", PyrexTypes.c_const_py_unicode_ptr_type, None)
+        ],
+        exception_value="-1")
+
+    PyObject_Size_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_py_ssize_t_type, [
+            PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None)
+        ],
+        exception_value="-1")
+
+    _map_to_capi_len_function = {
+        Builtin.unicode_type:    "__Pyx_PyUnicode_GET_LENGTH",
+        Builtin.bytes_type:      "__Pyx_PyBytes_GET_SIZE",
+        Builtin.bytearray_type:  '__Pyx_PyByteArray_GET_SIZE',
+        Builtin.list_type:       "__Pyx_PyList_GET_SIZE",
+        Builtin.tuple_type:      "__Pyx_PyTuple_GET_SIZE",
+        Builtin.set_type:        "__Pyx_PySet_GET_SIZE",
+        Builtin.frozenset_type:  "__Pyx_PySet_GET_SIZE",
+        Builtin.dict_type:       "PyDict_Size",
+    }.get
+
+    _ext_types_with_pysize = {"cpython.array.array"}
+
+    def _handle_simple_function_len(self, node, function, pos_args):
+        """Replace len(char*) by the equivalent call to strlen(),
+        len(Py_UNICODE) by the equivalent Py_UNICODE_strlen() and
+        len(known_builtin_type) by an equivalent C-API call.
+        """
+        if len(pos_args) != 1:
+            self._error_wrong_arg_count('len', node, pos_args, 1)
+            return node
+        arg = pos_args[0]
+        if isinstance(arg, ExprNodes.CoerceToPyTypeNode):
+            arg = arg.arg
+        if arg.type.is_string:
+            new_node = ExprNodes.PythonCapiCallNode(
+                node.pos, "__Pyx_ssize_strlen", self.Pyx_ssize_strlen_func_type,
+                args = [arg],
+                is_temp = node.is_temp)
+        elif arg.type.is_pyunicode_ptr:
+            new_node = ExprNodes.PythonCapiCallNode(
+                node.pos, "__Pyx_Py_UNICODE_ssize_strlen", self.Pyx_Py_UNICODE_strlen_func_type,
+                args = [arg],
+                is_temp = node.is_temp,
+                utility_code = UtilityCode.load_cached("ssize_pyunicode_strlen", "StringTools.c"))
+        elif arg.type.is_memoryviewslice:
+            func_type = PyrexTypes.CFuncType(
+                PyrexTypes.c_py_ssize_t_type, [
+                    PyrexTypes.CFuncTypeArg("memoryviewslice", arg.type, None)
+                ], nogil=True)
+            new_node = ExprNodes.PythonCapiCallNode(
+                node.pos, "__Pyx_MemoryView_Len", func_type,
+                args=[arg], is_temp=node.is_temp)
+        elif arg.type.is_pyobject:
+            cfunc_name = self._map_to_capi_len_function(arg.type)
+            if cfunc_name is None:
+                arg_type = arg.type
+                if ((arg_type.is_extension_type or arg_type.is_builtin_type)
+                        and arg_type.entry.qualified_name in self._ext_types_with_pysize):
+                    cfunc_name = 'Py_SIZE'
+                else:
+                    return node
+            arg = arg.as_none_safe_node(
+                "object of type 'NoneType' has no len()")
+            new_node = ExprNodes.PythonCapiCallNode(
+                node.pos, cfunc_name, self.PyObject_Size_func_type,
+                args=[arg], is_temp=node.is_temp)
+        elif arg.type.is_unicode_char:
+            return ExprNodes.IntNode(node.pos, value='1', constant_result=1,
+                                     type=node.type)
+        else:
+            return node
+        if node.type not in (PyrexTypes.c_size_t_type, PyrexTypes.c_py_ssize_t_type):
+            new_node = new_node.coerce_to(node.type, self.current_env())
+        return new_node
+
+    Pyx_Type_func_type = PyrexTypes.CFuncType(
+        Builtin.type_type, [
+            PyrexTypes.CFuncTypeArg("object", PyrexTypes.py_object_type, None)
+            ])
+
+    def _handle_simple_function_type(self, node, function, pos_args):
+        """Replace type(o) by a macro call to Py_TYPE(o).
+        """
+        if len(pos_args) != 1:
+            return node
+        node = ExprNodes.PythonCapiCallNode(
+            node.pos, "Py_TYPE", self.Pyx_Type_func_type,
+            args = pos_args,
+            is_temp = False)
+        return ExprNodes.CastNode(node, PyrexTypes.py_object_type)
+
+    Py_type_check_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_bint_type, [
+            PyrexTypes.CFuncTypeArg("arg", PyrexTypes.py_object_type, None)
+            ])
+
+    def _handle_simple_function_isinstance(self, node, function, pos_args):
+        """Replace isinstance() checks against builtin types by the
+        corresponding C-API call.
+        """
+        if len(pos_args) != 2:
+            return node
+        arg, types = pos_args
+        temps = []
+        if isinstance(types, ExprNodes.TupleNode):
+            types = types.args
+            if len(types) == 1 and not types[0].type is Builtin.type_type:
+                return node  # nothing to improve here
+            if arg.is_attribute or not arg.is_simple():
+                arg = UtilNodes.ResultRefNode(arg)
+                temps.append(arg)
+        elif types.type is Builtin.type_type:
+            types = [types]
+        else:
+            return node
+
+        tests = []
+        test_nodes = []
+        env = self.current_env()
+        for test_type_node in types:
+            builtin_type = None
+            if test_type_node.is_name:
+                if test_type_node.entry:
+                    entry = env.lookup(test_type_node.entry.name)
+                    if entry and entry.type and entry.type.is_builtin_type:
+                        builtin_type = entry.type
+            if builtin_type is Builtin.type_type:
+                # all types have type "type", but there's only one 'type'
+                if entry.name != 'type' or not (
+                        entry.scope and entry.scope.is_builtin_scope):
+                    builtin_type = None
+            if builtin_type is not None:
+                type_check_function = entry.type.type_check_function(exact=False)
+                if type_check_function == '__Pyx_Py3Int_Check' and builtin_type is Builtin.int_type:
+                    # isinstance(x, int) should really test for 'int' in Py2, not 'int | long'
+                    type_check_function = "PyInt_Check"
+                if type_check_function in tests:
+                    continue
+                tests.append(type_check_function)
+                type_check_args = [arg]
+            elif test_type_node.type is Builtin.type_type:
+                type_check_function = '__Pyx_TypeCheck'
+                type_check_args = [arg, test_type_node]
+            else:
+                if not test_type_node.is_literal:
+                    test_type_node = UtilNodes.ResultRefNode(test_type_node)
+                    temps.append(test_type_node)
+                type_check_function = 'PyObject_IsInstance'
+                type_check_args = [arg, test_type_node]
+            test_nodes.append(
+                ExprNodes.PythonCapiCallNode(
+                    test_type_node.pos, type_check_function, self.Py_type_check_func_type,
+                    args=type_check_args,
+                    is_temp=True,
+                ))
+
+        def join_with_or(a, b, make_binop_node=ExprNodes.binop_node):
+            or_node = make_binop_node(node.pos, 'or', a, b)
+            or_node.type = PyrexTypes.c_bint_type
+            or_node.wrap_operands(env)
+            return or_node
+
+        test_node = reduce(join_with_or, test_nodes).coerce_to(node.type, env)
+        for temp in temps[::-1]:
+            test_node = UtilNodes.EvalWithTempExprNode(temp, test_node)
+        return test_node
+
+    def _handle_simple_function_ord(self, node, function, pos_args):
+        """Unpack ord(Py_UNICODE) and ord('X').
+        """
+        if len(pos_args) != 1:
+            return node
+        arg = pos_args[0]
+        if isinstance(arg, ExprNodes.CoerceToPyTypeNode):
+            if arg.arg.type.is_unicode_char:
+                return ExprNodes.TypecastNode(
+                    arg.pos, operand=arg.arg, type=PyrexTypes.c_long_type
+                    ).coerce_to(node.type, self.current_env())
+        elif isinstance(arg, ExprNodes.UnicodeNode):
+            if len(arg.value) == 1:
+                return ExprNodes.IntNode(
+                    arg.pos, type=PyrexTypes.c_int_type,
+                    value=str(ord(arg.value)),
+                    constant_result=ord(arg.value)
+                    ).coerce_to(node.type, self.current_env())
+        elif isinstance(arg, ExprNodes.StringNode):
+            if arg.unicode_value and len(arg.unicode_value) == 1 \
+                    and ord(arg.unicode_value) <= 255:  # Py2/3 portability
+                return ExprNodes.IntNode(
+                    arg.pos, type=PyrexTypes.c_int_type,
+                    value=str(ord(arg.unicode_value)),
+                    constant_result=ord(arg.unicode_value)
+                    ).coerce_to(node.type, self.current_env())
+        return node
+
+    ### special methods
+
+    Pyx_tp_new_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [
+            PyrexTypes.CFuncTypeArg("type",   PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("args",   Builtin.tuple_type, None),
+            ])
+
+    Pyx_tp_new_kwargs_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [
+            PyrexTypes.CFuncTypeArg("type",   PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("args",   Builtin.tuple_type, None),
+            PyrexTypes.CFuncTypeArg("kwargs", Builtin.dict_type, None),
+        ])
+
+    def _handle_any_slot__new__(self, node, function, args,
+                                is_unbound_method, kwargs=None):
+        """Replace 'exttype.__new__(exttype, ...)' by a call to exttype->tp_new()
+        """
+        obj = function.obj
+        if not is_unbound_method or len(args) < 1:
+            return node
+        type_arg = args[0]
+        if not obj.is_name or not type_arg.is_name:
+            return node  # not a simple case
+        if obj.type != Builtin.type_type or type_arg.type != Builtin.type_type:
+            return node  # not a known type
+        if not type_arg.type_entry or not obj.type_entry:
+            if obj.name != type_arg.name:
+                return node
+            # otherwise, we know it's a type and we know it's the same
+            # type for both - that should do
+        elif type_arg.type_entry != obj.type_entry:
+            # different types - may or may not lead to an error at runtime
+            return node
+
+        args_tuple = ExprNodes.TupleNode(node.pos, args=args[1:])
+        args_tuple = args_tuple.analyse_types(
+            self.current_env(), skip_children=True)
+
+        if type_arg.type_entry:
+            ext_type = type_arg.type_entry.type
+            if (ext_type.is_extension_type and ext_type.typeobj_cname and
+                    ext_type.scope.global_scope() == self.current_env().global_scope()):
+                # known type in current module
+                tp_slot = TypeSlots.ConstructorSlot("tp_new", '__new__')
+                slot_func_cname = TypeSlots.get_slot_function(ext_type.scope, tp_slot)
+                if slot_func_cname:
+                    cython_scope = self.context.cython_scope
+                    PyTypeObjectPtr = PyrexTypes.CPtrType(
+                        cython_scope.lookup('PyTypeObject').type)
+                    pyx_tp_new_kwargs_func_type = PyrexTypes.CFuncType(
+                        ext_type, [
+                            PyrexTypes.CFuncTypeArg("type",   PyTypeObjectPtr, None),
+                            PyrexTypes.CFuncTypeArg("args",   PyrexTypes.py_object_type, None),
+                            PyrexTypes.CFuncTypeArg("kwargs", PyrexTypes.py_object_type, None),
+                            ])
+
+                    type_arg = ExprNodes.CastNode(type_arg, PyTypeObjectPtr)
+                    if not kwargs:
+                        kwargs = ExprNodes.NullNode(node.pos, type=PyrexTypes.py_object_type)  # hack?
+                    return ExprNodes.PythonCapiCallNode(
+                        node.pos, slot_func_cname,
+                        pyx_tp_new_kwargs_func_type,
+                        args=[type_arg, args_tuple, kwargs],
+                        may_return_none=False,
+                        is_temp=True)
+        else:
+            # arbitrary variable, needs a None check for safety
+            type_arg = type_arg.as_none_safe_node(
+                "object.__new__(X): X is not a type object (NoneType)")
+
+        utility_code = UtilityCode.load_cached('tp_new', 'ObjectHandling.c')
+        if kwargs:
+            return ExprNodes.PythonCapiCallNode(
+                node.pos, "__Pyx_tp_new_kwargs", self.Pyx_tp_new_kwargs_func_type,
+                args=[type_arg, args_tuple, kwargs],
+                utility_code=utility_code,
+                is_temp=node.is_temp
+                )
+        else:
+            return ExprNodes.PythonCapiCallNode(
+                node.pos, "__Pyx_tp_new", self.Pyx_tp_new_func_type,
+                args=[type_arg, args_tuple],
+                utility_code=utility_code,
+                is_temp=node.is_temp
+            )
+
+    def _handle_any_slot__class__(self, node, function, args,
+                                is_unbound_method, kwargs=None):
+        # The purpose of this function is to handle calls to instance.__class__() so that
+        # it doesn't get handled by the __Pyx_CallUnboundCMethod0 mechanism.
+        # TODO: optimizations of the instance.__class__() call might be possible in future.
+        return node
+
+    ### methods of builtin types
+
+    PyObject_Append_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_returncode_type, [
+            PyrexTypes.CFuncTypeArg("list", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("item", PyrexTypes.py_object_type, None),
+            ],
+        exception_value="-1")
+
+    def _handle_simple_method_object_append(self, node, function, args, is_unbound_method):
+        """Optimistic optimisation as X.append() is almost always
+        referring to a list.
+        """
+        if len(args) != 2 or node.result_is_used or node.function.entry:
+            return node
+
+        return ExprNodes.PythonCapiCallNode(
+            node.pos, "__Pyx_PyObject_Append", self.PyObject_Append_func_type,
+            args=args,
+            may_return_none=False,
+            is_temp=node.is_temp,
+            result_is_used=False,
+            utility_code=load_c_utility('append')
+        )
+
+    def _handle_simple_method_list_extend(self, node, function, args, is_unbound_method):
+        """Replace list.extend([...]) for short sequence literals values by sequential appends
+        to avoid creating an intermediate sequence argument.
+        """
+        if len(args) != 2:
+            return node
+        obj, value = args
+        if not value.is_sequence_constructor:
+            return node
+        items = list(value.args)
+        if value.mult_factor is not None or len(items) > 8:
+            # Appending wins for short sequences but slows down when multiple resize operations are needed.
+            # This seems to be a good enough limit that avoids repeated resizing.
+            if False and isinstance(value, ExprNodes.ListNode):
+                # One would expect that tuples are more efficient here, but benchmarking with
+                # Py3.5 and Py3.7 suggests that they are not. Probably worth revisiting at some point.
+                # Might be related to the usage of PySequence_FAST() in CPython's list.extend(),
+                # which is probably tuned more towards lists than tuples (and rightly so).
+                tuple_node = args[1].as_tuple().analyse_types(self.current_env(), skip_children=True)
+                Visitor.recursively_replace_node(node, args[1], tuple_node)
+            return node
+        wrapped_obj = self._wrap_self_arg(obj, function, is_unbound_method, 'extend')
+        if not items:
+            # Empty sequences are not likely to occur, but why waste a call to list.extend() for them?
+            wrapped_obj.result_is_used = node.result_is_used
+            return wrapped_obj
+        cloned_obj = obj = wrapped_obj
+        if len(items) > 1 and not obj.is_simple():
+            cloned_obj = UtilNodes.LetRefNode(obj)
+        # Use ListComp_Append() for all but the last item and finish with PyList_Append()
+        # to shrink the list storage size at the very end if necessary.
+        temps = []
+        arg = items[-1]
+        if not arg.is_simple():
+            arg = UtilNodes.LetRefNode(arg)
+            temps.append(arg)
+        new_node = ExprNodes.PythonCapiCallNode(
+            node.pos, "__Pyx_PyList_Append", self.PyObject_Append_func_type,
+            args=[cloned_obj, arg],
+            is_temp=True,
+            utility_code=load_c_utility("ListAppend"))
+        for arg in items[-2::-1]:
+            if not arg.is_simple():
+                arg = UtilNodes.LetRefNode(arg)
+                temps.append(arg)
+            new_node = ExprNodes.binop_node(
+                node.pos, '|',
+                ExprNodes.PythonCapiCallNode(
+                    node.pos, "__Pyx_ListComp_Append", self.PyObject_Append_func_type,
+                    args=[cloned_obj, arg], py_name="extend",
+                    is_temp=True,
+                    utility_code=load_c_utility("ListCompAppend")),
+                new_node,
+                type=PyrexTypes.c_returncode_type,
+            )
+        new_node.result_is_used = node.result_is_used
+        if cloned_obj is not obj:
+            temps.append(cloned_obj)
+        for temp in temps:
+            new_node = UtilNodes.EvalWithTempExprNode(temp, new_node)
+            new_node.result_is_used = node.result_is_used
+        return new_node
+
+    PyByteArray_Append_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_returncode_type, [
+            PyrexTypes.CFuncTypeArg("bytearray", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("value", PyrexTypes.c_int_type, None),
+            ],
+        exception_value="-1")
+
+    PyByteArray_AppendObject_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_returncode_type, [
+            PyrexTypes.CFuncTypeArg("bytearray", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("value", PyrexTypes.py_object_type, None),
+            ],
+        exception_value="-1")
+
+    def _handle_simple_method_bytearray_append(self, node, function, args, is_unbound_method):
+        if len(args) != 2:
+            return node
+        func_name = "__Pyx_PyByteArray_Append"
+        func_type = self.PyByteArray_Append_func_type
+
+        value = unwrap_coerced_node(args[1])
+        if value.type.is_int or isinstance(value, ExprNodes.IntNode):
+            value = value.coerce_to(PyrexTypes.c_int_type, self.current_env())
+            utility_code = UtilityCode.load_cached("ByteArrayAppend", "StringTools.c")
+        elif value.is_string_literal:
+            if not value.can_coerce_to_char_literal():
+                return node
+            value = value.coerce_to(PyrexTypes.c_char_type, self.current_env())
+            utility_code = UtilityCode.load_cached("ByteArrayAppend", "StringTools.c")
+        elif value.type.is_pyobject:
+            func_name = "__Pyx_PyByteArray_AppendObject"
+            func_type = self.PyByteArray_AppendObject_func_type
+            utility_code = UtilityCode.load_cached("ByteArrayAppendObject", "StringTools.c")
+        else:
+            return node
+
+        new_node = ExprNodes.PythonCapiCallNode(
+            node.pos, func_name, func_type,
+            args=[args[0], value],
+            may_return_none=False,
+            is_temp=node.is_temp,
+            utility_code=utility_code,
+        )
+        if node.result_is_used:
+            new_node = new_node.coerce_to(node.type, self.current_env())
+        return new_node
+
+    PyObject_Pop_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [
+            PyrexTypes.CFuncTypeArg("list", PyrexTypes.py_object_type, None),
+            ])
+
+    PyObject_PopIndex_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [
+            PyrexTypes.CFuncTypeArg("list", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("py_index", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("c_index", PyrexTypes.c_py_ssize_t_type, None),
+            PyrexTypes.CFuncTypeArg("is_signed", PyrexTypes.c_int_type, None),
+        ],
+        has_varargs=True)  # to fake the additional macro args that lack a proper C type
+
+    def _handle_simple_method_list_pop(self, node, function, args, is_unbound_method):
+        return self._handle_simple_method_object_pop(
+            node, function, args, is_unbound_method, is_list=True)
+
+    def _handle_simple_method_object_pop(self, node, function, args, is_unbound_method, is_list=False):
+        """Optimistic optimisation as X.pop([n]) is almost always
+        referring to a list.
+        """
+        if not args:
+            return node
+        obj = args[0]
+        if is_list:
+            type_name = 'List'
+            obj = obj.as_none_safe_node(
+                "'NoneType' object has no attribute '%.30s'",
+                error="PyExc_AttributeError",
+                format_args=['pop'])
+        else:
+            type_name = 'Object'
+        if len(args) == 1:
+            return ExprNodes.PythonCapiCallNode(
+                node.pos, "__Pyx_Py%s_Pop" % type_name,
+                self.PyObject_Pop_func_type,
+                args=[obj],
+                may_return_none=True,
+                is_temp=node.is_temp,
+                utility_code=load_c_utility('pop'),
+            )
+        elif len(args) == 2:
+            index = unwrap_coerced_node(args[1])
+            py_index = ExprNodes.NoneNode(index.pos)
+            orig_index_type = index.type
+            if not index.type.is_int:
+                if isinstance(index, ExprNodes.IntNode):
+                    py_index = index.coerce_to_pyobject(self.current_env())
+                    index = index.coerce_to(PyrexTypes.c_py_ssize_t_type, self.current_env())
+                elif is_list:
+                    if index.type.is_pyobject:
+                        py_index = index.coerce_to_simple(self.current_env())
+                        index = ExprNodes.CloneNode(py_index)
+                    index = index.coerce_to(PyrexTypes.c_py_ssize_t_type, self.current_env())
+                else:
+                    return node
+            elif not PyrexTypes.numeric_type_fits(index.type, PyrexTypes.c_py_ssize_t_type):
+                return node
+            elif isinstance(index, ExprNodes.IntNode):
+                py_index = index.coerce_to_pyobject(self.current_env())
+            # real type might still be larger at runtime
+            if not orig_index_type.is_int:
+                orig_index_type = index.type
+            if not orig_index_type.create_to_py_utility_code(self.current_env()):
+                return node
+            convert_func = orig_index_type.to_py_function
+            conversion_type = PyrexTypes.CFuncType(
+                PyrexTypes.py_object_type, [PyrexTypes.CFuncTypeArg("intval", orig_index_type, None)])
+            return ExprNodes.PythonCapiCallNode(
+                node.pos, "__Pyx_Py%s_PopIndex" % type_name,
+                self.PyObject_PopIndex_func_type,
+                args=[obj, py_index, index,
+                      ExprNodes.IntNode(index.pos, value=str(orig_index_type.signed and 1 or 0),
+                                        constant_result=orig_index_type.signed and 1 or 0,
+                                        type=PyrexTypes.c_int_type),
+                      ExprNodes.RawCNameExprNode(index.pos, PyrexTypes.c_void_type,
+                                                 orig_index_type.empty_declaration_code()),
+                      ExprNodes.RawCNameExprNode(index.pos, conversion_type, convert_func)],
+                may_return_none=True,
+                is_temp=node.is_temp,
+                utility_code=load_c_utility("pop_index"),
+            )
+
+        return node
+
+    single_param_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_returncode_type, [
+            PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None),
+            ],
+        exception_value = "-1")
+
+    def _handle_simple_method_list_sort(self, node, function, args, is_unbound_method):
+        """Call PyList_Sort() instead of the 0-argument l.sort().
+        """
+        if len(args) != 1:
+            return node
+        return self._substitute_method_call(
+            node, function, "PyList_Sort", self.single_param_func_type,
+            'sort', is_unbound_method, args).coerce_to(node.type, self.current_env)
+
+    Pyx_PyDict_GetItem_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [
+            PyrexTypes.CFuncTypeArg("dict", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("key", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("default", PyrexTypes.py_object_type, None),
+            ])
+
+    def _handle_simple_method_dict_get(self, node, function, args, is_unbound_method):
+        """Replace dict.get() by a call to PyDict_GetItem().
+        """
+        if len(args) == 2:
+            args.append(ExprNodes.NoneNode(node.pos))
+        elif len(args) != 3:
+            self._error_wrong_arg_count('dict.get', node, args, "2 or 3")
+            return node
+
+        return self._substitute_method_call(
+            node, function,
+            "__Pyx_PyDict_GetItemDefault", self.Pyx_PyDict_GetItem_func_type,
+            'get', is_unbound_method, args,
+            may_return_none = True,
+            utility_code = load_c_utility("dict_getitem_default"))
+
+    Pyx_PyDict_SetDefault_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [
+            PyrexTypes.CFuncTypeArg("dict", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("key", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("default", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("is_safe_type", PyrexTypes.c_int_type, None),
+            ])
+
+    def _handle_simple_method_dict_setdefault(self, node, function, args, is_unbound_method):
+        """Replace dict.setdefault() by calls to PyDict_GetItem() and PyDict_SetItem().
+        """
+        if len(args) == 2:
+            args.append(ExprNodes.NoneNode(node.pos))
+        elif len(args) != 3:
+            self._error_wrong_arg_count('dict.setdefault', node, args, "2 or 3")
+            return node
+        key_type = args[1].type
+        if key_type.is_builtin_type:
+            is_safe_type = int(key_type.name in
+                               'str bytes unicode float int long bool')
+        elif key_type is PyrexTypes.py_object_type:
+            is_safe_type = -1  # don't know
+        else:
+            is_safe_type = 0   # definitely not
+        args.append(ExprNodes.IntNode(
+            node.pos, value=str(is_safe_type), constant_result=is_safe_type))
+
+        return self._substitute_method_call(
+            node, function,
+            "__Pyx_PyDict_SetDefault", self.Pyx_PyDict_SetDefault_func_type,
+            'setdefault', is_unbound_method, args,
+            may_return_none=True,
+            utility_code=load_c_utility('dict_setdefault'))
+
+    PyDict_Pop_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [
+            PyrexTypes.CFuncTypeArg("dict", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("key", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("default", PyrexTypes.py_object_type, None),
+            ])
+
+    def _handle_simple_method_dict_pop(self, node, function, args, is_unbound_method):
+        """Replace dict.pop() by a call to _PyDict_Pop().
+        """
+        if len(args) == 2:
+            args.append(ExprNodes.NullNode(node.pos))
+        elif len(args) != 3:
+            self._error_wrong_arg_count('dict.pop', node, args, "2 or 3")
+            return node
+
+        return self._substitute_method_call(
+            node, function,
+            "__Pyx_PyDict_Pop", self.PyDict_Pop_func_type,
+            'pop', is_unbound_method, args,
+            may_return_none=True,
+            utility_code=load_c_utility('py_dict_pop'))
+
+    Pyx_BinopInt_func_types = dict(
+        ((ctype, ret_type), PyrexTypes.CFuncType(
+            ret_type, [
+                PyrexTypes.CFuncTypeArg("op1", PyrexTypes.py_object_type, None),
+                PyrexTypes.CFuncTypeArg("op2", PyrexTypes.py_object_type, None),
+                PyrexTypes.CFuncTypeArg("cval", ctype, None),
+                PyrexTypes.CFuncTypeArg("inplace", PyrexTypes.c_bint_type, None),
+                PyrexTypes.CFuncTypeArg("zerodiv_check", PyrexTypes.c_bint_type, None),
+            ], exception_value=None if ret_type.is_pyobject else ret_type.exception_value))
+        for ctype in (PyrexTypes.c_long_type, PyrexTypes.c_double_type)
+        for ret_type in (PyrexTypes.py_object_type, PyrexTypes.c_bint_type)
+        )
+
+    def _handle_simple_method_object___add__(self, node, function, args, is_unbound_method):
+        return self._optimise_num_binop('Add', node, function, args, is_unbound_method)
+
+    def _handle_simple_method_object___sub__(self, node, function, args, is_unbound_method):
+        return self._optimise_num_binop('Subtract', node, function, args, is_unbound_method)
+
+    def _handle_simple_method_object___mul__(self, node, function, args, is_unbound_method):
+        return self._optimise_num_binop('Multiply', node, function, args, is_unbound_method)
+
+    def _handle_simple_method_object___eq__(self, node, function, args, is_unbound_method):
+        return self._optimise_num_binop('Eq', node, function, args, is_unbound_method)
+
+    def _handle_simple_method_object___ne__(self, node, function, args, is_unbound_method):
+        return self._optimise_num_binop('Ne', node, function, args, is_unbound_method)
+
+    def _handle_simple_method_object___and__(self, node, function, args, is_unbound_method):
+        return self._optimise_num_binop('And', node, function, args, is_unbound_method)
+
+    def _handle_simple_method_object___or__(self, node, function, args, is_unbound_method):
+        return self._optimise_num_binop('Or', node, function, args, is_unbound_method)
+
+    def _handle_simple_method_object___xor__(self, node, function, args, is_unbound_method):
+        return self._optimise_num_binop('Xor', node, function, args, is_unbound_method)
+
+    def _handle_simple_method_object___rshift__(self, node, function, args, is_unbound_method):
+        if len(args) != 2 or not isinstance(args[1], ExprNodes.IntNode):
+            return node
+        if not args[1].has_constant_result() or not (1 <= args[1].constant_result <= 63):
+            return node
+        return self._optimise_num_binop('Rshift', node, function, args, is_unbound_method)
+
+    def _handle_simple_method_object___lshift__(self, node, function, args, is_unbound_method):
+        if len(args) != 2 or not isinstance(args[1], ExprNodes.IntNode):
+            return node
+        if not args[1].has_constant_result() or not (1 <= args[1].constant_result <= 63):
+            return node
+        return self._optimise_num_binop('Lshift', node, function, args, is_unbound_method)
+
+    def _handle_simple_method_object___mod__(self, node, function, args, is_unbound_method):
+        return self._optimise_num_div('Remainder', node, function, args, is_unbound_method)
+
+    def _handle_simple_method_object___floordiv__(self, node, function, args, is_unbound_method):
+        return self._optimise_num_div('FloorDivide', node, function, args, is_unbound_method)
+
+    def _handle_simple_method_object___truediv__(self, node, function, args, is_unbound_method):
+        return self._optimise_num_div('TrueDivide', node, function, args, is_unbound_method)
+
+    def _handle_simple_method_object___div__(self, node, function, args, is_unbound_method):
+        return self._optimise_num_div('Divide', node, function, args, is_unbound_method)
+
+    def _optimise_num_div(self, operator, node, function, args, is_unbound_method):
+        if len(args) != 2 or not args[1].has_constant_result() or args[1].constant_result == 0:
+            return node
+        if isinstance(args[1], ExprNodes.IntNode):
+            if not (-2**30 <= args[1].constant_result <= 2**30):
+                return node
+        elif isinstance(args[1], ExprNodes.FloatNode):
+            if not (-2**53 <= args[1].constant_result <= 2**53):
+                return node
+        else:
+            return node
+        return self._optimise_num_binop(operator, node, function, args, is_unbound_method)
+
+    def _handle_simple_method_float___add__(self, node, function, args, is_unbound_method):
+        return self._optimise_num_binop('Add', node, function, args, is_unbound_method)
+
+    def _handle_simple_method_float___sub__(self, node, function, args, is_unbound_method):
+        return self._optimise_num_binop('Subtract', node, function, args, is_unbound_method)
+
+    def _handle_simple_method_float___truediv__(self, node, function, args, is_unbound_method):
+        return self._optimise_num_binop('TrueDivide', node, function, args, is_unbound_method)
+
+    def _handle_simple_method_float___div__(self, node, function, args, is_unbound_method):
+        return self._optimise_num_binop('Divide', node, function, args, is_unbound_method)
+
+    def _handle_simple_method_float___mod__(self, node, function, args, is_unbound_method):
+        return self._optimise_num_binop('Remainder', node, function, args, is_unbound_method)
+
+    def _handle_simple_method_float___eq__(self, node, function, args, is_unbound_method):
+        return self._optimise_num_binop('Eq', node, function, args, is_unbound_method)
+
+    def _handle_simple_method_float___ne__(self, node, function, args, is_unbound_method):
+        return self._optimise_num_binop('Ne', node, function, args, is_unbound_method)
+
+    def _optimise_num_binop(self, operator, node, function, args, is_unbound_method):
+        """
+        Optimise math operators for (likely) float or small integer operations.
+        """
+        if getattr(node, "special_bool_cmp_function", None):
+            return node  # already optimized
+
+        if len(args) != 2:
+            return node
+
+        if node.type.is_pyobject:
+            ret_type = PyrexTypes.py_object_type
+        elif node.type is PyrexTypes.c_bint_type and operator in ('Eq', 'Ne'):
+            ret_type = PyrexTypes.c_bint_type
+        else:
+            return node
+
+        result = optimise_numeric_binop(operator, node, ret_type, args[0], args[1])
+        if not result:
+            return node
+        func_cname, utility_code, extra_args, num_type = result
+        assert all([arg.type.is_pyobject for arg in args])
+        args = list(args) + extra_args
+
+        call_node = self._substitute_method_call(
+            node, function,
+            func_cname,
+            self.Pyx_BinopInt_func_types[(num_type, ret_type)],
+            '__%s__' % operator[:3].lower(), is_unbound_method, args,
+            may_return_none=True,
+            with_none_check=False,
+            utility_code=utility_code)
+
+        if node.type.is_pyobject and not ret_type.is_pyobject:
+            call_node = ExprNodes.CoerceToPyTypeNode(call_node, self.current_env(), node.type)
+        return call_node
+
+    ### unicode type methods
+
+    PyUnicode_uchar_predicate_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_bint_type, [
+            PyrexTypes.CFuncTypeArg("uchar", PyrexTypes.c_py_ucs4_type, None),
+            ])
+
+    def _inject_unicode_predicate(self, node, function, args, is_unbound_method):
+        if is_unbound_method or len(args) != 1:
+            return node
+        ustring = args[0]
+        if not isinstance(ustring, ExprNodes.CoerceToPyTypeNode) or \
+               not ustring.arg.type.is_unicode_char:
+            return node
+        uchar = ustring.arg
+        method_name = function.attribute
+        if method_name == 'istitle':
+            # istitle() doesn't directly map to Py_UNICODE_ISTITLE()
+            utility_code = UtilityCode.load_cached(
+                "py_unicode_istitle", "StringTools.c")
+            function_name = '__Pyx_Py_UNICODE_ISTITLE'
+        else:
+            utility_code = None
+            function_name = 'Py_UNICODE_%s' % method_name.upper()
+        func_call = self._substitute_method_call(
+            node, function,
+            function_name, self.PyUnicode_uchar_predicate_func_type,
+            method_name, is_unbound_method, [uchar],
+            utility_code = utility_code)
+        if node.type.is_pyobject:
+            func_call = func_call.coerce_to_pyobject(self.current_env)
+        return func_call
+
+    _handle_simple_method_unicode_isalnum   = _inject_unicode_predicate
+    _handle_simple_method_unicode_isalpha   = _inject_unicode_predicate
+    _handle_simple_method_unicode_isdecimal = _inject_unicode_predicate
+    _handle_simple_method_unicode_isdigit   = _inject_unicode_predicate
+    _handle_simple_method_unicode_islower   = _inject_unicode_predicate
+    _handle_simple_method_unicode_isnumeric = _inject_unicode_predicate
+    _handle_simple_method_unicode_isspace   = _inject_unicode_predicate
+    _handle_simple_method_unicode_istitle   = _inject_unicode_predicate
+    _handle_simple_method_unicode_isupper   = _inject_unicode_predicate
+
+    PyUnicode_uchar_conversion_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_py_ucs4_type, [
+            PyrexTypes.CFuncTypeArg("uchar", PyrexTypes.c_py_ucs4_type, None),
+            ])
+
+    # DISABLED: Return value can only be one character, which is not correct.
+    '''
+    def _inject_unicode_character_conversion(self, node, function, args, is_unbound_method):
+        if is_unbound_method or len(args) != 1:
+            return node
+        ustring = args[0]
+        if not isinstance(ustring, ExprNodes.CoerceToPyTypeNode) or \
+               not ustring.arg.type.is_unicode_char:
+            return node
+        uchar = ustring.arg
+        method_name = function.attribute
+        function_name = 'Py_UNICODE_TO%s' % method_name.upper()
+        func_call = self._substitute_method_call(
+            node, function,
+            function_name, self.PyUnicode_uchar_conversion_func_type,
+            method_name, is_unbound_method, [uchar])
+        if node.type.is_pyobject:
+            func_call = func_call.coerce_to_pyobject(self.current_env)
+        return func_call
+
+    #_handle_simple_method_unicode_lower = _inject_unicode_character_conversion
+    #_handle_simple_method_unicode_upper = _inject_unicode_character_conversion
+    #_handle_simple_method_unicode_title = _inject_unicode_character_conversion
+    '''
+
+    PyUnicode_Splitlines_func_type = PyrexTypes.CFuncType(
+        Builtin.list_type, [
+            PyrexTypes.CFuncTypeArg("str", Builtin.unicode_type, None),
+            PyrexTypes.CFuncTypeArg("keepends", PyrexTypes.c_bint_type, None),
+            ])
+
+    def _handle_simple_method_unicode_splitlines(self, node, function, args, is_unbound_method):
+        """Replace unicode.splitlines(...) by a direct call to the
+        corresponding C-API function.
+        """
+        if len(args) not in (1,2):
+            self._error_wrong_arg_count('unicode.splitlines', node, args, "1 or 2")
+            return node
+        self._inject_bint_default_argument(node, args, 1, False)
+
+        return self._substitute_method_call(
+            node, function,
+            "PyUnicode_Splitlines", self.PyUnicode_Splitlines_func_type,
+            'splitlines', is_unbound_method, args)
+
+    PyUnicode_Split_func_type = PyrexTypes.CFuncType(
+        Builtin.list_type, [
+            PyrexTypes.CFuncTypeArg("str", Builtin.unicode_type, None),
+            PyrexTypes.CFuncTypeArg("sep", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("maxsplit", PyrexTypes.c_py_ssize_t_type, None),
+            ]
+        )
+
+    def _handle_simple_method_unicode_split(self, node, function, args, is_unbound_method):
+        """Replace unicode.split(...) by a direct call to the
+        corresponding C-API function.
+        """
+        if len(args) not in (1,2,3):
+            self._error_wrong_arg_count('unicode.split', node, args, "1-3")
+            return node
+        if len(args) < 2:
+            args.append(ExprNodes.NullNode(node.pos))
+        else:
+            self._inject_null_for_none(args, 1)
+        self._inject_int_default_argument(
+            node, args, 2, PyrexTypes.c_py_ssize_t_type, "-1")
+
+        return self._substitute_method_call(
+            node, function,
+            "PyUnicode_Split", self.PyUnicode_Split_func_type,
+            'split', is_unbound_method, args)
+
+    PyUnicode_Join_func_type = PyrexTypes.CFuncType(
+        Builtin.unicode_type, [
+            PyrexTypes.CFuncTypeArg("str", Builtin.unicode_type, None),
+            PyrexTypes.CFuncTypeArg("seq", PyrexTypes.py_object_type, None),
+            ])
+
+    def _handle_simple_method_unicode_join(self, node, function, args, is_unbound_method):
+        """
+        unicode.join() builds a list first => see if we can do this more efficiently
+        """
+        if len(args) != 2:
+            self._error_wrong_arg_count('unicode.join', node, args, "2")
+            return node
+        if isinstance(args[1], ExprNodes.GeneratorExpressionNode):
+            gen_expr_node = args[1]
+            loop_node = gen_expr_node.loop
+
+            yield_statements = _find_yield_statements(loop_node)
+            if yield_statements:
+                inlined_genexpr = ExprNodes.InlinedGeneratorExpressionNode(
+                    node.pos, gen_expr_node, orig_func='list',
+                    comprehension_type=Builtin.list_type)
+
+                for yield_expression, yield_stat_node in yield_statements:
+                    append_node = ExprNodes.ComprehensionAppendNode(
+                        yield_expression.pos,
+                        expr=yield_expression,
+                        target=inlined_genexpr.target)
+
+                    Visitor.recursively_replace_node(gen_expr_node, yield_stat_node, append_node)
+
+                args[1] = inlined_genexpr
+
+        return self._substitute_method_call(
+            node, function,
+            "PyUnicode_Join", self.PyUnicode_Join_func_type,
+            'join', is_unbound_method, args)
+
+    PyString_Tailmatch_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_bint_type, [
+            PyrexTypes.CFuncTypeArg("str", PyrexTypes.py_object_type, None),  # bytes/str/unicode
+            PyrexTypes.CFuncTypeArg("substring", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None),
+            PyrexTypes.CFuncTypeArg("end", PyrexTypes.c_py_ssize_t_type, None),
+            PyrexTypes.CFuncTypeArg("direction", PyrexTypes.c_int_type, None),
+            ],
+        exception_value = '-1')
+
+    def _handle_simple_method_unicode_endswith(self, node, function, args, is_unbound_method):
+        return self._inject_tailmatch(
+            node, function, args, is_unbound_method, 'unicode', 'endswith',
+            unicode_tailmatch_utility_code, +1)
+
+    def _handle_simple_method_unicode_startswith(self, node, function, args, is_unbound_method):
+        return self._inject_tailmatch(
+            node, function, args, is_unbound_method, 'unicode', 'startswith',
+            unicode_tailmatch_utility_code, -1)
+
+    def _inject_tailmatch(self, node, function, args, is_unbound_method, type_name,
+                          method_name, utility_code, direction):
+        """Replace unicode.startswith(...) and unicode.endswith(...)
+        by a direct call to the corresponding C-API function.
+        """
+        if len(args) not in (2,3,4):
+            self._error_wrong_arg_count('%s.%s' % (type_name, method_name), node, args, "2-4")
+            return node
+        self._inject_int_default_argument(
+            node, args, 2, PyrexTypes.c_py_ssize_t_type, "0")
+        self._inject_int_default_argument(
+            node, args, 3, PyrexTypes.c_py_ssize_t_type, "PY_SSIZE_T_MAX")
+        args.append(ExprNodes.IntNode(
+            node.pos, value=str(direction), type=PyrexTypes.c_int_type))
+
+        method_call = self._substitute_method_call(
+            node, function,
+            "__Pyx_Py%s_Tailmatch" % type_name.capitalize(),
+            self.PyString_Tailmatch_func_type,
+            method_name, is_unbound_method, args,
+            utility_code = utility_code)
+        return method_call.coerce_to(Builtin.bool_type, self.current_env())
+
+    PyUnicode_Find_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_py_ssize_t_type, [
+            PyrexTypes.CFuncTypeArg("str", Builtin.unicode_type, None),
+            PyrexTypes.CFuncTypeArg("substring", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None),
+            PyrexTypes.CFuncTypeArg("end", PyrexTypes.c_py_ssize_t_type, None),
+            PyrexTypes.CFuncTypeArg("direction", PyrexTypes.c_int_type, None),
+            ],
+        exception_value = '-2')
+
+    def _handle_simple_method_unicode_find(self, node, function, args, is_unbound_method):
+        return self._inject_unicode_find(
+            node, function, args, is_unbound_method, 'find', +1)
+
+    def _handle_simple_method_unicode_rfind(self, node, function, args, is_unbound_method):
+        return self._inject_unicode_find(
+            node, function, args, is_unbound_method, 'rfind', -1)
+
+    def _inject_unicode_find(self, node, function, args, is_unbound_method,
+                             method_name, direction):
+        """Replace unicode.find(...) and unicode.rfind(...) by a
+        direct call to the corresponding C-API function.
+        """
+        if len(args) not in (2,3,4):
+            self._error_wrong_arg_count('unicode.%s' % method_name, node, args, "2-4")
+            return node
+        self._inject_int_default_argument(
+            node, args, 2, PyrexTypes.c_py_ssize_t_type, "0")
+        self._inject_int_default_argument(
+            node, args, 3, PyrexTypes.c_py_ssize_t_type, "PY_SSIZE_T_MAX")
+        args.append(ExprNodes.IntNode(
+            node.pos, value=str(direction), type=PyrexTypes.c_int_type))
+
+        method_call = self._substitute_method_call(
+            node, function, "PyUnicode_Find", self.PyUnicode_Find_func_type,
+            method_name, is_unbound_method, args)
+        return method_call.coerce_to_pyobject(self.current_env())
+
+    PyUnicode_Count_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.c_py_ssize_t_type, [
+            PyrexTypes.CFuncTypeArg("str", Builtin.unicode_type, None),
+            PyrexTypes.CFuncTypeArg("substring", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None),
+            PyrexTypes.CFuncTypeArg("end", PyrexTypes.c_py_ssize_t_type, None),
+            ],
+        exception_value = '-1')
+
+    def _handle_simple_method_unicode_count(self, node, function, args, is_unbound_method):
+        """Replace unicode.count(...) by a direct call to the
+        corresponding C-API function.
+        """
+        if len(args) not in (2,3,4):
+            self._error_wrong_arg_count('unicode.count', node, args, "2-4")
+            return node
+        self._inject_int_default_argument(
+            node, args, 2, PyrexTypes.c_py_ssize_t_type, "0")
+        self._inject_int_default_argument(
+            node, args, 3, PyrexTypes.c_py_ssize_t_type, "PY_SSIZE_T_MAX")
+
+        method_call = self._substitute_method_call(
+            node, function, "PyUnicode_Count", self.PyUnicode_Count_func_type,
+            'count', is_unbound_method, args)
+        return method_call.coerce_to_pyobject(self.current_env())
+
+    PyUnicode_Replace_func_type = PyrexTypes.CFuncType(
+        Builtin.unicode_type, [
+            PyrexTypes.CFuncTypeArg("str", Builtin.unicode_type, None),
+            PyrexTypes.CFuncTypeArg("substring", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("replstr", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("maxcount", PyrexTypes.c_py_ssize_t_type, None),
+            ])
+
+    def _handle_simple_method_unicode_replace(self, node, function, args, is_unbound_method):
+        """Replace unicode.replace(...) by a direct call to the
+        corresponding C-API function.
+        """
+        if len(args) not in (3,4):
+            self._error_wrong_arg_count('unicode.replace', node, args, "3-4")
+            return node
+        self._inject_int_default_argument(
+            node, args, 3, PyrexTypes.c_py_ssize_t_type, "-1")
+
+        return self._substitute_method_call(
+            node, function, "PyUnicode_Replace", self.PyUnicode_Replace_func_type,
+            'replace', is_unbound_method, args)
+
+    PyUnicode_AsEncodedString_func_type = PyrexTypes.CFuncType(
+        Builtin.bytes_type, [
+            PyrexTypes.CFuncTypeArg("obj", Builtin.unicode_type, None),
+            PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_const_char_ptr_type, None),
+            PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_const_char_ptr_type, None),
+            ])
+
+    PyUnicode_AsXyzString_func_type = PyrexTypes.CFuncType(
+        Builtin.bytes_type, [
+            PyrexTypes.CFuncTypeArg("obj", Builtin.unicode_type, None),
+            ])
+
+    _special_encodings = ['UTF8', 'UTF16', 'UTF-16LE', 'UTF-16BE', 'Latin1', 'ASCII',
+                          'unicode_escape', 'raw_unicode_escape']
+
+    _special_codecs = [ (name, codecs.getencoder(name))
+                        for name in _special_encodings ]
+
+    def _handle_simple_method_unicode_encode(self, node, function, args, is_unbound_method):
+        """Replace unicode.encode(...) by a direct C-API call to the
+        corresponding codec.
+        """
+        if len(args) < 1 or len(args) > 3:
+            self._error_wrong_arg_count('unicode.encode', node, args, '1-3')
+            return node
+
+        string_node = args[0]
+
+        if len(args) == 1:
+            null_node = ExprNodes.NullNode(node.pos)
+            return self._substitute_method_call(
+                node, function, "PyUnicode_AsEncodedString",
+                self.PyUnicode_AsEncodedString_func_type,
+                'encode', is_unbound_method, [string_node, null_node, null_node])
+
+        parameters = self._unpack_encoding_and_error_mode(node.pos, args)
+        if parameters is None:
+            return node
+        encoding, encoding_node, error_handling, error_handling_node = parameters
+
+        if encoding and isinstance(string_node, ExprNodes.UnicodeNode):
+            # constant, so try to do the encoding at compile time
+            try:
+                value = string_node.value.encode(encoding, error_handling)
+            except:
+                # well, looks like we can't
+                pass
+            else:
+                value = bytes_literal(value, encoding)
+                return ExprNodes.BytesNode(string_node.pos, value=value, type=Builtin.bytes_type)
+
+        if encoding and error_handling == 'strict':
+            # try to find a specific encoder function
+            codec_name = self._find_special_codec_name(encoding)
+            if codec_name is not None and '-' not in codec_name:
+                encode_function = "PyUnicode_As%sString" % codec_name
+                return self._substitute_method_call(
+                    node, function, encode_function,
+                    self.PyUnicode_AsXyzString_func_type,
+                    'encode', is_unbound_method, [string_node])
+
+        return self._substitute_method_call(
+            node, function, "PyUnicode_AsEncodedString",
+            self.PyUnicode_AsEncodedString_func_type,
+            'encode', is_unbound_method,
+            [string_node, encoding_node, error_handling_node])
+
+    PyUnicode_DecodeXyz_func_ptr_type = PyrexTypes.CPtrType(PyrexTypes.CFuncType(
+        Builtin.unicode_type, [
+            PyrexTypes.CFuncTypeArg("string", PyrexTypes.c_const_char_ptr_type, None),
+            PyrexTypes.CFuncTypeArg("size", PyrexTypes.c_py_ssize_t_type, None),
+            PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_const_char_ptr_type, None),
+        ]))
+
+    _decode_c_string_func_type = PyrexTypes.CFuncType(
+        Builtin.unicode_type, [
+            PyrexTypes.CFuncTypeArg("string", PyrexTypes.c_const_char_ptr_type, None),
+            PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None),
+            PyrexTypes.CFuncTypeArg("stop", PyrexTypes.c_py_ssize_t_type, None),
+            PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_const_char_ptr_type, None),
+            PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_const_char_ptr_type, None),
+            PyrexTypes.CFuncTypeArg("decode_func", PyUnicode_DecodeXyz_func_ptr_type, None),
+        ])
+
+    _decode_bytes_func_type = PyrexTypes.CFuncType(
+        Builtin.unicode_type, [
+            PyrexTypes.CFuncTypeArg("string", PyrexTypes.py_object_type, None),
+            PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None),
+            PyrexTypes.CFuncTypeArg("stop", PyrexTypes.c_py_ssize_t_type, None),
+            PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_const_char_ptr_type, None),
+            PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_const_char_ptr_type, None),
+            PyrexTypes.CFuncTypeArg("decode_func", PyUnicode_DecodeXyz_func_ptr_type, None),
+        ])
+
+    _decode_cpp_string_func_type = None  # lazy init
+
+    def _handle_simple_method_bytes_decode(self, node, function, args, is_unbound_method):
+        """Replace char*.decode() by a direct C-API call to the
+        corresponding codec, possibly resolving a slice on the char*.
+        """
+        if not (1 <= len(args) <= 3):
+            self._error_wrong_arg_count('bytes.decode', node, args, '1-3')
+            return node
+
+        # normalise input nodes
+        string_node = args[0]
+        start = stop = None
+        if isinstance(string_node, ExprNodes.SliceIndexNode):
+            index_node = string_node
+            string_node = index_node.base
+            start, stop = index_node.start, index_node.stop
+            if not start or start.constant_result == 0:
+                start = None
+        if isinstance(string_node, ExprNodes.CoerceToPyTypeNode):
+            string_node = string_node.arg
+
+        string_type = string_node.type
+        if string_type in (Builtin.bytes_type, Builtin.bytearray_type):
+            if is_unbound_method:
+                string_node = string_node.as_none_safe_node(
+                    "descriptor '%s' requires a '%s' object but received a 'NoneType'",
+                    format_args=['decode', string_type.name])
+            else:
+                string_node = string_node.as_none_safe_node(
+                    "'NoneType' object has no attribute '%.30s'",
+                    error="PyExc_AttributeError",
+                    format_args=['decode'])
+        elif not string_type.is_string and not string_type.is_cpp_string:
+            # nothing to optimise here
+            return node
+
+        parameters = self._unpack_encoding_and_error_mode(node.pos, args)
+        if parameters is None:
+            return node
+        encoding, encoding_node, error_handling, error_handling_node = parameters
+
+        if not start:
+            start = ExprNodes.IntNode(node.pos, value='0', constant_result=0)
+        elif not start.type.is_int:
+            start = start.coerce_to(PyrexTypes.c_py_ssize_t_type, self.current_env())
+        if stop and not stop.type.is_int:
+            stop = stop.coerce_to(PyrexTypes.c_py_ssize_t_type, self.current_env())
+
+        # try to find a specific encoder function
+        codec_name = None
+        if encoding is not None:
+            codec_name = self._find_special_codec_name(encoding)
+        if codec_name is not None:
+            if codec_name in ('UTF16', 'UTF-16LE', 'UTF-16BE'):
+                codec_cname = "__Pyx_PyUnicode_Decode%s" % codec_name.replace('-', '')
+            else:
+                codec_cname = "PyUnicode_Decode%s" % codec_name
+            decode_function = ExprNodes.RawCNameExprNode(
+                node.pos, type=self.PyUnicode_DecodeXyz_func_ptr_type, cname=codec_cname)
+            encoding_node = ExprNodes.NullNode(node.pos)
+        else:
+            decode_function = ExprNodes.NullNode(node.pos)
+
+        # build the helper function call
+        temps = []
+        if string_type.is_string:
+            # C string
+            if not stop:
+                # use strlen() to find the string length, just as CPython would
+                if not string_node.is_name:
+                    string_node = UtilNodes.LetRefNode(string_node)  # used twice
+                    temps.append(string_node)
+                stop = ExprNodes.PythonCapiCallNode(
+                    string_node.pos, "__Pyx_ssize_strlen", self.Pyx_ssize_strlen_func_type,
+                    args=[string_node],
+                    is_temp=True,
+                )
+            helper_func_type = self._decode_c_string_func_type
+            utility_code_name = 'decode_c_string'
+        elif string_type.is_cpp_string:
+            # C++ std::string
+            if not stop:
+                stop = ExprNodes.IntNode(node.pos, value='PY_SSIZE_T_MAX',
+                                         constant_result=ExprNodes.not_a_constant)
+            if self._decode_cpp_string_func_type is None:
+                # lazy init to reuse the C++ string type
+                self._decode_cpp_string_func_type = PyrexTypes.CFuncType(
+                    Builtin.unicode_type, [
+                        PyrexTypes.CFuncTypeArg("string", string_type, None),
+                        PyrexTypes.CFuncTypeArg("start", PyrexTypes.c_py_ssize_t_type, None),
+                        PyrexTypes.CFuncTypeArg("stop", PyrexTypes.c_py_ssize_t_type, None),
+                        PyrexTypes.CFuncTypeArg("encoding", PyrexTypes.c_const_char_ptr_type, None),
+                        PyrexTypes.CFuncTypeArg("errors", PyrexTypes.c_const_char_ptr_type, None),
+                        PyrexTypes.CFuncTypeArg("decode_func", self.PyUnicode_DecodeXyz_func_ptr_type, None),
+                    ])
+            helper_func_type = self._decode_cpp_string_func_type
+            utility_code_name = 'decode_cpp_string'
+        else:
+            # Python bytes/bytearray object
+            if not stop:
+                stop = ExprNodes.IntNode(node.pos, value='PY_SSIZE_T_MAX',
+                                         constant_result=ExprNodes.not_a_constant)
+            helper_func_type = self._decode_bytes_func_type
+            if string_type is Builtin.bytes_type:
+                utility_code_name = 'decode_bytes'
+            else:
+                utility_code_name = 'decode_bytearray'
+
+        node = ExprNodes.PythonCapiCallNode(
+            node.pos, '__Pyx_%s' % utility_code_name, helper_func_type,
+            args=[string_node, start, stop, encoding_node, error_handling_node, decode_function],
+            is_temp=node.is_temp,
+            utility_code=UtilityCode.load_cached(utility_code_name, 'StringTools.c'),
+        )
+
+        for temp in temps[::-1]:
+            node = UtilNodes.EvalWithTempExprNode(temp, node)
+        return node
+
+    _handle_simple_method_bytearray_decode = _handle_simple_method_bytes_decode
+
+    def _find_special_codec_name(self, encoding):
+        try:
+            requested_codec = codecs.getencoder(encoding)
+        except LookupError:
+            return None
+        for name, codec in self._special_codecs:
+            if codec == requested_codec:
+                if '_' in name:
+                    name = ''.join([s.capitalize()
+                                    for s in name.split('_')])
+                return name
+        return None
+
+    def _unpack_encoding_and_error_mode(self, pos, args):
+        null_node = ExprNodes.NullNode(pos)
+
+        if len(args) >= 2:
+            encoding, encoding_node = self._unpack_string_and_cstring_node(args[1])
+            if encoding_node is None:
+                return None
+        else:
+            encoding = None
+            encoding_node = null_node
+
+        if len(args) == 3:
+            error_handling, error_handling_node = self._unpack_string_and_cstring_node(args[2])
+            if error_handling_node is None:
+                return None
+            if error_handling == 'strict':
+                error_handling_node = null_node
+        else:
+            error_handling = 'strict'
+            error_handling_node = null_node
+
+        return (encoding, encoding_node, error_handling, error_handling_node)
+
+    def _unpack_string_and_cstring_node(self, node):
+        if isinstance(node, ExprNodes.CoerceToPyTypeNode):
+            node = node.arg
+        if isinstance(node, ExprNodes.UnicodeNode):
+            encoding = node.value
+            node = ExprNodes.BytesNode(
+                node.pos, value=encoding.as_utf8_string(), type=PyrexTypes.c_const_char_ptr_type)
+        elif isinstance(node, (ExprNodes.StringNode, ExprNodes.BytesNode)):
+            encoding = node.value.decode('ISO-8859-1')
+            node = ExprNodes.BytesNode(
+                node.pos, value=node.value, type=PyrexTypes.c_const_char_ptr_type)
+        elif node.type is Builtin.bytes_type:
+            encoding = None
+            node = node.coerce_to(PyrexTypes.c_const_char_ptr_type, self.current_env())
+        elif node.type.is_string:
+            encoding = None
+        else:
+            encoding = node = None
+        return encoding, node
+
+    def _handle_simple_method_str_endswith(self, node, function, args, is_unbound_method):
+        return self._inject_tailmatch(
+            node, function, args, is_unbound_method, 'str', 'endswith',
+            str_tailmatch_utility_code, +1)
+
+    def _handle_simple_method_str_startswith(self, node, function, args, is_unbound_method):
+        return self._inject_tailmatch(
+            node, function, args, is_unbound_method, 'str', 'startswith',
+            str_tailmatch_utility_code, -1)
+
+    def _handle_simple_method_bytes_endswith(self, node, function, args, is_unbound_method):
+        return self._inject_tailmatch(
+            node, function, args, is_unbound_method, 'bytes', 'endswith',
+            bytes_tailmatch_utility_code, +1)
+
+    def _handle_simple_method_bytes_startswith(self, node, function, args, is_unbound_method):
+        return self._inject_tailmatch(
+            node, function, args, is_unbound_method, 'bytes', 'startswith',
+            bytes_tailmatch_utility_code, -1)
+
+    '''   # disabled for now, enable when we consider it worth it (see StringTools.c)
+    def _handle_simple_method_bytearray_endswith(self, node, function, args, is_unbound_method):
+        return self._inject_tailmatch(
+            node, function, args, is_unbound_method, 'bytearray', 'endswith',
+            bytes_tailmatch_utility_code, +1)
+
+    def _handle_simple_method_bytearray_startswith(self, node, function, args, is_unbound_method):
+        return self._inject_tailmatch(
+            node, function, args, is_unbound_method, 'bytearray', 'startswith',
+            bytes_tailmatch_utility_code, -1)
+    '''
+
+    ### helpers
+
+    def _substitute_method_call(self, node, function, name, func_type,
+                                attr_name, is_unbound_method, args=(),
+                                utility_code=None, is_temp=None,
+                                may_return_none=ExprNodes.PythonCapiCallNode.may_return_none,
+                                with_none_check=True):
+        args = list(args)
+        if with_none_check and args:
+            args[0] = self._wrap_self_arg(args[0], function, is_unbound_method, attr_name)
+        if is_temp is None:
+            is_temp = node.is_temp
+        return ExprNodes.PythonCapiCallNode(
+            node.pos, name, func_type,
+            args = args,
+            is_temp = is_temp,
+            utility_code = utility_code,
+            may_return_none = may_return_none,
+            result_is_used = node.result_is_used,
+            )
+
+    def _wrap_self_arg(self, self_arg, function, is_unbound_method, attr_name):
+        if self_arg.is_literal:
+            return self_arg
+        if is_unbound_method:
+            self_arg = self_arg.as_none_safe_node(
+                "descriptor '%s' requires a '%s' object but received a 'NoneType'",
+                format_args=[attr_name, self_arg.type.name])
+        else:
+            self_arg = self_arg.as_none_safe_node(
+                "'NoneType' object has no attribute '%{0}s'".format('.30' if len(attr_name) <= 30 else ''),
+                error="PyExc_AttributeError",
+                format_args=[attr_name])
+        return self_arg
+
+    obj_to_obj_func_type = PyrexTypes.CFuncType(
+        PyrexTypes.py_object_type, [
+            PyrexTypes.CFuncTypeArg("obj", PyrexTypes.py_object_type, None)
+        ])
+
+    def _inject_null_for_none(self, args, index):
+        if len(args) <= index:
+            return
+        arg = args[index]
+        args[index] = ExprNodes.NullNode(arg.pos) if arg.is_none else ExprNodes.PythonCapiCallNode(
+            arg.pos, "__Pyx_NoneAsNull",
+            self.obj_to_obj_func_type,
+            args=[arg.coerce_to_simple(self.current_env())],
+            is_temp=0,
+        )
+
+    def _inject_int_default_argument(self, node, args, arg_index, type, default_value):
+        # Python usually allows passing None for range bounds,
+        # so we treat that as requesting the default.
+        assert len(args) >= arg_index
+        if len(args) == arg_index or args[arg_index].is_none:
+            args.append(ExprNodes.IntNode(node.pos, value=str(default_value),
+                                          type=type, constant_result=default_value))
+        else:
+            arg = args[arg_index].coerce_to(type, self.current_env())
+            if isinstance(arg, ExprNodes.CoerceFromPyTypeNode):
+                # Add a runtime check for None and map it to the default value.
+                arg.special_none_cvalue = str(default_value)
+            args[arg_index] = arg
+
+    def _inject_bint_default_argument(self, node, args, arg_index, default_value):
+        assert len(args) >= arg_index
+        if len(args) == arg_index:
+            default_value = bool(default_value)
+            args.append(ExprNodes.BoolNode(node.pos, value=default_value,
+                                           constant_result=default_value))
+        else:
+            args[arg_index] = args[arg_index].coerce_to_boolean(self.current_env())
+
+
+def optimise_numeric_binop(operator, node, ret_type, arg0, arg1):
+    """
+    Optimise math operators for (likely) float or small integer operations.
+    """
+    # When adding IntNode/FloatNode to something else, assume other operand is also numeric.
+    # Prefer constants on RHS as they allows better size control for some operators.
+    num_nodes = (ExprNodes.IntNode, ExprNodes.FloatNode)
+    if isinstance(arg1, num_nodes):
+        if arg0.type is not PyrexTypes.py_object_type:
+            return None
+        numval = arg1
+        arg_order = 'ObjC'
+    elif isinstance(arg0, num_nodes):
+        if arg1.type is not PyrexTypes.py_object_type:
+            return None
+        numval = arg0
+        arg_order = 'CObj'
+    else:
+        return None
+
+    if not numval.has_constant_result():
+        return None
+
+    # is_float is an instance check rather that numval.type.is_float because
+    # it will often be a Python float type rather than a C float type
+    is_float = isinstance(numval, ExprNodes.FloatNode)
+    num_type = PyrexTypes.c_double_type if is_float else PyrexTypes.c_long_type
+    if is_float:
+        if operator not in ('Add', 'Subtract', 'Remainder', 'TrueDivide', 'Divide', 'Eq', 'Ne'):
+            return None
+    elif operator == 'Divide':
+        # mixed old-/new-style division is not currently optimised for integers
+        return None
+    elif abs(numval.constant_result) > 2**30:
+        # Cut off at an integer border that is still safe for all operations.
+        return None
+
+    if operator in ('TrueDivide', 'FloorDivide', 'Divide', 'Remainder'):
+        if arg1.constant_result == 0:
+            # Don't optimise division by 0. :)
+            return None
+
+    extra_args = []
+
+    extra_args.append((ExprNodes.FloatNode if is_float else ExprNodes.IntNode)(
+        numval.pos, value=numval.value, constant_result=numval.constant_result,
+        type=num_type))
+    inplace = node.inplace if isinstance(node, ExprNodes.NumBinopNode) else False
+    extra_args.append(ExprNodes.BoolNode(node.pos, value=inplace, constant_result=inplace))
+    if is_float or operator not in ('Eq', 'Ne'):
+        # "PyFloatBinop" and "PyIntBinop" take an additional "check for zero division" argument.
+        zerodivision_check = arg_order == 'CObj' and (
+            not node.cdivision if isinstance(node, ExprNodes.DivNode) else False)
+        extra_args.append(ExprNodes.BoolNode(node.pos, value=zerodivision_check, constant_result=zerodivision_check))
+
+    utility_code = TempitaUtilityCode.load_cached(
+        "PyFloatBinop" if is_float else "PyIntCompare" if operator in ('Eq', 'Ne') else "PyIntBinop",
+        "Optimize.c",
+        context=dict(op=operator, order=arg_order, ret_type=ret_type))
+
+    func_cname = "__Pyx_Py%s_%s%s%s" % (
+        'Float' if is_float else 'Int',
+        '' if ret_type.is_pyobject else 'Bool',
+        operator,
+        arg_order)
+
+    return func_cname, utility_code, extra_args, num_type
+
+
+unicode_tailmatch_utility_code = UtilityCode.load_cached('unicode_tailmatch', 'StringTools.c')
+bytes_tailmatch_utility_code = UtilityCode.load_cached('bytes_tailmatch', 'StringTools.c')
+str_tailmatch_utility_code = UtilityCode.load_cached('str_tailmatch', 'StringTools.c')
+
+
+class ConstantFolding(Visitor.VisitorTransform, SkipDeclarations):
+    """Calculate the result of constant expressions to store it in
+    ``expr_node.constant_result``, and replace trivial cases by their
+    constant result.
+
+    General rules:
+
+    - We calculate float constants to make them available to the
+      compiler, but we do not aggregate them into a single literal
+      node to prevent any loss of precision.
+
+    - We recursively calculate constants from non-literal nodes to
+      make them available to the compiler, but we only aggregate
+      literal nodes at each step.  Non-literal nodes are never merged
+      into a single node.
+    """
+
+    def __init__(self, reevaluate=False):
+        """
+        The reevaluate argument specifies whether constant values that were
+        previously computed should be recomputed.
+        """
+        super(ConstantFolding, self).__init__()
+        self.reevaluate = reevaluate
+
+    def _calculate_const(self, node):
+        if (not self.reevaluate and
+                node.constant_result is not ExprNodes.constant_value_not_set):
+            return
+
+        # make sure we always set the value
+        not_a_constant = ExprNodes.not_a_constant
+        node.constant_result = not_a_constant
+
+        # check if all children are constant
+        children = self.visitchildren(node)
+        for child_result in children.values():
+            if type(child_result) is list:
+                for child in child_result:
+                    if getattr(child, 'constant_result', not_a_constant) is not_a_constant:
+                        return
+            elif getattr(child_result, 'constant_result', not_a_constant) is not_a_constant:
+                return
+
+        # now try to calculate the real constant value
+        try:
+            node.calculate_constant_result()
+#            if node.constant_result is not ExprNodes.not_a_constant:
+#                print node.__class__.__name__, node.constant_result
+        except (ValueError, TypeError, KeyError, IndexError, AttributeError, ArithmeticError):
+            # ignore all 'normal' errors here => no constant result
+            pass
+        except Exception:
+            # this looks like a real error
+            import traceback, sys
+            traceback.print_exc(file=sys.stdout)
+
+    NODE_TYPE_ORDER = [ExprNodes.BoolNode, ExprNodes.CharNode,
+                       ExprNodes.IntNode, ExprNodes.FloatNode]
+
+    def _widest_node_class(self, *nodes):
+        try:
+            return self.NODE_TYPE_ORDER[
+                max(map(self.NODE_TYPE_ORDER.index, map(type, nodes)))]
+        except ValueError:
+            return None
+
+    def _bool_node(self, node, value):
+        value = bool(value)
+        return ExprNodes.BoolNode(node.pos, value=value, constant_result=value)
+
+    def visit_ExprNode(self, node):
+        self._calculate_const(node)
+        return node
+
+    def visit_UnopNode(self, node):
+        self._calculate_const(node)
+        if not node.has_constant_result():
+            if node.operator == '!':
+                return self._handle_NotNode(node)
+            return node
+        if not node.operand.is_literal:
+            return node
+        if node.operator == '!':
+            return self._bool_node(node, node.constant_result)
+        elif isinstance(node.operand, ExprNodes.BoolNode):
+            return ExprNodes.IntNode(node.pos, value=str(int(node.constant_result)),
+                                     type=PyrexTypes.c_int_type,
+                                     constant_result=int(node.constant_result))
+        elif node.operator == '+':
+            return self._handle_UnaryPlusNode(node)
+        elif node.operator == '-':
+            return self._handle_UnaryMinusNode(node)
+        return node
+
+    _negate_operator = {
+        'in': 'not_in',
+        'not_in': 'in',
+        'is': 'is_not',
+        'is_not': 'is'
+    }.get
+
+    def _handle_NotNode(self, node):
+        operand = node.operand
+        if isinstance(operand, ExprNodes.PrimaryCmpNode):
+            operator = self._negate_operator(operand.operator)
+            if operator:
+                node = copy.copy(operand)
+                node.operator = operator
+                node = self.visit_PrimaryCmpNode(node)
+        return node
+
+    def _handle_UnaryMinusNode(self, node):
+        def _negate(value):
+            if value.startswith('-'):
+                value = value[1:]
+            else:
+                value = '-' + value
+            return value
+
+        node_type = node.operand.type
+        if isinstance(node.operand, ExprNodes.FloatNode):
+            # this is a safe operation
+            return ExprNodes.FloatNode(node.pos, value=_negate(node.operand.value),
+                                       type=node_type,
+                                       constant_result=node.constant_result)
+        if node_type.is_int and node_type.signed or \
+                isinstance(node.operand, ExprNodes.IntNode) and node_type.is_pyobject:
+            return ExprNodes.IntNode(node.pos, value=_negate(node.operand.value),
+                                     type=node_type,
+                                     longness=node.operand.longness,
+                                     constant_result=node.constant_result)
+        return node
+
+    def _handle_UnaryPlusNode(self, node):
+        if (node.operand.has_constant_result() and
+                    node.constant_result == node.operand.constant_result):
+            return node.operand
+        return node
+
+    def visit_BoolBinopNode(self, node):
+        self._calculate_const(node)
+        if not node.operand1.has_constant_result():
+            return node
+        if node.operand1.constant_result:
+            if node.operator == 'and':
+                return node.operand2
+            else:
+                return node.operand1
+        else:
+            if node.operator == 'and':
+                return node.operand1
+            else:
+                return node.operand2
+
+    def visit_BinopNode(self, node):
+        self._calculate_const(node)
+        if node.constant_result is ExprNodes.not_a_constant:
+            return node
+        if isinstance(node.constant_result, float):
+            return node
+        operand1, operand2 = node.operand1, node.operand2
+        if not operand1.is_literal or not operand2.is_literal:
+            return node
+
+        # now inject a new constant node with the calculated value
+        try:
+            type1, type2 = operand1.type, operand2.type
+            if type1 is None or type2 is None:
+                return node
+        except AttributeError:
+            return node
+
+        if type1.is_numeric and type2.is_numeric:
+            widest_type = PyrexTypes.widest_numeric_type(type1, type2)
+        else:
+            widest_type = PyrexTypes.py_object_type
+
+        target_class = self._widest_node_class(operand1, operand2)
+        if target_class is None:
+            return node
+        elif target_class is ExprNodes.BoolNode and node.operator in '+-//<<%**>>':
+            # C arithmetic results in at least an int type
+            target_class = ExprNodes.IntNode
+        elif target_class is ExprNodes.CharNode and node.operator in '+-//<<%**>>&|^':
+            # C arithmetic results in at least an int type
+            target_class = ExprNodes.IntNode
+
+        if target_class is ExprNodes.IntNode:
+            unsigned = getattr(operand1, 'unsigned', '') and \
+                       getattr(operand2, 'unsigned', '')
+            longness = "LL"[:max(len(getattr(operand1, 'longness', '')),
+                                 len(getattr(operand2, 'longness', '')))]
+            value = hex(int(node.constant_result))
+            value = Utils.strip_py2_long_suffix(value)
+            new_node = ExprNodes.IntNode(pos=node.pos,
+                                         unsigned=unsigned, longness=longness,
+                                         value=value,
+                                         constant_result=int(node.constant_result))
+            # IntNode is smart about the type it chooses, so we just
+            # make sure we were not smarter this time
+            if widest_type.is_pyobject or new_node.type.is_pyobject:
+                new_node.type = PyrexTypes.py_object_type
+            else:
+                new_node.type = PyrexTypes.widest_numeric_type(widest_type, new_node.type)
+        else:
+            if target_class is ExprNodes.BoolNode:
+                node_value = node.constant_result
+            else:
+                node_value = str(node.constant_result)
+            new_node = target_class(pos=node.pos, type = widest_type,
+                                    value = node_value,
+                                    constant_result = node.constant_result)
+        return new_node
+
+    def visit_AddNode(self, node):
+        self._calculate_const(node)
+        if node.constant_result is ExprNodes.not_a_constant:
+            return node
+        if node.operand1.is_string_literal and node.operand2.is_string_literal:
+            # some people combine string literals with a '+'
+            str1, str2 = node.operand1, node.operand2
+            if isinstance(str1, ExprNodes.UnicodeNode) and isinstance(str2, ExprNodes.UnicodeNode):
+                bytes_value = None
+                if str1.bytes_value is not None and str2.bytes_value is not None:
+                    if str1.bytes_value.encoding == str2.bytes_value.encoding:
+                        bytes_value = bytes_literal(
+                            str1.bytes_value + str2.bytes_value,
+                            str1.bytes_value.encoding)
+                string_value = EncodedString(node.constant_result)
+                return ExprNodes.UnicodeNode(
+                    str1.pos, value=string_value, constant_result=node.constant_result, bytes_value=bytes_value)
+            elif isinstance(str1, ExprNodes.BytesNode) and isinstance(str2, ExprNodes.BytesNode):
+                if str1.value.encoding == str2.value.encoding:
+                    bytes_value = bytes_literal(node.constant_result, str1.value.encoding)
+                    return ExprNodes.BytesNode(str1.pos, value=bytes_value, constant_result=node.constant_result)
+            # all other combinations are rather complicated
+            # to get right in Py2/3: encodings, unicode escapes, ...
+        return self.visit_BinopNode(node)
+
+    def visit_MulNode(self, node):
+        self._calculate_const(node)
+        if node.operand1.is_sequence_constructor:
+            return self._calculate_constant_seq(node, node.operand1, node.operand2)
+        if isinstance(node.operand1, ExprNodes.IntNode) and \
+                node.operand2.is_sequence_constructor:
+            return self._calculate_constant_seq(node, node.operand2, node.operand1)
+        if node.operand1.is_string_literal:
+            return self._multiply_string(node, node.operand1, node.operand2)
+        elif node.operand2.is_string_literal:
+            return self._multiply_string(node, node.operand2, node.operand1)
+        return self.visit_BinopNode(node)
+
+    def _multiply_string(self, node, string_node, multiplier_node):
+        multiplier = multiplier_node.constant_result
+        if not isinstance(multiplier, _py_int_types):
+            return node
+        if not (node.has_constant_result() and isinstance(node.constant_result, _py_string_types)):
+            return node
+        if len(node.constant_result) > 256:
+            # Too long for static creation, leave it to runtime.  (-> arbitrary limit)
+            return node
+
+        build_string = encoded_string
+        if isinstance(string_node, ExprNodes.BytesNode):
+            build_string = bytes_literal
+        elif isinstance(string_node, ExprNodes.StringNode):
+            if string_node.unicode_value is not None:
+                string_node.unicode_value = encoded_string(
+                    string_node.unicode_value * multiplier,
+                    string_node.unicode_value.encoding)
+            build_string = encoded_string if string_node.value.is_unicode else bytes_literal
+        elif isinstance(string_node, ExprNodes.UnicodeNode):
+            if string_node.bytes_value is not None:
+                string_node.bytes_value = bytes_literal(
+                    string_node.bytes_value * multiplier,
+                    string_node.bytes_value.encoding)
+        else:
+            assert False, "unknown string node type: %s" % type(string_node)
+        string_node.value = build_string(
+            string_node.value * multiplier,
+            string_node.value.encoding)
+        # follow constant-folding and use unicode_value in preference
+        if isinstance(string_node, ExprNodes.StringNode) and string_node.unicode_value is not None:
+            string_node.constant_result = string_node.unicode_value
+        else:
+            string_node.constant_result = string_node.value
+        return string_node
+
+    def _calculate_constant_seq(self, node, sequence_node, factor):
+        if factor.constant_result != 1 and sequence_node.args:
+            if isinstance(factor.constant_result, _py_int_types) and factor.constant_result <= 0:
+                del sequence_node.args[:]
+                sequence_node.mult_factor = None
+            elif sequence_node.mult_factor is not None:
+                if (isinstance(factor.constant_result, _py_int_types) and
+                        isinstance(sequence_node.mult_factor.constant_result, _py_int_types)):
+                    value = sequence_node.mult_factor.constant_result * factor.constant_result
+                    sequence_node.mult_factor = ExprNodes.IntNode(
+                        sequence_node.mult_factor.pos,
+                        value=str(value), constant_result=value)
+                else:
+                    # don't know if we can combine the factors, so don't
+                    return self.visit_BinopNode(node)
+            else:
+                sequence_node.mult_factor = factor
+        return sequence_node
+
+    def visit_ModNode(self, node):
+        self.visitchildren(node)
+        if isinstance(node.operand1, ExprNodes.UnicodeNode) and isinstance(node.operand2, ExprNodes.TupleNode):
+            if not node.operand2.mult_factor:
+                fstring = self._build_fstring(node.operand1.pos, node.operand1.value, node.operand2.args)
+                if fstring is not None:
+                    return fstring
+        return self.visit_BinopNode(node)
+
+    _parse_string_format_regex = (
+        u'(%(?:'              # %...
+        u'(?:[-0-9]+|[ ])?'   # width (optional) or space prefix fill character (optional)
+        u'(?:[.][0-9]+)?'     # precision (optional)
+        u')?.)'               # format type (or something different for unsupported formats)
+    )
+
+    def _build_fstring(self, pos, ustring, format_args):
+        # Issues formatting warnings instead of errors since we really only catch a few errors by accident.
+        args = iter(format_args)
+        substrings = []
+        can_be_optimised = True
+        for s in re.split(self._parse_string_format_regex, ustring):
+            if not s:
+                continue
+            if s == u'%%':
+                substrings.append(ExprNodes.UnicodeNode(pos, value=EncodedString(u'%'), constant_result=u'%'))
+                continue
+            if s[0] != u'%':
+                if s[-1] == u'%':
+                    warning(pos, "Incomplete format: '...%s'" % s[-3:], level=1)
+                    can_be_optimised = False
+                substrings.append(ExprNodes.UnicodeNode(pos, value=EncodedString(s), constant_result=s))
+                continue
+            format_type = s[-1]
+            try:
+                arg = next(args)
+            except StopIteration:
+                warning(pos, "Too few arguments for format placeholders", level=1)
+                can_be_optimised = False
+                break
+            if arg.is_starred:
+                can_be_optimised = False
+                break
+            if format_type in u'asrfdoxX':
+                format_spec = s[1:]
+                conversion_char = None
+                if format_type in u'doxX' and u'.' in format_spec:
+                    # Precision is not allowed for integers in format(), but ok in %-formatting.
+                    can_be_optimised = False
+                elif format_type in u'ars':
+                    format_spec = format_spec[:-1]
+                    conversion_char = format_type
+                    if format_spec.startswith('0'):
+                        format_spec = '>' + format_spec[1:]  # right-alignment '%05s' spells '{:>5}'
+                elif format_type == u'd':
+                    # '%d' formatting supports float, but '{obj:d}' does not => convert to int first.
+                    conversion_char = 'd'
+
+                if format_spec.startswith('-'):
+                    format_spec = '<' + format_spec[1:]  # left-alignment '%-5s' spells '{:<5}'
+
+                substrings.append(ExprNodes.FormattedValueNode(
+                    arg.pos, value=arg,
+                    conversion_char=conversion_char,
+                    format_spec=ExprNodes.UnicodeNode(
+                        pos, value=EncodedString(format_spec), constant_result=format_spec)
+                        if format_spec else None,
+                ))
+            else:
+                # keep it simple for now ...
+                can_be_optimised = False
+                break
+
+        if not can_be_optimised:
+            # Print all warnings we can find before finally giving up here.
+            return None
+
+        try:
+            next(args)
+        except StopIteration: pass
+        else:
+            warning(pos, "Too many arguments for format placeholders", level=1)
+            return None
+
+        node = ExprNodes.JoinedStrNode(pos, values=substrings)
+        return self.visit_JoinedStrNode(node)
+
+    def visit_FormattedValueNode(self, node):
+        self.visitchildren(node)
+        conversion_char = node.conversion_char or 's'
+        if isinstance(node.format_spec, ExprNodes.UnicodeNode) and not node.format_spec.value:
+            node.format_spec = None
+        if node.format_spec is None and isinstance(node.value, ExprNodes.IntNode):
+            value = EncodedString(node.value.value)
+            if value.isdigit():
+                return ExprNodes.UnicodeNode(node.value.pos, value=value, constant_result=value)
+        if node.format_spec is None and conversion_char == 's':
+            value = None
+            if isinstance(node.value, ExprNodes.UnicodeNode):
+                value = node.value.value
+            elif isinstance(node.value, ExprNodes.StringNode):
+                value = node.value.unicode_value
+            if value is not None:
+                return ExprNodes.UnicodeNode(node.value.pos, value=value, constant_result=value)
+        return node
+
+    def visit_JoinedStrNode(self, node):
+        """
+        Clean up after the parser by discarding empty Unicode strings and merging
+        substring sequences.  Empty or single-value join lists are not uncommon
+        because f-string format specs are always parsed into JoinedStrNodes.
+        """
+        self.visitchildren(node)
+        unicode_node = ExprNodes.UnicodeNode
+
+        values = []
+        for is_unode_group, substrings in itertools.groupby(node.values, lambda v: isinstance(v, unicode_node)):
+            if is_unode_group:
+                substrings = list(substrings)
+                unode = substrings[0]
+                if len(substrings) > 1:
+                    value = EncodedString(u''.join(value.value for value in substrings))
+                    unode = ExprNodes.UnicodeNode(unode.pos, value=value, constant_result=value)
+                # ignore empty Unicode strings
+                if unode.value:
+                    values.append(unode)
+            else:
+                values.extend(substrings)
+
+        if not values:
+            value = EncodedString('')
+            node = ExprNodes.UnicodeNode(node.pos, value=value, constant_result=value)
+        elif len(values) == 1:
+            node = values[0]
+        elif len(values) == 2:
+            # reduce to string concatenation
+            node = ExprNodes.binop_node(node.pos, '+', *values)
+        else:
+            node.values = values
+        return node
+
+    def visit_MergedDictNode(self, node):
+        """Unpack **args in place if we can."""
+        self.visitchildren(node)
+        args = []
+        items = []
+
+        def add(parent, arg):
+            if arg.is_dict_literal:
+                if items and items[-1].reject_duplicates == arg.reject_duplicates:
+                    items[-1].key_value_pairs.extend(arg.key_value_pairs)
+                else:
+                    items.append(arg)
+            elif isinstance(arg, ExprNodes.MergedDictNode) and parent.reject_duplicates == arg.reject_duplicates:
+                for child_arg in arg.keyword_args:
+                    add(arg, child_arg)
+            else:
+                if items:
+                    args.extend(items)
+                    del items[:]
+                args.append(arg)
+
+        for arg in node.keyword_args:
+            add(node, arg)
+        if items:
+            args.extend(items)
+
+        if len(args) == 1:
+            arg = args[0]
+            if arg.is_dict_literal or isinstance(arg, ExprNodes.MergedDictNode):
+                return arg
+        node.keyword_args[:] = args
+        self._calculate_const(node)
+        return node
+
+    def visit_MergedSequenceNode(self, node):
+        """Unpack *args in place if we can."""
+        self.visitchildren(node)
+
+        is_set = node.type is Builtin.set_type
+        args = []
+        values = []
+
+        def add(arg):
+            if (is_set and arg.is_set_literal) or (arg.is_sequence_constructor and not arg.mult_factor):
+                if values:
+                    values[0].args.extend(arg.args)
+                else:
+                    values.append(arg)
+            elif isinstance(arg, ExprNodes.MergedSequenceNode):
+                for child_arg in arg.args:
+                    add(child_arg)
+            else:
+                if values:
+                    args.append(values[0])
+                    del values[:]
+                args.append(arg)
+
+        for arg in node.args:
+            add(arg)
+        if values:
+            args.append(values[0])
+
+        if len(args) == 1:
+            arg = args[0]
+            if ((is_set and arg.is_set_literal) or
+                    (arg.is_sequence_constructor and arg.type is node.type) or
+                    isinstance(arg, ExprNodes.MergedSequenceNode)):
+                return arg
+        node.args[:] = args
+        self._calculate_const(node)
+        return node
+
+    def visit_SequenceNode(self, node):
+        """Unpack *args in place if we can."""
+        self.visitchildren(node)
+        args = []
+        for arg in node.args:
+            if not arg.is_starred:
+                args.append(arg)
+            elif arg.target.is_sequence_constructor and not arg.target.mult_factor:
+                args.extend(arg.target.args)
+            else:
+                args.append(arg)
+        node.args[:] = args
+        self._calculate_const(node)
+        return node
+
+    def visit_PrimaryCmpNode(self, node):
+        # calculate constant partial results in the comparison cascade
+        self.visitchildren(node, ['operand1'])
+        left_node = node.operand1
+        cmp_node = node
+        while cmp_node is not None:
+            self.visitchildren(cmp_node, ['operand2'])
+            right_node = cmp_node.operand2
+            cmp_node.constant_result = not_a_constant
+            if left_node.has_constant_result() and right_node.has_constant_result():
+                try:
+                    cmp_node.calculate_cascaded_constant_result(left_node.constant_result)
+                except (ValueError, TypeError, KeyError, IndexError, AttributeError, ArithmeticError):
+                    pass  # ignore all 'normal' errors here => no constant result
+            left_node = right_node
+            cmp_node = cmp_node.cascade
+
+        if not node.cascade:
+            if node.has_constant_result():
+                return self._bool_node(node, node.constant_result)
+            return node
+
+        # collect partial cascades: [[value, CmpNode...], [value, CmpNode, ...], ...]
+        cascades = [[node.operand1]]
+        final_false_result = []
+
+        cmp_node = node
+        while cmp_node is not None:
+            if cmp_node.has_constant_result():
+                if not cmp_node.constant_result:
+                    # False => short-circuit
+                    final_false_result.append(self._bool_node(cmp_node, False))
+                    break
+                else:
+                    # True => discard and start new cascade
+                    cascades.append([cmp_node.operand2])
+            else:
+                # not constant => append to current cascade
+                cascades[-1].append(cmp_node)
+            cmp_node = cmp_node.cascade
+
+        cmp_nodes = []
+        for cascade in cascades:
+            if len(cascade) < 2:
+                continue
+            cmp_node = cascade[1]
+            pcmp_node = ExprNodes.PrimaryCmpNode(
+                cmp_node.pos,
+                operand1=cascade[0],
+                operator=cmp_node.operator,
+                operand2=cmp_node.operand2,
+                constant_result=not_a_constant)
+            cmp_nodes.append(pcmp_node)
+
+            last_cmp_node = pcmp_node
+            for cmp_node in cascade[2:]:
+                last_cmp_node.cascade = cmp_node
+                last_cmp_node = cmp_node
+            last_cmp_node.cascade = None
+
+        if final_false_result:
+            # last cascade was constant False
+            cmp_nodes.append(final_false_result[0])
+        elif not cmp_nodes:
+            # only constants, but no False result
+            return self._bool_node(node, True)
+        node = cmp_nodes[0]
+        if len(cmp_nodes) == 1:
+            if node.has_constant_result():
+                return self._bool_node(node, node.constant_result)
+        else:
+            for cmp_node in cmp_nodes[1:]:
+                node = ExprNodes.BoolBinopNode(
+                    node.pos,
+                    operand1=node,
+                    operator='and',
+                    operand2=cmp_node,
+                    constant_result=not_a_constant)
+        return node
+
+    def visit_CondExprNode(self, node):
+        self._calculate_const(node)
+        if not node.test.has_constant_result():
+            return node
+        if node.test.constant_result:
+            return node.true_val
+        else:
+            return node.false_val
+
+    def visit_IfStatNode(self, node):
+        self.visitchildren(node)
+        # eliminate dead code based on constant condition results
+        if_clauses = []
+        for if_clause in node.if_clauses:
+            condition = if_clause.condition
+            if condition.has_constant_result():
+                if condition.constant_result:
+                    # always true => subsequent clauses can safely be dropped
+                    node.else_clause = if_clause.body
+                    break
+                # else: false => drop clause
+            else:
+                # unknown result => normal runtime evaluation
+                if_clauses.append(if_clause)
+        if if_clauses:
+            node.if_clauses = if_clauses
+            return node
+        elif node.else_clause:
+            return node.else_clause
+        else:
+            return Nodes.StatListNode(node.pos, stats=[])
+
+    def visit_SliceIndexNode(self, node):
+        self._calculate_const(node)
+        # normalise start/stop values
+        if node.start is None or node.start.constant_result is None:
+            start = node.start = None
+        else:
+            start = node.start.constant_result
+        if node.stop is None or node.stop.constant_result is None:
+            stop = node.stop = None
+        else:
+            stop = node.stop.constant_result
+        # cut down sliced constant sequences
+        if node.constant_result is not not_a_constant:
+            base = node.base
+            if base.is_sequence_constructor and base.mult_factor is None:
+                base.args = base.args[start:stop]
+                return base
+            elif base.is_string_literal:
+                base = base.as_sliced_node(start, stop)
+                if base is not None:
+                    return base
+        return node
+
+    def visit_ComprehensionNode(self, node):
+        self.visitchildren(node)
+        if isinstance(node.loop, Nodes.StatListNode) and not node.loop.stats:
+            # loop was pruned already => transform into literal
+            if node.type is Builtin.list_type:
+                return ExprNodes.ListNode(
+                    node.pos, args=[], constant_result=[])
+            elif node.type is Builtin.set_type:
+                return ExprNodes.SetNode(
+                    node.pos, args=[], constant_result=set())
+            elif node.type is Builtin.dict_type:
+                return ExprNodes.DictNode(
+                    node.pos, key_value_pairs=[], constant_result={})
+        return node
+
+    def visit_ForInStatNode(self, node):
+        self.visitchildren(node)
+        sequence = node.iterator.sequence
+        if isinstance(sequence, ExprNodes.SequenceNode):
+            if not sequence.args:
+                if node.else_clause:
+                    return node.else_clause
+                else:
+                    # don't break list comprehensions
+                    return Nodes.StatListNode(node.pos, stats=[])
+            # iterating over a list literal? => tuples are more efficient
+            if isinstance(sequence, ExprNodes.ListNode):
+                node.iterator.sequence = sequence.as_tuple()
+        return node
+
+    def visit_WhileStatNode(self, node):
+        self.visitchildren(node)
+        if node.condition and node.condition.has_constant_result():
+            if node.condition.constant_result:
+                node.condition = None
+                node.else_clause = None
+            else:
+                return node.else_clause
+        return node
+
+    def visit_ExprStatNode(self, node):
+        self.visitchildren(node)
+        if not isinstance(node.expr, ExprNodes.ExprNode):
+            # ParallelRangeTransform does this ...
+            return node
+        # drop unused constant expressions
+        if node.expr.has_constant_result():
+            return None
+        return node
+
+    def visit_GILStatNode(self, node):
+        self.visitchildren(node)
+        if node.condition is None:
+            return node
+
+        if node.condition.has_constant_result():
+            # Condition is True - Modify node to be a normal
+            # GILStatNode with condition=None
+            if node.condition.constant_result:
+                node.condition = None
+
+            # Condition is False - the body of the GILStatNode
+            # should run without changing the state of the gil
+            # return the body of the GILStatNode
+            else:
+                return node.body
+
+        # If condition is not constant we keep the GILStatNode as it is.
+        # Either it will later become constant (e.g. a `numeric is int`
+        # expression in a fused type function) and then when ConstantFolding
+        # runs again it will be handled or a later transform (i.e. GilCheck)
+        # will raise an error
+        return node
+
+    # in the future, other nodes can have their own handler method here
+    # that can replace them with a constant result node
+
+    visit_Node = Visitor.VisitorTransform.recurse_to_children
+
+
+class FinalOptimizePhase(Visitor.EnvTransform, Visitor.NodeRefCleanupMixin):
+    """
+    This visitor handles several commuting optimizations, and is run
+    just before the C code generation phase.
+
+    The optimizations currently implemented in this class are:
+        - eliminate None assignment and refcounting for first assignment.
+        - isinstance -> typecheck for cdef types
+        - eliminate checks for None and/or types that became redundant after tree changes
+        - eliminate useless string formatting steps
+        - inject branch hints for unlikely if-cases that only raise exceptions
+        - replace Python function calls that look like method calls by a faster PyMethodCallNode
+    """
+    in_loop = False
+
+    def visit_SingleAssignmentNode(self, node):
+        """Avoid redundant initialisation of local variables before their
+        first assignment.
+        """
+        self.visitchildren(node)
+        if node.first:
+            lhs = node.lhs
+            lhs.lhs_of_first_assignment = True
+        return node
+
+    def visit_SimpleCallNode(self, node):
+        """
+        Replace generic calls to isinstance(x, type) by a more efficient type check.
+        Replace likely Python method calls by a specialised PyMethodCallNode.
+        """
+        self.visitchildren(node)
+        function = node.function
+        if function.type.is_cfunction and function.is_name:
+            if function.name == 'isinstance' and len(node.args) == 2:
+                type_arg = node.args[1]
+                if type_arg.type.is_builtin_type and type_arg.type.name == 'type':
+                    cython_scope = self.context.cython_scope
+                    function.entry = cython_scope.lookup('PyObject_TypeCheck')
+                    function.type = function.entry.type
+                    PyTypeObjectPtr = PyrexTypes.CPtrType(cython_scope.lookup('PyTypeObject').type)
+                    node.args[1] = ExprNodes.CastNode(node.args[1], PyTypeObjectPtr)
+        elif (node.is_temp and function.type.is_pyobject and self.current_directives.get(
+                "optimize.unpack_method_calls_in_pyinit"
+                if not self.in_loop and self.current_env().is_module_scope
+                else "optimize.unpack_method_calls")):
+            # optimise simple Python methods calls
+            if isinstance(node.arg_tuple, ExprNodes.TupleNode) and not (
+                    node.arg_tuple.mult_factor or (node.arg_tuple.is_literal and len(node.arg_tuple.args) > 1)):
+                # simple call, now exclude calls to objects that are definitely not methods
+                may_be_a_method = True
+                if function.type is Builtin.type_type:
+                    may_be_a_method = False
+                elif function.is_attribute:
+                    if function.entry and function.entry.type.is_cfunction:
+                        # optimised builtin method
+                        may_be_a_method = False
+                elif function.is_name:
+                    entry = function.entry
+                    if entry.is_builtin or entry.type.is_cfunction:
+                        may_be_a_method = False
+                    elif entry.cf_assignments:
+                        # local functions/classes are definitely not methods
+                        non_method_nodes = (ExprNodes.PyCFunctionNode, ExprNodes.ClassNode, ExprNodes.Py3ClassNode)
+                        may_be_a_method = any(
+                            assignment.rhs and not isinstance(assignment.rhs, non_method_nodes)
+                            for assignment in entry.cf_assignments)
+                if may_be_a_method:
+                    if (node.self and function.is_attribute and
+                            isinstance(function.obj, ExprNodes.CloneNode) and function.obj.arg is node.self):
+                        # function self object was moved into a CloneNode => undo
+                        function.obj = function.obj.arg
+                    node = self.replace(node, ExprNodes.PyMethodCallNode.from_node(
+                        node, function=function, arg_tuple=node.arg_tuple, type=node.type))
+        return node
+
+    def visit_NumPyMethodCallNode(self, node):
+        # Exclude from replacement above.
+        self.visitchildren(node)
+        return node
+
+    def visit_PyTypeTestNode(self, node):
+        """Remove tests for alternatively allowed None values from
+        type tests when we know that the argument cannot be None
+        anyway.
+        """
+        self.visitchildren(node)
+        if not node.notnone:
+            if not node.arg.may_be_none():
+                node.notnone = True
+        return node
+
+    def visit_NoneCheckNode(self, node):
+        """Remove None checks from expressions that definitely do not
+        carry a None value.
+        """
+        self.visitchildren(node)
+        if not node.arg.may_be_none():
+            return node.arg
+        return node
+
+    def visit_LoopNode(self, node):
+        """Remember when we enter a loop as some expensive optimisations might still be worth it there.
+        """
+        old_val = self.in_loop
+        self.in_loop = True
+        self.visitchildren(node)
+        self.in_loop = old_val
+        return node
+
+    def visit_IfStatNode(self, node):
+        """Assign 'unlikely' branch hints to if-clauses that only raise exceptions.
+        """
+        self.visitchildren(node)
+        last_non_unlikely_clause = None
+        for i, if_clause in enumerate(node.if_clauses):
+            self._set_ifclause_branch_hint(if_clause, if_clause.body)
+            if not if_clause.branch_hint:
+                last_non_unlikely_clause = if_clause
+        if node.else_clause and last_non_unlikely_clause:
+            # If the 'else' clause is 'unlikely', then set the preceding 'if' clause to 'likely' to reflect that.
+            self._set_ifclause_branch_hint(last_non_unlikely_clause, node.else_clause, inverse=True)
+        return node
+
+    def _set_ifclause_branch_hint(self, clause, statements_node, inverse=False):
+        """Inject a branch hint if the if-clause unconditionally leads to a 'raise' statement.
+        """
+        if not statements_node.is_terminator:
+            return
+        # Allow simple statements, but no conditions, loops, etc.
+        non_branch_nodes = (
+            Nodes.ExprStatNode,
+            Nodes.AssignmentNode,
+            Nodes.AssertStatNode,
+            Nodes.DelStatNode,
+            Nodes.GlobalNode,
+            Nodes.NonlocalNode,
+        )
+        statements = [statements_node]
+        for next_node_pos, node in enumerate(statements, 1):
+            if isinstance(node, Nodes.GILStatNode):
+                statements.insert(next_node_pos, node.body)
+                continue
+            if isinstance(node, Nodes.StatListNode):
+                statements[next_node_pos:next_node_pos] = node.stats
+                continue
+            if not isinstance(node, non_branch_nodes):
+                if next_node_pos == len(statements) and isinstance(node, (Nodes.RaiseStatNode, Nodes.ReraiseStatNode)):
+                    # Anything that unconditionally raises exceptions at the end should be considered unlikely.
+                    clause.branch_hint = 'likely' if inverse else 'unlikely'
+                break
+
+
+class ConsolidateOverflowCheck(Visitor.CythonTransform):
+    """
+    This class facilitates the sharing of overflow checking among all nodes
+    of a nested arithmetic expression.  For example, given the expression
+    a*b + c, where a, b, and x are all possibly overflowing ints, the entire
+    sequence will be evaluated and the overflow bit checked only at the end.
+    """
+    overflow_bit_node = None
+
+    def visit_Node(self, node):
+        if self.overflow_bit_node is not None:
+            saved = self.overflow_bit_node
+            self.overflow_bit_node = None
+            self.visitchildren(node)
+            self.overflow_bit_node = saved
+        else:
+            self.visitchildren(node)
+        return node
+
+    def visit_NumBinopNode(self, node):
+        if node.overflow_check and node.overflow_fold:
+            top_level_overflow = self.overflow_bit_node is None
+            if top_level_overflow:
+                self.overflow_bit_node = node
+            else:
+                node.overflow_bit_node = self.overflow_bit_node
+                node.overflow_check = False
+            self.visitchildren(node)
+            if top_level_overflow:
+                self.overflow_bit_node = None
+        else:
+            self.visitchildren(node)
+        return node
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/ParseTreeTransforms.pxd b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/ParseTreeTransforms.pxd
new file mode 100644
index 0000000000000000000000000000000000000000..ee34c9420c254010aebda0f411a4aa622e86d355
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/ParseTreeTransforms.pxd
@@ -0,0 +1,84 @@
+# cython: language_level=3str
+
+cimport cython
+
+from .Visitor cimport (
+    CythonTransform, VisitorTransform, TreeVisitor,
+    ScopeTrackingTransform, EnvTransform)
+
+# Don't include mixins, only the main classes.
+#cdef class SkipDeclarations:
+
+cdef class NormalizeTree(CythonTransform):
+    cdef bint is_in_statlist
+    cdef bint is_in_expr
+    cpdef visit_StatNode(self, node, is_listcontainer=*)
+
+cdef class PostParse(ScopeTrackingTransform):
+    cdef dict specialattribute_handlers
+    cdef size_t lambda_counter
+    cdef size_t genexpr_counter
+    cdef _visit_assignment_node(self, node, list expr_list)
+
+
+#def eliminate_rhs_duplicates(list expr_list_list, list ref_node_sequence)
+#def sort_common_subsequences(list items)
+@cython.locals(starred_targets=Py_ssize_t, lhs_size=Py_ssize_t, rhs_size=Py_ssize_t)
+cdef flatten_parallel_assignments(list input, list output)
+cdef map_starred_assignment(list lhs_targets, list starred_assignments, list lhs_args, list rhs_args)
+
+#class PxdPostParse(CythonTransform, SkipDeclarations):
+#class InterpretCompilerDirectives(CythonTransform, SkipDeclarations):
+#class WithTransform(VisitorTransform, SkipDeclarations):
+#class DecoratorTransform(CythonTransform, SkipDeclarations):
+
+#class AnalyseDeclarationsTransform(EnvTransform):
+
+cdef class AnalyseExpressionsTransform(CythonTransform):
+    pass
+
+cdef class ExpandInplaceOperators(EnvTransform):
+    pass
+
+cdef class AlignFunctionDefinitions(CythonTransform):
+    cdef dict directives
+    cdef set imported_names
+    cdef object scope
+
+@cython.final
+cdef class YieldNodeCollector(TreeVisitor):
+    cdef public list yields
+    cdef public list returns
+    cdef public list finallys
+    cdef public list excepts
+    cdef public bint has_return_value
+    cdef public bint has_yield
+    cdef public bint has_await
+    cdef list excludes
+
+@cython.final
+cdef class MarkClosureVisitor(CythonTransform):
+    cdef bint needs_closure
+    cdef list excludes
+
+@cython.final
+cdef class CreateClosureClasses(CythonTransform):
+    cdef list path
+    cdef bint in_lambda
+    cdef module_scope
+    cdef generator_class
+
+    cdef create_class_from_scope(self, node, target_module_scope, inner_node=*)
+    cdef find_entries_used_in_closures(self, node)
+
+#cdef class InjectGilHandling(VisitorTransform, SkipDeclarations):
+#    cdef bint nogil
+
+cdef class GilCheck(VisitorTransform):
+    cdef list env_stack
+    cdef bint nogil
+    cdef bint nogil_declarator_only
+    cdef bint current_gilstat_node_knows_gil_state
+
+cdef class TransformBuiltinMethods(EnvTransform):
+    cdef visit_cython_attribute(self, node)
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Parsing.cpython-311-x86_64-linux-gnu.so b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Parsing.cpython-311-x86_64-linux-gnu.so
new file mode 100644
index 0000000000000000000000000000000000000000..08220d03b96578290da62d551f41f8a7c8b261f0
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Parsing.cpython-311-x86_64-linux-gnu.so
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:710a5e569000a869790e4771a77758935facb69c3b699ae88bbe65b9f2bf98a2
+size 1170440
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Parsing.pxd b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Parsing.pxd
new file mode 100644
index 0000000000000000000000000000000000000000..852f5438d2ae0a27a590771386880497aea2e2a1
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Parsing.pxd
@@ -0,0 +1,205 @@
+# cython: language_level=3
+
+# We declare all of these here to type the first argument.
+
+from __future__ import absolute_import
+
+cimport cython
+from .Scanning cimport PyrexScanner
+
+ctypedef object (*p_sub_expr_func)(PyrexScanner obj)
+
+# entry points
+
+cpdef p_module(PyrexScanner s, pxd, full_module_name, ctx=*)
+cpdef p_code(PyrexScanner s, level= *, ctx=*)
+
+# internal parser states
+
+cdef p_ident(PyrexScanner s, message =*)
+cdef p_ident_list(PyrexScanner s)
+
+cdef tuple p_binop_operator(PyrexScanner s)
+cdef p_binop_expr(PyrexScanner s, ops, p_sub_expr_func p_sub_expr)
+cdef p_lambdef(PyrexScanner s)
+cdef p_test(PyrexScanner s)
+cdef p_test_allow_walrus_after(PyrexScanner s)
+cdef p_namedexpr_test(PyrexScanner s)
+cdef p_or_test(PyrexScanner s)
+cdef p_rassoc_binop_expr(PyrexScanner s, unicode op, p_sub_expr_func p_subexpr)
+cdef p_and_test(PyrexScanner s)
+cdef p_not_test(PyrexScanner s)
+cdef p_comparison(PyrexScanner s)
+cdef p_test_or_starred_expr(PyrexScanner s)
+cdef p_namedexpr_test_or_starred_expr(PyrexScanner s)
+cdef p_starred_expr(PyrexScanner s)
+cdef p_cascaded_cmp(PyrexScanner s)
+cdef p_cmp_op(PyrexScanner s)
+cdef p_bit_expr(PyrexScanner s)
+cdef p_xor_expr(PyrexScanner s)
+cdef p_and_expr(PyrexScanner s)
+cdef p_shift_expr(PyrexScanner s)
+cdef p_arith_expr(PyrexScanner s)
+cdef p_term(PyrexScanner s)
+cdef p_factor(PyrexScanner s)
+cdef _p_factor(PyrexScanner s)
+cdef p_typecast(PyrexScanner s)
+cdef p_sizeof(PyrexScanner s)
+cdef p_yield_expression(PyrexScanner s)
+cdef p_yield_statement(PyrexScanner s)
+cdef p_async_statement(PyrexScanner s, ctx, decorators)
+cdef p_power(PyrexScanner s)
+cdef p_new_expr(PyrexScanner s)
+cdef p_trailer(PyrexScanner s, node1)
+cdef p_call_parse_args(PyrexScanner s, bint allow_genexp = *)
+cdef p_call_build_packed_args(pos, positional_args, keyword_args)
+cdef p_call(PyrexScanner s, function)
+cdef p_index(PyrexScanner s, base)
+cdef tuple p_subscript_list(PyrexScanner s)
+cdef p_subscript(PyrexScanner s)
+cdef p_slice_element(PyrexScanner s, follow_set)
+cdef expect_ellipsis(PyrexScanner s)
+cdef make_slice_nodes(pos, subscripts)
+cpdef make_slice_node(pos, start, stop = *, step = *)
+cdef p_atom(PyrexScanner s)
+@cython.locals(value=unicode)
+cdef p_int_literal(PyrexScanner s)
+cdef p_name(PyrexScanner s, name)
+cdef wrap_compile_time_constant(pos, value)
+cdef p_cat_string_literal(PyrexScanner s)
+cdef p_opt_string_literal(PyrexScanner s, required_type=*)
+cdef bint check_for_non_ascii_characters(unicode string)
+@cython.locals(systr=unicode, is_python3_source=bint, is_raw=bint)
+cdef p_string_literal(PyrexScanner s, kind_override=*)
+cdef _append_escape_sequence(kind, builder, unicode escape_sequence, PyrexScanner s)
+cdef tuple _f_string_error_pos(pos, string, Py_ssize_t i)
+@cython.locals(i=Py_ssize_t, size=Py_ssize_t, c=Py_UCS4, next_start=Py_ssize_t)
+cdef list p_f_string(PyrexScanner s, unicode_value, pos, bint is_raw)
+@cython.locals(i=Py_ssize_t, size=Py_ssize_t, c=Py_UCS4, quote_char=Py_UCS4, NO_CHAR=Py_UCS4)
+cdef tuple p_f_string_expr(PyrexScanner s, unicode_value, pos, Py_ssize_t starting_index, bint is_raw)
+cdef p_list_maker(PyrexScanner s)
+cdef p_comp_iter(PyrexScanner s, body)
+cdef p_comp_for(PyrexScanner s, body)
+cdef p_comp_if(PyrexScanner s, body)
+cdef p_dict_or_set_maker(PyrexScanner s)
+cdef p_backquote_expr(PyrexScanner s)
+cdef p_simple_expr_list(PyrexScanner s, expr=*)
+cdef p_test_or_starred_expr_list(PyrexScanner s, expr=*)
+cdef p_namedexpr_test_or_starred_expr_list(s, expr=*)
+cdef p_testlist(PyrexScanner s)
+cdef p_testlist_star_expr(PyrexScanner s)
+cdef p_testlist_comp(PyrexScanner s)
+cdef p_genexp(PyrexScanner s, expr)
+
+#-------------------------------------------------------
+#
+#   Statements
+#
+#-------------------------------------------------------
+
+cdef p_global_statement(PyrexScanner s)
+cdef p_nonlocal_statement(PyrexScanner s)
+cdef p_expression_or_assignment(PyrexScanner s)
+cdef p_print_statement(PyrexScanner s)
+cdef p_exec_statement(PyrexScanner s)
+cdef p_del_statement(PyrexScanner s)
+cdef p_pass_statement(PyrexScanner s, bint with_newline = *)
+cdef p_break_statement(PyrexScanner s)
+cdef p_continue_statement(PyrexScanner s)
+cdef p_return_statement(PyrexScanner s)
+cdef p_raise_statement(PyrexScanner s)
+cdef p_import_statement(PyrexScanner s)
+cdef p_from_import_statement(PyrexScanner s, bint first_statement = *)
+cdef p_imported_name(PyrexScanner s)
+cdef p_dotted_name(PyrexScanner s, bint as_allowed)
+cdef p_as_name(PyrexScanner s)
+cdef p_assert_statement(PyrexScanner s)
+cdef p_if_statement(PyrexScanner s)
+cdef p_if_clause(PyrexScanner s)
+cdef p_else_clause(PyrexScanner s)
+cdef p_while_statement(PyrexScanner s)
+cdef p_for_statement(PyrexScanner s, bint is_async=*)
+cdef dict p_for_bounds(PyrexScanner s, bint allow_testlist=*, bint is_async=*)
+cdef p_for_from_relation(PyrexScanner s)
+cdef p_for_from_step(PyrexScanner s)
+cdef p_target(PyrexScanner s, terminator)
+cdef p_for_target(PyrexScanner s)
+cdef p_for_iterator(PyrexScanner s, bint allow_testlist=*, bint is_async=*)
+cdef p_try_statement(PyrexScanner s)
+cdef p_except_clause(PyrexScanner s)
+cdef p_include_statement(PyrexScanner s, ctx)
+cdef p_with_statement(PyrexScanner s)
+cdef p_with_items(PyrexScanner s, bint is_async=*)
+cdef p_with_items_list(PyrexScanner s, bint is_async)
+cdef tuple p_with_item(PyrexScanner s, bint is_async)
+cdef p_with_template(PyrexScanner s)
+cdef p_simple_statement(PyrexScanner s, bint first_statement = *)
+cdef p_simple_statement_list(PyrexScanner s, ctx, bint first_statement = *)
+cdef p_compile_time_expr(PyrexScanner s)
+cdef p_DEF_statement(PyrexScanner s)
+cdef p_IF_statement(PyrexScanner s, ctx)
+cdef p_statement(PyrexScanner s, ctx, bint first_statement = *)
+cdef p_statement_list(PyrexScanner s, ctx, bint first_statement = *)
+cdef p_suite(PyrexScanner s, ctx = *)
+cdef tuple p_suite_with_docstring(PyrexScanner s, ctx, bint with_doc_only=*)
+cdef tuple _extract_docstring(node)
+cdef p_positional_and_keyword_args(PyrexScanner s, end_sy_set, templates = *)
+
+cpdef p_c_base_type(PyrexScanner s, bint nonempty = *, templates = *)
+cdef p_calling_convention(PyrexScanner s)
+cdef p_c_complex_base_type(PyrexScanner s, templates = *)
+cdef p_c_simple_base_type(PyrexScanner s, bint nonempty, templates = *)
+cdef p_buffer_or_template(PyrexScanner s, base_type_node, templates)
+cdef p_bracketed_base_type(PyrexScanner s, base_type_node, nonempty, empty)
+cdef is_memoryviewslice_access(PyrexScanner s)
+cdef p_memoryviewslice_access(PyrexScanner s, base_type_node)
+cdef bint looking_at_name(PyrexScanner s) except -2
+cdef object looking_at_expr(PyrexScanner s)# except -2
+cdef bint looking_at_base_type(PyrexScanner s) except -2
+cdef bint looking_at_dotted_name(PyrexScanner s) except -2
+cdef p_sign_and_longness(PyrexScanner s)
+cdef p_opt_cname(PyrexScanner s)
+cpdef p_c_declarator(PyrexScanner s, ctx = *, bint empty = *, bint is_type = *, bint cmethod_flag = *,
+                   bint assignable = *, bint nonempty = *,
+                   bint calling_convention_allowed = *)
+cdef p_c_array_declarator(PyrexScanner s, base)
+cdef p_c_func_declarator(PyrexScanner s, pos, ctx, base, bint cmethod_flag)
+cdef p_c_simple_declarator(PyrexScanner s, ctx, bint empty, bint is_type, bint cmethod_flag,
+                          bint assignable, bint nonempty)
+cdef p_nogil(PyrexScanner s)
+cdef p_with_gil(PyrexScanner s)
+cdef p_exception_value_clause(PyrexScanner s, bint is_extern)
+cpdef p_c_arg_list(PyrexScanner s, ctx = *, bint in_pyfunc = *, bint cmethod_flag = *,
+                   bint nonempty_declarators = *, bint kw_only = *, bint annotated = *)
+cdef p_optional_ellipsis(PyrexScanner s)
+cdef p_c_arg_decl(PyrexScanner s, ctx, in_pyfunc, bint cmethod_flag = *, bint nonempty = *, bint kw_only = *, bint annotated = *)
+cdef p_api(PyrexScanner s)
+cdef p_cdef_statement(PyrexScanner s, ctx)
+cdef p_cdef_block(PyrexScanner s, ctx)
+cdef p_cdef_extern_block(PyrexScanner s, pos, ctx)
+cdef p_c_enum_definition(PyrexScanner s, pos, ctx)
+cdef p_c_enum_line(PyrexScanner s, ctx, list items)
+cdef p_c_enum_item(PyrexScanner s, ctx, list items)
+cdef p_c_struct_or_union_definition(PyrexScanner s, pos, ctx)
+cdef p_fused_definition(PyrexScanner s, pos, ctx)
+cdef p_struct_enum(PyrexScanner s, pos, ctx)
+cdef p_visibility(PyrexScanner s, prev_visibility)
+cdef p_c_modifiers(PyrexScanner s)
+cdef p_c_func_or_var_declaration(PyrexScanner s, pos, ctx)
+cdef p_ctypedef_statement(PyrexScanner s, ctx)
+cdef p_decorators(PyrexScanner s)
+cdef _reject_cdef_modifier_in_py(PyrexScanner s, name)
+cdef p_def_statement(PyrexScanner s, list decorators=*, bint is_async_def=*)
+cdef p_varargslist(PyrexScanner s, terminator=*, bint annotated = *)
+cdef p_py_arg_decl(PyrexScanner s, bint annotated = *)
+cdef p_class_statement(PyrexScanner s, decorators)
+cdef p_c_class_definition(PyrexScanner s, pos,  ctx)
+cdef tuple p_c_class_options(PyrexScanner s)
+cdef p_property_decl(PyrexScanner s)
+cdef p_doc_string(PyrexScanner s)
+cdef p_ignorable_statement(PyrexScanner s)
+cdef dict p_compiler_directive_comments(PyrexScanner s)
+cdef p_template_definition(PyrexScanner s)
+cdef p_cpp_class_definition(PyrexScanner s, pos, ctx)
+cdef p_cpp_class_attribute(PyrexScanner s, ctx)
+cdef p_annotation(PyrexScanner s)
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Pipeline.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Pipeline.py
new file mode 100644
index 0000000000000000000000000000000000000000..834eb0e6ab0de49c601ca6d52c9614811e01b51d
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Pipeline.py
@@ -0,0 +1,419 @@
+from __future__ import absolute_import
+
+import itertools
+from time import time
+
+from . import Errors
+from . import DebugFlags
+from . import Options
+from .Errors import CompileError, InternalError, AbortError
+from . import Naming
+
+#
+# Really small pipeline stages
+#
+def dumptree(t):
+    # For quick debugging in pipelines
+    print(t.dump())
+    return t
+
+def abort_on_errors(node):
+    # Stop the pipeline if there are any errors.
+    if Errors.get_errors_count() != 0:
+        raise AbortError("pipeline break")
+    return node
+
+def parse_stage_factory(context):
+    def parse(compsrc):
+        source_desc = compsrc.source_desc
+        full_module_name = compsrc.full_module_name
+        initial_pos = (source_desc, 1, 0)
+        saved_cimport_from_pyx, Options.cimport_from_pyx = Options.cimport_from_pyx, False
+        scope = context.find_module(full_module_name, pos = initial_pos, need_pxd = 0)
+        Options.cimport_from_pyx = saved_cimport_from_pyx
+        tree = context.parse(source_desc, scope, pxd = 0, full_module_name = full_module_name)
+        tree.compilation_source = compsrc
+        tree.scope = scope
+        tree.is_pxd = False
+        return tree
+    return parse
+
+def parse_pxd_stage_factory(context, scope, module_name):
+    def parse(source_desc):
+        tree = context.parse(source_desc, scope, pxd=True,
+                             full_module_name=module_name)
+        tree.scope = scope
+        tree.is_pxd = True
+        return tree
+    return parse
+
+def generate_pyx_code_stage_factory(options, result):
+    def generate_pyx_code_stage(module_node):
+        module_node.process_implementation(options, result)
+        result.compilation_source = module_node.compilation_source
+        return result
+    return generate_pyx_code_stage
+
+
+def inject_pxd_code_stage_factory(context):
+    def inject_pxd_code_stage(module_node):
+        for name, (statlistnode, scope) in context.pxds.items():
+            module_node.merge_in(statlistnode, scope, stage="pxd")
+        return module_node
+    return inject_pxd_code_stage
+
+
+def use_utility_code_definitions(scope, target, seen=None):
+    if seen is None:
+        seen = set()
+
+    for entry in scope.entries.values():
+        if entry in seen:
+            continue
+
+        seen.add(entry)
+        if entry.used and entry.utility_code_definition:
+            target.use_utility_code(entry.utility_code_definition)
+            for required_utility in entry.utility_code_definition.requires:
+                target.use_utility_code(required_utility)
+        elif entry.as_module:
+            use_utility_code_definitions(entry.as_module, target, seen)
+
+
+def sorted_utility_codes_and_deps(utilcodes):
+    ranks = {}
+    get_rank = ranks.get
+
+    def calculate_rank(utilcode):
+        rank = get_rank(utilcode)
+        if rank is None:
+            ranks[utilcode] = 0  # prevent infinite recursion on circular dependencies
+            original_order = len(ranks)
+            rank = ranks[utilcode] = 1 + (
+                min([calculate_rank(dep) for dep in utilcode.requires]) if utilcode.requires else -1
+                ) + original_order * 1e-8
+        return rank
+
+    for utilcode in utilcodes:
+        calculate_rank(utilcode)
+
+    # include all recursively collected dependencies
+    return sorted(ranks, key=get_rank)
+
+
+def normalize_deps(utilcodes):
+    deps = {utilcode:utilcode for utilcode in utilcodes}
+    for utilcode in utilcodes:
+        utilcode.requires = [deps.setdefault(dep, dep) for dep in utilcode.requires or ()]
+
+
+def inject_utility_code_stage_factory(context):
+    def inject_utility_code_stage(module_node):
+        module_node.prepare_utility_code()
+        use_utility_code_definitions(context.cython_scope, module_node.scope)
+
+        utility_code_list = module_node.scope.utility_code_list
+        utility_code_list[:] = sorted_utility_codes_and_deps(utility_code_list)
+        normalize_deps(utility_code_list)
+
+        added = set()
+        # Note: the list might be extended inside the loop (if some utility code
+        # pulls in other utility code, explicitly or implicitly)
+        for utilcode in utility_code_list:
+            if utilcode in added:
+                continue
+            added.add(utilcode)
+            if utilcode.requires:
+                for dep in utilcode.requires:
+                    if dep not in added:
+                        utility_code_list.append(dep)
+            tree = utilcode.get_tree(cython_scope=context.cython_scope)
+            if tree:
+                module_node.merge_in(tree.with_compiler_directives(),
+                                     tree.scope, stage="utility",
+                                     merge_scope=True)
+        return module_node
+
+    return inject_utility_code_stage
+
+
+#
+# Pipeline factories
+#
+
+def create_pipeline(context, mode, exclude_classes=()):
+    assert mode in ('pyx', 'py', 'pxd')
+    from .Visitor import PrintTree
+    from .ParseTreeTransforms import WithTransform, NormalizeTree, PostParse, PxdPostParse
+    from .ParseTreeTransforms import ForwardDeclareTypes, InjectGilHandling, AnalyseDeclarationsTransform
+    from .ParseTreeTransforms import AnalyseExpressionsTransform, FindInvalidUseOfFusedTypes
+    from .ParseTreeTransforms import CreateClosureClasses, MarkClosureVisitor, DecoratorTransform
+    from .ParseTreeTransforms import TrackNumpyAttributes, InterpretCompilerDirectives, TransformBuiltinMethods
+    from .ParseTreeTransforms import ExpandInplaceOperators, ParallelRangeTransform
+    from .ParseTreeTransforms import CalculateQualifiedNamesTransform
+    from .TypeInference import MarkParallelAssignments, MarkOverflowingArithmetic
+    from .ParseTreeTransforms import AdjustDefByDirectives, AlignFunctionDefinitions, AutoCpdefFunctionDefinitions
+    from .ParseTreeTransforms import RemoveUnreachableCode, GilCheck, CoerceCppTemps
+    from .FlowControl import ControlFlowAnalysis
+    from .AnalysedTreeTransforms import AutoTestDictTransform
+    from .AutoDocTransforms import EmbedSignature
+    from .Optimize import FlattenInListTransform, SwitchTransform, IterationTransform
+    from .Optimize import EarlyReplaceBuiltinCalls, OptimizeBuiltinCalls
+    from .Optimize import InlineDefNodeCalls
+    from .Optimize import ConstantFolding, FinalOptimizePhase
+    from .Optimize import DropRefcountingTransform
+    from .Optimize import ConsolidateOverflowCheck
+    from .Buffer import IntroduceBufferAuxiliaryVars
+    from .ModuleNode import check_c_declarations, check_c_declarations_pxd
+
+
+    if mode == 'pxd':
+        _check_c_declarations = check_c_declarations_pxd
+        _specific_post_parse = PxdPostParse(context)
+    else:
+        _check_c_declarations = check_c_declarations
+        _specific_post_parse = None
+
+    if mode == 'py':
+        _align_function_definitions = AlignFunctionDefinitions(context)
+    else:
+        _align_function_definitions = None
+
+    # NOTE: This is the "common" parts of the pipeline, which is also
+    # code in pxd files. So it will be run multiple times in a
+    # compilation stage.
+    stages = [
+        NormalizeTree(context),
+        PostParse(context),
+        _specific_post_parse,
+        TrackNumpyAttributes(),
+        InterpretCompilerDirectives(context, context.compiler_directives),
+        ParallelRangeTransform(context),
+        WithTransform(),
+        AdjustDefByDirectives(context),
+        _align_function_definitions,
+        MarkClosureVisitor(context),
+        AutoCpdefFunctionDefinitions(context),
+        RemoveUnreachableCode(context),
+        ConstantFolding(),
+        FlattenInListTransform(),
+        DecoratorTransform(context),
+        ForwardDeclareTypes(context),
+        InjectGilHandling(),
+        AnalyseDeclarationsTransform(context),
+        AutoTestDictTransform(context),
+        EmbedSignature(context),
+        EarlyReplaceBuiltinCalls(context),  ## Necessary?
+        TransformBuiltinMethods(context),
+        MarkParallelAssignments(context),
+        ControlFlowAnalysis(context),
+        RemoveUnreachableCode(context),
+        # MarkParallelAssignments(context),
+        MarkOverflowingArithmetic(context),
+        IntroduceBufferAuxiliaryVars(context),
+        _check_c_declarations,
+        InlineDefNodeCalls(context),
+        AnalyseExpressionsTransform(context),
+        FindInvalidUseOfFusedTypes(context),
+        ExpandInplaceOperators(context),
+        IterationTransform(context),
+        SwitchTransform(context),
+        OptimizeBuiltinCalls(context),  ## Necessary?
+        CreateClosureClasses(context),  ## After all lookups and type inference
+        CalculateQualifiedNamesTransform(context),
+        ConsolidateOverflowCheck(context),
+        DropRefcountingTransform(),
+        FinalOptimizePhase(context),
+        CoerceCppTemps(context),
+        GilCheck(),
+        ]
+    if exclude_classes:
+        stages = [s for s in stages if s.__class__ not in exclude_classes]
+    return stages
+
+def create_pyx_pipeline(context, options, result, py=False, exclude_classes=()):
+    mode = 'py' if py else 'pyx'
+
+    test_support = []
+    ctest_support = []
+    if options.evaluate_tree_assertions:
+        from ..TestUtils import TreeAssertVisitor
+        test_validator = TreeAssertVisitor()
+        test_support.append(test_validator)
+        ctest_support.append(test_validator.create_c_file_validator())
+
+    if options.gdb_debug:
+        from ..Debugger import DebugWriter  # requires Py2.5+
+        from .ParseTreeTransforms import DebugTransform
+        context.gdb_debug_outputwriter = DebugWriter.CythonDebugWriter(
+            options.output_dir)
+        debug_transform = [DebugTransform(context, options, result)]
+    else:
+        debug_transform = []
+
+    return list(itertools.chain(
+        [parse_stage_factory(context)],
+        create_pipeline(context, mode, exclude_classes=exclude_classes),
+        test_support,
+        [inject_pxd_code_stage_factory(context),
+         inject_utility_code_stage_factory(context),
+         abort_on_errors],
+        debug_transform,
+        [generate_pyx_code_stage_factory(options, result)],
+        ctest_support,
+    ))
+
+def create_pxd_pipeline(context, scope, module_name):
+    from .CodeGeneration import ExtractPxdCode
+
+    # The pxd pipeline ends up with a CCodeWriter containing the
+    # code of the pxd, as well as a pxd scope.
+    return [
+        parse_pxd_stage_factory(context, scope, module_name)
+        ] + create_pipeline(context, 'pxd') + [
+        ExtractPxdCode()
+        ]
+
+def create_py_pipeline(context, options, result):
+    return create_pyx_pipeline(context, options, result, py=True)
+
+def create_pyx_as_pxd_pipeline(context, result):
+    from .ParseTreeTransforms import AlignFunctionDefinitions, \
+        MarkClosureVisitor, WithTransform, AnalyseDeclarationsTransform
+    from .Optimize import ConstantFolding, FlattenInListTransform
+    from .Nodes import StatListNode
+    pipeline = []
+    pyx_pipeline = create_pyx_pipeline(context, context.options, result,
+                                       exclude_classes=[
+                                           AlignFunctionDefinitions,
+                                           MarkClosureVisitor,
+                                           ConstantFolding,
+                                           FlattenInListTransform,
+                                           WithTransform
+                                           ])
+    from .Visitor import VisitorTransform
+    class SetInPxdTransform(VisitorTransform):
+        # A number of nodes have an "in_pxd" attribute which affects AnalyseDeclarationsTransform
+        # (for example controlling pickling generation). Set it, to make sure we don't mix them up with
+        # the importing main module.
+        # FIXME: This should be done closer to the parsing step.
+        def visit_StatNode(self, node):
+            if hasattr(node, "in_pxd"):
+                node.in_pxd = True
+            self.visitchildren(node)
+            return node
+
+        visit_Node = VisitorTransform.recurse_to_children
+
+    for stage in pyx_pipeline:
+        pipeline.append(stage)
+        if isinstance(stage, AnalyseDeclarationsTransform):
+            pipeline.insert(-1, SetInPxdTransform())
+            break  # This is the last stage we need.
+    def fake_pxd(root):
+        for entry in root.scope.entries.values():
+            if not entry.in_cinclude:
+                entry.defined_in_pxd = 1
+                if entry.name == entry.cname and entry.visibility != 'extern':
+                    # Always mangle non-extern cimported entries.
+                    entry.cname = entry.scope.mangle(Naming.func_prefix, entry.name)
+        return StatListNode(root.pos, stats=[]), root.scope
+    pipeline.append(fake_pxd)
+    return pipeline
+
+def insert_into_pipeline(pipeline, transform, before=None, after=None):
+    """
+    Insert a new transform into the pipeline after or before an instance of
+    the given class. e.g.
+
+        pipeline = insert_into_pipeline(pipeline, transform,
+                                        after=AnalyseDeclarationsTransform)
+    """
+    assert before or after
+
+    cls = before or after
+    for i, t in enumerate(pipeline):
+        if isinstance(t, cls):
+            break
+
+    if after:
+        i += 1
+
+    return pipeline[:i] + [transform] + pipeline[i:]
+
+#
+# Running a pipeline
+#
+
+_pipeline_entry_points = {}
+
+try:
+    from threading import local as _threadlocal
+except ImportError:
+    class _threadlocal(object): pass
+
+threadlocal = _threadlocal()
+
+
+def get_timings():
+    try:
+        return threadlocal.cython_pipeline_timings
+    except AttributeError:
+        return {}
+
+
+def run_pipeline(pipeline, source, printtree=True):
+    from .Visitor import PrintTree
+    exec_ns = globals().copy() if DebugFlags.debug_verbose_pipeline else None
+
+    try:
+        timings = threadlocal.cython_pipeline_timings
+    except AttributeError:
+        timings = threadlocal.cython_pipeline_timings = {}
+
+    def run(phase, data):
+        return phase(data)
+
+    error = None
+    data = source
+    try:
+        try:
+            for phase in pipeline:
+                if phase is None:
+                    continue
+                if not printtree and isinstance(phase, PrintTree):
+                    continue
+
+                phase_name = getattr(phase, '__name__', type(phase).__name__)
+                if DebugFlags.debug_verbose_pipeline:
+                    print("Entering pipeline phase %r" % phase)
+                    # create a new wrapper for each step to show the name in profiles
+                    try:
+                        run = _pipeline_entry_points[phase_name]
+                    except KeyError:
+                        exec("def %s(phase, data): return phase(data)" % phase_name, exec_ns)
+                        run = _pipeline_entry_points[phase_name] = exec_ns[phase_name]
+
+                t = time()
+                data = run(phase, data)
+                t = time() - t
+
+                try:
+                    old_t, count = timings[phase_name]
+                except KeyError:
+                    old_t, count = 0, 0
+                timings[phase_name] = (old_t + int(t * 1000000), count + 1)
+                if DebugFlags.debug_verbose_pipeline:
+                    print("    %.3f seconds" % t)
+        except CompileError as err:
+            # err is set
+            Errors.report_error(err, use_stack=False)
+            error = err
+    except InternalError as err:
+        # Only raise if there was not an earlier error
+        if Errors.get_errors_count() == 0:
+            raise
+        error = err
+    except AbortError as err:
+        error = err
+    return (error, data)
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Scanning.pxd b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Scanning.pxd
new file mode 100644
index 0000000000000000000000000000000000000000..6890e6a518e11798610237b966cc13bdb9002cbc
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Scanning.pxd
@@ -0,0 +1,63 @@
+# cython: language_level=3
+
+import cython
+
+from ..Plex.Scanners cimport Scanner
+
+cdef unicode any_string_prefix, IDENT
+
+cdef get_lexicon()
+cdef initial_compile_time_env()
+
+## methods commented with '##' out are used by Parsing.py when compiled.
+
+@cython.final
+cdef class CompileTimeScope:
+    cdef public dict entries
+    cdef public CompileTimeScope outer
+    ##cdef declare(self, name, value)
+    ##cdef lookup_here(self, name)
+    ##cpdef lookup(self, name)
+
+@cython.final
+cdef class PyrexScanner(Scanner):
+    cdef public context
+    cdef public list included_files
+    cdef public CompileTimeScope compile_time_env
+    cdef public bint compile_time_eval
+    cdef public bint compile_time_expr
+    cdef public bint parse_comments
+    cdef public bint in_python_file
+    cdef public source_encoding
+    cdef dict keywords
+    cdef public list indentation_stack
+    cdef public indentation_char
+    cdef public int bracket_nesting_level
+    cdef readonly bint async_enabled
+    cdef public unicode sy
+    cdef public systring  # EncodedString
+    cdef public list put_back_on_failure
+
+    cdef Py_ssize_t current_level(self)
+    #cpdef commentline(self, text)
+    #cpdef open_bracket_action(self, text)
+    #cpdef close_bracket_action(self, text)
+    #cpdef newline_action(self, text)
+    #cpdef begin_string_action(self, text)
+    #cpdef end_string_action(self, text)
+    #cpdef unclosed_string_action(self, text)
+    @cython.locals(current_level=Py_ssize_t, new_level=Py_ssize_t)
+    cpdef indentation_action(self, text)
+    #cpdef eof_action(self, text)
+    ##cdef next(self)
+    ##cdef peek(self)
+    #cpdef put_back(self, sy, systring)
+    ##cdef bint expect(self, what, message = *) except -2
+    ##cdef expect_keyword(self, what, message = *)
+    ##cdef expected(self, what, message = *)
+    ##cdef expect_indent(self)
+    ##cdef expect_dedent(self)
+    ##cdef expect_newline(self, message=*, bint ignore_semicolon=*)
+    ##cdef int enter_async(self) except -1
+    ##cdef int exit_async(self) except -1
+    cdef void error_at_scanpos(self, str message) except *
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Tests/TestGrammar.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Tests/TestGrammar.py
new file mode 100644
index 0000000000000000000000000000000000000000..852b48c33dd07a3345c416e809fe87f6a4c88c75
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Tests/TestGrammar.py
@@ -0,0 +1,203 @@
+# mode: run
+# tag: syntax
+
+"""
+Uses TreeFragment to test invalid syntax.
+"""
+
+from __future__ import absolute_import
+
+import ast
+import textwrap
+
+from ...TestUtils import CythonTest
+from .. import ExprNodes
+from ..Errors import CompileError
+
+# Copied from CPython's test_grammar.py
+VALID_UNDERSCORE_LITERALS = [
+    '0_0_0',
+    '4_2',
+    '1_0000_0000',
+    '0b1001_0100',
+    '0xffff_ffff',
+    '0o5_7_7',
+    '1_00_00.5',
+    '1_00_00.5j',
+    '1_00_00.5e5',
+    '1_00_00j',
+    '1_00_00e5_1',
+    '1e1_0',
+    '.1_4',
+    '.1_4e1',
+    '0b_0',
+    '0x_f',
+    '0o_5',
+    '1_00_00j',
+    '1_00_00.5j',
+    '1_00_00e5_1j',
+    '.1_4j',
+    '(1_2.5+3_3j)',
+    '(.5_6j)',
+]
+
+# Copied from CPython's test_grammar.py
+INVALID_UNDERSCORE_LITERALS = [
+    # Trailing underscores:
+    '0_',
+    '42_',
+    '1.4j_',
+    '0x_',
+    '0b1_',
+    '0xf_',
+    '0o5_',
+    '0 if 1_Else 1',
+    # Underscores in the base selector:
+    '0_b0',
+    '0_xf',
+    '0_o5',
+    # Old-style octal, still disallowed:
+    # FIXME: still need to support PY_VERSION_HEX < 3
+    '0_7',
+    '09_99',
+    # Multiple consecutive underscores:
+    '4_______2',
+    '0.1__4',
+    '0.1__4j',
+    '0b1001__0100',
+    '0xffff__ffff',
+    '0x___',
+    '0o5__77',
+    '1e1__0',
+    '1e1__0j',
+    # Underscore right before a dot:
+    '1_.4',
+    '1_.4j',
+    # Underscore right after a dot:
+    '1._4',
+    '1._4j',
+    '._5',
+    '._5j',
+    # Underscore right after a sign:
+    '1.0e+_1',
+    '1.0e+_1j',
+    # Underscore right before j:
+    '1.4_j',
+    '1.4e5_j',
+    # Underscore right before e:
+    '1_e1',
+    '1.4_e1',
+    '1.4_e1j',
+    # Underscore right after e:
+    '1e_1',
+    '1.4e_1',
+    '1.4e_1j',
+    # Complex cases with parens:
+    '(1+1.5_j_)',
+    '(1+1.5_j)',
+    # Whitespace in literals
+    '1_ 2',
+    '1 _2',
+    '1_2.2_ 1',
+    '1_2.2 _1',
+    '1_2e _1',
+    '1_2e2 _1',
+    '1_2e 2_1',
+]
+
+
+INVALID_ELLIPSIS = [
+    (". . .", 2, 0),
+    (". ..", 2, 0),
+    (".. .", 2, 0),
+    (". ...", 2, 0),
+    (". ... .", 2, 0),
+    (".. ... .", 2, 0),
+    (". ... ..", 2, 0),
+    ("""
+    (
+        .
+        ..
+    )
+    """, 3, 4),
+    ("""
+    [
+        ..
+        .,
+        None
+    ]
+    """, 3, 4),
+    ("""
+    {
+        None,
+        .
+        .
+
+        .
+    }
+    """, 4, 4)
+]
+
+
+class TestGrammar(CythonTest):
+
+    def test_invalid_number_literals(self):
+        for literal in INVALID_UNDERSCORE_LITERALS:
+            for expression in ['%s', '1 + %s', '%s + 1', '2 * %s', '%s * 2']:
+                code = 'x = ' + expression % literal
+                try:
+                    self.fragment(u'''\
+                    # cython: language_level=3
+                    ''' + code)
+                except CompileError as exc:
+                    assert code in [s.strip() for s in str(exc).splitlines()], str(exc)
+                else:
+                    assert False, "Invalid Cython code '%s' failed to raise an exception" % code
+
+    def test_valid_number_literals(self):
+        for literal in VALID_UNDERSCORE_LITERALS:
+            for i, expression in enumerate(['%s', '1 + %s', '%s + 1', '2 * %s', '%s * 2']):
+                code = 'x = ' + expression % literal
+                node = self.fragment(u'''\
+                    # cython: language_level=3
+                    ''' + code).root
+                assert node is not None
+
+                literal_node = node.stats[0].rhs  # StatListNode([SingleAssignmentNode('x', expr)])
+                if i > 0:
+                    # Add/MulNode() -> literal is first or second operand
+                    literal_node = literal_node.operand2 if i % 2 else literal_node.operand1
+                if 'j' in literal or 'J' in literal:
+                    if '+' in literal:
+                        # FIXME: tighten this test
+                        assert isinstance(literal_node, ExprNodes.AddNode), (literal, literal_node)
+                    else:
+                        assert isinstance(literal_node, ExprNodes.ImagNode), (literal, literal_node)
+                elif '.' in literal or 'e' in literal or 'E' in literal and not ('0x' in literal or '0X' in literal):
+                    assert isinstance(literal_node, ExprNodes.FloatNode), (literal, literal_node)
+                else:
+                    assert isinstance(literal_node, ExprNodes.IntNode), (literal, literal_node)
+
+    def test_invalid_ellipsis(self):
+        ERR = ":{0}:{1}: Expected an identifier or literal"
+        for code, line, col in INVALID_ELLIPSIS:
+            try:
+                ast.parse(textwrap.dedent(code))
+            except SyntaxError as exc:
+                assert True
+            else:
+                assert False, "Invalid Python code '%s' failed to raise an exception" % code
+
+            try:
+                self.fragment(u'''\
+                # cython: language_level=3
+                ''' + code)
+            except CompileError as exc:
+                assert ERR.format(line, col) in str(exc), str(exc)
+            else:
+                assert False, "Invalid Cython code '%s' failed to raise an exception" % code
+
+
+if __name__ == "__main__":
+    import unittest
+    unittest.main()
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/TreePath.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/TreePath.py
new file mode 100644
index 0000000000000000000000000000000000000000..8585905557b3a2d6d97e5840da99e36ee4e976dc
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/TreePath.py
@@ -0,0 +1,296 @@
+"""
+A simple XPath-like language for tree traversal.
+
+This works by creating a filter chain of generator functions.  Each
+function selects a part of the expression, e.g. a child node, a
+specific descendant or a node that holds an attribute.
+"""
+
+from __future__ import absolute_import
+
+import re
+import operator
+import sys
+
+if sys.version_info[0] >= 3:
+    _unicode = str
+else:
+    _unicode = unicode
+
+path_tokenizer = re.compile(
+    r"("
+    r"'[^']*'|\"[^\"]*\"|"
+    r"//?|"
+    r"\(\)|"
+    r"==?|"
+    r"[/.*\[\]()@])|"
+    r"([^/\[\]()@=\s]+)|"
+    r"\s+"
+    ).findall
+
+def iterchildren(node, attr_name):
+    # returns an iterable of all child nodes of that name
+    child = getattr(node, attr_name)
+    if child is not None:
+        if type(child) is list:
+            return child
+        else:
+            return [child]
+    else:
+        return ()
+
+def _get_first_or_none(it):
+    try:
+        try:
+            _next = it.next
+        except AttributeError:
+            return next(it)
+        else:
+            return _next()
+    except StopIteration:
+        return None
+
+def type_name(node):
+    return node.__class__.__name__.split('.')[-1]
+
+def parse_func(next, token):
+    name = token[1]
+    token = next()
+    if token[0] != '(':
+        raise ValueError("Expected '(' after function name '%s'" % name)
+    predicate = handle_predicate(next, token)
+    return name, predicate
+
+def handle_func_not(next, token):
+    """
+    not(...)
+    """
+    name, predicate = parse_func(next, token)
+
+    def select(result):
+        for node in result:
+            if _get_first_or_none(predicate([node])) is None:
+                yield node
+    return select
+
+def handle_name(next, token):
+    """
+    /NodeName/
+    or
+    func(...)
+    """
+    name = token[1]
+    if name in functions:
+        return functions[name](next, token)
+    def select(result):
+        for node in result:
+            for attr_name in node.child_attrs:
+                for child in iterchildren(node, attr_name):
+                    if type_name(child) == name:
+                        yield child
+    return select
+
+def handle_star(next, token):
+    """
+    /*/
+    """
+    def select(result):
+        for node in result:
+            for name in node.child_attrs:
+                for child in iterchildren(node, name):
+                    yield child
+    return select
+
+def handle_dot(next, token):
+    """
+    /./
+    """
+    def select(result):
+        return result
+    return select
+
+def handle_descendants(next, token):
+    """
+    //...
+    """
+    token = next()
+    if token[0] == "*":
+        def iter_recursive(node):
+            for name in node.child_attrs:
+                for child in iterchildren(node, name):
+                    yield child
+                    for c in iter_recursive(child):
+                        yield c
+    elif not token[0]:
+        node_name = token[1]
+        def iter_recursive(node):
+            for name in node.child_attrs:
+                for child in iterchildren(node, name):
+                    if type_name(child) == node_name:
+                        yield child
+                    for c in iter_recursive(child):
+                        yield c
+    else:
+        raise ValueError("Expected node name after '//'")
+
+    def select(result):
+        for node in result:
+            for child in iter_recursive(node):
+                yield child
+
+    return select
+
+
+def handle_attribute(next, token):
+    token = next()
+    if token[0]:
+        raise ValueError("Expected attribute name")
+    name = token[1]
+    value = None
+    try:
+        token = next()
+    except StopIteration:
+        pass
+    else:
+        if token[0] == '=':
+            value = parse_path_value(next)
+    readattr = operator.attrgetter(name)
+    if value is None:
+        def select(result):
+            for node in result:
+                try:
+                    attr_value = readattr(node)
+                except AttributeError:
+                    continue
+                if attr_value is not None:
+                    yield attr_value
+    else:
+        def select(result):
+            for node in result:
+                try:
+                    attr_value = readattr(node)
+                except AttributeError:
+                    continue
+                if attr_value == value:
+                    yield attr_value
+                elif (isinstance(attr_value, bytes) and isinstance(value, _unicode) and
+                        attr_value == value.encode()):
+                    # allow a bytes-to-string comparison too
+                    yield attr_value
+
+    return select
+
+
+def parse_path_value(next):
+    token = next()
+    value = token[0]
+    if value:
+        if value[:1] == "'" or value[:1] == '"':
+            return value[1:-1]
+        try:
+            return int(value)
+        except ValueError:
+            pass
+    elif token[1].isdigit():
+        return int(token[1])
+    else:
+        name = token[1].lower()
+        if name == 'true':
+            return True
+        elif name == 'false':
+            return False
+    raise ValueError("Invalid attribute predicate: '%s'" % value)
+
+def handle_predicate(next, token):
+    token = next()
+    selector = []
+    while token[0] != ']':
+        selector.append( operations[token[0]](next, token) )
+        try:
+            token = next()
+        except StopIteration:
+            break
+        else:
+            if token[0] == "/":
+                token = next()
+
+        if not token[0] and token[1] == 'and':
+            return logical_and(selector, handle_predicate(next, token))
+
+    def select(result):
+        for node in result:
+            subresult = iter((node,))
+            for select in selector:
+                subresult = select(subresult)
+            predicate_result = _get_first_or_none(subresult)
+            if predicate_result is not None:
+                yield node
+    return select
+
+def logical_and(lhs_selects, rhs_select):
+    def select(result):
+        for node in result:
+            subresult = iter((node,))
+            for select in lhs_selects:
+                subresult = select(subresult)
+            predicate_result = _get_first_or_none(subresult)
+            subresult = iter((node,))
+            if predicate_result is not None:
+                for result_node in rhs_select(subresult):
+                    yield node
+    return select
+
+
+operations = {
+    "@":  handle_attribute,
+    "":   handle_name,
+    "*":  handle_star,
+    ".":  handle_dot,
+    "//": handle_descendants,
+    "[":  handle_predicate,
+    }
+
+functions = {
+    'not' : handle_func_not
+    }
+
+def _build_path_iterator(path):
+    # parse pattern
+    stream = iter([ (special,text)
+                    for (special,text) in path_tokenizer(path)
+                    if special or text ])
+    try:
+        _next = stream.next
+    except AttributeError:
+        # Python 3
+        def _next():
+            return next(stream)
+    token = _next()
+    selector = []
+    while 1:
+        try:
+            selector.append(operations[token[0]](_next, token))
+        except StopIteration:
+            raise ValueError("invalid path")
+        try:
+            token = _next()
+            if token[0] == "/":
+                token = _next()
+        except StopIteration:
+            break
+    return selector
+
+# main module API
+
+def iterfind(node, path):
+    selector_chain = _build_path_iterator(path)
+    result = iter((node,))
+    for select in selector_chain:
+        result = select(result)
+    return result
+
+def find_first(node, path):
+    return _get_first_or_none(iterfind(node, path))
+
+def find_all(node, path):
+    return list(iterfind(node, path))
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/TypeSlots.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/TypeSlots.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad27e0d6eaa49ec8a9e2cc1a43993ec8dbe2b9f5
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/TypeSlots.py
@@ -0,0 +1,1180 @@
+#
+#   Tables describing slots in the CPython type object
+#   and associated know-how.
+#
+
+from __future__ import absolute_import
+
+from . import Naming
+from . import PyrexTypes
+from .Errors import error
+
+import copy
+
+invisible = ['__cinit__', '__dealloc__', '__richcmp__',
+             '__nonzero__', '__bool__']
+
+richcmp_special_methods = ['__eq__', '__ne__', '__lt__', '__gt__', '__le__', '__ge__']
+
+
+class Signature(object):
+    #  Method slot signature descriptor.
+    #
+    #  has_dummy_arg      boolean
+    #  has_generic_args   boolean
+    #  fixed_arg_format   string
+    #  ret_format         string
+    #  error_value        string
+    #  use_fastcall       boolean
+    #
+    #  The formats are strings made up of the following
+    #  characters:
+    #
+    #    'O'  Python object
+    #    'T'  Python object of the type of 'self'
+    #    'v'  void
+    #    'p'  void *
+    #    'P'  void **
+    #    'i'  int
+    #    'b'  bint
+    #    'I'  int *
+    #    'l'  long
+    #    'f'  float
+    #    'd'  double
+    #    'h'  Py_hash_t
+    #    'z'  Py_ssize_t
+    #    'Z'  Py_ssize_t *
+    #    's'  char *
+    #    'S'  char **
+    #    'r'  int used only to signal exception
+    #    'B'  Py_buffer *
+    #    '-'  dummy 'self' argument (not used)
+    #    '*'  rest of args passed as generic Python
+    #           arg tuple and kw dict (must be last
+    #           char in format string)
+    #    '?'  optional object arg (currently for pow only)
+
+    format_map = {
+        'O': PyrexTypes.py_object_type,
+        'v': PyrexTypes.c_void_type,
+        'p': PyrexTypes.c_void_ptr_type,
+        'P': PyrexTypes.c_void_ptr_ptr_type,
+        'i': PyrexTypes.c_int_type,
+        'b': PyrexTypes.c_bint_type,
+        'I': PyrexTypes.c_int_ptr_type,
+        'l': PyrexTypes.c_long_type,
+        'f': PyrexTypes.c_float_type,
+        'd': PyrexTypes.c_double_type,
+        'h': PyrexTypes.c_py_hash_t_type,
+        'z': PyrexTypes.c_py_ssize_t_type,
+        'Z': PyrexTypes.c_py_ssize_t_ptr_type,
+        's': PyrexTypes.c_char_ptr_type,
+        'S': PyrexTypes.c_char_ptr_ptr_type,
+        'r': PyrexTypes.c_returncode_type,
+        'B': PyrexTypes.c_py_buffer_ptr_type,
+        '?': PyrexTypes.py_object_type
+        # 'T', '-' and '*' are handled otherwise
+        # and are not looked up in here
+    }
+
+    type_to_format_map = dict(
+        (type_, format_) for format_, type_ in format_map.items())
+
+    error_value_map = {
+        'O': "NULL",
+        'T': "NULL",
+        'i': "-1",
+        'b': "-1",
+        'l': "-1",
+        'r': "-1",
+        'h': "-1",
+        'z': "-1",
+    }
+
+    # Use METH_FASTCALL instead of METH_VARARGS
+    use_fastcall = False
+
+    def __init__(self, arg_format, ret_format, nogil=False):
+        self.has_dummy_arg = False
+        self.has_generic_args = False
+        self.optional_object_arg_count = 0
+        if arg_format[:1] == '-':
+            self.has_dummy_arg = True
+            arg_format = arg_format[1:]
+        if arg_format[-1:] == '*':
+            self.has_generic_args = True
+            arg_format = arg_format[:-1]
+        if arg_format[-1:] == '?':
+            self.optional_object_arg_count += 1
+        self.fixed_arg_format = arg_format
+        self.ret_format = ret_format
+        self.error_value = self.error_value_map.get(ret_format, None)
+        self.exception_check = ret_format != 'r' and self.error_value is not None
+        self.is_staticmethod = False
+        self.nogil = nogil
+
+    def __repr__(self):
+        return '<Signature[%s(%s%s)]>' % (
+            self.ret_format,
+            ', '.join(self.fixed_arg_format),
+            '*' if self.has_generic_args else '')
+
+    def min_num_fixed_args(self):
+        return self.max_num_fixed_args() - self.optional_object_arg_count
+
+    def max_num_fixed_args(self):
+        return len(self.fixed_arg_format)
+
+    def is_self_arg(self, i):
+        # argument is 'self' for methods or 'class' for classmethods
+        return self.fixed_arg_format[i] == 'T'
+
+    def returns_self_type(self):
+        # return type is same as 'self' argument type
+        return self.ret_format == 'T'
+
+    def fixed_arg_type(self, i):
+        return self.format_map[self.fixed_arg_format[i]]
+
+    def return_type(self):
+        return self.format_map[self.ret_format]
+
+    def format_from_type(self, arg_type):
+        if arg_type.is_pyobject:
+            arg_type = PyrexTypes.py_object_type
+        return self.type_to_format_map[arg_type]
+
+    def exception_value(self):
+        return self.error_value_map.get(self.ret_format)
+
+    def function_type(self, self_arg_override=None):
+        #  Construct a C function type descriptor for this signature
+        args = []
+        for i in range(self.max_num_fixed_args()):
+            if self_arg_override is not None and self.is_self_arg(i):
+                assert isinstance(self_arg_override, PyrexTypes.CFuncTypeArg)
+                args.append(self_arg_override)
+            else:
+                arg_type = self.fixed_arg_type(i)
+                args.append(PyrexTypes.CFuncTypeArg("", arg_type, None))
+        if self_arg_override is not None and self.returns_self_type():
+            ret_type = self_arg_override.type
+        else:
+            ret_type = self.return_type()
+        exc_value = self.exception_value()
+        return PyrexTypes.CFuncType(
+            ret_type, args, exception_value=exc_value,
+            exception_check=self.exception_check,
+            nogil=self.nogil)
+
+    def method_flags(self):
+        if self.ret_format == "O":
+            full_args = self.fixed_arg_format
+            if self.has_dummy_arg:
+                full_args = "O" + full_args
+            if full_args in ["O", "T"]:
+                if not self.has_generic_args:
+                    return [method_noargs]
+                elif self.use_fastcall:
+                    return [method_fastcall, method_keywords]
+                else:
+                    return [method_varargs, method_keywords]
+            elif full_args in ["OO", "TO"] and not self.has_generic_args:
+                return [method_onearg]
+
+            if self.is_staticmethod:
+                if self.use_fastcall:
+                    return [method_fastcall, method_keywords]
+                else:
+                    return [method_varargs, method_keywords]
+        return None
+
+    def method_function_type(self):
+        # Return the C function type
+        mflags = self.method_flags()
+        kw = "WithKeywords" if (method_keywords in mflags) else ""
+        for m in mflags:
+            if m == method_noargs or m == method_onearg:
+                return "PyCFunction"
+            if m == method_varargs:
+                return "PyCFunction" + kw
+            if m == method_fastcall:
+                return "__Pyx_PyCFunction_FastCall" + kw
+        return None
+
+    def with_fastcall(self):
+        # Return a copy of this Signature with use_fastcall=True
+        sig = copy.copy(self)
+        sig.use_fastcall = True
+        return sig
+
+    @property
+    def fastvar(self):
+        # Used to select variants of functions, one dealing with METH_VARARGS
+        # and one dealing with __Pyx_METH_FASTCALL
+        if self.use_fastcall:
+            return "FASTCALL"
+        else:
+            return "VARARGS"
+
+
+class SlotDescriptor(object):
+    #  Abstract base class for type slot descriptors.
+    #
+    #  slot_name    string           Member name of the slot in the type object
+    #  is_initialised_dynamically    Is initialised by code in the module init function
+    #  is_inherited                  Is inherited by subtypes (see PyType_Ready())
+    #  py3                           Indicates presence of slot in Python 3
+    #  py2                           Indicates presence of slot in Python 2
+    #  ifdef                         Full #ifdef string that slot is wrapped in. Using this causes py3, py2 and flags to be ignored.)
+    #  used_ifdef                    Full #ifdef string that the slot value is wrapped in (otherwise it is assigned NULL)
+    #                                Unlike "ifdef" the slot is defined and this just controls if it receives a value
+
+    def __init__(self, slot_name, dynamic=False, inherited=False,
+                 py3=True, py2=True, ifdef=None, is_binop=False,
+                 used_ifdef=None):
+        self.slot_name = slot_name
+        self.is_initialised_dynamically = dynamic
+        self.is_inherited = inherited
+        self.ifdef = ifdef
+        self.used_ifdef = used_ifdef
+        self.py3 = py3
+        self.py2 = py2
+        self.is_binop = is_binop
+
+    def slot_code(self, scope):
+        raise NotImplementedError()
+
+    def spec_value(self, scope):
+        return self.slot_code(scope)
+
+    def preprocessor_guard_code(self):
+        ifdef = self.ifdef
+        py2 = self.py2
+        py3 = self.py3
+        guard = None
+        if ifdef:
+            guard = "#if %s" % ifdef
+        elif not py3 or py3 == '<RESERVED>':
+            guard = "#if PY_MAJOR_VERSION < 3"
+        elif not py2:
+            guard = "#if PY_MAJOR_VERSION >= 3"
+        return guard
+
+    def generate_spec(self, scope, code):
+        if self.is_initialised_dynamically:
+            return
+        value = self.spec_value(scope)
+        if value == "0":
+            return
+        preprocessor_guard = self.preprocessor_guard_code()
+        if not preprocessor_guard:
+            if self.py3 and self.slot_name.startswith('bf_'):
+                # The buffer protocol requires Limited API 3.11, so check if the spec slots are available.
+                preprocessor_guard = "#if defined(Py_%s)" % self.slot_name
+        if preprocessor_guard:
+            code.putln(preprocessor_guard)
+        code.putln("{Py_%s, (void *)%s}," % (self.slot_name, value))
+        if preprocessor_guard:
+            code.putln("#endif")
+
+    def generate(self, scope, code):
+        preprocessor_guard = self.preprocessor_guard_code()
+        if preprocessor_guard:
+            code.putln(preprocessor_guard)
+
+        end_pypy_guard = False
+        if self.is_initialised_dynamically:
+            value = "0"
+        else:
+            value = self.slot_code(scope)
+            if value == "0" and self.is_inherited:
+                # PyPy currently has a broken PyType_Ready() that fails to
+                # inherit some slots.  To work around this, we explicitly
+                # set inherited slots here, but only in PyPy since CPython
+                # handles this better than we do (except for buffer slots in type specs).
+                inherited_value = value
+                current_scope = scope
+                while (inherited_value == "0"
+                       and current_scope.parent_type
+                       and current_scope.parent_type.base_type
+                       and current_scope.parent_type.base_type.scope):
+                    current_scope = current_scope.parent_type.base_type.scope
+                    inherited_value = self.slot_code(current_scope)
+                if inherited_value != "0":
+                    # we always need inherited buffer slots for the type spec
+                    is_buffer_slot = int(self.slot_name in ("bf_getbuffer", "bf_releasebuffer"))
+                    code.putln("#if CYTHON_COMPILING_IN_PYPY || %d" % is_buffer_slot)
+                    code.putln("%s, /*%s*/" % (inherited_value, self.slot_name))
+                    code.putln("#else")
+                    end_pypy_guard = True
+
+        if self.used_ifdef:
+            code.putln("#if %s" % self.used_ifdef)
+        code.putln("%s, /*%s*/" % (value, self.slot_name))
+        if self.used_ifdef:
+            code.putln("#else")
+            code.putln("NULL, /*%s*/" % self.slot_name)
+            code.putln("#endif")
+
+        if end_pypy_guard:
+            code.putln("#endif")
+
+        if self.py3 == '<RESERVED>':
+            code.putln("#else")
+            code.putln("0, /*reserved*/")
+        if preprocessor_guard:
+            code.putln("#endif")
+
+    # Some C implementations have trouble statically
+    # initialising a global with a pointer to an extern
+    # function, so we initialise some of the type slots
+    # in the module init function instead.
+
+    def generate_dynamic_init_code(self, scope, code):
+        if self.is_initialised_dynamically:
+            self.generate_set_slot_code(
+                self.slot_code(scope), scope, code)
+
+    def generate_set_slot_code(self, value, scope, code):
+        if value == "0":
+            return
+
+        if scope.parent_type.typeptr_cname:
+            target = "%s->%s" % (scope.parent_type.typeptr_cname, self.slot_name)
+        else:
+            assert scope.parent_type.typeobj_cname
+            target = "%s.%s" % (scope.parent_type.typeobj_cname, self.slot_name)
+
+        code.putln("%s = %s;" % (target, value))
+
+
+class FixedSlot(SlotDescriptor):
+    #  Descriptor for a type slot with a fixed value.
+    #
+    #  value        string
+
+    def __init__(self, slot_name, value, py3=True, py2=True, ifdef=None):
+        SlotDescriptor.__init__(self, slot_name, py3=py3, py2=py2, ifdef=ifdef)
+        self.value = value
+
+    def slot_code(self, scope):
+        return self.value
+
+
+class EmptySlot(FixedSlot):
+    #  Descriptor for a type slot whose value is always 0.
+
+    def __init__(self, slot_name, py3=True, py2=True, ifdef=None):
+        FixedSlot.__init__(self, slot_name, "0", py3=py3, py2=py2, ifdef=ifdef)
+
+
+class MethodSlot(SlotDescriptor):
+    #  Type slot descriptor for a user-definable method.
+    #
+    #  signature    Signature
+    #  method_name  string           The __xxx__ name of the method
+    #  alternatives [string]         Alternative list of __xxx__ names for the method
+
+    def __init__(self, signature, slot_name, method_name, method_name_to_slot,
+                 fallback=None, py3=True, py2=True, ifdef=None, inherited=True):
+        SlotDescriptor.__init__(self, slot_name, py3=py3, py2=py2,
+                                ifdef=ifdef, inherited=inherited)
+        self.signature = signature
+        self.slot_name = slot_name
+        self.method_name = method_name
+        self.alternatives = []
+        method_name_to_slot[method_name] = self
+        #
+        if fallback:
+            self.alternatives.append(fallback)
+        for alt in (self.py2, self.py3):
+            if isinstance(alt, (tuple, list)):
+                slot_name, method_name = alt
+                self.alternatives.append(method_name)
+                method_name_to_slot[method_name] = self
+
+    def slot_code(self, scope):
+        entry = scope.lookup_here(self.method_name)
+        if entry and entry.is_special and entry.func_cname:
+            return entry.func_cname
+        for method_name in self.alternatives:
+            entry = scope.lookup_here(method_name)
+            if entry and entry.is_special and entry.func_cname:
+                return entry.func_cname
+        return "0"
+
+
+class InternalMethodSlot(SlotDescriptor):
+    #  Type slot descriptor for a method which is always
+    #  synthesized by Cython.
+    #
+    #  slot_name    string           Member name of the slot in the type object
+
+    def __init__(self, slot_name, **kargs):
+        SlotDescriptor.__init__(self, slot_name, **kargs)
+
+    def slot_code(self, scope):
+        return scope.mangle_internal(self.slot_name)
+
+
+class GCDependentSlot(InternalMethodSlot):
+    #  Descriptor for a slot whose value depends on whether
+    #  the type participates in GC.
+
+    def __init__(self, slot_name, **kargs):
+        InternalMethodSlot.__init__(self, slot_name, **kargs)
+
+    def slot_code(self, scope):
+        if not scope.needs_gc():
+            return "0"
+        if not scope.has_cyclic_pyobject_attrs:
+            # if the type does not have GC relevant object attributes, it can
+            # delegate GC methods to its parent - iff the parent functions
+            # are defined in the same module
+            parent_type_scope = scope.parent_type.base_type.scope
+            if scope.parent_scope is parent_type_scope.parent_scope:
+                entry = scope.parent_scope.lookup_here(scope.parent_type.base_type.name)
+                if entry.visibility != 'extern':
+                    return self.slot_code(parent_type_scope)
+        return InternalMethodSlot.slot_code(self, scope)
+
+
+class GCClearReferencesSlot(GCDependentSlot):
+
+    def slot_code(self, scope):
+        if scope.needs_tp_clear():
+            return GCDependentSlot.slot_code(self, scope)
+        return "0"
+
+
+class ConstructorSlot(InternalMethodSlot):
+    #  Descriptor for tp_new and tp_dealloc.
+
+    def __init__(self, slot_name, method=None, **kargs):
+        InternalMethodSlot.__init__(self, slot_name, **kargs)
+        self.method = method
+
+    def _needs_own(self, scope):
+        if (scope.parent_type.base_type
+                and not scope.has_pyobject_attrs
+                and not scope.has_memoryview_attrs
+                and not scope.has_cpp_constructable_attrs
+                and not (self.slot_name == 'tp_new' and scope.parent_type.vtabslot_cname)):
+            entry = scope.lookup_here(self.method) if self.method else None
+            if not (entry and entry.is_special):
+                return False
+        # Unless we can safely delegate to the parent, all types need a tp_new().
+        return True
+
+    def _parent_slot_function(self, scope):
+        parent_type_scope = scope.parent_type.base_type.scope
+        if scope.parent_scope is parent_type_scope.parent_scope:
+            entry = scope.parent_scope.lookup_here(scope.parent_type.base_type.name)
+            if entry.visibility != 'extern':
+                return self.slot_code(parent_type_scope)
+        return None
+
+    def slot_code(self, scope):
+        if not self._needs_own(scope):
+            # if the type does not have object attributes, it can
+            # delegate GC methods to its parent - iff the parent
+            # functions are defined in the same module
+            slot_code = self._parent_slot_function(scope)
+            return slot_code or '0'
+        return InternalMethodSlot.slot_code(self, scope)
+
+    def spec_value(self, scope):
+        slot_function = self.slot_code(scope)
+        if self.slot_name == "tp_dealloc" and slot_function != scope.mangle_internal("tp_dealloc"):
+            # Not used => inherit from base type.
+            return "0"
+        return slot_function
+
+    def generate_dynamic_init_code(self, scope, code):
+        if self.slot_code(scope) != '0':
+            return
+        # If we don't have our own slot function and don't know the
+        # parent function statically, copy it dynamically.
+        base_type = scope.parent_type.base_type
+        if base_type.typeptr_cname:
+            src = '%s->%s' % (base_type.typeptr_cname, self.slot_name)
+        elif base_type.is_extension_type and base_type.typeobj_cname:
+            src = '%s.%s' % (base_type.typeobj_cname, self.slot_name)
+        else:
+            return
+
+        self.generate_set_slot_code(src, scope, code)
+
+
+class SyntheticSlot(InternalMethodSlot):
+    #  Type slot descriptor for a synthesized method which
+    #  dispatches to one or more user-defined methods depending
+    #  on its arguments. If none of the relevant methods are
+    #  defined, the method will not be synthesized and an
+    #  alternative default value will be placed in the type
+    #  slot.
+
+    def __init__(self, slot_name, user_methods, default_value, **kargs):
+        InternalMethodSlot.__init__(self, slot_name, **kargs)
+        self.user_methods = user_methods
+        self.default_value = default_value
+
+    def slot_code(self, scope):
+        if scope.defines_any_special(self.user_methods):
+            return InternalMethodSlot.slot_code(self, scope)
+        else:
+            return self.default_value
+
+    def spec_value(self, scope):
+        return self.slot_code(scope)
+
+
+class BinopSlot(SyntheticSlot):
+    def __init__(self, signature, slot_name, left_method, method_name_to_slot, **kargs):
+        assert left_method.startswith('__')
+        right_method = '__r' + left_method[2:]
+        SyntheticSlot.__init__(
+                self, slot_name, [left_method, right_method], "0", is_binop=True, **kargs)
+        # MethodSlot causes special method registration.
+        self.left_slot = MethodSlot(signature, "", left_method, method_name_to_slot, **kargs)
+        self.right_slot = MethodSlot(signature, "", right_method, method_name_to_slot, **kargs)
+
+
+class RichcmpSlot(MethodSlot):
+    def slot_code(self, scope):
+        entry = scope.lookup_here(self.method_name)
+        if entry and entry.is_special and entry.func_cname:
+            return entry.func_cname
+        elif scope.defines_any_special(richcmp_special_methods):
+            return scope.mangle_internal(self.slot_name)
+        else:
+            return "0"
+
+
+class TypeFlagsSlot(SlotDescriptor):
+    #  Descriptor for the type flags slot.
+
+    def slot_code(self, scope):
+        value = "Py_TPFLAGS_DEFAULT"
+        if scope.directives['type_version_tag']:
+            # it's not in 'Py_TPFLAGS_DEFAULT' in Py2
+            value += "|Py_TPFLAGS_HAVE_VERSION_TAG"
+        else:
+            # it's enabled in 'Py_TPFLAGS_DEFAULT' in Py3
+            value = "(%s&~Py_TPFLAGS_HAVE_VERSION_TAG)" % value
+        value += "|Py_TPFLAGS_CHECKTYPES|Py_TPFLAGS_HAVE_NEWBUFFER"
+        if not scope.parent_type.is_final_type:
+            value += "|Py_TPFLAGS_BASETYPE"
+        if scope.needs_gc():
+            value += "|Py_TPFLAGS_HAVE_GC"
+        if scope.may_have_finalize():
+            value += "|Py_TPFLAGS_HAVE_FINALIZE"
+        if scope.parent_type.has_sequence_flag:
+            value += "|Py_TPFLAGS_SEQUENCE"
+        return value
+
+    def generate_spec(self, scope, code):
+        # Flags are stored in the PyType_Spec, not in a PyType_Slot.
+        return
+
+
+class DocStringSlot(SlotDescriptor):
+    #  Descriptor for the docstring slot.
+
+    def slot_code(self, scope):
+        doc = scope.doc
+        if doc is None:
+            return "0"
+        if doc.is_unicode:
+            doc = doc.as_utf8_string()
+        return "PyDoc_STR(%s)" % doc.as_c_string_literal()
+
+
+class SuiteSlot(SlotDescriptor):
+    #  Descriptor for a substructure of the type object.
+    #
+    #  sub_slots   [SlotDescriptor]
+
+    def __init__(self, sub_slots, slot_type, slot_name, substructures, ifdef=None):
+        SlotDescriptor.__init__(self, slot_name, ifdef=ifdef)
+        self.sub_slots = sub_slots
+        self.slot_type = slot_type
+        substructures.append(self)
+
+    def is_empty(self, scope):
+        for slot in self.sub_slots:
+            if slot.slot_code(scope) != "0":
+                return False
+        return True
+
+    def substructure_cname(self, scope):
+        return "%s%s_%s" % (Naming.pyrex_prefix, self.slot_name, scope.class_name)
+
+    def slot_code(self, scope):
+        if not self.is_empty(scope):
+            return "&%s" % self.substructure_cname(scope)
+        return "0"
+
+    def generate_substructure(self, scope, code):
+        if not self.is_empty(scope):
+            code.putln("")
+            if self.ifdef:
+                code.putln("#if %s" % self.ifdef)
+            code.putln(
+                "static %s %s = {" % (
+                    self.slot_type,
+                    self.substructure_cname(scope)))
+            for slot in self.sub_slots:
+                slot.generate(scope, code)
+            code.putln("};")
+            if self.ifdef:
+                code.putln("#endif")
+
+    def generate_spec(self, scope, code):
+        for slot in self.sub_slots:
+            slot.generate_spec(scope, code)
+
+class MethodTableSlot(SlotDescriptor):
+    #  Slot descriptor for the method table.
+
+    def slot_code(self, scope):
+        if scope.pyfunc_entries:
+            return scope.method_table_cname
+        else:
+            return "0"
+
+
+class MemberTableSlot(SlotDescriptor):
+    #  Slot descriptor for the table of Python-accessible attributes.
+
+    def slot_code(self, scope):
+        # Only used in specs.
+        return "0"
+
+    def get_member_specs(self, scope):
+        return [
+            get_slot_by_name("tp_dictoffset", scope.directives).members_slot_value(scope),
+            #get_slot_by_name("tp_weaklistoffset").spec_value(scope),
+        ]
+
+    def is_empty(self, scope):
+        for member_entry in self.get_member_specs(scope):
+            if member_entry:
+                return False
+        return True
+
+    def substructure_cname(self, scope):
+        return "%s%s_%s" % (Naming.pyrex_prefix, self.slot_name, scope.class_name)
+
+    def generate_substructure_spec(self, scope, code):
+        if self.is_empty(scope):
+            return
+        from .Code import UtilityCode
+        code.globalstate.use_utility_code(UtilityCode.load_cached("IncludeStructmemberH", "ModuleSetupCode.c"))
+
+        code.putln("static struct PyMemberDef %s[] = {" % self.substructure_cname(scope))
+        for member_entry in self.get_member_specs(scope):
+            if member_entry:
+                code.putln(member_entry)
+        code.putln("{NULL, 0, 0, 0, NULL}")
+        code.putln("};")
+
+    def spec_value(self, scope):
+        if self.is_empty(scope):
+            return "0"
+        return self.substructure_cname(scope)
+
+
+class GetSetSlot(SlotDescriptor):
+    #  Slot descriptor for the table of attribute get & set methods.
+
+    def slot_code(self, scope):
+        if scope.property_entries:
+            return scope.getset_table_cname
+        else:
+            return "0"
+
+
+class BaseClassSlot(SlotDescriptor):
+    #  Slot descriptor for the base class slot.
+
+    def __init__(self, name):
+        SlotDescriptor.__init__(self, name, dynamic=True)
+
+    def generate_dynamic_init_code(self, scope, code):
+        base_type = scope.parent_type.base_type
+        if base_type:
+            code.putln("%s->%s = %s;" % (
+                scope.parent_type.typeptr_cname,
+                self.slot_name,
+                base_type.typeptr_cname))
+
+
+class DictOffsetSlot(SlotDescriptor):
+    #  Slot descriptor for a class' dict offset, for dynamic attributes.
+
+    def slot_code(self, scope):
+        dict_entry = scope.lookup_here("__dict__") if not scope.is_closure_class_scope else None
+        if dict_entry and dict_entry.is_variable:
+            if getattr(dict_entry.type, 'cname', None) != 'PyDict_Type':
+                error(dict_entry.pos, "__dict__ slot must be of type 'dict'")
+                return "0"
+            type = scope.parent_type
+            if type.typedef_flag:
+                objstruct = type.objstruct_cname
+            else:
+                objstruct = "struct %s" % type.objstruct_cname
+            return ("offsetof(%s, %s)" % (
+                        objstruct,
+                        dict_entry.cname))
+        else:
+            return "0"
+
+    def members_slot_value(self, scope):
+        dict_offset = self.slot_code(scope)
+        if dict_offset == "0":
+            return None
+        return '{"__dictoffset__", T_PYSSIZET, %s, READONLY, NULL},' % dict_offset
+
+## The following slots are (or could be) initialised with an
+## extern function pointer.
+#
+#slots_initialised_from_extern = (
+#    "tp_free",
+#)
+
+#------------------------------------------------------------------------------------------
+#
+#  Utility functions for accessing slot table data structures
+#
+#------------------------------------------------------------------------------------------
+
+
+def get_property_accessor_signature(name):
+    #  Return signature of accessor for an extension type
+    #  property, else None.
+    return property_accessor_signatures.get(name)
+
+
+def get_base_slot_function(scope, slot):
+    #  Returns the function implementing this slot in the baseclass.
+    #  This is useful for enabling the compiler to optimize calls
+    #  that recursively climb the class hierarchy.
+    base_type = scope.parent_type.base_type
+    if base_type and scope.parent_scope is base_type.scope.parent_scope:
+        parent_slot = slot.slot_code(base_type.scope)
+        if parent_slot != '0':
+            entry = scope.parent_scope.lookup_here(scope.parent_type.base_type.name)
+            if entry.visibility != 'extern':
+                return parent_slot
+    return None
+
+
+def get_slot_function(scope, slot):
+    #  Returns the function implementing this slot in the baseclass.
+    #  This is useful for enabling the compiler to optimize calls
+    #  that recursively climb the class hierarchy.
+    slot_code = slot.slot_code(scope)
+    if slot_code != '0':
+        entry = scope.parent_scope.lookup_here(scope.parent_type.name)
+        if entry.visibility != 'extern':
+            return slot_code
+    return None
+
+
+def get_slot_by_name(slot_name, compiler_directives):
+    # For now, only search the type struct, no referenced sub-structs.
+    for slot in get_slot_table(compiler_directives).slot_table:
+        if slot.slot_name == slot_name:
+            return slot
+    assert False, "Slot not found: %s" % slot_name
+
+
+def get_slot_code_by_name(scope, slot_name):
+    slot = get_slot_by_name(slot_name, scope.directives)
+    return slot.slot_code(scope)
+
+def is_reverse_number_slot(name):
+    """
+    Tries to identify __radd__ and friends (so the METH_COEXIST flag can be applied).
+
+    There's no great consequence if it inadvertently identifies a few other methods
+    so just use a simple rule rather than an exact list.
+    """
+    if name.startswith("__r") and name.endswith("__"):
+        forward_name = name.replace("r", "", 1)
+        for meth in get_slot_table(None).PyNumberMethods:
+            if hasattr(meth, "right_slot"):
+                return True
+    return False
+
+
+#------------------------------------------------------------------------------------------
+#
+#  Signatures for generic Python functions and methods.
+#
+#------------------------------------------------------------------------------------------
+
+pyfunction_signature = Signature("-*", "O")
+pymethod_signature = Signature("T*", "O")
+
+#------------------------------------------------------------------------------------------
+#
+#  Signatures for simple Python functions.
+#
+#------------------------------------------------------------------------------------------
+
+pyfunction_noargs = Signature("-", "O")
+pyfunction_onearg = Signature("-O", "O")
+
+#------------------------------------------------------------------------------------------
+#
+#  Signatures for the various kinds of function that
+#  can appear in the type object and its substructures.
+#
+#------------------------------------------------------------------------------------------
+
+unaryfunc = Signature("T", "O")            # typedef PyObject * (*unaryfunc)(PyObject *);
+binaryfunc = Signature("OO", "O")          # typedef PyObject * (*binaryfunc)(PyObject *, PyObject *);
+ibinaryfunc = Signature("TO", "O")         # typedef PyObject * (*binaryfunc)(PyObject *, PyObject *);
+powternaryfunc = Signature("OO?", "O")     # typedef PyObject * (*ternaryfunc)(PyObject *, PyObject *, PyObject *);
+ipowternaryfunc = Signature("TO?", "O")    # typedef PyObject * (*ternaryfunc)(PyObject *, PyObject *, PyObject *);
+callfunc = Signature("T*", "O")            # typedef PyObject * (*ternaryfunc)(PyObject *, PyObject *, PyObject *);
+inquiry = Signature("T", "i")              # typedef int (*inquiry)(PyObject *);
+lenfunc = Signature("T", "z")              # typedef Py_ssize_t (*lenfunc)(PyObject *);
+
+                                           # typedef int (*coercion)(PyObject **, PyObject **);
+intargfunc = Signature("Ti", "O")          # typedef PyObject *(*intargfunc)(PyObject *, int);
+ssizeargfunc = Signature("Tz", "O")        # typedef PyObject *(*ssizeargfunc)(PyObject *, Py_ssize_t);
+intintargfunc = Signature("Tii", "O")      # typedef PyObject *(*intintargfunc)(PyObject *, int, int);
+ssizessizeargfunc = Signature("Tzz", "O")  # typedef PyObject *(*ssizessizeargfunc)(PyObject *, Py_ssize_t, Py_ssize_t);
+intobjargproc = Signature("TiO", 'r')      # typedef int(*intobjargproc)(PyObject *, int, PyObject *);
+ssizeobjargproc = Signature("TzO", 'r')    # typedef int(*ssizeobjargproc)(PyObject *, Py_ssize_t, PyObject *);
+intintobjargproc = Signature("TiiO", 'r')  # typedef int(*intintobjargproc)(PyObject *, int, int, PyObject *);
+ssizessizeobjargproc = Signature("TzzO", 'r')  # typedef int(*ssizessizeobjargproc)(PyObject *, Py_ssize_t, Py_ssize_t, PyObject *);
+
+intintargproc = Signature("Tii", 'r')
+ssizessizeargproc = Signature("Tzz", 'r')
+objargfunc = Signature("TO", "O")
+objobjargproc = Signature("TOO", 'r')      # typedef int (*objobjargproc)(PyObject *, PyObject *, PyObject *);
+readbufferproc = Signature("TzP", "z")     # typedef Py_ssize_t (*readbufferproc)(PyObject *, Py_ssize_t, void **);
+writebufferproc = Signature("TzP", "z")    # typedef Py_ssize_t (*writebufferproc)(PyObject *, Py_ssize_t, void **);
+segcountproc = Signature("TZ", "z")        # typedef Py_ssize_t (*segcountproc)(PyObject *, Py_ssize_t *);
+charbufferproc = Signature("TzS", "z")     # typedef Py_ssize_t (*charbufferproc)(PyObject *, Py_ssize_t, char **);
+objargproc = Signature("TO", 'r')          # typedef int (*objobjproc)(PyObject *, PyObject *);
+                                           # typedef int (*visitproc)(PyObject *, void *);
+                                           # typedef int (*traverseproc)(PyObject *, visitproc, void *);
+
+destructor = Signature("T", "v")           # typedef void (*destructor)(PyObject *);
+# printfunc = Signature("TFi", 'r')        # typedef int (*printfunc)(PyObject *, FILE *, int);
+                                           # typedef PyObject *(*getattrfunc)(PyObject *, char *);
+getattrofunc = Signature("TO", "O")        # typedef PyObject *(*getattrofunc)(PyObject *, PyObject *);
+                                           # typedef int (*setattrfunc)(PyObject *, char *, PyObject *);
+setattrofunc = Signature("TOO", 'r')       # typedef int (*setattrofunc)(PyObject *, PyObject *, PyObject *);
+delattrofunc = Signature("TO", 'r')
+cmpfunc = Signature("TO", "i")             # typedef int (*cmpfunc)(PyObject *, PyObject *);
+reprfunc = Signature("T", "O")             # typedef PyObject *(*reprfunc)(PyObject *);
+hashfunc = Signature("T", "h")             # typedef Py_hash_t (*hashfunc)(PyObject *);
+richcmpfunc = Signature("TOi", "O")        # typedef PyObject *(*richcmpfunc) (PyObject *, PyObject *, int);
+getiterfunc = Signature("T", "O")          # typedef PyObject *(*getiterfunc) (PyObject *);
+iternextfunc = Signature("T", "O")         # typedef PyObject *(*iternextfunc) (PyObject *);
+descrgetfunc = Signature("TOO", "O")       # typedef PyObject *(*descrgetfunc) (PyObject *, PyObject *, PyObject *);
+descrsetfunc = Signature("TOO", 'r')       # typedef int (*descrsetfunc) (PyObject *, PyObject *, PyObject *);
+descrdelfunc = Signature("TO", 'r')
+initproc = Signature("T*", 'r')            # typedef int (*initproc)(PyObject *, PyObject *, PyObject *);
+                                           # typedef PyObject *(*newfunc)(struct _typeobject *, PyObject *, PyObject *);
+                                           # typedef PyObject *(*allocfunc)(struct _typeobject *, int);
+
+getbufferproc = Signature("TBi", "r")      # typedef int (*getbufferproc)(PyObject *, Py_buffer *, int);
+releasebufferproc = Signature("TB", "v")   # typedef void (*releasebufferproc)(PyObject *, Py_buffer *);
+
+
+#------------------------------------------------------------------------------------------
+#
+#  Signatures for accessor methods of properties.
+#
+#------------------------------------------------------------------------------------------
+
+property_accessor_signatures = {
+    '__get__': Signature("T", "O"),
+    '__set__': Signature("TO", 'r'),
+    '__del__': Signature("T", 'r')
+}
+
+
+PyNumberMethods_Py2only_GUARD = "PY_MAJOR_VERSION < 3 || (CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX < 0x03050000)"
+
+#------------------------------------------------------------------------------------------
+#
+#  The main slot table. This table contains descriptors for all the
+#  top-level type slots, beginning with tp_dealloc, in the order they
+#  appear in the type object.
+#
+# It depends on some compiler directives (currently c_api_binop_methods), so the
+# slot tables for each set of compiler directives are generated lazily and put in
+# the _slot_table_dict
+#
+#------------------------------------------------------------------------------------------
+
+class SlotTable(object):
+    def __init__(self, old_binops):
+        # The following dictionary maps __xxx__ method names to slot descriptors.
+        method_name_to_slot = {}
+        self._get_slot_by_method_name = method_name_to_slot.get
+        self.substructures = []   # List of all SuiteSlot instances
+
+        bf = binaryfunc if old_binops else ibinaryfunc
+        ptf = powternaryfunc if old_binops else ipowternaryfunc
+
+        #  Descriptor tables for the slots of the various type object
+        #  substructures, in the order they appear in the structure.
+        self.PyNumberMethods = (
+            BinopSlot(bf, "nb_add", "__add__", method_name_to_slot),
+            BinopSlot(bf, "nb_subtract", "__sub__", method_name_to_slot),
+            BinopSlot(bf, "nb_multiply", "__mul__", method_name_to_slot),
+            BinopSlot(bf, "nb_divide", "__div__", method_name_to_slot,
+                      ifdef = PyNumberMethods_Py2only_GUARD),
+            BinopSlot(bf, "nb_remainder", "__mod__", method_name_to_slot),
+            BinopSlot(bf, "nb_divmod", "__divmod__", method_name_to_slot),
+            BinopSlot(ptf, "nb_power", "__pow__", method_name_to_slot),
+            MethodSlot(unaryfunc, "nb_negative", "__neg__", method_name_to_slot),
+            MethodSlot(unaryfunc, "nb_positive", "__pos__", method_name_to_slot),
+            MethodSlot(unaryfunc, "nb_absolute", "__abs__", method_name_to_slot),
+            MethodSlot(inquiry, "nb_bool", "__bool__", method_name_to_slot,
+                       py2 = ("nb_nonzero", "__nonzero__")),
+            MethodSlot(unaryfunc, "nb_invert", "__invert__", method_name_to_slot),
+            BinopSlot(bf, "nb_lshift", "__lshift__", method_name_to_slot),
+            BinopSlot(bf, "nb_rshift", "__rshift__", method_name_to_slot),
+            BinopSlot(bf, "nb_and", "__and__", method_name_to_slot),
+            BinopSlot(bf, "nb_xor", "__xor__", method_name_to_slot),
+            BinopSlot(bf, "nb_or", "__or__", method_name_to_slot),
+            EmptySlot("nb_coerce", ifdef = PyNumberMethods_Py2only_GUARD),
+            MethodSlot(unaryfunc, "nb_int", "__int__", method_name_to_slot, fallback="__long__"),
+            MethodSlot(unaryfunc, "nb_long", "__long__", method_name_to_slot,
+                       fallback="__int__", py3 = "<RESERVED>"),
+            MethodSlot(unaryfunc, "nb_float", "__float__", method_name_to_slot),
+            MethodSlot(unaryfunc, "nb_oct", "__oct__", method_name_to_slot,
+                       ifdef = PyNumberMethods_Py2only_GUARD),
+            MethodSlot(unaryfunc, "nb_hex", "__hex__", method_name_to_slot,
+                       ifdef = PyNumberMethods_Py2only_GUARD),
+
+            # Added in release 2.0
+            MethodSlot(ibinaryfunc, "nb_inplace_add", "__iadd__", method_name_to_slot),
+            MethodSlot(ibinaryfunc, "nb_inplace_subtract", "__isub__", method_name_to_slot),
+            MethodSlot(ibinaryfunc, "nb_inplace_multiply", "__imul__", method_name_to_slot),
+            MethodSlot(ibinaryfunc, "nb_inplace_divide", "__idiv__", method_name_to_slot,
+                       ifdef = PyNumberMethods_Py2only_GUARD),
+            MethodSlot(ibinaryfunc, "nb_inplace_remainder", "__imod__", method_name_to_slot),
+            MethodSlot(ptf, "nb_inplace_power", "__ipow__", method_name_to_slot),
+            MethodSlot(ibinaryfunc, "nb_inplace_lshift", "__ilshift__", method_name_to_slot),
+            MethodSlot(ibinaryfunc, "nb_inplace_rshift", "__irshift__", method_name_to_slot),
+            MethodSlot(ibinaryfunc, "nb_inplace_and", "__iand__", method_name_to_slot),
+            MethodSlot(ibinaryfunc, "nb_inplace_xor", "__ixor__", method_name_to_slot),
+            MethodSlot(ibinaryfunc, "nb_inplace_or", "__ior__", method_name_to_slot),
+
+            # Added in release 2.2
+            # The following require the Py_TPFLAGS_HAVE_CLASS flag
+            BinopSlot(bf, "nb_floor_divide", "__floordiv__", method_name_to_slot),
+            BinopSlot(bf, "nb_true_divide", "__truediv__", method_name_to_slot),
+            MethodSlot(ibinaryfunc, "nb_inplace_floor_divide", "__ifloordiv__", method_name_to_slot),
+            MethodSlot(ibinaryfunc, "nb_inplace_true_divide", "__itruediv__", method_name_to_slot),
+
+            # Added in release 2.5
+            MethodSlot(unaryfunc, "nb_index", "__index__", method_name_to_slot),
+
+            # Added in release 3.5
+            BinopSlot(bf, "nb_matrix_multiply", "__matmul__", method_name_to_slot,
+                      ifdef="PY_VERSION_HEX >= 0x03050000"),
+            MethodSlot(ibinaryfunc, "nb_inplace_matrix_multiply", "__imatmul__", method_name_to_slot,
+                       ifdef="PY_VERSION_HEX >= 0x03050000"),
+        )
+
+        self.PySequenceMethods = (
+            MethodSlot(lenfunc, "sq_length", "__len__", method_name_to_slot),
+            EmptySlot("sq_concat"),  # nb_add used instead
+            EmptySlot("sq_repeat"),  # nb_multiply used instead
+            SyntheticSlot("sq_item", ["__getitem__"], "0"),    #EmptySlot("sq_item"),   # mp_subscript used instead
+            MethodSlot(ssizessizeargfunc, "sq_slice", "__getslice__", method_name_to_slot),
+            EmptySlot("sq_ass_item"),  # mp_ass_subscript used instead
+            SyntheticSlot("sq_ass_slice", ["__setslice__", "__delslice__"], "0"),
+            MethodSlot(cmpfunc, "sq_contains", "__contains__", method_name_to_slot),
+            EmptySlot("sq_inplace_concat"),  # nb_inplace_add used instead
+            EmptySlot("sq_inplace_repeat"),  # nb_inplace_multiply used instead
+        )
+
+        self.PyMappingMethods = (
+            MethodSlot(lenfunc, "mp_length", "__len__", method_name_to_slot),
+            MethodSlot(objargfunc, "mp_subscript", "__getitem__", method_name_to_slot),
+            SyntheticSlot("mp_ass_subscript", ["__setitem__", "__delitem__"], "0"),
+        )
+
+        self.PyBufferProcs = (
+            MethodSlot(readbufferproc, "bf_getreadbuffer", "__getreadbuffer__", method_name_to_slot,
+                       py3 = False),
+            MethodSlot(writebufferproc, "bf_getwritebuffer", "__getwritebuffer__", method_name_to_slot,
+                       py3 = False),
+            MethodSlot(segcountproc, "bf_getsegcount", "__getsegcount__", method_name_to_slot,
+                       py3 = False),
+            MethodSlot(charbufferproc, "bf_getcharbuffer", "__getcharbuffer__", method_name_to_slot,
+                       py3 = False),
+
+            MethodSlot(getbufferproc, "bf_getbuffer", "__getbuffer__", method_name_to_slot),
+            MethodSlot(releasebufferproc, "bf_releasebuffer", "__releasebuffer__", method_name_to_slot)
+        )
+
+        self.PyAsyncMethods = (
+            MethodSlot(unaryfunc, "am_await", "__await__", method_name_to_slot),
+            MethodSlot(unaryfunc, "am_aiter", "__aiter__", method_name_to_slot),
+            MethodSlot(unaryfunc, "am_anext", "__anext__", method_name_to_slot),
+            EmptySlot("am_send", ifdef="PY_VERSION_HEX >= 0x030A00A3"),
+        )
+
+        self.slot_table = (
+            ConstructorSlot("tp_dealloc", '__dealloc__'),
+            EmptySlot("tp_print", ifdef="PY_VERSION_HEX < 0x030800b4"),
+            EmptySlot("tp_vectorcall_offset", ifdef="PY_VERSION_HEX >= 0x030800b4"),
+            EmptySlot("tp_getattr"),
+            EmptySlot("tp_setattr"),
+
+            # tp_compare (Py2) / tp_reserved (Py3<3.5) / tp_as_async (Py3.5+) is always used as tp_as_async in Py3
+            MethodSlot(cmpfunc, "tp_compare", "__cmp__", method_name_to_slot, ifdef="PY_MAJOR_VERSION < 3"),
+            SuiteSlot(self. PyAsyncMethods, "__Pyx_PyAsyncMethodsStruct", "tp_as_async",
+                      self.substructures, ifdef="PY_MAJOR_VERSION >= 3"),
+
+            MethodSlot(reprfunc, "tp_repr", "__repr__", method_name_to_slot),
+
+            SuiteSlot(self.PyNumberMethods, "PyNumberMethods", "tp_as_number", self.substructures),
+            SuiteSlot(self.PySequenceMethods, "PySequenceMethods", "tp_as_sequence", self.substructures),
+            SuiteSlot(self.PyMappingMethods, "PyMappingMethods", "tp_as_mapping", self.substructures),
+
+            MethodSlot(hashfunc, "tp_hash", "__hash__", method_name_to_slot,
+                       inherited=False),    # Py3 checks for __richcmp__
+            MethodSlot(callfunc, "tp_call", "__call__", method_name_to_slot),
+            MethodSlot(reprfunc, "tp_str", "__str__", method_name_to_slot),
+
+            SyntheticSlot("tp_getattro", ["__getattr__","__getattribute__"], "0"),  #"PyObject_GenericGetAttr"),
+            SyntheticSlot("tp_setattro", ["__setattr__", "__delattr__"], "0"),  #"PyObject_GenericSetAttr"),
+
+            SuiteSlot(self.PyBufferProcs, "PyBufferProcs", "tp_as_buffer", self.substructures),
+
+            TypeFlagsSlot("tp_flags"),
+            DocStringSlot("tp_doc"),
+
+            GCDependentSlot("tp_traverse"),
+            GCClearReferencesSlot("tp_clear"),
+
+            RichcmpSlot(richcmpfunc, "tp_richcompare", "__richcmp__", method_name_to_slot,
+                        inherited=False),  # Py3 checks for __hash__
+
+            EmptySlot("tp_weaklistoffset"),
+
+            MethodSlot(getiterfunc, "tp_iter", "__iter__", method_name_to_slot),
+            MethodSlot(iternextfunc, "tp_iternext", "__next__", method_name_to_slot),
+
+            MethodTableSlot("tp_methods"),
+            MemberTableSlot("tp_members"),
+            GetSetSlot("tp_getset"),
+
+            BaseClassSlot("tp_base"),  #EmptySlot("tp_base"),
+            EmptySlot("tp_dict"),
+
+            SyntheticSlot("tp_descr_get", ["__get__"], "0"),
+            SyntheticSlot("tp_descr_set", ["__set__", "__delete__"], "0"),
+
+            DictOffsetSlot("tp_dictoffset", ifdef="!CYTHON_USE_TYPE_SPECS"),  # otherwise set via "__dictoffset__" member
+
+            MethodSlot(initproc, "tp_init", "__init__", method_name_to_slot),
+            EmptySlot("tp_alloc"),  #FixedSlot("tp_alloc", "PyType_GenericAlloc"),
+            ConstructorSlot("tp_new", "__cinit__"),
+            EmptySlot("tp_free"),
+
+            EmptySlot("tp_is_gc"),
+            EmptySlot("tp_bases"),
+            EmptySlot("tp_mro"),
+            EmptySlot("tp_cache"),
+            EmptySlot("tp_subclasses"),
+            EmptySlot("tp_weaklist"),
+            EmptySlot("tp_del"),
+            EmptySlot("tp_version_tag"),
+            SyntheticSlot("tp_finalize", ["__del__"], "0", ifdef="PY_VERSION_HEX >= 0x030400a1",
+                          used_ifdef="CYTHON_USE_TP_FINALIZE"),
+            EmptySlot("tp_vectorcall", ifdef="PY_VERSION_HEX >= 0x030800b1 && (!CYTHON_COMPILING_IN_PYPY || PYPY_VERSION_NUM >= 0x07030800)"),
+            EmptySlot("tp_print", ifdef="__PYX_NEED_TP_PRINT_SLOT == 1"),
+            EmptySlot("tp_watched", ifdef="PY_VERSION_HEX >= 0x030C0000"),
+            EmptySlot("tp_versions_used", ifdef="PY_VERSION_HEX >= 0x030d00A4"),
+            # PyPy specific extension - only here to avoid C compiler warnings.
+            EmptySlot("tp_pypy_flags", ifdef="CYTHON_COMPILING_IN_PYPY && PY_VERSION_HEX >= 0x03090000 && PY_VERSION_HEX < 0x030a0000"),
+        )
+
+        #------------------------------------------------------------------------------------------
+        #
+        #  Descriptors for special methods which don't appear directly
+        #  in the type object or its substructures. These methods are
+        #  called from slot functions synthesized by Cython.
+        #
+        #------------------------------------------------------------------------------------------
+
+        MethodSlot(initproc, "", "__cinit__", method_name_to_slot)
+        MethodSlot(destructor, "", "__dealloc__", method_name_to_slot)
+        MethodSlot(destructor, "", "__del__", method_name_to_slot)
+        MethodSlot(objobjargproc, "", "__setitem__", method_name_to_slot)
+        MethodSlot(objargproc, "", "__delitem__", method_name_to_slot)
+        MethodSlot(ssizessizeobjargproc, "", "__setslice__", method_name_to_slot)
+        MethodSlot(ssizessizeargproc, "", "__delslice__", method_name_to_slot)
+        MethodSlot(getattrofunc, "", "__getattr__", method_name_to_slot)
+        MethodSlot(getattrofunc, "", "__getattribute__", method_name_to_slot)
+        MethodSlot(setattrofunc, "", "__setattr__", method_name_to_slot)
+        MethodSlot(delattrofunc, "", "__delattr__", method_name_to_slot)
+        MethodSlot(descrgetfunc, "", "__get__", method_name_to_slot)
+        MethodSlot(descrsetfunc, "", "__set__", method_name_to_slot)
+        MethodSlot(descrdelfunc, "", "__delete__", method_name_to_slot)
+
+    def get_special_method_signature(self, name):
+        #  Given a method name, if it is a special method,
+        #  return its signature, else return None.
+        slot = self._get_slot_by_method_name(name)
+        if slot:
+            return slot.signature
+        elif name in richcmp_special_methods:
+            return ibinaryfunc
+        else:
+            return None
+
+    def get_slot_by_method_name(self, method_name):
+        # For now, only search the type struct, no referenced sub-structs.
+        return self._get_slot_by_method_name(method_name)
+
+    def __iter__(self):
+        # make it easier to iterate over all the slots
+        return iter(self.slot_table)
+
+
+_slot_table_dict = {}
+
+def get_slot_table(compiler_directives):
+    if not compiler_directives:
+        # fetch default directives here since the builtin type classes don't have
+        # directives set
+        from .Options import get_directive_defaults
+        compiler_directives = get_directive_defaults()
+
+    old_binops = compiler_directives['c_api_binop_methods']
+    key = (old_binops,)
+    if key not in _slot_table_dict:
+        _slot_table_dict[key] = SlotTable(old_binops=old_binops)
+    return _slot_table_dict[key]
+
+
+# Populate "special_method_names" based on the default directives (so it can always be accessed quickly).
+special_method_names = set(get_slot_table(compiler_directives=None))
+
+
+# Method flags for python-exposed methods.
+
+method_noargs   = "METH_NOARGS"
+method_onearg   = "METH_O"
+method_varargs  = "METH_VARARGS"
+method_fastcall = "__Pyx_METH_FASTCALL"  # Actually VARARGS on versions < 3.7
+method_keywords = "METH_KEYWORDS"
+method_coexist  = "METH_COEXIST"
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/UFuncs.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/UFuncs.py
new file mode 100644
index 0000000000000000000000000000000000000000..5e641d785bb963dadace6043304c53795923dea3
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/UFuncs.py
@@ -0,0 +1,286 @@
+from . import (
+    Nodes,
+    ExprNodes,
+    FusedNode,
+    TreeFragment,
+    Pipeline,
+    ParseTreeTransforms,
+    Naming,
+    UtilNodes,
+)
+from .Errors import error
+from . import PyrexTypes
+from .UtilityCode import CythonUtilityCode
+from .Code import TempitaUtilityCode, UtilityCode
+from .Visitor import PrintTree, TreeVisitor, VisitorTransform
+
+numpy_int_types = [
+    "NPY_BYTE",
+    "NPY_INT8",
+    "NPY_SHORT",
+    "NPY_INT16",
+    "NPY_INT",
+    "NPY_INT32",
+    "NPY_LONG",
+    "NPY_LONGLONG",
+    "NPY_INT64",
+]
+numpy_uint_types = [tp.replace("NPY_", "NPY_U") for tp in numpy_int_types]
+# note: half float type is deliberately omitted
+numpy_numeric_types = (
+    numpy_int_types
+    + numpy_uint_types
+    + [
+        "NPY_FLOAT",
+        "NPY_FLOAT32",
+        "NPY_DOUBLE",
+        "NPY_FLOAT64",
+        "NPY_LONGDOUBLE",
+    ]
+)
+
+
+def _get_type_constant(pos, type_):
+    if type_.is_complex:
+        # 'is' checks don't seem to work for complex types
+        if type_ == PyrexTypes.c_float_complex_type:
+            return "NPY_CFLOAT"
+        elif type_ == PyrexTypes.c_double_complex_type:
+            return "NPY_CDOUBLE"
+        elif type_ == PyrexTypes.c_longdouble_complex_type:
+            return "NPY_CLONGDOUBLE"
+    elif type_.is_numeric:
+        postfix = type_.empty_declaration_code().upper().replace(" ", "")
+        typename = "NPY_%s" % postfix
+        if typename in numpy_numeric_types:
+            return typename
+    elif type_.is_pyobject:
+        return "NPY_OBJECT"
+    # TODO possible NPY_BOOL to bint but it needs a cast?
+    # TODO NPY_DATETIME, NPY_TIMEDELTA, NPY_STRING, NPY_UNICODE and maybe NPY_VOID might be handleable
+    error(pos, "Type '%s' cannot be used as a ufunc argument" % type_)
+
+
+class _FindCFuncDefNode(TreeVisitor):
+    """
+    Finds the CFuncDefNode in the tree
+
+    The assumption is that there's only one CFuncDefNode
+    """
+
+    found_node = None
+
+    def visit_Node(self, node):
+        if self.found_node:
+            return
+        else:
+            self.visitchildren(node)
+
+    def visit_CFuncDefNode(self, node):
+        self.found_node = node
+
+    def __call__(self, tree):
+        self.visit(tree)
+        return self.found_node
+
+
+def get_cfunc_from_tree(tree):
+    return _FindCFuncDefNode()(tree)
+
+
+class _ArgumentInfo(object):
+    """
+    Everything related to defining an input/output argument for a ufunc
+
+    type  - PyrexType
+    type_constant  - str such as "NPY_INT8" representing numpy dtype constants
+    """
+
+    def __init__(self, type, type_constant):
+        self.type = type
+        self.type_constant = type_constant
+
+
+class UFuncConversion(object):
+    def __init__(self, node):
+        self.node = node
+        self.global_scope = node.local_scope.global_scope()
+
+        self.in_definitions = self.get_in_type_info()
+        self.out_definitions = self.get_out_type_info()
+
+    def get_in_type_info(self):
+        definitions = []
+        for n, arg in enumerate(self.node.args):
+            type_const = _get_type_constant(self.node.pos, arg.type)
+            definitions.append(_ArgumentInfo(arg.type, type_const))
+        return definitions
+
+    def get_out_type_info(self):
+        if self.node.return_type.is_ctuple:
+            components = self.node.return_type.components
+        else:
+            components = [self.node.return_type]
+        definitions = []
+        for n, type in enumerate(components):
+            definitions.append(
+                _ArgumentInfo(type, _get_type_constant(self.node.pos, type))
+            )
+        return definitions
+
+    def generate_cy_utility_code(self):
+        arg_types = [a.type for a in self.in_definitions]
+        out_types = [a.type for a in self.out_definitions]
+        inline_func_decl = self.node.entry.type.declaration_code(
+            self.node.entry.cname, pyrex=True
+        )
+        self.node.entry.used = True
+
+        ufunc_cname = self.global_scope.next_id(self.node.entry.name + "_ufunc_def")
+
+        will_be_called_without_gil = not (any(t.is_pyobject for t in arg_types) or
+            any(t.is_pyobject for t in out_types))
+
+        context = dict(
+            func_cname=ufunc_cname,
+            in_types=arg_types,
+            out_types=out_types,
+            inline_func_call=self.node.entry.cname,
+            inline_func_declaration=inline_func_decl,
+            nogil=self.node.entry.type.nogil,
+            will_be_called_without_gil=will_be_called_without_gil,
+        )
+
+        code = CythonUtilityCode.load(
+            "UFuncDefinition",
+            "UFuncs.pyx",
+            context=context,
+            outer_module_scope=self.global_scope,
+        )
+
+        tree = code.get_tree(entries_only=True)
+        return tree
+
+    def use_generic_utility_code(self):
+        # use the invariant C utility code
+        self.global_scope.use_utility_code(
+            UtilityCode.load_cached("UFuncsInit", "UFuncs_C.c")
+        )
+        self.global_scope.use_utility_code(
+            UtilityCode.load_cached("NumpyImportUFunc", "NumpyImportArray.c")
+        )
+
+
+def convert_to_ufunc(node):
+    if isinstance(node, Nodes.CFuncDefNode):
+        if node.local_scope.parent_scope.is_c_class_scope:
+            error(node.pos, "Methods cannot currently be converted to a ufunc")
+            return node
+        converters = [UFuncConversion(node)]
+        original_node = node
+    elif isinstance(node, FusedNode.FusedCFuncDefNode) and isinstance(
+        node.node, Nodes.CFuncDefNode
+    ):
+        if node.node.local_scope.parent_scope.is_c_class_scope:
+            error(node.pos, "Methods cannot currently be converted to a ufunc")
+            return node
+        converters = [UFuncConversion(n) for n in node.nodes]
+        original_node = node.node
+    else:
+        error(node.pos, "Only C functions can be converted to a ufunc")
+        return node
+
+    if not converters:
+        return  # this path probably shouldn't happen
+
+    del converters[0].global_scope.entries[original_node.entry.name]
+    # the generic utility code is generic, so there's no reason to do it multiple times
+    converters[0].use_generic_utility_code()
+    return [node] + _generate_stats_from_converters(converters, original_node)
+
+
+def generate_ufunc_initialization(converters, cfunc_nodes, original_node):
+    global_scope = converters[0].global_scope
+    ufunc_funcs_name = global_scope.next_id(Naming.pyrex_prefix + "funcs")
+    ufunc_types_name = global_scope.next_id(Naming.pyrex_prefix + "types")
+    ufunc_data_name = global_scope.next_id(Naming.pyrex_prefix + "data")
+    type_constants = []
+    narg_in = None
+    narg_out = None
+    for c in converters:
+        in_const = [d.type_constant for d in c.in_definitions]
+        if narg_in is not None:
+            assert narg_in == len(in_const)
+        else:
+            narg_in = len(in_const)
+        type_constants.extend(in_const)
+        out_const = [d.type_constant for d in c.out_definitions]
+        if narg_out is not None:
+            assert narg_out == len(out_const)
+        else:
+            narg_out = len(out_const)
+        type_constants.extend(out_const)
+
+    func_cnames = [cfnode.entry.cname for cfnode in cfunc_nodes]
+
+    context = dict(
+        ufunc_funcs_name=ufunc_funcs_name,
+        func_cnames=func_cnames,
+        ufunc_types_name=ufunc_types_name,
+        type_constants=type_constants,
+        ufunc_data_name=ufunc_data_name,
+    )
+    global_scope.use_utility_code(
+        TempitaUtilityCode.load("UFuncConsts", "UFuncs_C.c", context=context)
+    )
+
+    pos = original_node.pos
+    func_name = original_node.entry.name
+    docstr = original_node.doc
+    args_to_func = '%s(), %s, %s(), %s, %s, %s, PyUFunc_None, "%s", %s, 0' % (
+        ufunc_funcs_name,
+        ufunc_data_name,
+        ufunc_types_name,
+        len(func_cnames),
+        narg_in,
+        narg_out,
+        func_name,
+        docstr.as_c_string_literal() if docstr else "NULL",
+    )
+
+    call_node = ExprNodes.PythonCapiCallNode(
+        pos,
+        function_name="PyUFunc_FromFuncAndData",
+        # use a dummy type because it's honestly too fiddly
+        func_type=PyrexTypes.CFuncType(
+            PyrexTypes.py_object_type,
+            [PyrexTypes.CFuncTypeArg("dummy", PyrexTypes.c_void_ptr_type, None)],
+        ),
+        args=[
+            ExprNodes.ConstNode(
+                pos, type=PyrexTypes.c_void_ptr_type, value=args_to_func
+            )
+        ],
+    )
+    lhs_entry = global_scope.declare_var(func_name, PyrexTypes.py_object_type, pos)
+    assgn_node = Nodes.SingleAssignmentNode(
+        pos,
+        lhs=ExprNodes.NameNode(
+            pos, name=func_name, type=PyrexTypes.py_object_type, entry=lhs_entry
+        ),
+        rhs=call_node,
+    )
+    return assgn_node
+
+
+def _generate_stats_from_converters(converters, node):
+    stats = []
+    for converter in converters:
+        tree = converter.generate_cy_utility_code()
+        ufunc_node = get_cfunc_from_tree(tree)
+        # merge in any utility code
+        converter.global_scope.utility_code_list.extend(tree.scope.utility_code_list)
+        stats.append(ufunc_node)
+
+    stats.append(generate_ufunc_initialization(converters, stats, node))
+    return stats
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/UtilNodes.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/UtilNodes.py
new file mode 100644
index 0000000000000000000000000000000000000000..81d3038ead414e8cb19b1cd1b0af674080a6b545
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/UtilNodes.py
@@ -0,0 +1,388 @@
+#
+# Nodes used as utilities and support for transforms etc.
+# These often make up sets including both Nodes and ExprNodes
+# so it is convenient to have them in a separate module.
+#
+
+from __future__ import absolute_import
+
+from . import Nodes
+from . import ExprNodes
+from .Nodes import Node
+from .ExprNodes import AtomicExprNode
+from .PyrexTypes import c_ptr_type, c_bint_type
+
+
+class TempHandle(object):
+    # THIS IS DEPRECATED, USE LetRefNode instead
+    temp = None
+    needs_xdecref = False
+    def __init__(self, type, needs_cleanup=None):
+        self.type = type
+        if needs_cleanup is None:
+            self.needs_cleanup = type.is_pyobject
+        else:
+            self.needs_cleanup = needs_cleanup
+
+    def ref(self, pos):
+        return TempRefNode(pos, handle=self, type=self.type)
+
+
+class TempRefNode(AtomicExprNode):
+    # THIS IS DEPRECATED, USE LetRefNode instead
+    # handle   TempHandle
+
+    def analyse_types(self, env):
+        assert self.type == self.handle.type
+        return self
+
+    def analyse_target_types(self, env):
+        assert self.type == self.handle.type
+        return self
+
+    def analyse_target_declaration(self, env):
+        pass
+
+    def calculate_result_code(self):
+        result = self.handle.temp
+        if result is None: result = "<error>"  # might be called and overwritten
+        return result
+
+    def generate_result_code(self, code):
+        pass
+
+    def generate_assignment_code(self, rhs, code, overloaded_assignment=False):
+        if self.type.is_pyobject:
+            rhs.make_owned_reference(code)
+            # TODO: analyse control flow to see if this is necessary
+            code.put_xdecref(self.result(), self.ctype())
+        code.putln('%s = %s;' % (
+            self.result(),
+            rhs.result() if overloaded_assignment else rhs.result_as(self.ctype()),
+        ))
+        rhs.generate_post_assignment_code(code)
+        rhs.free_temps(code)
+
+
+class TempsBlockNode(Node):
+    # THIS IS DEPRECATED, USE LetNode instead
+
+    """
+    Creates a block which allocates temporary variables.
+    This is used by transforms to output constructs that need
+    to make use of a temporary variable. Simply pass the types
+    of the needed temporaries to the constructor.
+
+    The variables can be referred to using a TempRefNode
+    (which can be constructed by calling get_ref_node).
+    """
+
+    # temps   [TempHandle]
+    # body    StatNode
+
+    child_attrs = ["body"]
+
+    def generate_execution_code(self, code):
+        for handle in self.temps:
+            handle.temp = code.funcstate.allocate_temp(
+                handle.type, manage_ref=handle.needs_cleanup)
+        self.body.generate_execution_code(code)
+        for handle in self.temps:
+            if handle.needs_cleanup:
+                if handle.needs_xdecref:
+                    code.put_xdecref_clear(handle.temp, handle.type)
+                else:
+                    code.put_decref_clear(handle.temp, handle.type)
+            code.funcstate.release_temp(handle.temp)
+
+    def analyse_declarations(self, env):
+        self.body.analyse_declarations(env)
+
+    def analyse_expressions(self, env):
+        self.body = self.body.analyse_expressions(env)
+        return self
+
+    def generate_function_definitions(self, env, code):
+        self.body.generate_function_definitions(env, code)
+
+    def annotate(self, code):
+        self.body.annotate(code)
+
+
+class ResultRefNode(AtomicExprNode):
+    # A reference to the result of an expression.  The result_code
+    # must be set externally (usually a temp name).
+
+    subexprs = []
+    lhs_of_first_assignment = False
+
+    def __init__(self, expression=None, pos=None, type=None, may_hold_none=True, is_temp=False):
+        self.expression = expression
+        self.pos = None
+        self.may_hold_none = may_hold_none
+        if expression is not None:
+            self.pos = expression.pos
+            self.type = getattr(expression, "type", None)
+        if pos is not None:
+            self.pos = pos
+        if type is not None:
+            self.type = type
+        if is_temp:
+            self.is_temp = True
+        assert self.pos is not None
+
+    def clone_node(self):
+        # nothing to do here
+        return self
+
+    def type_dependencies(self, env):
+        if self.expression:
+            return self.expression.type_dependencies(env)
+        else:
+            return ()
+
+    def update_expression(self, expression):
+        self.expression = expression
+        type = getattr(expression, "type", None)
+        if type:
+            self.type = type
+
+    def analyse_target_declaration(self, env):
+        pass  # OK - we can assign to this
+
+    def analyse_types(self, env):
+        if self.expression is not None:
+            if not self.expression.type:
+                self.expression = self.expression.analyse_types(env)
+            self.type = self.expression.type
+        return self
+
+    def infer_type(self, env):
+        if self.type is not None:
+            return self.type
+        if self.expression is not None:
+            if self.expression.type is not None:
+                return self.expression.type
+            return self.expression.infer_type(env)
+        assert False, "cannot infer type of ResultRefNode"
+
+    def may_be_none(self):
+        if not self.type.is_pyobject:
+            return False
+        return self.may_hold_none
+
+    def _DISABLED_may_be_none(self):
+        # not sure if this is safe - the expression may not be the
+        # only value that gets assigned
+        if self.expression is not None:
+            return self.expression.may_be_none()
+        if self.type is not None:
+            return self.type.is_pyobject
+        return True  # play it safe
+
+    def is_simple(self):
+        return True
+
+    def result(self):
+        try:
+            return self.result_code
+        except AttributeError:
+            if self.expression is not None:
+                self.result_code = self.expression.result()
+        return self.result_code
+
+    def generate_evaluation_code(self, code):
+        pass
+
+    def generate_result_code(self, code):
+        pass
+
+    def generate_disposal_code(self, code):
+        pass
+
+    def generate_assignment_code(self, rhs, code, overloaded_assignment=False):
+        if self.type.is_pyobject:
+            rhs.make_owned_reference(code)
+            if not self.lhs_of_first_assignment:
+                code.put_decref(self.result(), self.ctype())
+        code.putln('%s = %s;' % (
+            self.result(),
+            rhs.result() if overloaded_assignment else rhs.result_as(self.ctype()),
+        ))
+        rhs.generate_post_assignment_code(code)
+        rhs.free_temps(code)
+
+    def allocate_temps(self, env):
+        pass
+
+    def release_temp(self, env):
+        pass
+
+    def free_temps(self, code):
+        pass
+
+
+class LetNodeMixin:
+    def set_temp_expr(self, lazy_temp):
+        self.lazy_temp = lazy_temp
+        self.temp_expression = lazy_temp.expression
+
+    def setup_temp_expr(self, code):
+        self.temp_expression.generate_evaluation_code(code)
+        self.temp_type = self.temp_expression.type
+        if self.temp_type.is_array:
+            self.temp_type = c_ptr_type(self.temp_type.base_type)
+        self._result_in_temp = self.temp_expression.result_in_temp()
+        if self._result_in_temp:
+            self.temp = self.temp_expression.result()
+        else:
+            if self.temp_type.is_memoryviewslice:
+                self.temp_expression.make_owned_memoryviewslice(code)
+            else:
+                self.temp_expression.make_owned_reference(code)
+            self.temp = code.funcstate.allocate_temp(
+                self.temp_type, manage_ref=True)
+            code.putln("%s = %s;" % (self.temp, self.temp_expression.result()))
+            self.temp_expression.generate_disposal_code(code)
+            self.temp_expression.free_temps(code)
+        self.lazy_temp.result_code = self.temp
+
+    def teardown_temp_expr(self, code):
+        if self._result_in_temp:
+            self.temp_expression.generate_disposal_code(code)
+            self.temp_expression.free_temps(code)
+        else:
+            if self.temp_type.needs_refcounting:
+                code.put_decref_clear(self.temp, self.temp_type)
+            code.funcstate.release_temp(self.temp)
+
+
+class EvalWithTempExprNode(ExprNodes.ExprNode, LetNodeMixin):
+    # A wrapper around a subexpression that moves an expression into a
+    # temp variable and provides it to the subexpression.
+
+    subexprs = ['temp_expression', 'subexpression']
+
+    def __init__(self, lazy_temp, subexpression):
+        self.set_temp_expr(lazy_temp)
+        self.pos = subexpression.pos
+        self.subexpression = subexpression
+        # if called after type analysis, we already know the type here
+        self.type = self.subexpression.type
+
+    def infer_type(self, env):
+        return self.subexpression.infer_type(env)
+
+    def may_be_none(self):
+        return self.subexpression.may_be_none()
+
+    def result(self):
+        return self.subexpression.result()
+
+    def analyse_types(self, env):
+        self.temp_expression = self.temp_expression.analyse_types(env)
+        self.lazy_temp.update_expression(self.temp_expression)  # overwrite in case it changed
+        self.subexpression = self.subexpression.analyse_types(env)
+        self.type = self.subexpression.type
+        return self
+
+    def free_subexpr_temps(self, code):
+        self.subexpression.free_temps(code)
+
+    def generate_subexpr_disposal_code(self, code):
+        self.subexpression.generate_disposal_code(code)
+
+    def generate_evaluation_code(self, code):
+        self.setup_temp_expr(code)
+        self.subexpression.generate_evaluation_code(code)
+        self.teardown_temp_expr(code)
+
+
+LetRefNode = ResultRefNode
+
+
+class LetNode(Nodes.StatNode, LetNodeMixin):
+    # Implements a local temporary variable scope. Imagine this
+    # syntax being present:
+    # let temp = VALUE:
+    #     BLOCK (can modify temp)
+    #     if temp is an object, decref
+    #
+    # Usually used after analysis phase, but forwards analysis methods
+    # to its children
+
+    child_attrs = ['temp_expression', 'body']
+
+    def __init__(self, lazy_temp, body):
+        self.set_temp_expr(lazy_temp)
+        self.pos = body.pos
+        self.body = body
+
+    def analyse_declarations(self, env):
+        self.temp_expression.analyse_declarations(env)
+        self.body.analyse_declarations(env)
+
+    def analyse_expressions(self, env):
+        self.temp_expression = self.temp_expression.analyse_expressions(env)
+        self.body = self.body.analyse_expressions(env)
+        return self
+
+    def generate_execution_code(self, code):
+        self.setup_temp_expr(code)
+        self.body.generate_execution_code(code)
+        self.teardown_temp_expr(code)
+
+    def generate_function_definitions(self, env, code):
+        self.temp_expression.generate_function_definitions(env, code)
+        self.body.generate_function_definitions(env, code)
+
+
+class TempResultFromStatNode(ExprNodes.ExprNode):
+    # An ExprNode wrapper around a StatNode that executes the StatNode
+    # body.  Requires a ResultRefNode that it sets up to refer to its
+    # own temp result.  The StatNode must assign a value to the result
+    # node, which then becomes the result of this node.
+
+    subexprs = []
+    child_attrs = ['body']
+
+    def __init__(self, result_ref, body):
+        self.result_ref = result_ref
+        self.pos = body.pos
+        self.body = body
+        self.type = result_ref.type
+        self.is_temp = 1
+
+    def analyse_declarations(self, env):
+        self.body.analyse_declarations(env)
+
+    def analyse_types(self, env):
+        self.body = self.body.analyse_expressions(env)
+        return self
+
+    def may_be_none(self):
+        return self.result_ref.may_be_none()
+
+    def generate_result_code(self, code):
+        self.result_ref.result_code = self.result()
+        self.body.generate_execution_code(code)
+
+    def generate_function_definitions(self, env, code):
+        self.body.generate_function_definitions(env, code)
+
+
+class HasGilNode(AtomicExprNode):
+    """
+    Simple node that evaluates to 0 or 1 depending on whether we're
+    in a nogil context
+    """
+    type = c_bint_type
+
+    def analyse_types(self, env):
+        return self
+
+    def generate_result_code(self, code):
+        self.has_gil = code.funcstate.gil_owned
+
+    def calculate_result_code(self):
+        return "1" if self.has_gil else "0"
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Visitor.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Visitor.py
new file mode 100644
index 0000000000000000000000000000000000000000..92e2eb9c0d304612df1843a9421ba4b858bff6ad
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/Visitor.py
@@ -0,0 +1,871 @@
+# cython: infer_types=True
+# cython: language_level=3str
+# cython: auto_pickle=False
+
+#
+#   Tree visitor and transform framework
+#
+
+from __future__ import absolute_import, print_function
+
+import sys
+import inspect
+
+from . import TypeSlots
+from . import Builtin
+from . import Nodes
+from . import ExprNodes
+from . import Errors
+from . import DebugFlags
+from . import Future
+
+import cython
+
+
+cython.declare(_PRINTABLE=tuple)
+
+if sys.version_info[0] >= 3:
+    _PRINTABLE = (bytes, str, int, float)
+else:
+    _PRINTABLE = (str, unicode, long, int, float)
+
+
+class TreeVisitor(object):
+    """
+    Base class for writing visitors for a Cython tree, contains utilities for
+    recursing such trees using visitors. Each node is
+    expected to have a child_attrs iterable containing the names of attributes
+    containing child nodes or lists of child nodes. Lists are not considered
+    part of the tree structure (i.e. contained nodes are considered direct
+    children of the parent node).
+
+    visit_children visits each of the children of a given node (see the visit_children
+    documentation). When recursing the tree using visit_children, an attribute
+    access_path is maintained which gives information about the current location
+    in the tree as a stack of tuples: (parent_node, attrname, index), representing
+    the node, attribute and optional list index that was taken in each step in the path to
+    the current node.
+
+    Example:
+
+    >>> class SampleNode(object):
+    ...     child_attrs = ["head", "body"]
+    ...     def __init__(self, value, head=None, body=None):
+    ...         self.value = value
+    ...         self.head = head
+    ...         self.body = body
+    ...     def __repr__(self): return "SampleNode(%s)" % self.value
+    ...
+    >>> tree = SampleNode(0, SampleNode(1), [SampleNode(2), SampleNode(3)])
+    >>> class MyVisitor(TreeVisitor):
+    ...     def visit_SampleNode(self, node):
+    ...         print("in %s %s" % (node.value, self.access_path))
+    ...         self.visitchildren(node)
+    ...         print("out %s" % node.value)
+    ...
+    >>> MyVisitor().visit(tree)
+    in 0 []
+    in 1 [(SampleNode(0), 'head', None)]
+    out 1
+    in 2 [(SampleNode(0), 'body', 0)]
+    out 2
+    in 3 [(SampleNode(0), 'body', 1)]
+    out 3
+    out 0
+    """
+    def __init__(self):
+        super(TreeVisitor, self).__init__()
+        self.dispatch_table = {}
+        self.access_path = []
+
+    def dump_node(self, node):
+        ignored = list(node.child_attrs or []) + [
+            'child_attrs', 'pos', 'gil_message', 'cpp_message', 'subexprs']
+        values = []
+        pos = getattr(node, 'pos', None)
+        if pos:
+            source = pos[0]
+            if source:
+                import os.path
+                source = os.path.basename(source.get_description())
+            values.append(u'%s:%s:%s' % (source, pos[1], pos[2]))
+        attribute_names = dir(node)
+        for attr in attribute_names:
+            if attr in ignored:
+                continue
+            if attr.startswith('_') or attr.endswith('_'):
+                continue
+            try:
+                value = getattr(node, attr)
+            except AttributeError:
+                continue
+            if value is None or value == 0:
+                continue
+            elif isinstance(value, list):
+                value = u'[...]/%d' % len(value)
+            elif not isinstance(value, _PRINTABLE):
+                continue
+            else:
+                value = repr(value)
+            values.append(u'%s = %s' % (attr, value))
+        return u'%s(%s)' % (node.__class__.__name__, u',\n    '.join(values))
+
+    def _find_node_path(self, stacktrace):
+        import os.path
+        last_traceback = stacktrace
+        nodes = []
+        while hasattr(stacktrace, 'tb_frame'):
+            frame = stacktrace.tb_frame
+            node = frame.f_locals.get('self')
+            if isinstance(node, Nodes.Node):
+                code = frame.f_code
+                method_name = code.co_name
+                pos = (os.path.basename(code.co_filename),
+                       frame.f_lineno)
+                nodes.append((node, method_name, pos))
+                last_traceback = stacktrace
+            stacktrace = stacktrace.tb_next
+        return (last_traceback, nodes)
+
+    def _raise_compiler_error(self, child, e):
+        trace = ['']
+        for parent, attribute, index in self.access_path:
+            node = getattr(parent, attribute)
+            if index is None:
+                index = ''
+            else:
+                node = node[index]
+                index = u'[%d]' % index
+            trace.append(u'%s.%s%s = %s' % (
+                parent.__class__.__name__, attribute, index,
+                self.dump_node(node)))
+        stacktrace, called_nodes = self._find_node_path(sys.exc_info()[2])
+        last_node = child
+        for node, method_name, pos in called_nodes:
+            last_node = node
+            trace.append(u"File '%s', line %d, in %s: %s" % (
+                pos[0], pos[1], method_name, self.dump_node(node)))
+        raise Errors.CompilerCrash(
+            getattr(last_node, 'pos', None), self.__class__.__name__,
+            u'\n'.join(trace), e, stacktrace)
+
+    @cython.final
+    def find_handler(self, obj):
+        # to resolve, try entire hierarchy
+        cls = type(obj)
+        mro = inspect.getmro(cls)
+        for mro_cls in mro:
+            handler_method = getattr(self, "visit_" + mro_cls.__name__, None)
+            if handler_method is not None:
+                return handler_method
+
+        print(type(self), cls)
+        if self.access_path:
+            print(self.access_path)
+            print(self.access_path[-1][0].pos)
+            print(self.access_path[-1][0].__dict__)
+        raise RuntimeError("Visitor %r does not accept object: %s" % (self, obj))
+
+    def visit(self, obj):
+        # generic def entry point for calls from Python subclasses
+        return self._visit(obj)
+
+    @cython.final
+    def _visit(self, obj):
+        # fast cdef entry point for calls from Cython subclasses
+        try:
+            try:
+                handler_method = self.dispatch_table[type(obj)]
+            except KeyError:
+                handler_method = self.find_handler(obj)
+                self.dispatch_table[type(obj)] = handler_method
+            return handler_method(obj)
+        except Errors.CompileError:
+            raise
+        except Errors.AbortError:
+            raise
+        except Exception as e:
+            if DebugFlags.debug_no_exception_intercept:
+                raise
+            self._raise_compiler_error(obj, e)
+
+    @cython.final
+    def _visitchild(self, child, parent, attrname, idx):
+        # fast cdef entry point for calls from Cython subclasses
+        self.access_path.append((parent, attrname, idx))
+        result = self._visit(child)
+        self.access_path.pop()
+        return result
+
+    def visitchildren(self, parent, attrs=None, exclude=None):
+        # generic def entry point for calls from Python subclasses
+        return self._visitchildren(parent, attrs, exclude)
+
+    @cython.final
+    @cython.locals(idx=cython.Py_ssize_t)
+    def _visitchildren(self, parent, attrs, exclude):
+        # fast cdef entry point for calls from Cython subclasses
+        """
+        Visits the children of the given parent. If parent is None, returns
+        immediately (returning None).
+
+        The return value is a dictionary giving the results for each
+        child (mapping the attribute name to either the return value
+        or a list of return values (in the case of multiple children
+        in an attribute)).
+        """
+        if parent is None: return None
+        result = {}
+        for attr in parent.child_attrs:
+            if attrs is not None and attr not in attrs: continue
+            if exclude is not None and attr in exclude: continue
+            child = getattr(parent, attr)
+            if child is not None:
+                if type(child) is list:
+                    childretval = [self._visitchild(x, parent, attr, idx) for idx, x in enumerate(child)]
+                else:
+                    childretval = self._visitchild(child, parent, attr, None)
+                    assert not isinstance(childretval, list), 'Cannot insert list here: %s in %r' % (attr, parent)
+                result[attr] = childretval
+        return result
+
+
+class VisitorTransform(TreeVisitor):
+    """
+    A tree transform is a base class for visitors that wants to do stream
+    processing of the structure (rather than attributes etc.) of a tree.
+
+    It implements __call__ to simply visit the argument node.
+
+    It requires the visitor methods to return the nodes which should take
+    the place of the visited node in the result tree (which can be the same
+    or one or more replacement). Specifically, if the return value from
+    a visitor method is:
+
+    - [] or None; the visited node will be removed (set to None if an attribute and
+    removed if in a list)
+    - A single node; the visited node will be replaced by the returned node.
+    - A list of nodes; the visited nodes will be replaced by all the nodes in the
+    list. This will only work if the node was already a member of a list; if it
+    was not, an exception will be raised. (Typically you want to ensure that you
+    are within a StatListNode or similar before doing this.)
+    """
+    def visitchildren(self, parent, attrs=None, exclude=None):
+        # generic def entry point for calls from Python subclasses
+        return self._process_children(parent, attrs, exclude)
+
+    @cython.final
+    def _process_children(self, parent, attrs=None, exclude=None):
+        # fast cdef entry point for calls from Cython subclasses
+        result = self._visitchildren(parent, attrs, exclude)
+        for attr, newnode in result.items():
+            if type(newnode) is list:
+                newnode = self._flatten_list(newnode)
+            setattr(parent, attr, newnode)
+        return result
+
+    @cython.final
+    def _flatten_list(self, orig_list):
+        # Flatten the list one level and remove any None
+        newlist = []
+        for x in orig_list:
+            if x is not None:
+                if type(x) is list:
+                    newlist.extend(x)
+                else:
+                    newlist.append(x)
+        return newlist
+
+    def visitchild(self, parent, attr, idx=0):
+        # Helper to visit specific children from Python subclasses
+        child = getattr(parent, attr)
+        if child is not None:
+            node = self._visitchild(child, parent, attr, idx)
+            if node is not child:
+                setattr(parent, attr, node)
+            child = node
+        return child
+
+    def recurse_to_children(self, node):
+        self._process_children(node)
+        return node
+
+    def __call__(self, root):
+        return self._visit(root)
+
+
+class CythonTransform(VisitorTransform):
+    """
+    Certain common conventions and utilities for Cython transforms.
+
+     - Sets up the context of the pipeline in self.context
+     - Tracks directives in effect in self.current_directives
+    """
+    def __init__(self, context):
+        super(CythonTransform, self).__init__()
+        self.context = context
+
+    def __call__(self, node):
+        from .ModuleNode import ModuleNode
+        if isinstance(node, ModuleNode):
+            self.current_directives = node.directives
+        return super(CythonTransform, self).__call__(node)
+
+    def visit_CompilerDirectivesNode(self, node):
+        old = self.current_directives
+        self.current_directives = node.directives
+        self._process_children(node)
+        self.current_directives = old
+        return node
+
+    def visit_Node(self, node):
+        self._process_children(node)
+        return node
+
+
+class ScopeTrackingTransform(CythonTransform):
+    # Keeps track of type of scopes
+    #scope_type: can be either of 'module', 'function', 'cclass', 'pyclass', 'struct'
+    #scope_node: the node that owns the current scope
+
+    def visit_ModuleNode(self, node):
+        self.scope_type = 'module'
+        self.scope_node = node
+        self._process_children(node)
+        return node
+
+    def visit_scope(self, node, scope_type):
+        prev = self.scope_type, self.scope_node
+        self.scope_type = scope_type
+        self.scope_node = node
+        self._process_children(node)
+        self.scope_type, self.scope_node = prev
+        return node
+
+    def visit_CClassDefNode(self, node):
+        return self.visit_scope(node, 'cclass')
+
+    def visit_PyClassDefNode(self, node):
+        return self.visit_scope(node, 'pyclass')
+
+    def visit_FuncDefNode(self, node):
+        return self.visit_scope(node, 'function')
+
+    def visit_CStructOrUnionDefNode(self, node):
+        return self.visit_scope(node, 'struct')
+
+
+class EnvTransform(CythonTransform):
+    """
+    This transformation keeps a stack of the environments.
+    """
+    def __call__(self, root):
+        self.env_stack = []
+        self.enter_scope(root, root.scope)
+        return super(EnvTransform, self).__call__(root)
+
+    def current_env(self):
+        return self.env_stack[-1][1]
+
+    def current_scope_node(self):
+        return self.env_stack[-1][0]
+
+    def global_scope(self):
+        return self.current_env().global_scope()
+
+    def enter_scope(self, node, scope):
+        self.env_stack.append((node, scope))
+
+    def exit_scope(self):
+        self.env_stack.pop()
+
+    def visit_FuncDefNode(self, node):
+        self.visit_func_outer_attrs(node)
+        self.enter_scope(node, node.local_scope)
+        self.visitchildren(node, attrs=None, exclude=node.outer_attrs)
+        self.exit_scope()
+        return node
+
+    def visit_func_outer_attrs(self, node):
+        self.visitchildren(node, attrs=node.outer_attrs)
+
+    def visit_GeneratorBodyDefNode(self, node):
+        self._process_children(node)
+        return node
+
+    def visit_ClassDefNode(self, node):
+        self.enter_scope(node, node.scope)
+        self._process_children(node)
+        self.exit_scope()
+        return node
+
+    def visit_CStructOrUnionDefNode(self, node):
+        self.enter_scope(node, node.scope)
+        self._process_children(node)
+        self.exit_scope()
+        return node
+
+    def visit_ScopedExprNode(self, node):
+        if node.expr_scope:
+            self.enter_scope(node, node.expr_scope)
+            self._process_children(node)
+            self.exit_scope()
+        else:
+            self._process_children(node)
+        return node
+
+    def visit_CArgDeclNode(self, node):
+        # default arguments are evaluated in the outer scope
+        if node.default:
+            attrs = [attr for attr in node.child_attrs if attr != 'default']
+            self._process_children(node, attrs)
+            self.enter_scope(node, self.current_env().outer_scope)
+            self.visitchildren(node, ('default',))
+            self.exit_scope()
+        else:
+            self._process_children(node)
+        return node
+
+
+class NodeRefCleanupMixin(object):
+    """
+    Clean up references to nodes that were replaced.
+
+    NOTE: this implementation assumes that the replacement is
+    done first, before hitting any further references during
+    normal tree traversal.  This needs to be arranged by calling
+    "self.visitchildren()" at a proper place in the transform
+    and by ordering the "child_attrs" of nodes appropriately.
+    """
+    def __init__(self, *args):
+        super(NodeRefCleanupMixin, self).__init__(*args)
+        self._replacements = {}
+
+    def visit_CloneNode(self, node):
+        arg = node.arg
+        if arg not in self._replacements:
+            self.visitchildren(arg)
+        node.arg = self._replacements.get(arg, arg)
+        return node
+
+    def visit_ResultRefNode(self, node):
+        expr = node.expression
+        if expr is None or expr not in self._replacements:
+            self.visitchildren(node)
+            expr = node.expression
+        if expr is not None:
+            node.expression = self._replacements.get(expr, expr)
+        return node
+
+    def replace(self, node, replacement):
+        self._replacements[node] = replacement
+        return replacement
+
+
+find_special_method_for_binary_operator = {
+    '<':  '__lt__',
+    '<=': '__le__',
+    '==': '__eq__',
+    '!=': '__ne__',
+    '>=': '__ge__',
+    '>':  '__gt__',
+    '+':  '__add__',
+    '&':  '__and__',
+    '/':  '__div__',
+    '//': '__floordiv__',
+    '<<': '__lshift__',
+    '%':  '__mod__',
+    '*':  '__mul__',
+    '|':  '__or__',
+    '**': '__pow__',
+    '>>': '__rshift__',
+    '-':  '__sub__',
+    '^':  '__xor__',
+    'in': '__contains__',
+}.get
+
+
+find_special_method_for_unary_operator = {
+    'not': '__not__',
+    '~':   '__inv__',
+    '-':   '__neg__',
+    '+':   '__pos__',
+}.get
+
+
+class MethodDispatcherTransform(EnvTransform):
+    """
+    Base class for transformations that want to intercept on specific
+    builtin functions or methods of builtin types, including special
+    methods triggered by Python operators.  Must run after declaration
+    analysis when entries were assigned.
+
+    Naming pattern for handler methods is as follows:
+
+    * builtin functions: _handle_(general|simple|any)_function_NAME
+
+    * builtin methods: _handle_(general|simple|any)_method_TYPENAME_METHODNAME
+    """
+    # only visit call nodes and Python operations
+    def visit_GeneralCallNode(self, node):
+        self._process_children(node)
+        function = node.function
+        if not function.type.is_pyobject:
+            return node
+        arg_tuple = node.positional_args
+        if not isinstance(arg_tuple, ExprNodes.TupleNode):
+            return node
+        keyword_args = node.keyword_args
+        if keyword_args and not isinstance(keyword_args, ExprNodes.DictNode):
+            # can't handle **kwargs
+            return node
+        args = arg_tuple.args
+        return self._dispatch_to_handler(node, function, args, keyword_args)
+
+    def visit_SimpleCallNode(self, node):
+        self._process_children(node)
+        function = node.function
+        if function.type.is_pyobject:
+            arg_tuple = node.arg_tuple
+            if not isinstance(arg_tuple, ExprNodes.TupleNode):
+                return node
+            args = arg_tuple.args
+        else:
+            args = node.args
+        return self._dispatch_to_handler(node, function, args, None)
+
+    def visit_PrimaryCmpNode(self, node):
+        if node.cascade:
+            # not currently handled below
+            self._process_children(node)
+            return node
+        return self._visit_binop_node(node)
+
+    def visit_BinopNode(self, node):
+        return self._visit_binop_node(node)
+
+    def _visit_binop_node(self, node):
+        self._process_children(node)
+        # FIXME: could special case 'not_in'
+        special_method_name = find_special_method_for_binary_operator(node.operator)
+        if special_method_name:
+            operand1, operand2 = node.operand1, node.operand2
+            if special_method_name == '__contains__':
+                operand1, operand2 = operand2, operand1
+            elif special_method_name == '__div__':
+                if Future.division in self.current_env().global_scope().context.future_directives:
+                    special_method_name = '__truediv__'
+            obj_type = operand1.type
+            if obj_type.is_builtin_type:
+                type_name = obj_type.name
+            else:
+                type_name = "object"  # safety measure
+            node = self._dispatch_to_method_handler(
+                special_method_name, None, False, type_name,
+                node, None, [operand1, operand2], None)
+        return node
+
+    def visit_UnopNode(self, node):
+        self._process_children(node)
+        special_method_name = find_special_method_for_unary_operator(node.operator)
+        if special_method_name:
+            operand = node.operand
+            obj_type = operand.type
+            if obj_type.is_builtin_type:
+                type_name = obj_type.name
+            else:
+                type_name = "object"  # safety measure
+            node = self._dispatch_to_method_handler(
+                special_method_name, None, False, type_name,
+                node, None, [operand], None)
+        return node
+
+    ### dispatch to specific handlers
+
+    def _find_handler(self, match_name, has_kwargs):
+        try:
+            match_name.encode('ascii')
+        except UnicodeEncodeError:
+            # specifically when running the Cython compiler under Python 2
+            #  getattr can't take a unicode string.
+            #  Classes with unicode names won't have specific handlers and thus it
+            #  should be OK to return None.
+            # Doing the test here ensures that the same code gets run on
+            # Python 2 and 3
+            return None
+
+        call_type = 'general' if has_kwargs else 'simple'
+        handler = getattr(self, '_handle_%s_%s' % (call_type, match_name), None)
+        if handler is None:
+            handler = getattr(self, '_handle_any_%s' % match_name, None)
+        return handler
+
+    def _delegate_to_assigned_value(self, node, function, arg_list, kwargs):
+        assignment = function.cf_state[0]
+        value = assignment.rhs
+        if value.is_name:
+            if not value.entry or len(value.entry.cf_assignments) > 1:
+                # the variable might have been reassigned => play safe
+                return node
+        elif value.is_attribute and value.obj.is_name:
+            if not value.obj.entry or len(value.obj.entry.cf_assignments) > 1:
+                # the underlying variable might have been reassigned => play safe
+                return node
+        else:
+            return node
+        return self._dispatch_to_handler(
+            node, value, arg_list, kwargs)
+
+    def _dispatch_to_handler(self, node, function, arg_list, kwargs):
+        if function.is_name:
+            # we only consider functions that are either builtin
+            # Python functions or builtins that were already replaced
+            # into a C function call (defined in the builtin scope)
+            if not function.entry:
+                return node
+            entry = function.entry
+            is_builtin = (
+                entry.is_builtin or
+                entry is self.current_env().builtin_scope().lookup_here(function.name))
+            if not is_builtin:
+                if function.cf_state and function.cf_state.is_single:
+                    # we know the value of the variable
+                    # => see if it's usable instead
+                    return self._delegate_to_assigned_value(
+                        node, function, arg_list, kwargs)
+                if arg_list and entry.is_cmethod and entry.scope and entry.scope.parent_type.is_builtin_type:
+                    if entry.scope.parent_type is arg_list[0].type:
+                        # Optimised (unbound) method of a builtin type => try to "de-optimise".
+                        return self._dispatch_to_method_handler(
+                            entry.name, self_arg=None, is_unbound_method=True,
+                            type_name=entry.scope.parent_type.name,
+                            node=node, function=function, arg_list=arg_list, kwargs=kwargs)
+                return node
+            function_handler = self._find_handler(
+                "function_%s" % function.name, kwargs)
+            if function_handler is None:
+                return self._handle_function(node, function.name, function, arg_list, kwargs)
+            if kwargs:
+                return function_handler(node, function, arg_list, kwargs)
+            else:
+                return function_handler(node, function, arg_list)
+        elif function.is_attribute:
+            attr_name = function.attribute
+            if function.type.is_pyobject:
+                self_arg = function.obj
+            elif node.self and function.entry:
+                entry = function.entry.as_variable
+                if not entry or not entry.is_builtin:
+                    return node
+                # C implementation of a Python builtin method - see if we find further matches
+                self_arg = node.self
+                arg_list = arg_list[1:]  # drop CloneNode of self argument
+            else:
+                return node
+            obj_type = self_arg.type
+            is_unbound_method = False
+            if obj_type.is_builtin_type:
+                if obj_type is Builtin.type_type and self_arg.is_name and arg_list and arg_list[0].type.is_pyobject:
+                    # calling an unbound method like 'list.append(L,x)'
+                    # (ignoring 'type.mro()' here ...)
+                    type_name = self_arg.name
+                    self_arg = None
+                    is_unbound_method = True
+                else:
+                    type_name = obj_type.name
+            else:
+                type_name = "object"  # safety measure
+            return self._dispatch_to_method_handler(
+                attr_name, self_arg, is_unbound_method, type_name,
+                node, function, arg_list, kwargs)
+        else:
+            return node
+
+    def _dispatch_to_method_handler(self, attr_name, self_arg,
+                                    is_unbound_method, type_name,
+                                    node, function, arg_list, kwargs):
+        method_handler = self._find_handler(
+            "method_%s_%s" % (type_name, attr_name), kwargs)
+        if method_handler is None:
+            if (attr_name in TypeSlots.special_method_names
+                    or attr_name in ['__new__', '__class__']):
+                method_handler = self._find_handler(
+                    "slot%s" % attr_name, kwargs)
+            if method_handler is None:
+                return self._handle_method(
+                    node, type_name, attr_name, function,
+                    arg_list, is_unbound_method, kwargs)
+        if self_arg is not None:
+            arg_list = [self_arg] + list(arg_list)
+        if kwargs:
+            result = method_handler(
+                node, function, arg_list, is_unbound_method, kwargs)
+        else:
+            result = method_handler(
+                node, function, arg_list, is_unbound_method)
+        return result
+
+    def _handle_function(self, node, function_name, function, arg_list, kwargs):
+        """Fallback handler"""
+        return node
+
+    def _handle_method(self, node, type_name, attr_name, function,
+                       arg_list, is_unbound_method, kwargs):
+        """Fallback handler"""
+        return node
+
+
+class RecursiveNodeReplacer(VisitorTransform):
+    """
+    Recursively replace all occurrences of a node in a subtree by
+    another node.
+    """
+    def __init__(self, orig_node, new_node):
+        super(RecursiveNodeReplacer, self).__init__()
+        self.orig_node, self.new_node = orig_node, new_node
+
+    def visit_CloneNode(self, node):
+        if node is self.orig_node:
+            return self.new_node
+        if node.arg is self.orig_node:
+            node.arg = self.new_node
+        return node
+
+    def visit_Node(self, node):
+        self._process_children(node)
+        if node is self.orig_node:
+            return self.new_node
+        else:
+            return node
+
+def recursively_replace_node(tree, old_node, new_node):
+    replace_in = RecursiveNodeReplacer(old_node, new_node)
+    replace_in(tree)
+
+
+class NodeFinder(TreeVisitor):
+    """
+    Find out if a node appears in a subtree.
+    """
+    def __init__(self, node):
+        super(NodeFinder, self).__init__()
+        self.node = node
+        self.found = False
+
+    def visit_Node(self, node):
+        if self.found:
+            pass  # short-circuit
+        elif node is self.node:
+            self.found = True
+        else:
+            self._visitchildren(node, None, None)
+
+def tree_contains(tree, node):
+    finder = NodeFinder(node)
+    finder.visit(tree)
+    return finder.found
+
+
+# Utils
+def replace_node(ptr, value):
+    """Replaces a node. ptr is of the form used on the access path stack
+    (parent, attrname, listidx|None)
+    """
+    parent, attrname, listidx = ptr
+    if listidx is None:
+        setattr(parent, attrname, value)
+    else:
+        getattr(parent, attrname)[listidx] = value
+
+
+class PrintTree(TreeVisitor):
+    """Prints a representation of the tree to standard output.
+    Subclass and override repr_of to provide more information
+    about nodes. """
+    def __init__(self, start=None, end=None):
+        TreeVisitor.__init__(self)
+        self._indent = ""
+        if start is not None or end is not None:
+            self._line_range = (start or 0, end or 2**30)
+        else:
+            self._line_range = None
+
+    def indent(self):
+        self._indent += "  "
+
+    def unindent(self):
+        self._indent = self._indent[:-2]
+
+    def __call__(self, tree, phase=None):
+        print("Parse tree dump at phase '%s'" % phase)
+        self.visit(tree)
+        return tree
+
+    # Don't do anything about process_list, the defaults gives
+    # nice-looking name[idx] nodes which will visually appear
+    # under the parent-node, not displaying the list itself in
+    # the hierarchy.
+    def visit_Node(self, node):
+        self._print_node(node)
+        self.indent()
+        self.visitchildren(node)
+        self.unindent()
+        return node
+
+    def visit_CloneNode(self, node):
+        self._print_node(node)
+        self.indent()
+        line = node.pos[1]
+        if self._line_range is None or self._line_range[0] <= line <= self._line_range[1]:
+            print("%s- %s: %s" % (self._indent, 'arg', self.repr_of(node.arg)))
+        self.indent()
+        self.visitchildren(node.arg)
+        self.unindent()
+        self.unindent()
+        return node
+
+    def _print_node(self, node):
+        line = node.pos[1]
+        if self._line_range is None or self._line_range[0] <= line <= self._line_range[1]:
+            if len(self.access_path) == 0:
+                name = "(root)"
+            else:
+                parent, attr, idx = self.access_path[-1]
+                if idx is not None:
+                    name = "%s[%d]" % (attr, idx)
+                else:
+                    name = attr
+            print("%s- %s: %s" % (self._indent, name, self.repr_of(node)))
+
+    def repr_of(self, node):
+        if node is None:
+            return "(none)"
+        else:
+            result = node.__class__.__name__
+            if isinstance(node, ExprNodes.NameNode):
+                result += "(type=%s, name=\"%s\")" % (repr(node.type), node.name)
+            elif isinstance(node, Nodes.DefNode):
+                result += "(name=\"%s\")" % node.name
+            elif isinstance(node, Nodes.CFuncDefNode):
+                result += "(name=\"%s\")" % node.declared_name()
+            elif isinstance(node, ExprNodes.AttributeNode):
+                result += "(type=%s, attribute=\"%s\")" % (repr(node.type), node.attribute)
+            elif isinstance(node, (ExprNodes.ConstNode, ExprNodes.PyConstNode)):
+                result += "(type=%s, value=%r)" % (repr(node.type), node.value)
+            elif isinstance(node, ExprNodes.ExprNode):
+                t = node.type
+                result += "(type=%s)" % repr(t)
+            elif node.pos:
+                pos = node.pos
+                path = pos[0].get_description()
+                if '/' in path:
+                    path = path.split('/')[-1]
+                if '\\' in path:
+                    path = path.split('\\')[-1]
+                result += "(pos=(%s:%s:%s))" % (path, pos[1], pos[2])
+
+            return result
+
+if __name__ == "__main__":
+    import doctest
+    doctest.testmod()
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/CmdLine.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/CmdLine.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..dff1ba5b7f29fee160d5e34a05dc45eebbce2f31
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/CmdLine.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/CodeGeneration.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/CodeGeneration.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0d545cc36b33d5307b9c6108912599fed068fc22
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/CodeGeneration.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/DebugFlags.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/DebugFlags.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bd6262c81a829cbac0b7ce2486ea0ecf8ec35f58
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/DebugFlags.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/FlowControl.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/FlowControl.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..bfc5e7b7f1cd0d1aed9b20180d63da0d93d73445
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/FlowControl.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Interpreter.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Interpreter.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..48912b89b307e15c167b7c59f8e9176203e7e36e
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/Interpreter.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/StringEncoding.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/StringEncoding.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..53433bba6777b18b6b1ae1e531f2fec4b893e1ae
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/StringEncoding.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/TypeSlots.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/TypeSlots.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..e97f98bdc27867b474a41c6bfbc8dcf0bad010cd
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/TypeSlots.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..abb4a450195ebee078d98483ac2f920a45093cf0
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Compiler/__pycache__/__init__.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Tests/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Tests/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..fa81adaff68e06d8e915a6afa375f62f7e5a8fad
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Tests/__init__.py
@@ -0,0 +1 @@
+# empty file
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Tests/__pycache__/TestJediTyper.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Tests/__pycache__/TestJediTyper.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..91ea588221cee73db8f8179f6d4ca622aa4f8478
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/Cython/Tests/__pycache__/TestJediTyper.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/MarkupSafe-2.1.5.dist-info/INSTALLER b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/MarkupSafe-2.1.5.dist-info/INSTALLER
new file mode 100644
index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/MarkupSafe-2.1.5.dist-info/INSTALLER
@@ -0,0 +1 @@
+pip
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/MarkupSafe-2.1.5.dist-info/LICENSE.rst b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/MarkupSafe-2.1.5.dist-info/LICENSE.rst
new file mode 100644
index 0000000000000000000000000000000000000000..9d227a0cc43c3268d15722b763bd94ad298645a1
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/MarkupSafe-2.1.5.dist-info/LICENSE.rst
@@ -0,0 +1,28 @@
+Copyright 2010 Pallets
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+1.  Redistributions of source code must retain the above copyright
+    notice, this list of conditions and the following disclaimer.
+
+2.  Redistributions in binary form must reproduce the above copyright
+    notice, this list of conditions and the following disclaimer in the
+    documentation and/or other materials provided with the distribution.
+
+3.  Neither the name of the copyright holder nor the names of its
+    contributors may be used to endorse or promote products derived from
+    this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED
+TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/MarkupSafe-2.1.5.dist-info/METADATA b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/MarkupSafe-2.1.5.dist-info/METADATA
new file mode 100644
index 0000000000000000000000000000000000000000..dfe37d52dfbbfdffc5b3181923e51c610046ff12
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/MarkupSafe-2.1.5.dist-info/METADATA
@@ -0,0 +1,93 @@
+Metadata-Version: 2.1
+Name: MarkupSafe
+Version: 2.1.5
+Summary: Safely add untrusted strings to HTML/XML markup.
+Home-page: https://palletsprojects.com/p/markupsafe/
+Maintainer: Pallets
+Maintainer-email: contact@palletsprojects.com
+License: BSD-3-Clause
+Project-URL: Donate, https://palletsprojects.com/donate
+Project-URL: Documentation, https://markupsafe.palletsprojects.com/
+Project-URL: Changes, https://markupsafe.palletsprojects.com/changes/
+Project-URL: Source Code, https://github.com/pallets/markupsafe/
+Project-URL: Issue Tracker, https://github.com/pallets/markupsafe/issues/
+Project-URL: Chat, https://discord.gg/pallets
+Classifier: Development Status :: 5 - Production/Stable
+Classifier: Environment :: Web Environment
+Classifier: Intended Audience :: Developers
+Classifier: License :: OSI Approved :: BSD License
+Classifier: Operating System :: OS Independent
+Classifier: Programming Language :: Python
+Classifier: Topic :: Internet :: WWW/HTTP :: Dynamic Content
+Classifier: Topic :: Text Processing :: Markup :: HTML
+Requires-Python: >=3.7
+Description-Content-Type: text/x-rst
+License-File: LICENSE.rst
+
+MarkupSafe
+==========
+
+MarkupSafe implements a text object that escapes characters so it is
+safe to use in HTML and XML. Characters that have special meanings are
+replaced so that they display as the actual characters. This mitigates
+injection attacks, meaning untrusted user input can safely be displayed
+on a page.
+
+
+Installing
+----------
+
+Install and update using `pip`_:
+
+.. code-block:: text
+
+    pip install -U MarkupSafe
+
+.. _pip: https://pip.pypa.io/en/stable/getting-started/
+
+
+Examples
+--------
+
+.. code-block:: pycon
+
+    >>> from markupsafe import Markup, escape
+
+    >>> # escape replaces special characters and wraps in Markup
+    >>> escape("<script>alert(document.cookie);</script>")
+    Markup('&lt;script&gt;alert(document.cookie);&lt;/script&gt;')
+
+    >>> # wrap in Markup to mark text "safe" and prevent escaping
+    >>> Markup("<strong>Hello</strong>")
+    Markup('<strong>hello</strong>')
+
+    >>> escape(Markup("<strong>Hello</strong>"))
+    Markup('<strong>hello</strong>')
+
+    >>> # Markup is a str subclass
+    >>> # methods and operators escape their arguments
+    >>> template = Markup("Hello <em>{name}</em>")
+    >>> template.format(name='"World"')
+    Markup('Hello <em>&#34;World&#34;</em>')
+
+
+Donate
+------
+
+The Pallets organization develops and supports MarkupSafe and other
+popular packages. In order to grow the community of contributors and
+users, and allow the maintainers to devote more time to the projects,
+`please donate today`_.
+
+.. _please donate today: https://palletsprojects.com/donate
+
+
+Links
+-----
+
+-   Documentation: https://markupsafe.palletsprojects.com/
+-   Changes: https://markupsafe.palletsprojects.com/changes/
+-   PyPI Releases: https://pypi.org/project/MarkupSafe/
+-   Source Code: https://github.com/pallets/markupsafe/
+-   Issue Tracker: https://github.com/pallets/markupsafe/issues/
+-   Chat: https://discord.gg/pallets
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/MarkupSafe-2.1.5.dist-info/WHEEL b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/MarkupSafe-2.1.5.dist-info/WHEEL
new file mode 100644
index 0000000000000000000000000000000000000000..4497ba5739a9af5b8409a27f2985495d6ff6068f
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/MarkupSafe-2.1.5.dist-info/WHEEL
@@ -0,0 +1,6 @@
+Wheel-Version: 1.0
+Generator: bdist_wheel (0.42.0)
+Root-Is-Purelib: false
+Tag: cp311-cp311-manylinux_2_17_x86_64
+Tag: cp311-cp311-manylinux2014_x86_64
+
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/MarkupSafe-2.1.5.dist-info/top_level.txt b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/MarkupSafe-2.1.5.dist-info/top_level.txt
new file mode 100644
index 0000000000000000000000000000000000000000..75bf729258f9daef77370b6df1a57940f90fc23f
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/MarkupSafe-2.1.5.dist-info/top_level.txt
@@ -0,0 +1 @@
+markupsafe
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/networkx/algorithms/flow/tests/gl1.gpickle.bz2 b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/networkx/algorithms/flow/tests/gl1.gpickle.bz2
new file mode 100644
index 0000000000000000000000000000000000000000..5e9291ea7aa77204bbaab28651e6a4d4f47a4bea
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/networkx/algorithms/flow/tests/gl1.gpickle.bz2
@@ -0,0 +1,3 @@
+version https://git-lfs.github.com/spec/v1
+oid sha256:cf8f81ceb5eaaee1621aa60b892d83e596a6173f6f6517359b679ff3daa1b0f8
+size 44623
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cublas_cu11-11.11.3.6.dist-info/INSTALLER b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cublas_cu11-11.11.3.6.dist-info/INSTALLER
new file mode 100644
index 0000000000000000000000000000000000000000..a1b589e38a32041e49332e5e81c2d363dc418d68
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cublas_cu11-11.11.3.6.dist-info/INSTALLER
@@ -0,0 +1 @@
+pip
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cublas_cu11-11.11.3.6.dist-info/RECORD b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cublas_cu11-11.11.3.6.dist-info/RECORD
new file mode 100644
index 0000000000000000000000000000000000000000..c42c6a8da7940687588e4c6cc8c4585514c8185f
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cublas_cu11-11.11.3.6.dist-info/RECORD
@@ -0,0 +1,23 @@
+nvidia/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+nvidia/__pycache__/__init__.cpython-311.pyc,,
+nvidia/cublas/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+nvidia/cublas/__pycache__/__init__.cpython-311.pyc,,
+nvidia/cublas/include/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+nvidia/cublas/include/__pycache__/__init__.cpython-311.pyc,,
+nvidia/cublas/include/cublas.h,sha256=a0lLqy-k47NuwyDjuueC3W0Mpc908MTU7o5sMJqE-1w,41246
+nvidia/cublas/include/cublasLt.h,sha256=jr9DyHiX6fzzlbnvBZ4kbMEDpr1eAurHZlnHjsnNi-8,79035
+nvidia/cublas/include/cublasXt.h,sha256=CW9dyXYGSUW1wEXrVVyhU6OxBK1PUvMoYdVGlQT7L9A,37380
+nvidia/cublas/include/cublas_api.h,sha256=aJ4b8tkMtQfGgBQ32Nz2q4OaXxayynpCUCDUVMXVOpA,220681
+nvidia/cublas/include/cublas_v2.h,sha256=DrT-TOKePZcfL_ld1ECGv2F30_9KznXxj5WXoABe2v4,8811
+nvidia/cublas/include/nvblas.h,sha256=dXCLR-2oUiJFzLsDtIAK09m42ct4G0HWdYzBUuDPXpc,23341
+nvidia/cublas/lib/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
+nvidia/cublas/lib/__pycache__/__init__.cpython-311.pyc,,
+nvidia/cublas/lib/libcublas.so.11,sha256=O4HRcM1hPPnuJNMLSD97bYFw1tMqA1T8IH0JyUOuP2I,94729912
+nvidia/cublas/lib/libcublasLt.so.11,sha256=ttFKdGGz7BmVQn_RebmLkVyfyBBXNmq3v6RoFFGw0wg,574565016
+nvidia/cublas/lib/libnvblas.so.11,sha256=ZHNz0AIKU8cL1E0pUPgfbF7ewgaJmFWACnaqvhrifgI,745240
+nvidia_cublas_cu11-11.11.3.6.dist-info/INSTALLER,sha256=zuuue4knoyJ-UwPPXg8fezS7VCrXJQrAP7zeNuwvFQg,4
+nvidia_cublas_cu11-11.11.3.6.dist-info/License.txt,sha256=rW9YU_ugyg0VnQ9Y1JrkmDDC-Mk_epJki5zpCttMbM0,59262
+nvidia_cublas_cu11-11.11.3.6.dist-info/METADATA,sha256=mA43HfZFHjjDd8elta6JbTa_ysfzaRIG3x5ynq8HBhw,1506
+nvidia_cublas_cu11-11.11.3.6.dist-info/RECORD,,
+nvidia_cublas_cu11-11.11.3.6.dist-info/WHEEL,sha256=-kQi_VMfvRQozZJT7HUPMfY-5vLo0LVTmAylNJ3Ft98,106
+nvidia_cublas_cu11-11.11.3.6.dist-info/top_level.txt,sha256=fTkAtiFuL16nUrB9ytDDtpytz2t0B4NvYTnRzwAhO14,7
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cublas_cu11-11.11.3.6.dist-info/top_level.txt b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cublas_cu11-11.11.3.6.dist-info/top_level.txt
new file mode 100644
index 0000000000000000000000000000000000000000..862f7abf232cdfbb928609856247292e81c9decb
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/nvidia_cublas_cu11-11.11.3.6.dist-info/top_level.txt
@@ -0,0 +1 @@
+nvidia
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/__pycache__/utils.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/__pycache__/utils.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..fbf5a573edffa844c6e526d6f4d0fc77c57da937
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/__pycache__/utils.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/api/__init__.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/api/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/api/__pycache__/__init__.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/api/__pycache__/__init__.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..8c747fa4c33dd399659cf9ba8f54045b56a6fc79
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/api/__pycache__/__init__.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/api/__pycache__/meta.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/api/__pycache__/meta.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..0ec652e593df245657f9809897a6545c54839070
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/api/__pycache__/meta.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/ATen/templates/DispatchKeyNativeFunctions.cpp b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/ATen/templates/DispatchKeyNativeFunctions.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..7647f459a744b2eacfac6aaea4f49b86babbb234
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/ATen/templates/DispatchKeyNativeFunctions.cpp
@@ -0,0 +1,13 @@
+// ${generated_comment}
+${includes}
+${native_functions_include}
+
+namespace {
+${helper_fns}
+} // namespace
+
+${namespace_prologue}
+
+${native_function_definitions}
+
+${namespace_epilogue}
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/README.md b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..bfa43899cc590959c2bfd74e38662ec03aaee3d6
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/README.md
@@ -0,0 +1,3 @@
+If you add a file to this directory, you **MUST** update
+`torch/CMakeLists.txt` and add the file as a dependency to
+the `add_custom_command` call.
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/__pycache__/gen_annotated_fn_args.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/__pycache__/gen_annotated_fn_args.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..acb42b89fab240523ea312ebe23d1a0cd39d5ba9
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/__pycache__/gen_annotated_fn_args.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/__pycache__/gen_view_funcs.cpython-311.pyc b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/__pycache__/gen_view_funcs.cpython-311.pyc
new file mode 100644
index 0000000000000000000000000000000000000000..42de7baa21ca21c9f5d69d3c49a8381923c3b9fc
Binary files /dev/null and b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/__pycache__/gen_view_funcs.cpython-311.pyc differ
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/derivatives.yaml b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/derivatives.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..d5862d3f242606d64f29c053bfdd50b4a03f97f6
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/derivatives.yaml
@@ -0,0 +1,3140 @@
+# Defines derivative formulas and Python signatures of methods on Variable
+#
+# Note about possibly confusing nomenclature: An 'output gradient' is the
+# gradient of an output of a forward function. Output gradients are used as
+# the inputs to backward functions. `grads` is a vector of output gradients,
+# and `grad == grads[0]`, in all the derivative formulas in this file.
+# An 'input gradient' is the gradient of an input to a forward function.
+# Input gradients are the outputs of backward functions, corresponding to the
+# input names included in the derivative formulas defined in this file.
+# Also, every time we talk computing "gradient" we actually mean computing
+# the vector jacobian product using the given 'output gradient' as the vector.
+#
+# Each entry consists of:
+#   - A 'name', which specifies the ATen name of the function you
+#     are defining derivatives for, and an argument specification.
+#   - An optional 'dispatch' entry which can be used to specify
+#     per-autograd dispatch key derivatives. If this entry is not
+#     specified, then the gradient entries will be taken as the
+#     default gradients (i.e. registered for every backward dispatch
+#     key). (see _test_autograd_multiple_dispatch for an example
+#     of how to register separate derivates for different dispatch keys).
+#     The list of allowed dispatch keys (in addition to 'Default' which
+#     represents the Autograd alias key) is torchgen/model.py:AUTOGRAD_KEYS.
+#   - One or more gradients entries, mapping differentiable input
+#     names to a formula specifying how to compute its gradient.
+#     Note that a single gradient entry can specify the gradient
+#     formula for multiple input names, by specifying a key
+#     "input1, input2" (see atan2 for an example).
+#   - An argument can be flagged as 'non_differentiable'.
+#   - Optional entry with key 'output_differentiability' and value a list of the
+#     same length as the number of outputs from the forward function. The list
+#     should contain only booleans, specifying whether each of the output Tensor
+#     is differentiable.
+#     If it is not specified for a function that returns multiple elements but
+#     uses `grad` instead of `grads[idx]`, then all but the first output will
+#     be marked as non-differentiable.
+#     If None of the output is differentiable, you can also add the function
+#     name to `gen_variable_type.py`'s `DONT_REQUIRE_DERIVATIVE` list.
+#
+# There are two cases for Tensor and TensorList arguments here:
+#   - If that argument is differentiable, in the sense that a gradient with respect
+#     to that argument could exist. You should either:
+#       - Specify the formula for that gradient
+#       - Specify not_implemented("function_name") as a formula to say that this is not
+#         implemented yet (but might be in the future and the user can request that on an issue)
+#   - If that argument is not differentiable, because it is not a floating point dtype or the
+#     function is not differentiable with respect to that argument  for
+#     example. You should either:
+#       - Do not specify any formula for this argument
+#       - Specify explicitly that this argument is "non_differentiable". Note that in this case,
+#         we trust you that this argument will never have requires_grad=True and it will be silently
+#         ignored if it does.
+#
+# If a function has out-of-place and in-place variants, then the derivative
+# definition for the in-place variant is optional. It will default to the
+# definition for the out-of-place variant. Note that _out variants are never
+# differentiable.
+#
+# Gradient expressions are standard C++ expressions operating on ATen
+# variables.  In a gradient expression, the following variables/functions
+# are in scope:
+#
+#   - 'grad', the gradient of the output (often spelled grad_output
+#     in Python) which we are going to left-multiply.
+#
+#     When a function returns multiple *differentiable* outputs,
+#     you can refer to the gradients of each outputs using 'grads',
+#     e.g., 'grads[0]', 'grads[1]'.
+#
+#     When a function returns multiple *differentiable* outputs that
+#     are named, you can refer to the gradients of each outputs using
+#     'grad_{name}', e.g., 'grad_x', 'grad_y'.
+#
+#     When a function returns *one* differentiable output (the
+#     first output) and some more nondifferentiable outputs,
+#     you MUST refer to the gradient of the differentiable output with
+#     'grad' (this case is special-cased in our code generation).
+#
+#     Note that the number of differentibale outputs can be modified by the
+#     'output_differentiability' entry (see above).
+#
+#     Across a differentiable function's derivatives set, it is not
+#     permitted to mix the use of "grad", "grads", and
+#     "grad_{name}". You must be consistent for that differentiable
+#     function.
+#
+#   - Any of the input arguments, tensor or non-tensor, including
+#     argument names that only appear in Declarations.yaml, e.g. 'output'.
+#
+#   - 'result', representing the result of evaluating the forward
+#     expression for ATen native function declarations. If the forward
+#     expression outputs a tuple, use 'resultX' instead to access the
+#     X-th entry
+#
+#   - 'grad_input_mask', a std::array<bool, n>, specifies which input
+#     gradients are actually needed.  For example, in the entry
+#     `input0, input1: foo(grad_input_mask)`, `grad_input_mask` is a size
+#     two array, where `grad_input_mask[0]` is true if `input0` requires
+#     grad, and `grad_input_mask[1]` is true if `input1` requires grad.
+#
+#     (NB: if your function computes gradient for a list of tensors,
+#     the `grad_input_mask` will only have a single entry for the list
+#     specifying if either zero or at least one tensor from the list requires
+#     grad.  If we want to support more fine-grained signalling,
+#     we'll need some alternate variable which is not a std::array)
+#
+#   - 'retain_variables', a bool which is true if a user has specified
+#     that saved variables should be retained in case the backwards is
+#     run again later.  This allows an optimization where we can
+#     destroy saved buffers if we know variables are not going to be retained,
+#     e.g., it is used by _cudnn_rnn
+#
+#   - `wrap_opt_if`, is a 2-argument function that accepts a tensor
+#     variable and a boolean condition that dictates whether to save that
+#     variable in a graph. The result of this function is `c10::optional<Tensor>`,
+#     and it is `c10::nullopt` when the condition evalutes to `false`,
+#     otherwise it is the variable wrapped in `c10::optional<Tensor>`.
+#     For example, wrap_opt_if(var_0, grad_input_mask[1] || grad_input_mask[2])
+#     would mean that `var_0` is saved as long as the second (grad_input_mask[1])
+#     or the third (grad_input_mask[2]) argument requires gradients.
+#     Another interpretation of this expression would read as `var_0` is needed
+#     in the backward computation of the second or the third argument.
+#     NOTE: the usage of `var_i.requires_grad()` in the conditional expression
+#     is not supported, use `grad_input_mask[i]` instead.
+#     NOTE: `wrap_opt_if` could be used to prevent saving redundant variables
+#     with multi-output backward formulas.
+#     See https://github.com/pytorch/pytorch/issues/97575 for more details
+#     on the issue.
+#
+# If you need a complex expression, e.g., with local variables,
+# write a _backward function in torch/csrc/autograd/FunctionsManual.cpp
+# and invoke it from here.  By the way, go read
+# https://github.com/zdevito/ATen/issues/163; this describes an
+# important hazard that occurs when porting backwards from Python to C++
+#
+# Double backwards gradient expressions can be somewhat confusing;
+# the most important thing to remember is: (1) you need to define a
+# derivative formula for every input, including inputs named things
+# like 'grad_output', and (2) the gradient to multiply with is always
+# called 'grad' (even though it really is a grad-grad).
+#
+# You can also add forward derivative definition by defining a formula for
+# a returned value (in general "result" if the name is not specified). This
+# formula works the same way as the backward one and advanced implementations
+# should also be placed in the FunctionsManual file.
+# This formula should compute a single Jacobian vector product using the (primal)
+# value of the argument "foo_p", its forward grad "foo_t" and the result of the
+# function as "result".
+# Note that the forward derivative can be automatically generated in two cases:
+#     - if your function is linear (NOT affine or multi-linear), then you can
+#       specify so by just using the string "auto_linear" for the formula.
+#     - if your function is applied element wise (and has a single input), you
+#       can specify so by just using the string "auto_element_wise" for the formula.
+#
+# Note that to avoid unpacking overhead, functions taking TensorList as inputs
+# will always have their forward grad formula called. This function is responsible
+# to check if any computation is needed and should return an undefined Tensor when
+# there is nothing to do. You can check "cat_forward" for a full example.
+#
+# NB: There are a number of gradient definitions in here which are bogus
+# (implemented using zeros_like).  These gradients are (hopefully) not
+# used by our frontend.  You MUST check the frontend code; search for
+# OpName.apply to see if it's still using a legacy Python style API.
+#
+# Note: Returning views.
+# The following cases exist:
+#     - If a function returns no view, it can have arbitrary outputs.
+#     - If a function return at least one Tensor that is a differentiable view
+#       of one of its input:
+#         - If there is only one differentiable output, this Tensor is marked as a
+#           differentiable view. (alias or transpose for example)
+#         - If there are more than one differentiable output, by default all the views are
+#           marked as differentiable views and created with allow_rebase_history=false.
+#           Meaning that any inplace operation on it will raise an error. (unbind for example)
+#
+#  Notes about undefined output gradients:
+#     All backward functions must support all combinations of undefined output
+#     gradient Tensors, where `grad[i].defined() == false`. Depending on the
+#     number of input and output grads your derivative formula uses, code
+#     generation may automatically add some level of undefined grad support,
+#     according to these three cases:
+#
+#       * 1 input grad and 1 output grad:
+#           Complete undefined grad support is automatically added, so you
+#           shouldn't have to think about it, unless there is a bug in the code
+#           generation.
+#
+#       * 1 input grad and multiple output grads:
+#           Undefined grad support is automatically added ONLY in the case where
+#           all output grads are undefined. You will have to add explicit support
+#           for cases where a subset of output grads is undefined.
+#
+#       * multiple input grads:
+#           No automatic support, so you will need to add it.
+#
+#     If your derivative formula uses more than one output grad, it is usually
+#     preferable to add undefined grad support in the backward function itself
+#     (if you're using one), rather than in the derivative formula in this file.
+#
+#     Undefined Tensors are created with the default constructor `at::Tensor()`.
+#     It is an efficient way to represent a Tensor filled with zeros because
+#     the Tensor holds no sizing information and no Storage data is allocated.
+#     But consequentially, Tensor operations cannot be performed on them.
+#     Therefore, your backward function should treat an undefined output grad as
+#     a zero, and it needs to be a special case.
+#
+#     If all output grads are undefined, then it should be correct for the
+#     backward function to return undefined input grads. Since we use the chain
+#     rule, output grads equal to zero should result in input grads equal to zero,
+#     unless there is some rare special case.
+#
+#     If a subset of output grads is undefined, then it may be acceptable for
+#     the backward function to return undefined input grads--it depends on the
+#     specific function, so you'll have to determine that yourself. If returning
+#     an undefined Tensor is correct for a given input grad, it is also logically
+#     correct to return a defined grad full of zeros, but that would not be
+#     preferable since it would be less efficient.
+#
+# NB: The parameter names here MUST be consistent with the parameter names
+# in native_functions.yaml
+- name: abs(Tensor self) -> Tensor
+  self: grad * self.sgn()
+  result: handle_r_to_c(result.scalar_type(), self_t.conj() * self_p.sgn())
+
+- name: acos(Tensor self) -> Tensor
+  self: grad * -((-self * self + 1).rsqrt()).conj()
+  result: auto_element_wise
+
+- name: add.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor
+  self: handle_r_to_c(self.scalar_type(), grad)
+  other: handle_r_to_c(other.scalar_type(), maybe_multiply(grad, alpha.conj()))
+  result: self_t + maybe_multiply(other_t, alpha)
+
+- name: add.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor
+  self: handle_r_to_c(self.scalar_type(), grad)
+  result: self_t.clone()
+
+- name: addbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
+  self: maybe_multiply(grad, beta.conj())
+  batch1: maybe_multiply(grad.unsqueeze(0).expand_symint({ batch1.sym_size(0), batch1.sym_size(1), batch2.sym_size(2) }).bmm(batch2.transpose(1, 2).conj()), alpha.conj())
+  batch2: maybe_multiply(batch1.transpose(1, 2).conj().bmm(grad.unsqueeze(0).expand_symint({ batch1.sym_size(0), batch1.sym_size(1), batch2.sym_size(2) })), alpha.conj())
+  result: maybe_multiply(self_t, beta) + maybe_multiply(batch1_t.bmm(batch2_p).sum(0), alpha) + maybe_multiply(batch1_p.bmm(batch2_t).sum(0), alpha)
+
+- name: addcdiv(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor
+  self: handle_r_to_c(self.scalar_type(), grad)
+  tensor1: handle_r_to_c(tensor1.scalar_type(), grad * (value / tensor2).conj())
+  tensor2: handle_r_to_c(tensor2.scalar_type(), -grad * (value * tensor1 / (tensor2 * tensor2)).conj())
+  result: self_t + maybe_multiply(tensor1_t / tensor2_p, value) - maybe_multiply(tensor2_t * (tensor1_p / tensor2_p) / tensor2_p, value)
+
+- name: addcmul(Tensor self, Tensor tensor1, Tensor tensor2, *, Scalar value=1) -> Tensor
+  self: handle_r_to_c(self.scalar_type(), grad)
+  tensor1: handle_r_to_c(tensor1.scalar_type(), grad * (tensor2 * value).conj())
+  tensor2: handle_r_to_c(tensor2.scalar_type(), grad * (tensor1 * value).conj())
+  result: self_t + maybe_multiply(tensor1_t * tensor2_p, value) + maybe_multiply(tensor2_t * tensor1_p, value)
+
+- name: addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
+  self: maybe_multiply(grad, beta.conj())
+  mat1: mm_mat1_backward(grad, mat2, mat1.sym_sizes(), mat1.sym_strides(), mat1.layout(), alpha)
+  mat2: mm_mat2_backward(grad, mat1, mat2.sym_sizes(), mat2.sym_strides(), mat2.layout(), alpha)
+  result: maybe_multiply(self_t, beta) + maybe_multiply(mat1_t.mm(mat2_p), alpha) + maybe_multiply(mat1_p.mm(mat2_t), alpha)
+
+- name: _sparse_addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
+  self: maybe_multiply(grad, beta)
+  mat1: mm_mat1_sparse_backward(grad, mat1, mat2, alpha)
+  mat2: mm_mat2_backward(grad, mat1, mat2.sym_sizes(), mat2.sym_strides(), mat2.layout(), alpha)
+
+- name: addmv(Tensor self, Tensor mat, Tensor vec, *, Scalar beta=1, Scalar alpha=1) -> Tensor
+  self: maybe_multiply(grad, beta.conj())
+  mat: maybe_multiply(grad.ger(vec.conj()), alpha.conj())
+  vec: maybe_multiply(mat.t().conj().mv(grad), alpha.conj())
+  result: maybe_multiply(self_t, beta) + maybe_multiply(mat_t.mv(vec_p), alpha) + maybe_multiply(mat_p.mv(vec_t), alpha)
+
+- name: addr(Tensor self, Tensor vec1, Tensor vec2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
+  self: maybe_multiply(grad, beta.conj())
+  vec1: maybe_multiply(grad.mv(vec2.conj()), alpha.conj())
+  vec2: maybe_multiply(grad.t().mv(vec1.conj()), alpha.conj())
+  result: maybe_multiply(self_t, beta) + maybe_multiply(vec1_t.outer(vec2_p), alpha) + maybe_multiply(vec1_p.outer(vec2_t), alpha)
+
+- name: affine_grid_generator(Tensor theta, SymInt[] size, bool align_corners) -> Tensor
+  theta: affine_grid_generator_backward_symint(grad, size, align_corners)
+
+- name: alias(Tensor(a) self) -> Tensor(a)
+  self: grad
+  result: self_t
+
+- name: angle(Tensor self) -> Tensor
+  self: angle_backward(grad, self)
+  result: handle_r_to_c(result.scalar_type(), angle_backward(self_t.conj(), self_p).conj())
+
+# The four items below are necessary because TensorIterator doesn't work on
+# Variables (codegen does not unwrap the input Tensor for all() and any() ).
+- name: any(Tensor self) -> Tensor
+  output_differentiability: [False]
+
+- name: any.dim(Tensor self, int dim, bool keepdim=False) -> Tensor
+  output_differentiability: [False]
+
+- name: any.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor
+  output_differentiability: [False]
+
+- name: _is_all_true(Tensor self) -> Tensor
+  self: non_differentiable
+
+- name: _is_any_true(Tensor self) -> Tensor
+  self: non_differentiable
+
+- name: all(Tensor self) -> Tensor
+  output_differentiability: [False]
+
+- name: all.dim(Tensor self, int dim, bool keepdim=False) -> Tensor
+  output_differentiability: [False]
+
+- name: all.dims(Tensor self, int[]? dim=None, bool keepdim=False) -> Tensor
+  output_differentiability: [False]
+
+- name: acosh(Tensor self) -> Tensor
+# Save one rsqrt in the real case by using that for x real and positive sqrt(x*y) = sqrt(x)*sqrt(y) (not true in the complex case)
+  self: "self.is_complex() ? grad * ((self + 1).rsqrt() * (self - 1).rsqrt()).conj() : grad * (self * self - 1).rsqrt()"
+  result: auto_element_wise
+
+- name: acosh_(Tensor(a!) self) -> Tensor(a!)
+  self: not_implemented("inplace version of acosh")
+
+- name: asinh(Tensor self) -> Tensor
+  self: grad * (self.pow(2) + 1).rsqrt().conj()
+  result: auto_element_wise
+
+- name: asinh_(Tensor(a!) self) -> Tensor(a!)
+  self: not_implemented("inplace version of asinh")
+
+- name: atanh(Tensor self) -> Tensor
+  self: grad * 1 / (1 - self.pow(2)).conj()
+  result: auto_element_wise
+
+- name: atanh_(Tensor(a!) self) -> Tensor(a!)
+  self: not_implemented("inplace version of atanh")
+
+- name: as_strided(Tensor(a) self, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor(a)
+  self: as_strided_backward(grad, TensorGeometry(self), size, stride, storage_offset)
+  result: auto_linear
+
+- name: as_strided_(Tensor(a!) self, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor(a!)
+  self: as_strided_backward(grad, TensorGeometry(self), size, stride, storage_offset)
+  result: auto_linear
+
+- name: asin(Tensor self) -> Tensor
+  self: grad * (-self * self + 1).rsqrt().conj()
+  result: auto_element_wise
+
+- name: atan(Tensor self) -> Tensor
+  self: grad / (self * self + 1).conj()
+  result: auto_element_wise
+
+- name: atan2(Tensor self, Tensor other) -> Tensor
+  self, other: atan2_backward(grad, self, other, grad_input_mask)
+  result: (-self_p * other_t + other_p * self_t) / (self_p.pow(2) + other_p.pow(2))
+
+- name: baddbmm(Tensor self, Tensor batch1, Tensor batch2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
+  self: maybe_multiply(grad, beta.conj())
+  batch1: maybe_multiply(grad.bmm(batch2.transpose(1, 2).conj()), alpha.conj())
+  batch2: maybe_multiply(batch1.transpose(1, 2).conj().bmm(grad), alpha.conj())
+  result: maybe_multiply(self_t, beta) + maybe_multiply(batch1_t.bmm(batch2_p), alpha) + maybe_multiply(batch1_p.bmm(batch2_t), alpha)
+
+- name: bernoulli(Tensor self, *, Generator? generator=None) -> Tensor
+  self: zeros_like(grad)
+  result: auto_element_wise
+
+- name: bernoulli_.Tensor(Tensor(a!) self, Tensor p, *, Generator? generator=None) -> Tensor(a!)
+  self: zeros_like(grad)
+  p: zeros_like(p)
+  result: self_t.zero_()
+
+- name: bernoulli_.float(Tensor(a!) self, float p=0.5, *, Generator? generator=None) -> Tensor(a!)
+  self: zeros_like(grad)
+  result: self_t.zero_()
+
+- name: bmm(Tensor self, Tensor mat2) -> Tensor
+  self: grad.bmm(mat2.transpose(1, 2).conj())
+  mat2: self.transpose(1, 2).conj().bmm(grad)
+  result: self_t.bmm(mat2_p) + self_p.bmm(mat2_t)
+
+- name: matmul(Tensor self, Tensor other) -> Tensor
+  self, other: matmul_backward(grad, self, other, grad_input_mask)
+
+- name: cat(Tensor[] tensors, int dim=0) -> Tensor
+  tensors: cat_tensors_backward(grad, to_args_sizes_symint(tensors), to_args_scalartypes(tensors), dim)
+  result: cat_jvp(tensors, dim)
+
+- name: cauchy_(Tensor(a!) self, float median=0, float sigma=1, *, Generator? generator=None) -> Tensor(a!)
+  self: zeros_like(grad)
+  result: self_t.zero_()
+
+- name: ceil(Tensor self) -> Tensor
+  self: zeros_like(grad)
+  result: auto_element_wise
+
+- name: cholesky(Tensor self, bool upper=False) -> Tensor
+  self: cholesky_backward(grad, upper, result)
+
+- name: linalg_cholesky_ex(Tensor self, *, bool upper=False, bool check_errors=False) -> (Tensor L, Tensor info)
+  self: cholesky_backward(grad, upper, L)
+  L: cholesky_jvp(self_t, L, upper)
+
+- name: cholesky_solve(Tensor self, Tensor input2, bool upper=False) -> Tensor
+  self, input2: cholesky_solve_backward(grad, self, input2, result, upper, grad_input_mask)
+  result: cholesky_solve_jvp(result, input2_p, input2_t, self_t, upper)
+
+- name: cholesky_inverse(Tensor self, bool upper=False) -> Tensor
+  self: cholesky_inverse_backward(grad, self, upper, result)
+  result: cholesky_inverse_jvp(self_p, self_t, result, upper)
+
+# For clamp, gradient is not defined at the boundaries. But empirically it's helpful
+# to be able to get gradient on min and max, so we return the subgradient 1 for these cases.
+- name: clamp.Tensor(Tensor self, Tensor? min=None, Tensor? max=None) -> Tensor
+  self: clamp_backward(grad, self, min, max)
+  min, max: clamp_backward_min_max(grad, self, min, max, grad_input_mask)
+  result: clamp_jvp(self_p, self_t, min_p, min_t, max_p, max_t)
+
+- name: clamp(Tensor self, Scalar? min=None, Scalar? max=None) -> Tensor
+  self: clamp_backward(grad, self, min, max)
+  result: auto_element_wise
+
+- name: clamp_min(Tensor self, Scalar min) -> Tensor
+  self: where(self >= min, grad, at::scalar_tensor(0., grad.options()))
+  result: auto_element_wise
+
+- name: clamp_min.Tensor(Tensor self, Tensor min) -> Tensor
+  self: where(self >= min, grad, at::scalar_tensor(0., grad.options()))
+  min: where(self < min, grad, at::scalar_tensor(0., grad.options()))
+  result: where(self_p >= min_p, self_t, min_t)
+
+- name: clamp_max(Tensor self, Scalar max) -> Tensor
+  self: where(self <= max, grad, at::scalar_tensor(0., grad.options()))
+  result: auto_element_wise
+
+- name: clamp_max.Tensor(Tensor self, Tensor max) -> Tensor
+  self: where(self <= max, grad, at::scalar_tensor(0., grad.options()))
+  max: where(self > max, grad, at::scalar_tensor(0., grad.options()))
+  result: where(self_p <= max_p, self_t, max_t)
+
+- name: clone(Tensor self, *, MemoryFormat? memory_format=None) -> Tensor
+  self: grad
+  result: auto_linear
+
+- name: _lazy_clone(Tensor self) -> Tensor
+  self: grad
+  result: auto_linear
+
+- name: _to_copy(Tensor self, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None, bool non_blocking=False, MemoryFormat? memory_format=None) -> Tensor
+  self: _to_copy_backward(grad, self.options())
+  result: _to_copy(self_t, dtype, layout, device, pin_memory, non_blocking, memory_format)
+  # The condition is: if dtype is not nullopt, then isDifferentiableType(*dtype)
+  # (If dtype IS nullopt, we rely on the regular check that any input requires grad).
+  output_differentiability: ["!dtype || isDifferentiableType(*dtype)"]
+
+- name: _coalesce(Tensor self) -> Tensor
+  self: grad
+
+- name: complex(Tensor real, Tensor imag) -> Tensor
+  real: at::real(grad)
+  imag: at::imag(grad)
+  result: at::complex(real_t, imag_t)
+
+- name: polar(Tensor abs, Tensor angle) -> Tensor
+  abs, angle: polar_backward(grad, result)
+  result: at::complex(abs_t*angle_p.cos() - angle_t*abs_p*angle_p.sin(), abs_t*angle_p.sin() + angle_t*abs_p*angle_p.cos())
+
+- name: _conj(Tensor(a) self) -> Tensor(a)
+  self: grad.conj()
+  result: self_t.conj()
+
+- name: _neg_view(Tensor(a) self) -> Tensor(a)
+  self: grad.neg()
+  result: self_t._neg_view()
+
+- name: _conj_physical(Tensor self) -> Tensor
+  self: grad.conj_physical()
+  result: self_t.conj_physical()
+
+- name: conj_physical_(Tensor(a!) self) -> Tensor(a!)
+  self: grad.conj_physical()
+  result: self_t.conj_physical_()
+
+- name: copysign.Tensor(Tensor self, Tensor other) -> Tensor
+  self: copysign_tensor_self_backward(grad, self, result)
+  other: zeros_like(other)
+  result: copysign_tensor_self_backward(self_t, self_p, result)
+
+- name: copysign.Scalar(Tensor self, Scalar other) -> Tensor
+  self: copysign_tensor_self_backward(grad, self, result)
+  result: auto_element_wise
+
+- name: cos(Tensor self) -> Tensor
+  self: grad * -self.sin().conj()
+  result: auto_element_wise
+
+- name: cosh(Tensor self) -> Tensor
+  self: grad * self.sinh().conj()
+  result: auto_element_wise
+
+- name: count_nonzero.dim_IntList(Tensor self, int[] dim) -> Tensor
+  output_differentiability: [False]
+
+- name: count_nonzero(Tensor self, int? dim=None) -> Tensor
+  output_differentiability: [False]
+
+- name: linalg_cross(Tensor self, Tensor other, *, int dim=-1) -> Tensor
+  self: at::linalg_cross(other.conj(), grad, dim)
+  other: at::linalg_cross(grad, self.conj(), dim)
+  result: "at::linalg_cross(self_t, other_p, dim) + at::linalg_cross(self_p, other_t, dim)"
+
+- name: logcumsumexp(Tensor self, int dim) -> Tensor
+  self: logcumsumexp_backward(grad, self, result, dim)
+  result: logcumsumexp_jvp(self_p, self_t, dim)
+
+- name: cumprod(Tensor self, int dim, *, ScalarType? dtype=None) -> Tensor
+  self: cumprod_backward(grad.to(self.scalar_type()), self, dim, result)
+  result: "cumprod_jvp(self_t, self_p, result, dim).to(dtype.has_value() ? *dtype : self_p.scalar_type())"
+
+- name: cumsum(Tensor self, int dim, *, ScalarType? dtype=None) -> Tensor
+  self: cumsum_backward(grad.to(self.scalar_type()), dim)
+  result: auto_linear
+
+- name: cummax(Tensor self, int dim) -> (Tensor values, Tensor indices)
+  self: cummaxmin_backward(grad, self, indices, dim)
+  values: self_t.gather(dim, indices)
+
+- name: cummin(Tensor self, int dim) -> (Tensor values, Tensor indices)
+  self: cummaxmin_backward(grad, self, indices, dim)
+  values: self_t.gather(dim, indices)
+
+- name: conv_tbc(Tensor self, Tensor weight, Tensor bias, int pad=0) -> Tensor
+  self, weight, bias: "grad.defined() ? conv_tbc_backward(grad, self, weight, bias, pad) : std::tuple<Tensor, Tensor, Tensor>()"
+
+- name: _ctc_loss(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank=0, bool zero_infinity=False) -> (Tensor, Tensor)
+  log_probs: _ctc_loss_backward(grad, log_probs, targets, input_lengths, target_lengths, result0, result1, blank, zero_infinity)
+
+- name: _ctc_loss.Tensor(Tensor log_probs, Tensor targets, Tensor input_lengths, Tensor target_lengths, int blank=0, bool zero_infinity=False) -> (Tensor, Tensor)
+  log_probs: _ctc_loss_backward(grad, log_probs, targets, input_lengths, target_lengths, result0, result1, blank, zero_infinity)
+
+- name: deg2rad(Tensor self) -> Tensor
+  self: deg2rad_backward(grad)
+  result: auto_element_wise
+
+- name: _linalg_det(Tensor A) -> (Tensor result, Tensor LU, Tensor pivots)
+  A: linalg_det_backward(grad, result, A, LU, pivots)
+  result: linalg_det_jvp(A_t, result, LU, pivots, A_p.is_contiguous() && !A_p.is_complex())
+  output_differentiability: [True, False, False]
+
+- name: _linalg_slogdet(Tensor A) -> (Tensor sign, Tensor logabsdet, Tensor LU, Tensor pivots)
+  A: slogdet_backward(grad_sign, grad_logabsdet, A, sign, LU, pivots)
+  sign, logabsdet: slogdet_jvp(LU, pivots, A_t, sign, A_p.is_contiguous() && !A_p.is_complex())
+  output_differentiability: [True, True, False, False]
+
+- name: block_diag(Tensor[] tensors) -> Tensor
+  tensors: block_diag_backward(grad, to_args_sizes(tensors), to_args_scalartypes(tensors))
+  result: block_diag_jvp(tensors)
+
+- name: diag_embed(Tensor self, int offset=0, int dim1=-2, int dim2=-1) -> Tensor
+  self: grad.diagonal(offset, dim1, dim2)
+  result: auto_linear
+
+- name: diagonal(Tensor(a) self, int offset=0, int dim1=0, int dim2=1) -> Tensor(a)
+  self: diagonal_backward_symint(grad, self.sym_sizes(), offset, dim1, dim2)
+  result: auto_linear
+
+- name: diagonal_backward(Tensor grad_output, SymInt[] input_sizes, int offset, int dim1, int dim2) -> Tensor
+  grad_output: grad.diagonal(offset, dim1, dim2)
+  result: auto_linear
+
+- name: dist(Tensor self, Tensor other, Scalar p=2) -> Tensor
+  self: norm_backward(grad, self - other, p, result)
+  other: -norm_backward(grad, self - other, p, result)
+  result: norm_jvp(self_p - other_p, self_t - other_t, p, result, {}, false)
+
+# The backward formula is done in this order to improve numerical stability
+# of the higher order derivatives, see https://github.com/pytorch/pytorch/issues/43414
+# Note that we don't use "result" because saving it would be BC-breaking when it is used in an inplace operation later
+- name: div.Tensor(Tensor self, Tensor other) -> Tensor
+  self: div_tensor_self_backward(grad, other, self.scalar_type())
+  other: div_tensor_other_backward(grad, self, other)
+  result: (self_t - other_t * result) / other_p
+
+- name: div.Scalar(Tensor self, Scalar other) -> Tensor
+  self: div_tensor_self_backward(grad, other, self.scalar_type())
+  result: self_t / other
+
+- name: div.Tensor_mode(Tensor self, Tensor other, *, str? rounding_mode) -> Tensor
+  self: div_tensor_self_backward(grad, other, self.scalar_type(), rounding_mode)
+  other: div_tensor_other_backward(grad, self, other, rounding_mode)
+  result: "rounding_mode.has_value() ? result.new_zeros_symint(result.sym_sizes()) : self_t / other_p - other_t * (self_p / other_p) / other_p"
+
+- name: div.Scalar_mode(Tensor self, Scalar other, *, str? rounding_mode) -> Tensor
+  self: div_tensor_self_backward(grad, other, self.scalar_type(), rounding_mode)
+  result: "rounding_mode.has_value() ? result.new_zeros_symint(result.sym_sizes()) : self_t / other"
+
+- name: dot(Tensor self, Tensor tensor) -> Tensor
+  self: grad * tensor.conj()
+  tensor: grad * self.conj()
+  result: at::dot(self_t, tensor_p) + at::dot(self_p, tensor_t)
+
+- name: vdot(Tensor self, Tensor other) -> Tensor
+  self: grad.conj() * other
+  other: grad * self
+  result: at::vdot(self_t, other_p) + at::vdot(self_p, other_t)
+
+- name: _fused_dropout(Tensor self, float p, Generator? generator=None) -> (Tensor, Tensor)
+  self: _fused_dropout_backward(grad, result1, p)
+
+- name: native_dropout(Tensor input, float p, bool? train) -> (Tensor, Tensor)
+  input: "GradMode::is_enabled() ? infinitely_differentiable_native_dropout_backward(grad, result1, (!train.has_value() || !train.value() ? 1 : (p == 1 ? 0.0 : 1.0 / (1.0 - p)))) : native_dropout_backward(grad, result1, (!train.has_value() || !train.value() ? 1 : (p == 1 ? 0.0 : 1.0 / (1.0 - p))))"
+  result0: "(!train.has_value() || train.value()) ? (p == 1 ? 0.0 : 1.0 / (1.0 - p)) * input_t * result1 : input_t"
+
+- name: native_dropout_backward(Tensor grad_output, Tensor mask, float scale) -> Tensor
+  grad_output: "native_dropout_double_backward(grad, grad_output, mask, scale)"
+  mask: 'not_implemented("native_dropout_backward: mask")'
+
+- name: eq_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+  self: zeros_like(self)
+  result: self_t.zero_()
+
+- name: eq_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+  self: zeros_like(self)
+  other: zeros_like(other)
+  result: self_t.zero_()
+
+- name: erf(Tensor self) -> Tensor
+  self: 2.0 / sqrt(M_PI) * exp(-(self.pow(2))) * grad
+  result: auto_element_wise
+
+- name: erfc(Tensor self) -> Tensor
+  self: -2.0 / sqrt(M_PI) * exp(-(self.pow(2))) * grad
+  result: auto_element_wise
+
+- name: special_erfcx(Tensor self) -> Tensor
+  self: (2.0 * self * result - 2.0 / sqrt(M_PI)) * grad
+  result: auto_element_wise
+
+- name: erfinv(Tensor self) -> Tensor
+  self: 0.5 * sqrt(M_PI) * exp(self.erfinv().pow(2)) * grad
+  result: auto_element_wise
+
+- name: exp(Tensor self) -> Tensor
+  self: grad * result.conj()
+  result: auto_element_wise
+
+- name: exp2(Tensor self) -> Tensor
+  self: grad * result.conj() * M_LN2
+  result: auto_element_wise
+
+- name: expm1(Tensor self) -> Tensor
+  self: grad * (result.conj() + 1)
+  result: auto_element_wise
+
+# TODO: this derivative is not SymInt safe, need sum_to support
+- name: expand(Tensor(a) self, SymInt[] size, *, bool implicit=False) -> Tensor(a)
+  self: at::sum_to(grad, self.sym_sizes())
+  result: auto_linear
+
+- name: exponential_(Tensor(a!) self, float lambd=1, *, Generator? generator=None) -> Tensor(a!)
+  self: zeros_like(grad)
+  result: self_t.zero_()
+
+- name: fake_quantize_per_tensor_affine_cachemask(Tensor self, float scale, int zero_point, int quant_min, int quant_max) -> (Tensor output, Tensor mask)
+  self: fake_quantize_per_tensor_affine_cachemask_backward(grad, mask)
+
+- name: _fake_quantize_per_tensor_affine_cachemask_tensor_qparams(Tensor self, Tensor scale, Tensor zero_point, Tensor fake_quant_enabled, int quant_min, int quant_max) -> (Tensor output, Tensor mask)
+  self: fake_quantize_per_tensor_affine_cachemask_backward(grad, mask)
+
+- name: _fake_quantize_learnable_per_tensor_affine(Tensor self, Tensor scale, Tensor zero_point, int quant_min, int quant_max, float grad_factor=1.0) -> Tensor
+  self, scale, zero_point: "grad.defined() ? _fake_quantize_learnable_per_tensor_affine_backward(grad, self, scale, zero_point, quant_min, quant_max, grad_factor) : std::tuple<Tensor, Tensor, Tensor>()"
+
+- name: fake_quantize_per_channel_affine_cachemask(Tensor self, Tensor scale, Tensor zero_point, int axis, int quant_min, int quant_max) -> (Tensor output, Tensor mask)
+  self: fake_quantize_per_channel_affine_cachemask_backward(grad, mask)
+
+- name: _fake_quantize_learnable_per_channel_affine(Tensor self, Tensor scale, Tensor zero_point, int axis, int quant_min, int quant_max, float grad_factor=1.0) -> Tensor
+  self, scale, zero_point: "grad.defined() ? _fake_quantize_learnable_per_channel_affine_backward(grad, self, scale, zero_point, axis, quant_min, quant_max, grad_factor) : std::tuple<Tensor, Tensor, Tensor>()"
+
+- name: _fused_moving_avg_obs_fq_helper(Tensor self, Tensor observer_on, Tensor fake_quant_on, Tensor(a!) running_min, Tensor(b!) running_max, Tensor(c!) scale, Tensor(d!) zero_point, float averaging_const, int quant_min, int quant_max, int ch_axis, bool per_row_fake_quant=False, bool symmetric_quant=False) -> (Tensor output, Tensor mask)
+  self: fake_quantize_per_tensor_affine_cachemask_backward(grad, mask)
+
+- name: fill.Scalar(Tensor self, Scalar value) -> Tensor
+  self: zeros_like(grad)
+  result: at::fill(self_t, 0)
+
+- name: fill.Tensor(Tensor self, Tensor value) -> Tensor
+  self: zeros_like(grad)
+  value: grad.sum()
+  result: at::fill(self_t, value_t)
+
+- name: fill_.Scalar(Tensor(a!) self, Scalar value) -> Tensor(a!)
+  self: zeros_like(grad)
+  result: self_t.fill_(0)
+
+- name: fill_.Tensor(Tensor(a!) self, Tensor value) -> Tensor(a!)
+  self: zeros_like(grad)
+  value: grad.sum()
+  result: self_t.fill_(value_t)
+
+- name: floor(Tensor self) -> Tensor
+  self: zeros_like(grad)
+  result: auto_element_wise
+
+- name: fmod.Scalar(Tensor self, Scalar other) -> Tensor
+  self: grad
+  result: auto_element_wise
+
+- name: fmod.Tensor(Tensor self, Tensor other) -> Tensor
+  self: grad
+  other: -grad * self.div(other, /*rounding_mode=*/"trunc")
+  result: self_t - other_t * self_p.div(other_p, /*rounding_mode=*/"trunc")
+
+- name: frac(Tensor self) -> Tensor
+  self: grad
+  result: self_t
+
+- name: frexp.Tensor(Tensor self) -> (Tensor mantissa, Tensor exponent)
+  self: grad / exponent.exp2()
+  mantissa: self_t / exponent.exp2()
+
+- name: gather(Tensor self, int dim, Tensor index, *, bool sparse_grad=False) -> Tensor
+  self: gather_backward(grad, self, dim, index, sparse_grad)
+  index: non_differentiable
+  result: auto_linear
+
+- name: ge_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+  self: zeros_like(self)
+  result: self_t.zero_()
+
+- name: ge_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+  self: zeros_like(self)
+  other: zeros_like(other)
+  result: self_t.zero_()
+
+- name: geometric_(Tensor(a!) self, float p, *, Generator? generator=None) -> Tensor(a!)
+  self: zeros_like(grad)
+  result: self_t.zero_()
+
+- name: geqrf(Tensor self) -> (Tensor a, Tensor tau)
+  self: not_implemented("geqrf")
+
+- name: indices(Tensor(a) self) -> Tensor(a)
+  output_differentiability: [False]
+
+- name: _indices(Tensor(a) self) -> Tensor(a)
+  output_differentiability: [False]
+
+- name: crow_indices(Tensor(a) self) -> Tensor(a)
+  output_differentiability: [False]
+
+- name: col_indices(Tensor(a) self) -> Tensor(a)
+  output_differentiability: [False]
+
+- name: ccol_indices(Tensor(a) self) -> Tensor(a)
+  output_differentiability: [False]
+
+- name: row_indices(Tensor(a) self) -> Tensor(a)
+  output_differentiability: [False]
+
+- name: grid_sampler_2d(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> Tensor
+  input, grid: "grad.defined() ? grid_sampler_2d_backward(grad, input, grid, interpolation_mode, padding_mode, align_corners, grad_input_mask) : std::tuple<Tensor, Tensor>()"
+
+- name: grid_sampler_3d(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> Tensor
+  input, grid: "grad.defined() ? grid_sampler_3d_backward(grad, input, grid, interpolation_mode, padding_mode, align_corners, grad_input_mask) : std::tuple<Tensor, Tensor>()"
+
+# See NOTE [ grid_sample CPU fallback ]
+- name: _grid_sampler_2d_cpu_fallback(Tensor input, Tensor grid, int interpolation_mode, int padding_mode, bool align_corners) -> Tensor
+  input, grid: "grad.defined() ? _grid_sampler_2d_cpu_fallback_backward(grad, input, grid, interpolation_mode, padding_mode, align_corners) : std::tuple<Tensor, Tensor>()"
+
+- name: gt_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+  self: zeros_like(self)
+  result: self_t.zero_()
+
+- name: gt_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+  self: zeros_like(self)
+  other: zeros_like(other)
+  result: self_t.zero_()
+
+- name: hardsigmoid(Tensor self) -> Tensor
+  self: hardsigmoid_backward(grad, self)
+  result: auto_element_wise
+
+- name: histc(Tensor self, int bins=100, Scalar min=0, Scalar max=0) -> Tensor
+  output_differentiability: [False]
+
+- name: hardswish(Tensor self) -> Tensor
+  self: hardswish_backward(grad, self)
+  result: auto_element_wise
+
+- name: hardswish_backward(Tensor grad_output, Tensor self) -> Tensor
+  grad_output: hardswish_backward(grad, self)
+  self: at::where(at::logical_and(-3.0 < self, self < 3.0), grad * grad_output / 3.0, at::zeros({}, self.options()))
+  result: "hardswish_backward(grad_output_t, self_p)
+         + at::where(at::logical_and(-3.0 < self_p, self_p < 3.0), self_t * grad_output_p / 3.0, at::zeros({}, self_p.options()))"
+
+- name: hypot(Tensor self, Tensor other) -> Tensor
+  self: grad * self / result
+  other: grad * other / result
+  result: self_t * self_p / result + other_t * other_p / result
+
+- name: i0(Tensor self) -> Tensor
+  self: grad * at::special_i1(self)
+  result: auto_element_wise
+
+- name: special_i0e(Tensor self) -> Tensor
+  self: grad * (at::special_i1e(self) - self.sgn() * result)
+  result: auto_element_wise
+
+- name: special_i1(Tensor self) -> Tensor
+  self: i1_backward(grad, self, result)
+  result: auto_element_wise
+
+- name: special_i1e(Tensor self) -> Tensor
+  self: i1e_backward(grad, self, result)
+  result: auto_element_wise
+
+- name: igamma(Tensor self, Tensor other) -> Tensor
+  self: 'not_implemented("igamma: input")'
+  other: grad * exp((self - 1) * log(other) - other - lgamma(self))
+
+- name: igammac(Tensor self, Tensor other) -> Tensor
+  self: 'not_implemented("igammac: input")'
+  other: -grad * exp((self - 1) * log(other) - other - lgamma(self))
+
+- name: index.Tensor(Tensor self, Tensor?[] indices) -> Tensor
+  self: index_backward(grad.new_zeros_symint(self.sym_sizes(), self.options()), indices, grad)
+  result: auto_linear
+
+- name: _unsafe_index.Tensor(Tensor self, Tensor?[] indices) -> Tensor
+  self: at::_unsafe_index_put(grad.new_zeros_symint(self.sym_sizes(), self.options()), indices, grad, true)
+  result: auto_linear
+
+- name: index_add(Tensor self, int dim, Tensor index, Tensor source, *, Scalar alpha=1) -> Tensor
+  self: grad
+  # The case source.dim() == 0  is necessary to support scalar tensors of the form
+  # source.dim() == 0 and index.dim() == 1 and index.size() == (1,),
+  # This is because source is not broadcastable to index, as source.dim() < index.dim()
+  source: "maybe_multiply(source.dim() > 0 ? grad.index_select(dim, index).expand_as(source) : grad.index_select(dim, index.squeeze(0)), alpha)"
+  index: non_differentiable
+  result: at::index_add(self_t, dim, index, maybe_multiply(source_t, alpha))
+
+- name: index_reduce(Tensor self, int dim, Tensor index, Tensor source, str reduce, *, bool include_self=True) -> Tensor
+  self, source: index_reduce_backward(grad, self, dim, index, source, reduce, include_self, result)
+  index: non_differentiable
+
+- name: index_copy(Tensor self, int dim, Tensor index, Tensor source) -> Tensor
+  self: grad.index_fill(dim, index, 0)
+  # The case source.dim() == 0 is necessary to support scalar tensors of the form
+  # source.dim() == 0 and index.dim() == 1 and index.size() == (1,),
+  # This is because source is not broadcastable to index, as source.dim() < index.dim()
+  source: "source.dim() > 0 ? grad.index_select(dim, index).expand_as(source) : grad.index_select(dim, index.squeeze(0))"
+  index: non_differentiable
+  result: self_t.index_copy(dim, index, source_t)
+
+- name: index_fill.int_Scalar(Tensor self, int dim, Tensor index, Scalar value) -> Tensor
+  self: grad.index_fill(dim, index, 0)
+  index: non_differentiable
+  result: self_t.index_fill(dim, index, 0)
+
+- name: index_fill.int_Tensor(Tensor self, int dim, Tensor index, Tensor value) -> Tensor
+  self: grad.index_fill(dim, index, 0)
+  value: grad.index_select(dim, std::get<0>(at::_unique(index, /*sorted=*/false))).sum()
+  index: non_differentiable
+  result: self_t.index_fill(dim, index, value_t)
+
+- name: index_put(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor
+  self: "accumulate ? grad : grad.index_put(indices, zeros_like(values), false)"
+  values: grad.index(indices)
+  result: self_t.index_put(indices, values_t, accumulate)
+
+- name: _unsafe_index_put(Tensor self, Tensor?[] indices, Tensor values, bool accumulate=False) -> Tensor
+  self: "accumulate ? grad : at::_unsafe_index_put(grad, indices, zeros_like(values), false)"
+  values: at::_unsafe_index(grad, indices)
+  result: at::_unsafe_index_put(self_t, indices, values_t, accumulate)
+
+- name: _index_put_impl_(Tensor(a!) self, Tensor?[] indices, Tensor values, bool accumulate=False, bool unsafe=False) -> Tensor(a!)
+  self: "accumulate ? grad : grad.index_put(indices, zeros_like(values), false)"
+  values: grad.index(indices)
+  result: at::_index_put_impl_(self_t, indices, values_t, accumulate, unsafe)
+
+- name: index_select(Tensor self, int dim, Tensor index) -> Tensor
+  self: index_select_backward_symint(grad, self.sym_sizes(), dim, index)
+  index: non_differentiable
+  result: auto_linear
+
+- name: linalg_inv_ex(Tensor A, *, bool check_errors=False) -> (Tensor inverse, Tensor info)
+  A: -at::matmul(inverse.mH(), at::matmul(grad, inverse.mH()))
+  inverse: -at::matmul(at::matmul(inverse, A_t), inverse)
+  output_differentiability: [True, False]
+
+- name: linalg_pinv.atol_rtol_tensor(Tensor self, *, Tensor? atol=None, Tensor? rtol=None, bool hermitian=False) -> Tensor
+  self: pinv_backward(grad, result, self)
+  result: pinv_jvp(self_p, result, self_t)
+
+- name: isnan(Tensor self) -> Tensor
+  self: non_differentiable
+
+- name: kthvalue(Tensor self, int k, int dim=-1, bool keepdim=False) -> (Tensor values, Tensor indices)
+  self: value_selecting_reduction_backward_symint(grad, dim, indices, self.sym_sizes(), keepdim)
+  values: gather_with_keepdimed_indices(self_t, dim, indices, keepdim)
+
+- name: le_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+  self: zeros_like(self)
+  result: self_t.zero_()
+
+- name: le_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+  self: zeros_like(self)
+  other: zeros_like(other)
+  result: self_t.zero_()
+
+- name: lerp.Scalar(Tensor self, Tensor end, Scalar weight) -> Tensor
+  self: "weight.isComplex() ? grad * (1 - weight.conj().toComplexDouble()) : grad * (1 - weight.toDouble())"
+  end: grad * weight.conj()
+  result: at::lerp(self_t, end_t, weight)
+
+- name: lerp.Tensor(Tensor self, Tensor end, Tensor weight) -> Tensor
+  self: grad * (1 - weight).conj()
+  end: grad * weight.conj()
+  weight: grad * (end - self).conj()
+  result: at::lerp(self_t, end_t, weight_p) + weight_t * (end_p - self_p)
+
+- name: lgamma(Tensor self) -> Tensor
+  self: grad * digamma(self)
+  result: auto_element_wise
+
+- name: digamma(Tensor self) -> Tensor
+  self: grad * polygamma(1, self)
+  result: auto_element_wise
+
+- name: polygamma(int n, Tensor self) -> Tensor
+  self: grad * polygamma(n + 1, self)
+  result: auto_element_wise
+
+- name: polygamma_(Tensor(a!) self, int n) -> Tensor(a!)
+  self: grad * polygamma(n + 1, self)
+  result: self_t.mul_(polygamma(n + 1, original_self_p))
+
+- name: log(Tensor self) -> Tensor
+  self: grad.div(self.conj())
+  result: auto_element_wise
+
+- name: log10(Tensor self) -> Tensor
+  self: grad / (self.conj() * 2.3025850929940456)
+  result: auto_element_wise
+
+- name: log1p(Tensor self) -> Tensor
+  self: log1p_backward(grad, self)
+  result: auto_element_wise
+
+- name: log2(Tensor self) -> Tensor
+  self: grad / (self.conj() * 0.6931471805599453)
+  result: auto_element_wise
+
+- name: logaddexp(Tensor self, Tensor other) -> Tensor
+  self: grad / (1 + exp(other - self)).conj()
+  other: grad / (1 + exp(self - other)).conj()
+  result: self_t / (1 + exp(other_p - self_p)) + other_t / (1 + exp(self_p - other_p))
+
+- name: logaddexp2(Tensor self, Tensor other) -> Tensor
+  self: grad / (1 + pow(2, other - self))
+  other: grad / (1 + pow(2, self - other))
+  result: self_t / (1 + pow(2, other_p - self_p)) + other_t / (1 + pow(2, self_p - other_p))
+
+# Note [Gradient formula for xlogy at x = 0, y <= 0]
+# x * log(y) is not defined at y <= 0, so we cannot even talk about differentiability
+# Now, xlogy(0, y) = 0 by definition.
+# This does not make it differentiable as it's not defined in a neighbourhood of a point
+# (0, y) when y <= 0.
+# Now, when a function is non-differentiable, sometimes we return "a relatively sensible value"
+# In this case, as per the discussion in https://github.com/pytorch/pytorch/issues/80770, we choose
+# this value to be zero, which is the directional derivative along the line {x = 0}.
+- name: xlogy.Tensor(Tensor self, Tensor other) -> Tensor
+  self: at::xlogy(grad, other).masked_fill((self == 0.) & (other <= 0.), 0.)
+  other: grad * self / other
+  result: at::xlogy(self_t, other_p).masked_fill((self_p == 0.) & (other_p <= 0.), 0.) + other_t * self_p / other_p
+
+- name: xlogy.Scalar_Self(Scalar self, Tensor other) -> Tensor
+  other: grad * self / other
+  result: auto_element_wise
+
+- name: xlogy.Scalar_Other(Tensor self, Scalar other) -> Tensor
+  self: "other.toDouble() > 0.
+          ? at::xlogy(grad,  other)
+          : at::xlogy(grad,  other).masked_fill(self == 0., 0.)"
+  result: auto_element_wise
+
+# See Note [Gradient formula for xlogy at x = 0, y <= 0]
+# Same here but with y <= -1
+- name: special_xlog1py(Tensor self, Tensor other) -> Tensor
+  self: at::special_xlog1py(grad,  other).masked_fill((self == 0.) & (other <= -1.), 0.)
+  other: grad * self / (other + 1)
+  result: at::special_xlog1py(self_t,  other_p).masked_fill((self_p == 0.) & (other_p <= -1.), 0.) + other_t * self_p / (other_p + 1)
+
+- name: special_xlog1py.self_scalar(Scalar self, Tensor other) -> Tensor
+  other: grad * self / (other + 1)
+  result: auto_element_wise
+
+- name: special_xlog1py.other_scalar(Tensor self, Scalar other) -> Tensor
+  self: "other.toDouble() > -1.
+          ? at::special_xlog1py(grad,  other)
+          : at::special_xlog1py(grad,  other).masked_fill(self == 0., 0.)"
+  result: auto_element_wise
+
+- name: special_zeta(Tensor self, Tensor other) -> Tensor
+  self: not_implemented("zeta")
+  other:  grad * -self * special_zeta(self + 1., other)
+
+- name: special_zeta.self_scalar(Scalar self, Tensor other) -> Tensor
+  other:  grad * -self * special_zeta(self.toDouble() + 1., other)
+
+- name: special_zeta.other_scalar(Tensor self, Scalar other) -> Tensor
+  self: not_implemented("zeta")
+
+- name: log_normal_(Tensor(a!) self, float mean=1, float std=2, *, Generator? generator=None) -> Tensor(a!)
+  self: zeros_like(grad)
+  result: self_t.zero_()
+
+- name: logsumexp(Tensor self, int[1] dim, bool keepdim=False) -> Tensor
+  self: logsumexp_backward(grad, self, result, dim, keepdim)
+  result: logsumexp_jvp(self_p, self_t, dim, keepdim)
+
+- name: linalg_lstsq(Tensor self, Tensor b, float? rcond=None, *, str? driver=None) -> (Tensor solution, Tensor residuals, Tensor rank, Tensor singular_values)
+  self, b: linalg_lstsq_backward(grad, self, b, grad_input_mask)
+  solution: linalg_lstsq_jvp(self_p, b_p, self_t, b_t)
+  output_differentiability: [True, False, False, False]
+
+- name: lt_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+  self: zeros_like(self)
+  result: self_t.zero_()
+
+- name: lt_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+  self: zeros_like(self)
+  other: zeros_like(other)
+  result: self_t.zero_()
+
+- name: linalg_lu_factor_ex(Tensor A, *, bool pivot=True, bool check_errors=False) -> (Tensor LU, Tensor pivots, Tensor info)
+  A: lu_factor_ex_backward(grad, LU, pivots, pivot)
+  LU: lu_factor_ex_jvp(A_t, LU, pivots, pivot)
+  output_differentiability: [True, False, False]
+
+- name: linalg_lu(Tensor A, *, bool pivot=True) -> (Tensor P, Tensor L, Tensor U)
+  A: linalg_lu_backward(grad_L, grad_U, P, L, U, pivot)
+  L: std::get<0>(linalg_lu_jvp(A_t, P, L, U, pivot))
+  U: std::get<1>(linalg_lu_jvp(A_t, P, L, U, pivot))
+  output_differentiability: [False, True, True]
+
+- name: linalg_lu_solve(Tensor LU, Tensor pivots, Tensor B, *, bool left=True, bool adjoint=False) -> Tensor
+  LU: linalg_lu_solve_LU(grad, LU, pivots, result, left, adjoint)
+  B: "at::linalg_lu_solve(LU, pivots, grad, left, !adjoint)"
+  result: linalg_lu_solve_jvp(result, LU_p, pivots, LU_t, B_t, left, adjoint)
+
+- name: lu_unpack(Tensor LU_data, Tensor LU_pivots, bool unpack_data=True, bool unpack_pivots=True) -> (Tensor P, Tensor L, Tensor U)
+  LU_data: lu_unpack_backward(grad_L, grad_U, LU_data.sym_size(-2), LU_data.sym_size(-1))
+  LU_pivots: non_differentiable
+  L: "LU_data_t.sym_size(-2) >= LU_data_t.sym_size(-1) ? LU_data_t.tril(-1) : LU_data_t.narrow_symint(-1, 0, LU_data_t.sym_size(-2)).tril(-1)"
+  U: "LU_data_t.sym_size(-1) >= LU_data_t.sym_size(-2) ? LU_data_t.triu() : LU_data_t.narrow_symint(-2, 0, LU_data_t.sym_size(-1)).triu()"
+  output_differentiability: [False, True, True]
+
+- name: masked_fill.Scalar(Tensor self, Tensor mask, Scalar value) -> Tensor
+  self: grad.masked_fill(mask, 0)
+  mask: non_differentiable
+  result: self_t.masked_fill(mask, 0)
+
+- name: masked_fill.Tensor(Tensor self, Tensor mask, Tensor value) -> Tensor
+  self: grad.masked_fill(mask, 0)
+  value: masked_fill_backward(grad, mask)
+  mask: non_differentiable
+  result: self_t.masked_fill(mask, value_t)
+
+- name: masked_scatter(Tensor self, Tensor mask, Tensor source) -> Tensor
+  self: grad.masked_fill(mask, 0)
+  source: masked_scatter_backward_symint(grad, mask, source.sym_sizes())
+  mask: non_differentiable
+  result: self_t.masked_scatter(mask, source_t)
+
+- name: masked_scatter_backward(Tensor grad_output, Tensor mask, SymInt[] sizes) -> Tensor
+  grad_output: zeros_like(grad_output).masked_scatter(mask, grad)
+  mask: non_differentiable
+  result: masked_scatter_backward(grad_output_t, mask, grad_output_t.sizes())
+
+- name: masked_select(Tensor self, Tensor mask) -> Tensor
+  self: masked_select_backward(grad, self, mask)
+  mask: non_differentiable
+  result: auto_linear
+
+- name: linalg_matrix_exp(Tensor self) -> Tensor
+  self: linalg_matrix_exp_differential(self, grad, /*adjoint*/ true)
+  result: linalg_matrix_exp_differential(self_p, self_t, /*adjoint*/ false)
+
+- name: max.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)
+  self: value_selecting_reduction_backward_symint(grad, dim, indices, self.sym_sizes(), keepdim)
+  values: gather_with_keepdimed_indices(self_t, dim, indices, keepdim)
+
+- name: max(Tensor self) -> Tensor
+  self: evenly_distribute_backward(grad, self, result)
+  result: evenly_read_jvp(self_t, self_p, result)
+
+- name: maximum(Tensor self, Tensor other) -> Tensor
+  self: at::where(self == other, grad / 2, grad).masked_fill_(self < other, 0)
+  other: at::where(self == other, grad / 2, grad).masked_fill_(self > other, 0)
+  result: other_t + at::where(self_p == other_p, at::scalar_tensor(0.5, result.options()), (self_p > other_p).to(result.scalar_type())) * (self_t - other_t)
+
+- name: fmax(Tensor self, Tensor other) -> Tensor
+  self: grad.masked_fill((self >= other).logical_or_(other.isnan()).logical_not_(), 0)
+  other: grad.masked_fill((self >= other).logical_or_(other.isnan()), 0)
+  result: other_t + (self_p > other_p).logical_or_(other_p.isnan()) * (self_t - other_t)
+
+- name: mean(Tensor self, *, ScalarType? dtype=None) -> Tensor
+  self: grad.expand_symint(self.sym_sizes()) / self.sym_numel()
+  result: auto_linear
+
+- name: mean.dim(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
+  self: mean_backward(grad, self.sym_sizes(), dim, self.sym_numel(), keepdim)
+  result: auto_linear
+
+- name: median(Tensor self) -> Tensor
+  self: evenly_distribute_backward(grad, self, result)
+  result: evenly_read_jvp(self_t, self_p, result)
+
+- name: nanmedian(Tensor self) -> Tensor
+  self: evenly_distribute_backward(grad, self, result)
+  result: evenly_read_jvp(self_t, self_p, result)
+
+# This is in theory incorrect in the following case:
+#   sorted list: [..., a, b, b, ..., b, b, c, ...] with median = b and the value
+#                            |                     at middle position of the
+#                            |                     list between two `b`s. E.g.,
+#                            |
+#                            ^the middle position
+# The gradient exists and is essentially 0 in this case.
+#
+# In case where the middle position is at the boundary of `b` range, e.g.,
+#   sorted list: [..., a, b, b, ..., b, b, c, ...]
+#                                       |
+#                                       ^the middle position
+# The backward implementation is correct in the sense that it returns the
+# subgradient on one side.
+- name: median.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)
+  self: value_selecting_reduction_backward_symint(grad, dim, indices, self.sym_sizes(), keepdim)
+  values: gather_with_keepdimed_indices(self_t, dim, indices, keepdim)
+
+- name: nanmedian.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)
+  self: value_selecting_reduction_backward_symint(grad, dim, indices, self.sym_sizes(), keepdim)
+  values: gather_with_keepdimed_indices(self_t, dim, indices, keepdim)
+
+- name: min.dim(Tensor self, int dim, bool keepdim=False) -> (Tensor values, Tensor indices)
+  self: value_selecting_reduction_backward_symint(grad, dim, indices, self.sym_sizes(), keepdim)
+  values: gather_with_keepdimed_indices(self_t, dim, indices, keepdim)
+
+- name: min(Tensor self) -> Tensor
+  self: evenly_distribute_backward(grad, self, result)
+  result: evenly_read_jvp(self_t, self_p, result)
+
+- name: minimum(Tensor self, Tensor other) -> Tensor
+  self: at::where(self == other, grad / 2, grad).masked_fill_(self > other, 0)
+  other: at::where(self == other, grad / 2, grad).masked_fill_(self < other, 0)
+  result: other_t + at::where(self_p == other_p, at::scalar_tensor(0.5, result.options()), (self_p < other_p).to(result.scalar_type())) * (self_t - other_t)
+
+- name: fmin(Tensor self, Tensor other) -> Tensor
+  self: grad.masked_fill((self <= other).logical_or_(other.isnan()).logical_not_(), 0)
+  other: grad.masked_fill((self <= other).logical_or_(other.isnan()), 0)
+  result: other_t + (self_p <= other_p).logical_or_(other_p.isnan()) * (self_t - other_t)
+
+- name: amax(Tensor self, int[1] dim=[], bool keepdim=False) -> Tensor
+  self: scale_grad_by_count(restore_reduced_dims(grad, dim, keepdim), restore_reduced_dims(result, dim, keepdim) == self, dim)
+  result: amaxamin_jvp(self_p, self_t, result, dim, keepdim)
+
+- name: amin(Tensor self, int[1] dim=[], bool keepdim=False) -> Tensor
+  self: scale_grad_by_count(restore_reduced_dims(grad, dim, keepdim), restore_reduced_dims(result, dim, keepdim) == self, dim)
+  result: amaxamin_jvp(self_p, self_t, result, dim, keepdim)
+
+- name: mm(Tensor self, Tensor mat2) -> Tensor
+  self: mm_mat1_backward(grad, mat2, self.sym_sizes(), self.sym_strides(), self.layout(), 1)
+  mat2: mm_mat2_backward(grad, self, mat2.sym_sizes(), mat2.sym_strides(), mat2.layout(), 1)
+  result: at::mm(self_t, mat2_p) + at::mm(self_p, mat2_t)
+
+- name: mode(Tensor self, int dim=-1, bool keepdim=False) -> (Tensor values, Tensor indices)
+  self: value_selecting_reduction_backward_symint(grad, dim, indices, self.sym_sizes(), keepdim)
+  values: gather_with_keepdimed_indices(self_t, dim, indices, keepdim)
+
+- name: mul.Tensor(Tensor self, Tensor other) -> Tensor
+  self: mul_tensor_backward(grad, other, self.scalar_type())
+  other: mul_tensor_backward(grad, self, other.scalar_type())
+  result: other_t * self_p + self_t * other_p
+
+- name: mul.Scalar(Tensor self, Scalar other) -> Tensor
+  self: mul_tensor_backward(grad, other, self.scalar_type())
+  result: self_t * other
+
+- name: mv(Tensor self, Tensor vec) -> Tensor
+  self: grad.ger(vec.conj())
+  vec: self.conj().t().mv(grad)
+  result: mv(self_t, vec_p) + mv(self_p, vec_t)
+
+- name: mvlgamma(Tensor self, int p) -> Tensor
+  self: mvlgamma_backward(grad, self, p)
+  result: auto_element_wise
+
+- name: nan_to_num(Tensor self, float? nan=None, float? posinf=None, float? neginf=None) -> Tensor
+  self: grad * at::isfinite(self)
+  result: auto_element_wise
+
+- name: native_batch_norm(Tensor input, Tensor? weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float momentum, float eps) -> (Tensor, Tensor, Tensor)
+  input, weight, bias: "grad.defined() ? native_batch_norm_backward(grad, input, weight, running_mean, running_var, result1, result2, training, eps, grad_input_mask) : std::tuple<Tensor, Tensor, Tensor>()"
+  result0: batch_norm_jvp(input_p, input_t, weight_p, weight_t, bias_p, bias_t, running_mean, running_var, result1, result2, training, eps)
+
+- name: _native_batch_norm_legit(Tensor input, Tensor? weight, Tensor? bias, Tensor(a!) running_mean, Tensor(b!) running_var, bool training, float momentum, float eps) -> (Tensor, Tensor, Tensor)
+  input, weight, bias: "grad.defined() ? native_batch_norm_backward(grad, input, weight, running_mean, running_var, result1, result2, training, eps, grad_input_mask) : std::tuple<Tensor, Tensor, Tensor>()"
+  result0: batch_norm_jvp(input_p, input_t, weight_p, weight_t, bias_p, bias_t, running_mean, running_var, result1, result2, training, eps)
+
+- name: _native_batch_norm_legit_no_training(Tensor input, Tensor? weight, Tensor? bias, Tensor running_mean, Tensor running_var, float momentum, float eps) -> (Tensor, Tensor, Tensor)
+  input, weight, bias: "grad.defined() ? native_batch_norm_backward(grad, input, weight, running_mean, running_var, result1, result2, /*training=*/false, eps, grad_input_mask) : std::tuple<Tensor, Tensor, Tensor>()"
+  result0: batch_norm_jvp(input_p, input_t, weight_p, weight_t, bias_p, bias_t, running_mean, running_var, result1, result2, /*training=*/false, eps)
+
+- name: _native_batch_norm_legit.no_stats(Tensor input, Tensor? weight, Tensor? bias, bool training, float momentum, float eps) -> (Tensor, Tensor, Tensor)
+  input, weight, bias: "grad.defined() ? native_batch_norm_backward(grad, input, weight, Tensor(), Tensor(), result1, result2, training, eps, grad_input_mask) : std::tuple<Tensor, Tensor, Tensor>()"
+  result0: batch_norm_jvp(input_p, input_t, weight_p, weight_t, bias_p, bias_t, Tensor(), Tensor(), result1, result2, training, eps)
+
+- name: native_batch_norm_backward(Tensor grad_out, Tensor input, Tensor? weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_invstd, bool train, float eps, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
+  input, weight, grad_out: batchnorm_double_backward(input, weight, grads[0], grads[1], grads[2], grad_out, running_mean, running_var, train, eps, save_mean, save_invstd, grad_input_mask)
+  save_mean: not_implemented("native_batch_norm_backward save_mean")
+  save_invstd: not_implemented("native_batch_norm_backward save_invstd")
+
+- name: native_layer_norm(Tensor input, SymInt[] normalized_shape, Tensor? weight, Tensor? bias, float eps) -> (Tensor, Tensor, Tensor)
+  input, weight, bias: "grad.defined() ? native_layer_norm_backward_symint(grad, input, normalized_shape, result1, result2, weight, bias, grad_input_mask) : std::tuple<Tensor, Tensor, Tensor>()"
+  result0: layer_norm_jvp(input_p, input_t, weight_p, weight_t, bias_p, bias_t, result1, result2, normalized_shape)
+
+- name: native_layer_norm_backward(Tensor grad_out, Tensor input, SymInt[] normalized_shape, Tensor mean, Tensor rstd, Tensor? weight, Tensor? bias, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
+  input, weight, grad_out: layer_norm_double_backward(input, weight, grads[0], grads[1], grads[2], grad_out, mean, rstd, normalized_shape, grad_input_mask)
+  bias: Tensor()
+  mean: not_implemented("native_layer_norm_backward mean")
+  rstd: not_implemented("native_layer_norm_backward rstd")
+
+- name: native_group_norm(Tensor input, Tensor? weight, Tensor? bias, SymInt N, SymInt C, SymInt HxW, int group, float eps) -> (Tensor, Tensor, Tensor)
+  input, weight, bias: "GradMode::is_enabled() || grads[1].defined() || grads[2].defined() ? infinitely_differentiable_native_group_norm_backward(grads[0], grads[1], grads[2], input, result1, result2, weight, N, C, HxW, group, eps, grad_input_mask) : (grads[0].defined() ? native_group_norm_backward_symint(grads[0].device().is_xpu() ? grads[0] : grads[0].contiguous(grads[0].device().is_cpu() ? input.suggest_memory_format() : c10::MemoryFormat::Contiguous), input.device().is_xpu() ? input : input.contiguous(input.device().is_cpu() ? input.suggest_memory_format() : c10::MemoryFormat::Contiguous), result1, result2, weight, N, C, HxW, group, grad_input_mask) : std::tuple<Tensor, Tensor, Tensor>())"
+  result0: group_norm_jvp(input_p, input_t, weight_p, weight_t, bias_p, bias_t, result1, result2, group)
+  result1: group_norm_mean_jvp(input_t, result1, group)
+  result2: group_norm_invstd_jvp(input_p, input_t, result1, result2, group)
+
+- name: ne_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)
+  self: zeros_like(self)
+  result: self_t.zero_()
+
+- name: ne_.Tensor(Tensor(a!) self, Tensor other) -> Tensor(a!)
+  self: zeros_like(self)
+  other: zeros_like(other)
+  result: self_t.zero_()
+
+- name: neg(Tensor self) -> Tensor
+  self: grad.neg()
+  result: auto_element_wise
+
+- name: nextafter(Tensor self, Tensor other) -> Tensor
+  self: not_implemented("nextafter")
+  other: not_implemented("nextafter")
+
+- name: norm.Scalar(Tensor self, Scalar p=2) -> Tensor
+  self: norm_backward(grad, self, p, result)
+  result: norm_jvp(self_p, self_t, p, result)
+
+- name: norm.ScalarOpt_dim(Tensor self, Scalar? p, int[1] dim, bool keepdim=False) -> Tensor
+  self: norm_backward(grad, self, p, result, dim, keepdim)
+  result: norm_jvp(self_p, self_t, p, result, dim, keepdim)
+
+- name: norm.ScalarOpt_dtype(Tensor self, Scalar? p, *, ScalarType dtype) -> Tensor
+  self: norm_backward(grad, self.to(grad.scalar_type()), p, result)
+  result: norm_jvp(self_p, self_t, p, result)
+
+- name: norm.ScalarOpt_dim_dtype(Tensor self, Scalar? p, int[1] dim, bool keepdim, *, ScalarType dtype) -> Tensor
+  self: norm_backward(grad, self.to(grad.scalar_type()), p, result, dim, keepdim)
+  result: norm_jvp(self_p, self_t, p, result, dim, keepdim)
+
+- name: linalg_vector_norm(Tensor self, Scalar ord=2, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
+  self: linalg_vector_norm_backward(grad, self, ord, result, dim, keepdim)
+  result: linalg_vector_norm_jvp(self_p, self_t, ord, result, dim, keepdim)
+
+- name: _pdist_forward(Tensor self, float p=2) -> Tensor
+  self: _pdist_backward(grad, self, p, result)
+
+- name: _pdist_backward(Tensor grad, Tensor self, float p, Tensor pdist) -> Tensor
+  grad: not_implemented("_pdist_backward")
+  self: not_implemented("_pdist_backward")
+  pdist: not_implemented("_pdist_backward")
+
+- name: _euclidean_dist(Tensor x1, Tensor x2) -> Tensor
+  x1, x2: _euclidean_dist_backward(grad, x1, x2, result)
+
+- name: _cdist_forward(Tensor x1, Tensor x2, float p, int? compute_mode) -> Tensor
+  x1: _cdist_backward(grad.contiguous(), x1, x2, p, result)
+  x2: _cdist_backward(grad.mT().contiguous(), x2, x1, p, result.mT().contiguous())
+
+- name: _cdist_backward(Tensor grad, Tensor x1, Tensor x2, float p, Tensor cdist) -> Tensor
+  grad: not_implemented("_cdist_backward")
+  x1: not_implemented("_cdist_backward")
+  x2: not_implemented("_cdist_backward")
+  cdist: not_implemented("_cdist_backward")
+
+- name: normal_(Tensor(a!) self, float mean=0, float std=1, *, Generator? generator=None) -> Tensor(a!)
+  self: zeros_like(grad)
+  result: self_t.zero_()
+
+- name: normal.Tensor_float(Tensor mean, float std=1, *, Generator? generator=None) -> Tensor
+  mean: at::zeros_symint(mean.sym_sizes(), grad.options())
+  result: auto_element_wise
+
+- name: normal.float_Tensor(float mean, Tensor std, *, Generator? generator=None) -> Tensor
+  std: at::zeros_symint(std.sym_sizes(), grad.options())
+  result: auto_element_wise
+
+- name: normal.Tensor_Tensor(Tensor mean, Tensor std, *, Generator? generator=None) -> Tensor
+  mean: at::zeros_symint(mean.sym_sizes(), grad.options())
+  std: at::zeros_symint(std.sym_sizes(), grad.options())
+  result: zeros_like(mean_t)
+
+- name: linalg_householder_product(Tensor input, Tensor tau) -> Tensor
+  input, tau: householder_product_backward(grad, result, input, tau)
+  result: householder_product_jvp(input_t, tau_t, result, input_p, tau_p)
+
+- name: ormqr(Tensor self, Tensor input2, Tensor input3, bool left=True, bool transpose=False) -> Tensor
+  self, input2, input3: ormqr_backward(grad, result, self, input2, input3, left, transpose, grad_input_mask)
+
+- name: permute(Tensor(a) self, int[] dims) -> Tensor(a)
+  self: permute_backwards(grad, dims)
+  result: auto_linear
+
+- name: poisson(Tensor self, Generator? generator=None) -> Tensor
+  self: zeros_like(self)
+  result: auto_element_wise
+
+- name: pow.Tensor_Scalar(Tensor self, Scalar exponent) -> Tensor
+  self: pow_backward(grad, self, exponent)
+  result: auto_element_wise
+
+- name: pow.Tensor_Tensor(Tensor self, Tensor exponent) -> Tensor
+  self: pow_backward_self(grad, self, exponent)
+  exponent: pow_backward_exponent(grad, self, exponent, result)
+  result: (pow_backward_self(self_t.conj(), self_p, exponent_p) + pow_backward_exponent(exponent_t.conj(), self_p, exponent_p, result)).conj()
+
+- name: pow.Scalar(Scalar self, Tensor exponent) -> Tensor
+  exponent: pow_backward_exponent(grad, self, exponent, result)
+  result: auto_element_wise
+
+- name: prod(Tensor self, *, ScalarType? dtype=None) -> Tensor
+  self: prod_backward(grad, self.to(grad.scalar_type()), result)
+  result: (prod_backward(at::ones({}, result.options()).expand_as(result), self_p.to(result.scalar_type()), result) * self_t.conj()).sum().conj()
+
+- name: prod.dim_int(Tensor self, int dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
+  self: prod_backward(grad, self.to(grad.scalar_type()), result, dim, keepdim)
+  result: (prod_backward(at::ones({}, result.options()).expand_as(result), self_p.to(result.scalar_type()), result, dim, keepdim) * self_t.conj()).sum(dim, keepdim).conj()
+
+- name: put(Tensor self, Tensor index, Tensor source, bool accumulate=False) -> Tensor
+  self: "accumulate ? grad : grad.put(index, zeros_like(source), false)"
+  index: non_differentiable
+  source: grad.take(index).reshape_as(source)
+  result: self_t.put(index, source_t, accumulate)
+
+- name: linalg_qr(Tensor A, str mode='reduced') -> (Tensor Q, Tensor R)
+  A: linalg_qr_backward(grad_Q, grad_R, Q, R, mode)
+  Q, R: linalg_qr_jvp(A_t, Q, R, mode)
+
+- name: rad2deg(Tensor self) -> Tensor
+  self: rad2deg_backward(grad)
+  result: auto_element_wise
+
+- name: random_.from(Tensor(a!) self, int from, int? to, *, Generator? generator=None) -> Tensor(a!)
+  self: zeros_like(grad)
+  result: self_t.zero_()
+
+- name: random_.to(Tensor(a!) self, int to, *, Generator? generator=None) -> Tensor(a!)
+  self: zeros_like(grad)
+  result: self_t.zero_()
+
+- name: random_(Tensor(a!) self, *, Generator? generator=None) -> Tensor(a!)
+  self: zeros_like(grad)
+  result: self_t.zero_()
+
+- name: reciprocal(Tensor self) -> Tensor
+  self: -grad * (result * result).conj()
+  result: auto_element_wise
+
+- name: remainder.Scalar(Tensor self, Scalar other) -> Tensor
+  self: grad
+  result: auto_element_wise
+
+- name: remainder.Tensor(Tensor self, Tensor other) -> Tensor
+  self: grad
+  other: -grad * self.div(other, /*rounding_mode=*/"floor")
+  result: self_t - other_t * self_p.div(other_p, /*rounding_mode=*/"floor")
+
+- name: renorm(Tensor self, Scalar p, int dim, Scalar maxnorm) -> Tensor
+  self: renorm_backward(grad, self, p, dim, maxnorm)
+  result: renorm_jvp(self_p, self_t, p, dim, maxnorm)
+
+- name: repeat(Tensor self, SymInt[] repeats) -> Tensor
+  self: repeat_backward(grad, repeats, self.sym_sizes())
+  result: auto_linear
+
+- name: special_entr(Tensor self) -> Tensor
+  self: grad * (-(1 + self.log()))
+  result: auto_element_wise
+
+- name: special_ndtri(Tensor self) -> Tensor
+  self: grad * std::sqrt(2 * M_PI) * (result.square() / 2).exp()
+  result: auto_element_wise
+
+- name: special_log_ndtr(Tensor self) -> Tensor
+  self: grad / std::sqrt(2 * M_PI) * (result + self.pow(2) / 2).neg().exp()
+  result: auto_element_wise
+
+# [Note: Sometimes view derivatives]
+# The following situation applies to other operations as well.
+# TODO: This note is only referenced by to_dense and to_sparse*. Make
+# this more generic if it's been referenced more than once.
+#
+# DO NOT define a backward for reshape!
+# reshape is special in that it sometimes returns a view, and sometimes not.
+# Defining a backward will make codegen spit out the forward call as
+#     as_variable(baseType->reshape(self)),
+# making it impossible (hard) to detect when it is actually a view.
+# - name: reshape(Tensor self, IntArrayRef shape)
+
+- name: _reshape_alias(Tensor(a) self, SymInt[] size, SymInt[] stride) -> Tensor(a)
+  self: grad.reshape_symint(self.sym_sizes())
+  result: auto_linear
+
+- name: round(Tensor self) -> Tensor
+  self: zeros_like(grad)
+  result: auto_element_wise
+
+- name: round.decimals(Tensor self, *, int decimals) -> Tensor
+  self: zeros_like(grad)
+  result: auto_element_wise
+
+- name: rsqrt(Tensor self) -> Tensor
+  self: -0.5 * grad * result.pow(3).conj()
+  result: auto_element_wise
+
+- name: scatter.src(Tensor self, int dim, Tensor index, Tensor src) -> Tensor
+  self: grad.scatter(dim, index, 0)
+  index: non_differentiable
+  src: grad.gather(dim, index)
+  result: self_t.scatter(dim, index, src_t)
+
+- name: scatter.value(Tensor self, int dim, Tensor index, Scalar value) -> Tensor
+  self: grad.scatter(dim, index, 0)
+  index: non_differentiable
+  result: self_t.scatter(dim, index, 0)
+
+- name: scatter_add(Tensor self, int dim, Tensor index, Tensor src) -> Tensor
+  self: grad
+  index: non_differentiable
+  src: grad.gather(dim, index)
+  result: scatter_add(self_t, dim, index, src_t)
+
+- name: select.int(Tensor(a) self, int dim, SymInt index) -> Tensor(a)
+  dispatch:
+    Default:
+      self: select_backward_symint(grad, self.sym_sizes(), dim, index)
+      result: auto_linear
+    AutogradNestedTensor:
+      self: _nested_select_backward_symint(grad, self, dim, index)
+
+- name: select_backward(Tensor grad_output, SymInt[] input_sizes, int dim, SymInt index) -> Tensor
+  grad_output: grad.select_symint(dim, index)
+  result: auto_linear
+
+- name: sigmoid(Tensor self) -> Tensor
+  self: sigmoid_backward(grad, result)
+  result: auto_element_wise
+
+- name: logit(Tensor self, float? eps=None) -> Tensor
+  self: "GradMode::is_enabled() ? infinitely_differentiable_logit_backward(grad, self, eps) : logit_backward(grad, self, eps)"
+  result: auto_element_wise
+
+- name: sign(Tensor self) -> Tensor
+  self: zeros_like(grad)
+  result: auto_element_wise
+
+- name: sgn(Tensor self) -> Tensor
+  self: sgn_backward(self, grad, result)
+  # Cannot use auto_element_wise here because the Jacobian is *not* Hermitian (in fact, it is symmetric)
+  # The function is not holomorphic, so there's no reason for its Jacobian to be Hermitian
+  # auto_element_wise has a name that's a bit deceiving in the complex case
+  result: sgn_backward(self_p, self_t, result)
+
+- name: sin(Tensor self) -> Tensor
+  self: grad * self.cos().conj()
+  result: auto_element_wise
+
+- name: sinc(Tensor self) -> Tensor
+  self: sinc_backward(grad, self)
+  result: auto_element_wise
+
+- name: sinh(Tensor self) -> Tensor
+  self: grad * self.cosh().conj()
+  result: auto_element_wise
+
+- name: slice.Tensor(Tensor(a) self, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a)
+  self: slice_backward_wrapper(grad, self.sym_sizes(), dim, start, end, step)
+  result: auto_linear
+
+- name: slice_backward(Tensor grad_output, SymInt[] input_sizes, int dim, SymInt start, SymInt end, SymInt step) -> Tensor
+  grad_output: grad.slice_symint(dim, start, end, step)
+  result: auto_linear
+
+- name: slice_inverse(Tensor(a) self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor(a)
+  self: grad.slice_symint(dim, start, end, step)
+  src: slice_scatter_symint(grad, zeros_like(self), dim, start, end, step)
+  result: auto_linear
+
+- name: slice_scatter(Tensor self, Tensor src, int dim=0, SymInt? start=None, SymInt? end=None, SymInt step=1) -> Tensor
+  self: slice_scatter_symint(grad, zeros_like(src), dim, start, end, step)
+  src: grad.slice_symint(dim, start, end, step)
+  result: auto_linear
+
+- name: select_scatter(Tensor self, Tensor src, int dim, SymInt index) -> Tensor
+  self: select_scatter_symint(grad, zeros_like(src), dim, index)
+  src: grad.select_symint(dim, index)
+  result: auto_linear
+
+- name: diagonal_scatter(Tensor self, Tensor src, int offset=0, int dim1=0, int dim2=1) -> Tensor
+  self: diagonal_scatter(grad, zeros_like(src), offset, dim1, dim2)
+  src: grad.diagonal(offset, dim1, dim2)
+  result: auto_linear
+
+- name: as_strided_scatter(Tensor self, Tensor src, SymInt[] size, SymInt[] stride, SymInt? storage_offset=None) -> Tensor
+  self: as_strided_scatter_backward(grad, TensorGeometry(self), TensorGeometry(src), size, stride, storage_offset)
+  # See Note [as_strided_scatter backward support]
+  src: grad.contiguous().as_strided_symint(size, stride, storage_offset)
+  result: auto_linear
+
+- name: _linalg_solve_ex(Tensor A, Tensor B, *, bool left=True, bool check_errors=False) -> (Tensor result, Tensor LU, Tensor pivots, Tensor info)
+  A, B: linalg_solve_backward(grad, result, A, LU, pivots, left, grad_input_mask[1])
+  result: "linalg_solve_jvp(A_t, B_t, result, LU, pivots, left, A_p.is_contiguous() && !A_p.is_complex())"
+  output_differentiability: [True, False, False, False]  # LU is an auxiliary tensor not exposed to the user
+
+- name: sort(Tensor self, int dim=-1, bool descending=False) -> (Tensor values, Tensor indices)
+  self: value_selecting_reduction_backward_symint(grad, dim, indices, self.sym_sizes(), true)
+  output_differentiability: [True, False]
+  values: gather_with_keepdimed_indices(self_t, dim, indices, true)
+
+- name: sort.stable(Tensor self, *, bool? stable, int dim=-1, bool descending=False) -> (Tensor values, Tensor indices)
+  self: value_selecting_reduction_backward_symint(grad, dim, indices, self.sym_sizes(), true)
+  output_differentiability: [True, False]
+  values: gather_with_keepdimed_indices(self_t, dim, indices, true)
+
+- name: split.Tensor(Tensor(a -> *) self, SymInt split_size, int dim=0) -> Tensor(a)[]
+  self: split_backward(grads, split_size, dim, self.sym_sizes(), self.options())
+  result: auto_linear
+
+- name: unsafe_split.Tensor(Tensor self, SymInt split_size, int dim=0) -> Tensor[]
+  self: split_backward(grads, split_size, dim, self.sym_sizes(), self.options())
+  result: auto_linear
+
+- name: split_with_sizes(Tensor(a -> *) self, SymInt[] split_sizes, int dim=0) -> Tensor(a)[]
+  dispatch:
+    Default:
+      self: split_with_sizes_backward(grads, split_sizes, dim, self.sym_sizes(), self.options())
+      result: auto_linear
+    AutogradNestedTensor:
+      self: _nested_split_with_sizes_backward(grads, split_sizes, dim, at::native::get_nested_tensor_impl(self)->get_nested_sizes(), self.options())
+
+- name: unsafe_split_with_sizes(Tensor self, SymInt[] split_sizes, int dim=0) -> Tensor[]
+  self: split_with_sizes_backward(grads, split_sizes, dim, self.sym_sizes(), self.options())
+  result: auto_linear
+
+- name: sqrt(Tensor self) -> Tensor
+  self: grad / (2 * result.conj())
+  result: auto_element_wise
+
+- name: squeeze(Tensor(a) self) -> Tensor(a)
+  self: unsqueeze_to(grad, self.sym_sizes())
+  result: auto_linear
+
+- name: squeeze.dim(Tensor(a) self, int dim) -> Tensor(a)
+  dispatch:
+    Default:
+      self: unsqueeze_to(grad, dim, self.sym_sizes())
+      result: auto_linear
+    AutogradNestedTensor:
+      self: grad.unsqueeze(dim)
+
+- name: squeeze.dims(Tensor(a) self, int[] dim) -> Tensor(a)
+  dispatch:
+    Default:
+      self: unsqueeze_to(grad, dim, self.sym_sizes())
+      result: auto_linear
+    AutogradNestedTensor:
+      self: unsqueeze_multiple(grad, dim, self.dim())
+
+- name: squeeze_(Tensor(a!) self) -> Tensor(a!)
+  self: unsqueeze_to(grad, self.sym_sizes())
+  result: auto_linear
+
+- name: squeeze_.dim(Tensor(a!) self, int dim) -> Tensor(a!)
+  self: unsqueeze_to(grad, dim, self.sym_sizes())
+  result: auto_linear
+
+- name: squeeze_.dims(Tensor(a!) self, int[] dim) -> Tensor(a!)
+  self: unsqueeze_to(grad, dim, self.sym_sizes())
+  result: auto_linear
+
+- name: std.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> Tensor
+  self: std_backward(result, grad, self, dim, correction, keepdim)
+  # pointwise (variance) + sum + sqrt
+  result: (at::real(var_backward(self_t.conj(), self_p, dim, correction, true).sum(dim.value_or(IntArrayRef({})), keepdim)) / (2. * result)).masked_fill_(result == 0, 0)
+
+- name: std_mean.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> (Tensor, Tensor)
+  self: std_mean_backward(grads[0], grads[1], self, result0, dim, correction, keepdim)
+  result0: (at::real(var_backward(self_t.conj(), self_p, dim, correction, true).sum(dim.value_or(IntArrayRef({})), keepdim)) / (2. * result0)).masked_fill_(result0 == 0, 0)
+  # linear
+  result1: mean(self_t, dim.value_or(IntArrayRef({})), keepdim)
+
+- name: sub.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor
+  self: handle_r_to_c(self.scalar_type(), grad)
+  other: handle_r_to_c(other.scalar_type(), maybe_multiply(-grad, alpha.conj()))
+  result: self_t - maybe_multiply(other_t, alpha)
+
+- name: sub.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor
+  self: handle_r_to_c(self.scalar_type(), grad)
+  result: auto_element_wise
+
+- name: rsub.Tensor(Tensor self, Tensor other, *, Scalar alpha=1) -> Tensor
+  self: handle_r_to_c(self.scalar_type(), maybe_multiply(-grad, alpha.conj()))
+  other: handle_r_to_c(other.scalar_type(), grad)
+  result: -maybe_multiply(self_t, alpha) + other_t
+
+- name: rsub.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor
+  self: handle_r_to_c(self.scalar_type(), maybe_multiply(-grad, alpha.conj()))
+  result: auto_element_wise
+
+- name: sum(Tensor self, *, ScalarType? dtype=None) -> Tensor
+  self: grad.expand_symint(self.sym_sizes())
+  result: auto_linear
+
+- name: sum.dim_IntList(Tensor self, int[1]? dim, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
+  dispatch:
+    Default:
+      self: sum_backward(grad, self.sym_sizes(), dim, keepdim)
+      result: auto_linear
+    AutogradNestedTensor:
+      # TODO: replace this function once semantics for nested tensor expand have been settled on
+      self: _nested_sum_backward(grad, self, dim, keepdim)
+
+- name: nansum(Tensor self, int[1]? dim=None, bool keepdim=False, *, ScalarType? dtype=None) -> Tensor
+  self: nansum_backward(grad.to(self.scalar_type()), self, dim, keepdim)
+  result: at::where(self_p.isnan(), 0, self_t).sum(dim, keepdim, dtype)
+
+# We never call _linalg_svd with compute_uv=False in an autograd context, so we don't even consider it here
+- name: _linalg_svd(Tensor A, bool full_matrices=False, bool compute_uv=True, *, str? driver=None) -> (Tensor U, Tensor S, Tensor Vh)
+  A: "svd_backward(full_matrices && grad_U.defined() ? grad_U.narrow_symint(-1, 0, S.sym_size(-1)) : grad_U,
+                   grad_S,
+                   full_matrices && grad_Vh.defined() ? grad_Vh.narrow_symint(-2, 0, S.sym_size(-1)) : grad_Vh,
+                   full_matrices ? U.narrow_symint(-1, 0, S.sym_size(-1)) : U,
+                   S,
+                   full_matrices ? Vh.narrow_symint(-2, 0, S.sym_size(-1)) : Vh)"
+  U, S, Vh: linalg_svd_jvp(A_t, U, S, Vh, full_matrices)
+
+- name: _linalg_eigh(Tensor A, str UPLO="L", bool compute_v=True) -> (Tensor eigenvalues, Tensor eigenvectors)
+  A: linalg_eig_backward(grads[0], grads[1], eigenvalues, eigenvectors, /*is_hermitian=*/true)
+  eigenvalues, eigenvectors: linalg_eig_jvp(A_t, eigenvalues, eigenvectors, /*is_hermitian=*/true)
+
+- name: linalg_eig(Tensor self) -> (Tensor eigenvalues, Tensor eigenvectors)
+  self: handle_r_to_c(self.scalar_type(), linalg_eig_backward(grads[0], grads[1], eigenvalues, eigenvectors, /*is_hermitian=*/false))
+  eigenvalues, eigenvectors: linalg_eig_jvp(self_t, eigenvalues, eigenvectors, /*is_hermitian=*/false)
+
+- name: t(Tensor(a) self) -> Tensor(a)
+  self: grad.t()
+  result: auto_linear
+
+- name: t_(Tensor(a!) self) -> Tensor(a!)
+  self: grad.t()
+  result: auto_linear
+
+- name: one_hot(Tensor self, int num_classes=-1) -> Tensor
+  self: non_differentiable
+
+- name: flip(Tensor self, int[] dims) -> Tensor
+  self: grad.flip(dims)
+  result: auto_linear
+
+- name: roll(Tensor self, SymInt[1] shifts, int[1] dims=[]) -> Tensor
+  self: grad.roll_symint(fmap(reverse_list_symint(shifts), [](c10::SymInt i){return -i;}), reverse_list(dims))
+  result: auto_linear
+
+- name: rot90(Tensor self, int k=1, int[] dims=[0,1]) -> Tensor
+  self: grad.rot90(-k, dims)
+  result: auto_linear
+
+- name: take(Tensor self, Tensor index) -> Tensor
+  self: take_backward(grad, self, index)
+  index: non_differentiable
+  result: auto_linear
+
+- name: tan(Tensor self) -> Tensor
+  self: grad * (1 + result.pow(2)).conj()
+  result: auto_element_wise
+
+- name: tanh(Tensor self) -> Tensor
+  self: tanh_backward(grad, result)
+  result: auto_element_wise
+
+- name: topk(Tensor self, SymInt k, int dim=-1, bool largest=True, bool sorted=True) -> (Tensor values, Tensor indices)
+  self: value_selecting_reduction_backward_symint(grad, dim, indices, self.sym_sizes(), true)
+  output_differentiability: [True, False]
+  values: gather(self_t, dim, indices)
+
+- name: trace(Tensor self) -> Tensor
+  self: trace_backward_symint(grad, self.sym_sizes())
+  result: auto_linear
+
+- name: transpose.int(Tensor(a) self, int dim0, int dim1) -> Tensor(a)
+  self: grad.transpose(dim0, dim1)
+  result: auto_linear
+
+- name: transpose_(Tensor(a!) self, int dim0, int dim1) -> Tensor(a!)
+  self: grad.transpose(dim0, dim1)
+  result: auto_linear
+
+- name: triangular_solve(Tensor self, Tensor A, bool upper=True, bool transpose=False, bool unitriangular=False) -> (Tensor solution, Tensor cloned_coefficient)
+  self, A: triangular_solve_backward(grad_solution, grad_cloned_coefficient, self, A, solution, upper, transpose, unitriangular, grad_input_mask)
+  solution: triangular_solve_jvp(solution, A_p, A_t, self_t, upper, transpose, unitriangular)
+  cloned_coefficient: A_t
+
+- name: linalg_solve_triangular(Tensor self, Tensor B, *, bool upper, bool left=True, bool unitriangular=False) -> Tensor
+  self, B: linalg_solve_triangular_backward(grad, self, result, upper, left, unitriangular, grad_input_mask)
+  result: linalg_solve_triangular_forward_AD(self_t, B_t, self_p, result, upper, left, unitriangular)
+
+- name: tril(Tensor self, int diagonal=0) -> Tensor
+  self: grad.tril(diagonal)
+  result: auto_linear
+
+- name: triu(Tensor self, int diagonal=0) -> Tensor
+  self: grad.triu(diagonal)
+  result: auto_linear
+
+- name: trunc(Tensor self) -> Tensor
+  self: zeros_like(grad)
+  result: auto_element_wise
+
+# DO NOT define a backward for to_dense
+# See [Note: Sometimes view derivatives]
+# - name: to_dense(Tensor self, ScalarType? dtype=None, *, bool? masked_grad=None) -> Tensor
+#
+- name: _to_dense(Tensor self, ScalarType? dtype=None, bool? masked_grad=None) -> Tensor
+  self: to_dense_backward(grad, self, masked_grad)
+
+# DO NOT define a backward for to_sparse.sparse_dim
+# See [Note: Sometimes view derivatives]
+# - name: to_sparse.sparse_dim(Tensor self, int sparse_dim) -> Tensor
+#
+- name: _to_sparse.sparse_dim(Tensor self, int sparse_dim) -> Tensor
+  self: to_sparse_backward(grad, self.layout(), self.sym_blocksize())
+
+# DO NOT define a backward for to_sparse
+# See [Note: Sometimes view derivatives]
+# - name: to_sparse(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None) -> Tensor
+#
+- name: _to_sparse(Tensor self, *, Layout? layout=None, int[2]? blocksize=None, int? dense_dim=None) -> Tensor
+  self: to_sparse_backward(grad, self.layout(), self.sym_blocksize())
+
+# DO NOT define a backward for to_sparse_csr
+# See [Note: Sometimes view derivatives]
+# - name: to_sparse_csr(Tensor self, int? dense_dim=None) -> Tensor
+#
+- name: _to_sparse_csr(Tensor self, int? dense_dim=None) -> Tensor
+  self: to_sparse_backward(grad, self.layout(), self.sym_blocksize())
+
+# DO NOT define a backward for to_sparse_csc
+# See [Note: Sometimes view derivatives]
+# - name: to_sparse_csc(Tensor self, int? dense_dim=None) -> Tensor
+#
+- name: _to_sparse_csc(Tensor self, int? dense_dim=None) -> Tensor
+  self: to_sparse_backward(grad, self.layout(), self.sym_blocksize())
+
+# DO NOT define a backward for to_sparse_bsr
+# See [Note: Sometimes view derivatives]
+# - name: to_sparse_bsr(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor
+#
+- name: _to_sparse_bsr(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor
+  self: to_sparse_backward(grad, self.layout(), self.sym_blocksize())
+
+# DO NOT define a backward for to_sparse_bsc
+# See [Note: Sometimes view derivatives]
+# - name: to_sparse_bsc(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor
+#
+- name: _to_sparse_bsc(Tensor self, int[2] blocksize, int? dense_dim=None) -> Tensor
+  self: to_sparse_backward(grad, self.layout(), self.sym_blocksize())
+
+- name: to_mkldnn(Tensor self, ScalarType? dtype=None) -> Tensor
+  self: to_mkldnn_backward(grad, self)
+
+- name: unfold(Tensor(a) self, int dimension, int size, int step) -> Tensor(a)
+  self: unfold_backward_symint(grad, self.sym_sizes(), dimension, size, step)
+  result: auto_linear
+
+- name: unfold_backward(Tensor grad_in, SymInt[] input_sizes, int dim, int size, int step) -> Tensor
+  grad_in: grad.unfold(dim, size, step)
+  result: auto_linear
+
+- name: uniform_(Tensor(a!) self, float from=0, float to=1, *, Generator? generator=None) -> Tensor(a!)
+  self: zeros_like(grad)
+  result: self_t.zero_()
+
+- name: _unique(Tensor self, bool sorted=True, bool return_inverse=False) -> (Tensor, Tensor)
+  output_differentiability: [True, False]
+  self: not_implemented("_unique")
+
+- name: unique_dim(Tensor self, int dim, bool sorted=True, bool return_inverse=False, bool return_counts=False) -> (Tensor, Tensor, Tensor)
+  output_differentiability: [True, False, False]
+  self: not_implemented("unique_dim")
+
+- name: unique_consecutive(Tensor self, bool return_inverse=False, bool return_counts=False, int? dim=None) -> (Tensor, Tensor, Tensor)
+  output_differentiability: [True, False, False]
+  self: not_implemented("unique_consecutive")
+
+- name: unique_dim_consecutive(Tensor self, int dim, bool return_inverse=False, bool return_counts=False) -> (Tensor, Tensor, Tensor)
+  output_differentiability: [True, False, False]
+  self: not_implemented("unique_dim_consecutive")
+
+- name: _unique2(Tensor self, bool sorted=True, bool return_inverse=False, bool return_counts=False) -> (Tensor, Tensor, Tensor)
+  output_differentiability: [True, False, False]
+  self: not_implemented("_unique2")
+
+- name: _unsafe_view(Tensor self, SymInt[] size) -> Tensor
+  self: grad.reshape_symint(self.sym_sizes())
+  result: auto_linear
+
+- name: lift(Tensor self) -> Tensor
+  self: grad
+  result: auto_linear
+
+- name: lift_fresh(Tensor(a) self) -> Tensor(a)
+  self: grad
+  result: auto_linear
+
+- name: unsqueeze(Tensor(a) self, int dim) -> Tensor(a)
+  self: grad.squeeze(dim)
+  result: auto_linear
+
+- name: unsqueeze_(Tensor(a!) self, int dim) -> Tensor(a!)
+  self: grad.squeeze(dim)
+  result: auto_linear
+
+- name: var.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> Tensor
+  self: var_backward(grad, self, dim, correction, keepdim)
+  # pointwise + sum
+  result: at::real(var_backward(self_t.conj(), self_p, dim, correction, true).sum(dim.value_or(IntArrayRef({})), keepdim))
+
+- name: var_mean.correction(Tensor self, int[1]? dim=None, *, Scalar? correction=None, bool keepdim=False) -> (Tensor, Tensor)
+  self: var_mean_backward(grads[0], grads[1], self, dim, correction, keepdim)
+  result0: at::real(var_backward(self_t.conj(), self_p, dim, correction, true).sum(dim.value_or(IntArrayRef({})), keepdim))
+  # linear
+  result1: mean(self_t, dim.value_or(IntArrayRef({})), keepdim)
+
+- name: view(Tensor(a) self, SymInt[] size) -> Tensor(a)
+  dispatch:
+    Default:
+      self: grad.reshape_symint(self.sym_sizes())
+      result: auto_linear
+    AutogradNestedTensor:
+      self: grad.reshape_as(self)
+      result: auto_linear
+
+- name: view.dtype(Tensor(a) self, ScalarType dtype) -> Tensor(a)
+  output_differentiability: [False]
+
+- name: view_as_real(Tensor(a) self) -> Tensor(a)
+  self: at::view_as_complex(grad.contiguous()) # gx0 + 1j * gx1
+  result: at::view_as_real(self_t)
+
+- name: view_as_complex(Tensor(a) self) -> Tensor(a)
+  self: at::view_as_real(grad.contiguous().resolve_conj()) # [gx, gy]
+  result: at::view_as_complex(self_t)
+
+- name: where.self(Tensor condition, Tensor self, Tensor other) -> Tensor
+  condition: non_differentiable
+  self: where(condition, grad, 0)
+  other: where(condition, 0, grad)
+  result: where(condition, self_t, other_t)
+
+# weight_norm_cuda_interface_backward does not have an explicitly defined derivative, so if we do happen
+# to be running backward with create_graph=True, fall back to a backward function that uses
+# differentiable ops.
+- name: _weight_norm_interface(Tensor v, Tensor g, int dim=0) -> (Tensor, Tensor)
+  v, g: "grad.defined() ? (GradMode::is_enabled() ? _weight_norm_differentiable_backward(grad.contiguous(), v, g, result1, dim) : _weight_norm_interface_backward(grad.contiguous(), v, g, result1, dim)) : std::tuple<Tensor, Tensor>()"
+
+- name: zero_(Tensor(a!) self) -> Tensor(a!)
+  self: zeros_like(grad)
+  result: auto_linear
+
+- name: sparse_mask(Tensor self, Tensor mask) -> Tensor
+  self: sparse_mask_backward(grad, mask, self.layout())
+  mask: non_differentiable
+
+- name: _sparse_coo_tensor_with_dims_and_tensors(int sparse_dim, int dense_dim, SymInt[] size, Tensor indices, Tensor values, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False, bool? is_coalesced=None) -> Tensor
+  indices: non_differentiable
+  values: grad.sparse_mask(result)._values()
+
+- name: sparse_compressed_tensor.comp_plain_value_size(Tensor compressed_indices, Tensor plain_indices, Tensor values, SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=False) -> Tensor
+  compressed_indices: non_differentiable
+  plain_indices: non_differentiable
+  # TODO: remove to_dense after gh-107381 is fixed
+  values: grad.to_dense().sparse_mask(result).values()
+
+- name: _sparse_sum.dim(Tensor self, int[1] dim) -> Tensor
+  self: at::_sparse_sum_backward(grad, self, dim)
+
+- name: _standard_gamma(Tensor self, Generator? generator=None) -> Tensor
+  self: grad * _standard_gamma_grad(self, result)
+
+- name: _standard_gamma_grad(Tensor self, Tensor output) -> Tensor
+  self: not_implemented("_standard_gamma_grad")
+
+- name: values(Tensor(a) self) -> Tensor(a)
+  dispatch:
+    Default:
+      self: values_backward(grad, self)
+    AutogradNestedTensor:
+      self: at::_nested_view_from_buffer(grad.contiguous(), self._nested_tensor_size(), self._nested_tensor_strides(), self._nested_tensor_storage_offsets())
+
+# Why is _values() not differentiable?
+# See NOTE [ Sparse: autograd and API ]
+- name: _values(Tensor(a) self) -> Tensor(a)
+  output_differentiability: [False]
+
+# NN
+- name: _trilinear(Tensor i1, Tensor i2, Tensor i3, int[] expand1, int[] expand2, int[] expand3, int[] sumdim, int unroll_dim=1) -> Tensor
+  i1, i2, i3: "_trilinear_backward(grad,
+               wrap_opt_if(i1, grad_input_mask[1] || grad_input_mask[2]),
+               wrap_opt_if(i2, grad_input_mask[0] || grad_input_mask[2]),
+               wrap_opt_if(i3, grad_input_mask[0] || grad_input_mask[1]),
+               expand1, expand2, expand3, sumdim, grad_input_mask)"
+  result: "_trilinear(i1_t, i2_p, i3_p, expand1, expand2, expand3, sumdim, unroll_dim) +
+           _trilinear(i1_p, i2_t, i3_p, expand1, expand2, expand3, sumdim, unroll_dim) +
+           _trilinear(i1_p, i2_p, i3_t, expand1, expand2, expand3, sumdim, unroll_dim)"
+
+- name: constant_pad_nd(Tensor self, SymInt[] pad, Scalar value=0) -> Tensor
+  self: constant_pad_nd_backward(grad, pad)
+  result: constant_pad_nd_symint(self_t, pad, 0)
+
+- name: binary_cross_entropy(Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean) -> Tensor
+  self: binary_cross_entropy_backward(grad, self, target, weight, reduction)
+  target: binary_cross_entropy_target_backward(grad, self, target, weight, reduction)
+  result: "apply_loss_reduction(
+               binary_cross_entropy_backward(self_t, self_p, target_p, weight, at::Reduction::None)
+             + binary_cross_entropy_target_backward(target_t, self_p, target_p, weight, at::Reduction::None),
+           reduction)"
+
+- name: binary_cross_entropy_backward(Tensor grad_output, Tensor self, Tensor target, Tensor? weight=None, int reduction=Mean) -> Tensor
+  self: binary_cross_entropy_double_backward(grad_output, grad, self, target, weight, reduction)
+  target: binary_cross_entropy_double_backward_target(grad, grad_output, self, target, weight, reduction)
+  grad_output: binary_cross_entropy_double_backward_grad_output(grad, self, target, weight, reduction)
+  result: " binary_cross_entropy_double_backward(grad_output_p, self_t, self_p, target_p, weight, reduction)
+          + binary_cross_entropy_double_backward_target(target_t, grad_output_p, self_p, target_p, weight, reduction)
+          + binary_cross_entropy_double_backward_grad_output(grad_output_t, self_p, target_p, weight, reduction)"
+
+- name: binary_cross_entropy_with_logits(Tensor self, Tensor target, Tensor? weight=None, Tensor? pos_weight=None, int reduction=Mean) -> Tensor
+  self: binary_cross_entropy_with_logits_backward(grad, self, target, weight, pos_weight, reduction)
+  target: binary_cross_entropy_with_logits_target_backward(grad, self, target, weight, pos_weight, reduction)
+  result: "apply_loss_reduction(
+               binary_cross_entropy_with_logits_backward(self_t, self_p, target_p, weight, pos_weight, at::Reduction::None)
+             + binary_cross_entropy_with_logits_target_backward(target_t, self_p, target_p, weight, pos_weight, at::Reduction::None),
+           reduction)"
+
+- name: embedding(Tensor weight, Tensor indices, SymInt padding_idx=-1, bool scale_grad_by_freq=False, bool sparse=False) -> Tensor
+  indices: non_differentiable
+  weight: embedding_backward_symint(grad, indices, weight.sym_size(0), padding_idx, scale_grad_by_freq, sparse)
+  result: auto_linear
+
+- name: embedding_dense_backward(Tensor grad_output, Tensor indices, SymInt num_weights, SymInt padding_idx, bool scale_grad_by_freq) -> Tensor
+  grad_output: embedding_dense_double_backward_symint(grad, indices, padding_idx)
+  indices: non_differentiable
+  result: auto_linear
+
+- name: _embedding_bag(Tensor weight, Tensor indices, Tensor offsets, bool scale_grad_by_freq=False, int mode=0, bool sparse=False, Tensor? per_sample_weights=None, bool include_last_offset=False, int padding_idx=-1) -> (Tensor, Tensor, Tensor, Tensor)
+  indices: non_differentiable
+  offsets: non_differentiable
+  weight: _embedding_bag_backward_symint(grad, indices, offsets, result1, result2, result3, weight.sym_size(0), scale_grad_by_freq, mode, sparse, per_sample_weights, padding_idx)
+  per_sample_weights: _embedding_bag_per_sample_weights_backward(grad, weight, indices, offsets, result1, mode, padding_idx)
+
+- name: _embedding_bag_dense_backward(Tensor grad, Tensor indices, Tensor offset2bag, Tensor bag_size, Tensor maximum_indices, SymInt num_weights, bool scale_grad_by_freq, int mode, Tensor? per_sample_weights, int padding_idx=-1) -> Tensor
+  indices: non_differentiable
+  offset2bag: non_differentiable
+  bag_size: non_differentiable
+  maximum_indices: non_differentiable
+
+- name: embedding_renorm_(Tensor(a!) self, Tensor indices, float max_norm, float norm_type) -> Tensor(a!)
+  indices: non_differentiable
+  self: not_implemented("embedding_renorm")
+
+- name: mse_loss(Tensor self, Tensor target, int reduction=Mean) -> Tensor
+  self: mse_loss_backward(grad, self, target, reduction)
+  target: mse_loss_backward(grad, target, self, reduction)
+  result: apply_loss_reduction(mse_loss_backward(self_t.conj(), self_p, target_p, at::Reduction::None).conj() + mse_loss_backward(target_t.conj(), target_p, self_p, at::Reduction::None).conj(), reduction)
+
+- name: multi_margin_loss(Tensor self, Tensor target, Scalar p=1, Scalar margin=1, Tensor? weight=None, int reduction=Mean) -> Tensor
+  self: multi_margin_loss_backward(grad, self, target, p, margin, weight, reduction)
+  target: non_differentiable
+
+- name: multilabel_margin_loss_forward(Tensor self, Tensor target, int reduction) -> (Tensor output, Tensor is_target)
+  self: multilabel_margin_loss_backward(grad, self, target, reduction, is_target)
+  target: non_differentiable
+
+- name: nll_loss_forward(Tensor self, Tensor target, Tensor? weight, int reduction, SymInt ignore_index) -> (Tensor output, Tensor total_weight)
+  self: nll_loss_backward_symint(grad, self, target, weight, reduction, ignore_index, total_weight)
+  target: non_differentiable
+  output: std::get<0>(nll_loss_forward_symint(self_t, target, weight, reduction, ignore_index))
+
+- name: nll_loss2d_forward(Tensor self, Tensor target, Tensor? weight, int reduction, SymInt ignore_index) -> (Tensor output, Tensor total_weight)
+  self: nll_loss2d_backward_symint(grad, self, target, weight, reduction, ignore_index, total_weight)
+  target: non_differentiable
+  output: std::get<0>(nll_loss2d_forward_symint(self_t, target, weight, reduction, ignore_index))
+
+- name: smooth_l1_loss(Tensor self, Tensor target, int reduction=Mean, float beta=1.0) -> Tensor
+  self: smooth_l1_loss_backward(grad, self, target, reduction, beta)
+  target: smooth_l1_loss_backward(grad, target, self, reduction, beta)
+  result: apply_loss_reduction(smooth_l1_loss_backward(self_t.conj(), self_p, target_p, at::Reduction::None, beta).conj() + smooth_l1_loss_backward(target_t.conj(), target_p, self_p, at::Reduction::None, beta).conj(), reduction)
+
+- name: huber_loss(Tensor self, Tensor target, int reduction=Mean, float delta=1.0) -> Tensor
+  self: huber_loss_backward(grad, self, target, reduction, delta)
+  target: huber_loss_backward(grad, target, self, reduction, delta)
+  result: apply_loss_reduction(huber_loss_backward(self_t.conj(), self_p, target_p, at::Reduction::None, delta).conj() + huber_loss_backward(target_t.conj(), target_p, self_p, at::Reduction::None, delta).conj(), reduction)
+
+- name: soft_margin_loss(Tensor self, Tensor target, int reduction=Mean) -> Tensor
+  self: soft_margin_loss_backward(grad, self, target, reduction)
+  result: apply_loss_reduction(soft_margin_loss_backward(self_t.conj(), self_p, target, at::Reduction::None).conj(), reduction)
+
+- name: relu(Tensor self) -> Tensor
+  self: threshold_backward(grad, result, 0)
+  result: auto_element_wise
+
+- name: silu(Tensor self) -> Tensor
+  self: "GradMode::is_enabled() ? infinitely_differentiable_silu_backward(grad, self) : silu_backward(grad, self)"
+  result: auto_element_wise
+
+- name: mish(Tensor self) -> Tensor
+  self: "GradMode::is_enabled() ? infinitely_differentiable_mish_backward(grad, self) : mish_backward(grad, self)"
+  result: auto_element_wise
+
+- name: elu(Tensor self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> Tensor
+  self: elu_backward(grad, alpha, scale, input_scale, /* is_result */ false, self)
+  result: auto_element_wise
+
+- name: elu_(Tensor(a!) self, Scalar alpha=1, Scalar scale=1, Scalar input_scale=1) -> Tensor(a!)
+  self: elu_backward(grad, alpha, scale, input_scale, /* is_result */ true, result)
+  result: self_t.copy_(elu_backward(original_self_t, alpha, scale, input_scale, /* is_result */ true, result))
+
+- name: celu(Tensor self, Scalar alpha=1.0) -> Tensor
+  self: elu_backward(grad, alpha, 1, 1.0/alpha.toFloat(), /* is_result */ false, self)
+  result: auto_element_wise
+
+- name: celu_(Tensor(a!) self, Scalar alpha=1.0) -> Tensor(a!)
+  self: elu_backward(grad, alpha, 1, 1.0/alpha.toFloat(), /* is_result */ true, result)
+  result: self_t.copy_(elu_backward(original_self_t, alpha, 1, 1.0/alpha.toFloat(), /* is_result */ true, result))
+
+- name: gelu(Tensor self, *, str approximate='none') -> Tensor
+  self: gelu_backward(grad, self, approximate)
+  result: auto_element_wise
+
+- name: gelu_backward(Tensor grad_output, Tensor self, *, str approximate='none') -> Tensor
+  grad_output: gelu_backward(grad, self, approximate)
+  self: gelu_double_backward(grad, grad_output, self, approximate)
+  result: gelu_backward(grad_output_t, self_p, approximate) + gelu_double_backward(self_t, grad_output_p, self_p, approximate)
+
+- name: glu(Tensor self, int dim=-1) -> Tensor
+  # TODO: glu_backward can benefit from forward result,
+  # and forward ad/forward over reverse ad for that matter
+  self: glu_backward(grad, self, dim)
+  result: glu_jvp(result, self_p, self_t, dim)
+
+- name: hardshrink(Tensor self, Scalar lambd=0.5) -> Tensor
+  self: hardshrink_backward(grad, self, lambd)
+  result: auto_element_wise
+
+- name: hardshrink_backward(Tensor grad_out, Tensor self, Scalar lambd) -> Tensor
+  grad_out: hardshrink_backward(grad, self, lambd)
+  self: zeros_like(grad)
+  result: at::where((self_p > lambd).logical_or(self_p < -lambd), grad_out_t, at::zeros({}, result.options()).expand_as(result))
+
+- name: hardtanh(Tensor self, Scalar min_val=-1, Scalar max_val=1) -> Tensor
+  self: hardtanh_backward(grad, self, min_val, max_val)
+  result: auto_element_wise
+
+- name: leaky_relu(Tensor self, Scalar negative_slope=0.01) -> Tensor
+  self: leaky_relu_backward(grad, self, negative_slope, false)
+  result: auto_element_wise
+
+- name: leaky_relu_(Tensor(a!) self, Scalar negative_slope=0.01) -> Tensor(a!)
+  self: leaky_relu_backward(grad, result, negative_slope, true)
+  result: self_t.copy_(leaky_relu_backward(original_self_t.conj(), result, negative_slope, true).conj())
+
+- name: log_sigmoid_forward(Tensor self) -> (Tensor output, Tensor buffer)
+  self: log_sigmoid_backward(grad, self, buffer)
+  output: log_sigmoid_backward(self_t.conj(), self_p, buffer).conj()
+  output_differentiability: [True, False]
+
+- name: _log_softmax(Tensor self, int dim, bool half_to_float) -> Tensor
+  self: _log_softmax_backward_data(grad, result, dim, self.scalar_type())
+  result: self_t - logsumexp_jvp(self_p, self_t, {dim}, true)
+
+- name: _sparse_log_softmax(Tensor self, int dim, bool half_to_float) -> Tensor
+  self: _sparse_log_softmax_backward_data(grad, result, dim, self)
+
+- name: _masked_softmax(Tensor self, Tensor mask, int? dim=None, int? mask_type=None) -> Tensor
+  self: _masked_softmax_backward(grad, result, mask, dim)
+  mask: non_differentiable
+
+- name: _prelu_kernel(Tensor self, Tensor weight) -> Tensor
+  self, weight: "grad.defined() ? _prelu_kernel_backward(grad, self, weight) : std::tuple<Tensor, Tensor>()"
+  result: at::where(self_p >= 0, self_t, weight_p * self_t + weight_t * self_p)
+
+- name: _prelu_kernel_backward(Tensor grad_output, Tensor self, Tensor weight) -> (Tensor, Tensor)
+  grad_output: "grads[0].defined() ?
+                (grads[1].defined() ? at::where(self >= 0, grads[0], grads[0] * weight + grads[1] * self)
+                                    : at::where(self >= 0, grads[0], grads[0] * weight))
+                                    : at::where(self >= 0, at::zeros({}, grad_output.options()), grads[1] * self)"
+  self: "grads[1].defined() ? at::where(self >= 0, at::zeros({}, self.options()), grad_output * grads[1]) : zeros_like(self)"
+  weight: "grads[0].defined() ? at::where(self >= 0, at::zeros({}, weight.options()), grad_output * grads[0]) : zeros_like(self)"
+  result0: at::where(self_p >= 0, grad_output_t, grad_output_t * weight_p + grad_output_p * weight_t)
+  result1: at::where(self_p >= 0, at::zeros({}, self_p.options()), grad_output_p * self_t + grad_output_t * self_p)
+
+- name: rrelu_with_noise(Tensor self, Tensor noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor
+  self: rrelu_with_noise_backward(grad, self, noise, lower, upper, training, false)
+  result: auto_element_wise
+
+- name: rrelu_with_noise_(Tensor(a!) self, Tensor noise, Scalar lower=0.125, Scalar upper=0.3333333333333333, bool training=False, Generator? generator=None) -> Tensor(a!)
+  self: rrelu_with_noise_backward(grad, result, noise, lower, upper, training, true)
+
+- name: _softmax(Tensor self, int dim, bool half_to_float) -> Tensor
+  self: _softmax_backward_data(grad, result, dim, self.scalar_type())
+  result: result * (self_t - logsumexp_jvp(self_p, self_t, {dim}, true))
+
+- name: _sparse_softmax(Tensor self, int dim, bool half_to_float) -> Tensor
+  self: _sparse_softmax_backward_data(grad, result, dim, self)
+
+- name: _sparse_sparse_matmul(Tensor self, Tensor other) -> Tensor
+  self: sparse_sparse_matmul_backward(grad, self, other, 0)
+  other: sparse_sparse_matmul_backward(grad, self, other, 1)
+
+- name: softplus(Tensor self, Scalar beta=1, Scalar threshold=20) -> Tensor
+  self: softplus_backward(grad, self, beta, threshold)
+  result: auto_element_wise
+
+- name: softshrink(Tensor self, Scalar lambd=0.5) -> Tensor
+  self: softshrink_backward(grad, self, lambd)
+  result: auto_element_wise
+
+- name: threshold(Tensor self, Scalar threshold, Scalar value) -> Tensor
+  self: threshold_backward(grad, self, threshold)
+  result: auto_element_wise
+
+- name: threshold_(Tensor(a!) self, Scalar threshold, Scalar value) -> Tensor(a!)
+  self: threshold_backward(grad, self, threshold)
+  result: self_t.copy_(threshold_backward(self_t.conj(), original_self_p, threshold).conj())
+
+- name: reflection_pad1d(Tensor self, SymInt[2] padding) -> Tensor
+  self: reflection_pad1d_backward_symint(grad, self, padding)
+  result: auto_linear
+
+- name: reflection_pad2d(Tensor self, SymInt[4] padding) -> Tensor
+  self: reflection_pad2d_backward_symint(grad, self, padding)
+  result: auto_linear
+
+- name: reflection_pad3d(Tensor self, SymInt[6] padding) -> Tensor
+  self: reflection_pad3d_backward_symint(grad, self, padding)
+  result: auto_linear
+
+- name: replication_pad1d(Tensor self, SymInt[2] padding) -> Tensor
+  self: replication_pad1d_backward_symint(grad, self, padding)
+  result: auto_linear
+
+- name: replication_pad2d(Tensor self, SymInt[4] padding) -> Tensor
+  self: replication_pad2d_backward_symint(grad, self, padding)
+  result: auto_linear
+
+- name: replication_pad3d(Tensor self, SymInt[6] padding) -> Tensor
+  self: replication_pad3d_backward_symint(grad, self, padding)
+  result: auto_linear
+
+- name: upsample_linear1d(Tensor self, SymInt[1] output_size, bool align_corners, float? scales=None) -> Tensor
+  self: upsample_linear1d_backward_symint(grad, output_size, self.sym_sizes(), align_corners, scales)
+  result: auto_linear
+
+- name: upsample_bilinear2d(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
+  self: upsample_bilinear2d_backward_symint(grad, output_size, self.sym_sizes(), align_corners, scales_h, scales_w)
+  result: auto_linear
+
+- name: _upsample_bilinear2d_aa(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
+  self: _upsample_bilinear2d_aa_backward_symint(grad, output_size, self.sym_sizes(), align_corners, scales_h, scales_w)
+  result: auto_linear
+
+- name: upsample_bicubic2d(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
+  self: upsample_bicubic2d_backward_symint(grad, output_size, self.sym_sizes(), align_corners, scales_h, scales_w)
+  result: auto_linear
+
+- name: _upsample_bicubic2d_aa(Tensor self, SymInt[2] output_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
+  self: _upsample_bicubic2d_aa_backward_symint(grad, output_size, self.sym_sizes(), align_corners, scales_h, scales_w)
+  result: auto_linear
+
+- name: upsample_trilinear3d(Tensor self, SymInt[3] output_size, bool align_corners, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
+  self: upsample_trilinear3d_backward_symint(grad, output_size, self.sym_sizes(), align_corners, scales_d, scales_h, scales_w)
+  result: auto_linear
+
+- name: upsample_nearest1d(Tensor self, SymInt[1] output_size, float? scales=None) -> Tensor
+  self: upsample_nearest1d_backward_symint(grad, output_size, self.sym_sizes(), scales)
+  result: auto_linear
+
+- name: _upsample_nearest_exact1d(Tensor self, SymInt[1] output_size, float? scales=None) -> Tensor
+  self: _upsample_nearest_exact1d_backward_symint(grad, output_size, self.sym_sizes(), scales)
+  result: auto_linear
+
+- name: upsample_nearest2d(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor
+  self: upsample_nearest2d_backward_symint(grad, output_size, self.sym_sizes(), scales_h, scales_w)
+  result: auto_linear
+
+- name: _upsample_nearest_exact2d(Tensor self, SymInt[2] output_size, float? scales_h=None, float? scales_w=None) -> Tensor
+  self: _upsample_nearest_exact2d_backward_symint(grad, output_size, self.sym_sizes(), scales_h, scales_w)
+  result: auto_linear
+
+- name: upsample_nearest3d(Tensor self, SymInt[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
+  self: upsample_nearest3d_backward_symint(grad, output_size, self.sym_sizes(), scales_d, scales_h, scales_w)
+  result: auto_linear
+
+- name: _upsample_nearest_exact3d(Tensor self, SymInt[3] output_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
+  self: _upsample_nearest_exact3d_backward_symint(grad, output_size, self.sym_sizes(), scales_d, scales_h, scales_w)
+  result: auto_linear
+
+- name: pixel_shuffle(Tensor self, int upscale_factor) -> Tensor
+  self: pixel_unshuffle(grad, upscale_factor)
+  result: auto_linear
+
+- name: pixel_unshuffle(Tensor self, int downscale_factor) -> Tensor
+  self: pixel_shuffle(grad, downscale_factor)
+  result: auto_linear
+
+- name: _adaptive_avg_pool2d(Tensor self, SymInt[2] output_size) -> Tensor
+  self: _adaptive_avg_pool2d_backward(grad, self)
+  result: auto_linear
+
+- name: _adaptive_avg_pool3d(Tensor self, SymInt[3] output_size) -> Tensor
+  self: _adaptive_avg_pool3d_backward(grad, self)
+  result: auto_linear
+
+- name: adaptive_max_pool2d(Tensor self, int[2] output_size) -> (Tensor, Tensor)
+  self: adaptive_max_pool2d_backward(grad, self, result1)
+  result0: gather(self_t.flatten(-2), -1, result1.flatten(-2)).view_as(result1)
+  output_differentiability: [True, False]
+
+- name: adaptive_max_pool3d(Tensor self, int[3] output_size) -> (Tensor, Tensor)
+  self: adaptive_max_pool3d_backward(grad, self, result1)
+  result0: gather(self_t.flatten(-3), -1, result1.flatten(-3)).view_as(result1)
+  output_differentiability: [True, False]
+
+- name: avg_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None) -> Tensor
+  self: avg_pool2d_backward(grad, self, kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_override)
+  result: auto_linear
+
+- name: avg_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, bool ceil_mode=False, bool count_include_pad=True, int? divisor_override=None) -> Tensor
+  self: avg_pool3d_backward(grad, self, kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_override)
+  result: auto_linear
+
+- name: fractional_max_pool2d(Tensor self, int[2] kernel_size, int[2] output_size, Tensor random_samples) -> (Tensor, Tensor)
+  self: fractional_max_pool2d_backward(grad, self, kernel_size, output_size, result1)
+  result0: gather(self_t.flatten(-2), -1, result1.flatten(-2)).view_as(result1)
+  output_differentiability: [True, False]
+
+- name: fractional_max_pool3d(Tensor self, int[3] kernel_size, int[3] output_size, Tensor random_samples) -> (Tensor, Tensor)
+  self: fractional_max_pool3d_backward(grad, self, kernel_size, output_size, result1)
+  result0: gather(self_t.flatten(-3), -1, result1.flatten(-3)).view_as(result1)
+  output_differentiability: [True, False]
+
+- name: linear(Tensor input, Tensor weight, Tensor? bias=None) -> Tensor
+  input, weight, bias: "grad.defined() ? linear_backward(input, grad, weight, grad_input_mask) : std::tuple<Tensor, Tensor, Tensor>()"
+
+- name: linear_backward(Tensor self, Tensor grad_output, Tensor weight, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
+  self, grad_output, weight: linear_double_backward(grads, self, grad_output, weight)
+
+#mps
+- name: max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor
+  self: max_pool2d_backward(grad, self, kernel_size, stride, padding, dilation, ceil_mode)
+
+- name: _mps_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor
+  self, weight, bias: "grad.defined() ? mps_convolution_backward_symint(self, grad, weight, padding, stride, dilation, groups, grad_input_mask) : std::tuple<Tensor, Tensor, Tensor>()"
+
+- name: mps_convolution_backward(Tensor self, Tensor grad_output, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
+  grad_output, self, weight: _convolution_double_backward_symint(grads[0], grads[1], grads[2], grad_output, weight, self, stride, padding, dilation, false, std::vector<c10::SymInt>(padding.size(), 0), groups, grad_input_mask)
+
+- name: max_pool2d_with_indices(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor)
+  self: max_pool2d_with_indices_backward(grad, self, kernel_size, stride, padding, dilation, ceil_mode, result1)
+  result0: gather(self_t.flatten(-2), -1, result1.flatten(-2)).view_as(result1)
+  output_differentiability: [True, False]
+
+- name: max_pool3d_with_indices(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> (Tensor, Tensor)
+  self: max_pool3d_with_indices_backward(grad, self, kernel_size, stride, padding, dilation, ceil_mode, result1)
+  result0: gather(self_t.flatten(-3), -1, result1.flatten(-3)).view_as(result1)
+  output_differentiability: [True, False]
+
+- name: max_unpool2d(Tensor self, Tensor indices, SymInt[2] output_size) -> Tensor
+  self: max_pool_double_backward(grad, indices, 2)
+  indices: non_differentiable
+  result: auto_linear
+
+- name: max_unpool3d(Tensor self, Tensor indices, SymInt[3] output_size, int[3] stride, int[3] padding) -> Tensor
+  self: max_pool_double_backward(grad, indices, 3)
+  indices: non_differentiable
+  result: auto_linear
+
+- name: convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups) -> Tensor
+  input, weight, bias: "grad.defined() ? convolution_backward_symint(grad, input, weight, bias->sym_sizes(), stride, padding, dilation, transposed, output_padding, groups, grad_input_mask) : std::tuple<Tensor, Tensor, Tensor>()"
+  result: convolution_jvp(input_p, input_t, weight_p, weight_t, bias_p, bias_t, stride, padding, dilation, transposed, output_padding, groups)
+
+# TorchScript serializes calls to _convolution so this entry is present until that is changed to use convolution.
+# Note that the benchmark, deterministic, cudnn_enabled, and allow_tf32 flags are queried from the global context
+# by convolution_backward instead of being passed along from the forward pass.
+- name: _convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool benchmark, bool deterministic, bool cudnn_enabled, bool allow_tf32) -> Tensor
+  input, weight, bias: "grad.defined() ? convolution_backward_symint(grad, input, weight, bias->sym_sizes(), stride, padding, dilation, transposed, output_padding, groups, grad_input_mask) : std::tuple<Tensor, Tensor, Tensor>()"
+  result: _convolution_jvp(input_p, input_t, weight_p, weight_t, bias_p, bias_t, stride, padding, dilation, transposed, output_padding, groups, benchmark, deterministic, cudnn_enabled, allow_tf32)
+
+- name: convolution_backward(Tensor grad_output, Tensor input, Tensor weight, SymInt[]? bias_sizes, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask) -> (Tensor, Tensor, Tensor)
+  grad_output, input, weight: _convolution_double_backward_symint(grads[0], grads[1], grads[2], grad_output, weight, input, stride, padding, dilation, transposed, output_padding, groups, grad_input_mask)
+  result0: std::get<0>(convolution_backward_symint(grad_output_p, input_p, weight_t, bias_sizes, stride, padding, dilation, transposed, output_padding, groups, {true, false, false})) + std::get<0>(convolution_backward_symint(grad_output_t, input_p, weight_p, bias_sizes, stride, padding, dilation, transposed, output_padding, groups, {true, false, false}))
+  result1: std::get<1>(convolution_backward_symint(grad_output_p, input_t, weight_p, bias_sizes, stride, padding, dilation, transposed, output_padding, groups, {false, true, false})) + std::get<1>(convolution_backward_symint(grad_output_t, input_p, weight_p, bias_sizes, stride, padding, dilation, transposed, output_padding, groups, {false, true, false}))
+  result2: convolution_backward_jvp_grad_bias(grad_output_t, result2)
+
+- name: convolution_overrideable(Tensor input, Tensor weight, Tensor? bias, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups) -> Tensor
+  input, weight, bias: "grad.defined() ? convolution_backward_overrideable_symint(grad, input, weight, stride, padding, dilation, transposed, output_padding, groups, grad_input_mask) : std::tuple<Tensor, Tensor, Tensor>()"
+
+- name: convolution_backward_overrideable(Tensor grad_output, Tensor input, Tensor weight, SymInt[] stride, SymInt[] padding, SymInt[] dilation, bool transposed, SymInt[] output_padding, SymInt groups, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
+  grad_output, input, weight: _convolution_double_backward_symint(grads[0], grads[1], grads[2], grad_output, weight, input, stride, padding, dilation, transposed, output_padding, groups, grad_input_mask)
+
+- name: slow_conv_transpose2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] output_padding=0, SymInt[2] dilation=1) -> Tensor
+  self, weight, bias: "grad.defined() ? convolution_backward_symint(grad, self, weight, bias->sym_sizes(), stride, padding, dilation, true, output_padding, 1, grad_input_mask) : std::tuple<Tensor, Tensor, Tensor>()"
+
+- name: slow_conv_transpose3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] output_padding=0, SymInt[3] dilation=1) -> Tensor
+  self, weight, bias: "grad.defined() ? convolution_backward_symint(grad, self, weight, bias->sym_sizes(), stride, padding, dilation, true, output_padding, 1, grad_input_mask) : std::tuple<Tensor, Tensor, Tensor>()"
+
+- name: _slow_conv2d_forward(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding) -> Tensor
+  self, weight, bias: "grad.defined() ? _slow_conv2d_backward_symint(grad, self, weight, kernel_size, stride, padding, grad_input_mask) : std::tuple<Tensor, Tensor, Tensor>()"
+
+- name: _slow_conv2d_backward.output_mask(Tensor grad_output, Tensor self, Tensor weight, SymInt[2] kernel_size, SymInt[2] stride, SymInt[2] padding, bool[3] output_mask) -> (Tensor grad_input, Tensor grad_weight, Tensor grad_bias)
+  grad_output, self, weight: _convolution_double_backward_symint(grads[0], grads[1], grads[2], grad_output, weight, self, stride, padding, {{1, 1}}, false, {{0, 0}}, 1, grad_input_mask)
+
+- name: _conv_depthwise2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias, SymInt[2] stride, SymInt[2] padding, SymInt[2] dilation) -> Tensor
+  self, weight, bias: "grad.defined() ? convolution_backward_symint(grad.contiguous(), self, weight, bias->sym_sizes(), stride, padding, dilation, /*transposed=*/ false, /*output_padding=*/ {{0, 0}}, /*groups=*/ 1, grad_input_mask) : std::tuple<Tensor, Tensor, Tensor>()"
+
+- name: conv_depthwise3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding, SymInt[3] dilation) -> Tensor
+  self, weight, bias: "grad.defined() ? convolution_backward_symint(grad.contiguous(), self, weight, bias->sym_sizes(), stride, padding, dilation, /*transposed=*/ false, /*output_padding=*/ {{0, 0, 0}}, /*groups=*/ 1, grad_input_mask) : std::tuple<Tensor, Tensor, Tensor>()"
+
+- name: slow_conv3d_forward(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias, SymInt[3] stride, SymInt[3] padding) -> Tensor
+  self, weight, bias: "grad.defined() ? convolution_backward_symint(grad, self, weight, bias->sym_sizes(), stride, padding, /*dilation=*/ {{1, 1, 1}}, false, /*output_padding=*/ {{0, 0, 0}}, 1, grad_input_mask) : std::tuple<Tensor, Tensor, Tensor>()"
+
+- name: slow_conv_dilated2d(Tensor self, Tensor weight, SymInt[2] kernel_size, Tensor? bias=None, SymInt[2] stride=1, SymInt[2] padding=0, SymInt[2] dilation=1) -> Tensor
+  self, weight, bias: "grad.defined() ? convolution_backward_symint(grad, self, weight, bias->sym_sizes(), stride, padding, dilation, false, std::vector<c10::SymInt>(padding.size(), 0), 1, grad_input_mask) : std::tuple<Tensor, Tensor, Tensor>()"
+
+- name: slow_conv_dilated3d(Tensor self, Tensor weight, SymInt[3] kernel_size, Tensor? bias=None, SymInt[3] stride=1, SymInt[3] padding=0, SymInt[3] dilation=1) -> Tensor
+  self, weight, bias: "grad.defined() ? convolution_backward_symint(grad, self, weight, bias->sym_sizes(), stride, padding, dilation, false, std::vector<c10::SymInt>(padding.size(), 0), 1, grad_input_mask) : std::tuple<Tensor, Tensor, Tensor>()"
+
+- name: col2im(Tensor self, SymInt[2] output_size, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride) -> Tensor
+  self: im2col(grad, kernel_size, dilation, padding, stride)
+  result: auto_linear
+
+- name: im2col(Tensor self, int[2] kernel_size, int[2] dilation, int[2] padding, int[2] stride) -> Tensor
+  self: col2im_symint(grad, {self.sym_size(-2), self.sym_size(-1)}, kernel_size, dilation, padding, stride)
+  result: auto_linear
+
+- name: _adaptive_avg_pool2d_backward(Tensor grad_output, Tensor self) -> Tensor
+  grad_output: _adaptive_avg_pool2d_symint(grad, {grad_output.sym_size(-2), grad_output.sym_size(-1)})
+  self: zeros_like(self)
+  result: _adaptive_avg_pool2d_backward(grad_output_t, self_p)
+
+- name: _adaptive_avg_pool3d_backward(Tensor grad_output, Tensor self) -> Tensor
+  grad_output: _adaptive_avg_pool3d_symint(grad, { grad_output.sym_size(-3), grad_output.sym_size(-2), grad_output.sym_size(-1) })
+  self: zeros_like(self)
+  result: _adaptive_avg_pool3d_backward(grad_output_t, self_p)
+
+- name: adaptive_max_pool2d_backward(Tensor grad_output, Tensor self, Tensor indices) -> Tensor
+  grad_output: max_pool_double_backward(grad, indices, 2)
+  self: zeros_like(self)
+  result: auto_linear
+
+- name: adaptive_max_pool3d_backward(Tensor grad_output, Tensor self, Tensor indices) -> Tensor
+  grad_output: max_pool_double_backward(grad, indices, 3)
+  self: zeros_like(self)
+  result: auto_linear
+
+- name: avg_pool2d_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride, int[2] padding, bool ceil_mode, bool count_include_pad, int? divisor_override) -> Tensor
+  grad_output: avg_pool2d(grad, kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_override)
+  self: zeros_like(self)
+  result: avg_pool2d_backward(grad_output_t, self_p, kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_override)
+
+- name: avg_pool3d_backward(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, bool ceil_mode, bool count_include_pad, int? divisor_override) -> Tensor
+  grad_output: avg_pool3d(grad, kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_override)
+  self: zeros_like(self)
+  result: avg_pool3d_backward(grad_output_t, self_p, kernel_size, stride, padding, ceil_mode, count_include_pad, divisor_override)
+
+- name: elu_backward(Tensor grad_output, Scalar alpha, Scalar scale, Scalar input_scale, bool is_result, Tensor self_or_result) -> Tensor
+  grad_output: elu_backward(grad, alpha, scale, input_scale, is_result, self_or_result)
+  self_or_result: elu_double_backward(grad, grad_output, alpha, scale, input_scale, is_result, self_or_result)
+  result: elu_backward(grad_output_t, alpha, scale, input_scale, is_result, self_or_result_p) + elu_double_backward(self_or_result_t, grad_output_p, alpha, scale, input_scale, is_result, self_or_result_p)
+
+- name: fractional_max_pool2d_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] output_size, Tensor indices) -> Tensor
+  grad_output: max_pool_double_backward(grad, indices, 2)
+  self: zeros_like(self)
+  result: auto_linear
+
+- name: fractional_max_pool3d_backward(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] output_size, Tensor indices) -> Tensor
+  grad_output: max_pool_double_backward(grad, indices, 3)
+  self: zeros_like(self)
+  result: auto_linear
+
+- name: glu_backward(Tensor grad_output, Tensor self, int dim) -> Tensor
+  grad_output: glu_double_backward_grad_output(grad, self, dim)
+  self: glu_double_backward(grad, grad_output, self, dim)
+  result: glu_backward_jvp(result, grad_output_p, self_p, grad_output_t, self_t, dim)
+
+- name: hardtanh_backward(Tensor grad_output, Tensor self, Scalar min_val, Scalar max_val) -> Tensor
+  grad_output: hardtanh_backward(grad, self, min_val, max_val)
+  self: zeros_like(grad)
+  result: at::where((self_p > min_val).logical_and(self_p < max_val), grad_output_t, at::zeros({}, result.options()).expand_as(result))
+
+- name: log_sigmoid_backward(Tensor grad_output, Tensor self, Tensor buffer) -> Tensor
+  grad_output: log_sigmoid_backward(grad, self, buffer)
+  self: log_sigmoid_double_backward(grad * grad_output, self)
+  result: log_sigmoid_backward(grad_output_t, self_p, buffer) + log_sigmoid_double_backward(self_t * grad_output_p, self_p)
+
+- name: _log_softmax_backward_data(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype) -> Tensor
+  grad_output: grad.to(output.dtype()) - (grad.to(output.dtype()) * output.exp()).sum(dim, true)
+  output: (-grad_output.sum(dim, true) * output.exp() * grad.to(output.dtype())).to(output.dtype())
+
+- name: leaky_relu_backward(Tensor grad_output, Tensor self, Scalar negative_slope, bool self_is_result) -> Tensor
+  # self_is_result is always false here since double backward call is an out-of-place call, self is input itself
+  grad_output: leaky_relu_backward(grad, self, negative_slope, false)
+  self: zeros_like(grad)
+  # leaky_relu_backward(grad_output, self, negative_slope, false)
+  # computes grad_output * at::where(self_p > 0, 1, negative_slope)
+  # so the jvp formula is the following:
+  # grad_output_t * at::where(self_p > 0, self_p.new_ones([]), negative_slope);
+  #
+  # leaky_relu_backward(grad_output, result, negative_slope, true)
+  # computes grad_output * at::where(result > 0, 1, negative_slope)
+  # under the assumption that `negative_slope` is positive (otherwise,
+  # it is not possible to compute the gradient).
+  #
+  # so the jvp formula is the following:
+  # grad_output_t * at::where(result_p > 0, result_p.new_ones([]), negative_slope);
+  # with the assumption that negative_slope is positive.
+  #
+  # Combined together that results in the following optimized kernel which
+  # also checks the assumption that negative_slope is positive when self_is_result
+  # is True:
+  result: leaky_relu_backward(grad_output_t, self_p, negative_slope, self_is_result)
+
+# This derivative is mps-only, and `error_for_max_pool2d_double_backward` just raises an error.
+- name: max_pool2d_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor
+  grad_output: error_for_max_pool2d_double_backward()
+  self: zeros_like(self)
+  result: auto_linear
+
+- name: max_pool2d_with_indices_backward(Tensor grad_output, Tensor self, int[2] kernel_size, int[2] stride, int[2] padding, int[2] dilation, bool ceil_mode, Tensor indices) -> Tensor
+  grad_output: max_pool_double_backward(grad, indices, 2)
+  self: zeros_like(self)
+  indices: non_differentiable
+  result: auto_linear
+
+- name: max_pool3d_with_indices_backward(Tensor grad_output, Tensor self, int[3] kernel_size, int[3] stride, int[3] padding, int[3] dilation, bool ceil_mode, Tensor indices) -> Tensor
+  grad_output: max_pool_double_backward(grad, indices, 3)
+  self: zeros_like(self)
+  indices: non_differentiable
+  result: auto_linear
+
+- name: mse_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction) -> Tensor
+  grad_output: mse_loss_backward(grad, self, target, reduction)
+  self: mse_loss_double_backward(grad * grad_output, self, reduction)
+  target: -mse_loss_double_backward(grad * grad_output, target, reduction)
+  result: "  mse_loss_double_backward(self_t * grad_output_p, self_p, reduction)
+           - mse_loss_double_backward(target_t * grad_output_p, target_p, reduction)
+           + mse_loss_backward(grad_output_t, self_p, target_p, reduction)
+          "
+
+- name: nll_loss_backward(Tensor grad_output, Tensor self, Tensor target, Tensor? weight, int reduction, SymInt ignore_index, Tensor total_weight) -> Tensor
+  grad_output: nll_loss_symint(grad, target, weight, reduction, ignore_index)
+  self: zeros_like(grad)
+  target: non_differentiable
+
+- name: nll_loss2d_backward(Tensor grad_output, Tensor self, Tensor target, Tensor? weight, int reduction, SymInt ignore_index, Tensor total_weight) -> Tensor
+  grad_output: nll_loss2d_symint(grad, target, weight, reduction, ignore_index)
+  self: zeros_like(grad)
+  target: non_differentiable
+
+- name: rrelu_with_noise_backward(Tensor grad_output, Tensor self, Tensor noise, Scalar lower, Scalar upper, bool training, bool self_is_result) -> Tensor
+  # self_is_result is always false here since double backward call is an out-of-place call, self is input itself
+  grad_output: rrelu_with_noise_backward(grad, self, noise, lower, upper, training, false)
+  self: zeros_like(grad)
+  result: rrelu_with_noise_backward(grad_output_t, self_p, noise, lower, upper, training, false)
+
+- name: reflection_pad1d_backward(Tensor grad_output, Tensor self, SymInt[2] padding) -> Tensor
+  grad_output: reflection_pad1d_symint(grad, padding)
+  self: zeros_like(self)
+  result: reflection_pad1d_backward_symint(grad_output_t, self_p, padding)
+
+- name: reflection_pad2d_backward(Tensor grad_output, Tensor self, SymInt[4] padding) -> Tensor
+  grad_output: reflection_pad2d_symint(grad, padding)
+  self: zeros_like(self)
+  result: reflection_pad2d_backward_symint(grad_output_t, self_p, padding)
+
+- name: reflection_pad3d_backward(Tensor grad_output, Tensor self, SymInt[6] padding) -> Tensor
+  grad_output: reflection_pad3d_symint(grad, padding)
+  self: zeros_like(self)
+  result: reflection_pad3d_backward_symint(grad_output_t, self_p, padding)
+
+- name: replication_pad1d_backward(Tensor grad_output, Tensor self, SymInt[2] padding) -> Tensor
+  grad_output: replication_pad1d_symint(grad, padding)
+  self: zeros_like(self)
+  result: replication_pad1d_backward_symint(grad_output_t, self_p, padding)
+
+- name: replication_pad2d_backward(Tensor grad_output, Tensor self, SymInt[4] padding) -> Tensor
+  grad_output: replication_pad2d_symint(grad, padding)
+  self: zeros_like(self)
+  result: replication_pad2d_backward_symint(grad_output_t, self_p, padding)
+
+- name: replication_pad3d_backward(Tensor grad_output, Tensor self, SymInt[6] padding) -> Tensor
+  grad_output: replication_pad3d_symint(grad, padding)
+  self: zeros_like(self)
+  result: replication_pad3d_backward_symint(grad_output_t, self_p, padding)
+
+- name: sparse_sampled_addmm(Tensor self, Tensor mat1, Tensor mat2, *, Scalar beta=1, Scalar alpha=1) -> Tensor
+  self, mat1, mat2: "sparse_sampled_addmm_backward(grad,
+                                                   self,
+                                                   wrap_opt_if(mat1, grad_input_mask[2]),
+                                                   wrap_opt_if(mat2, grad_input_mask[1]),
+                                                   alpha, beta, grad_input_mask)"
+
+- name: _sparse_mm_reduce_impl(Tensor self, Tensor other, str reduce) -> (Tensor, Tensor)
+  output_differentiability: [True, False]
+  self, other: "grad.defined() ? _sparse_mm_reduce_impl_backward(self, grad, other, reduce, result1, grad_input_mask) :  std::tuple<Tensor, Tensor>()"
+
+- name: smooth_l1_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction, float beta) -> Tensor
+  grad_output: smooth_l1_loss_backward(grad, self, target, reduction, beta)
+  self: smooth_l1_loss_double_backward(grad * grad_output, self, target, reduction, beta)
+  target: -smooth_l1_loss_double_backward(grad * grad_output, self, target, reduction, beta)
+  result: "  smooth_l1_loss_double_backward(self_t * grad_output_p, self_p, target_p, reduction, beta)
+           - smooth_l1_loss_double_backward(target_t * grad_output_p, self_p, target_p, reduction, beta)
+           + smooth_l1_loss_backward(grad_output_t, self_p, target_p, reduction, beta)
+          "
+
+- name: huber_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction, float delta) -> Tensor
+  grad_output: huber_loss_double_backward_grad_output(grad, grad_output, self, target, reduction, delta)
+  self: huber_loss_double_backward(grad * grad_output, self, target, reduction, delta)
+  target: -huber_loss_double_backward(grad * grad_output, self, target, reduction, delta)
+
+- name: softplus_backward(Tensor grad_output, Tensor self, Scalar beta, Scalar threshold) -> Tensor
+  grad_output: softplus_backward(grad, self, beta, threshold)
+  self: softplus_double_backward(grad * grad_output, self, beta, threshold)
+  result: "softplus_backward(grad_output_t, self_p, beta, threshold)
+         + softplus_double_backward(self_t * grad_output_p, self_p, beta, threshold)"
+
+- name: _softmax_backward_data(Tensor grad_output, Tensor output, int dim, ScalarType input_dtype) -> Tensor
+  grad_output: _softmax_backward_data(grad.to(output.dtype()), output, dim, input_dtype)
+  output: softmax_double_backward(grad.to(output.dtype()), grad_output, dim, output).to(output.dtype())
+
+- name: soft_margin_loss_backward(Tensor grad_output, Tensor self, Tensor target, int reduction) -> Tensor
+  grad_output: soft_margin_loss_double_backward_grad_output(grad, grad_output, self, target, reduction)
+  self: soft_margin_loss_double_backward(grad * grad_output, self, target, reduction)
+
+- name: softshrink_backward(Tensor grad_output, Tensor self, Scalar lambd) -> Tensor
+  grad_output: softshrink_backward(grad, self, lambd)
+  self: zeros_like(grad)
+  result: at::where((self_p > lambd).logical_or(self_p < -lambd), grad_output_t, at::zeros({}, result.options()).expand_as(result))
+
+- name: threshold_backward(Tensor grad_output, Tensor self, Scalar threshold) -> Tensor
+  grad_output: threshold_backward(grad, self, threshold)
+  self: zeros_like(grad)
+  result: zeros_like(self_t) + threshold_backward(grad_output_t, self_p, threshold)
+
+- name: upsample_linear1d_backward(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, bool align_corners, float? scales=None) -> Tensor
+  grad_output: upsample_linear1d_symint(grad, output_size, align_corners, scales)
+  result: auto_linear
+
+- name: upsample_bilinear2d_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
+  grad_output: upsample_bilinear2d_symint(grad, output_size, align_corners, scales_h, scales_w)
+  result: auto_linear
+
+- name: _upsample_bilinear2d_aa_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
+  grad_output: _upsample_bilinear2d_aa_symint(grad, output_size, align_corners, scales_h, scales_w)
+  result: auto_linear
+
+- name: upsample_bicubic2d_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
+  grad_output: upsample_bicubic2d_symint(grad, output_size, align_corners, scales_h, scales_w)
+  result: auto_linear
+
+- name: _upsample_bicubic2d_aa_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, bool align_corners, float? scales_h=None, float? scales_w=None) -> Tensor
+  grad_output: _upsample_bicubic2d_aa_symint(grad, output_size, align_corners, scales_h, scales_w)
+  result: auto_linear
+
+- name: upsample_trilinear3d_backward(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, bool align_corners, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
+  grad_output: upsample_trilinear3d_symint(grad, output_size, align_corners, scales_d, scales_h, scales_w)
+  result: auto_linear
+
+- name: upsample_nearest1d_backward(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, float? scales=None) -> Tensor
+  grad_output: upsample_nearest1d_symint(grad, output_size, scales)
+  result: auto_linear
+
+- name: _upsample_nearest_exact1d_backward(Tensor grad_output, SymInt[1] output_size, SymInt[3] input_size, float? scales=None) -> Tensor
+  grad_output: _upsample_nearest_exact1d_symint(grad, output_size, scales)
+  result: auto_linear
+
+- name: upsample_nearest2d_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None) -> Tensor
+  grad_output: upsample_nearest2d_symint(grad, output_size, scales_h, scales_w)
+  result: auto_linear
+
+- name: _upsample_nearest_exact2d_backward(Tensor grad_output, SymInt[2] output_size, SymInt[4] input_size, float? scales_h=None, float? scales_w=None) -> Tensor
+  grad_output: _upsample_nearest_exact2d_symint(grad, output_size, scales_h, scales_w)
+  result: auto_linear
+
+- name: upsample_nearest3d_backward(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
+  grad_output: upsample_nearest3d_symint(grad, output_size, scales_d, scales_h, scales_w)
+  result: auto_linear
+
+- name: _upsample_nearest_exact3d_backward(Tensor grad_output, SymInt[3] output_size, SymInt[5] input_size, float? scales_d=None, float? scales_h=None, float? scales_w=None) -> Tensor
+  grad_output: _upsample_nearest_exact3d_symint(grad, output_size, scales_d, scales_h, scales_w)
+  result: auto_linear
+
+- name: sigmoid_backward(Tensor grad_output, Tensor output) -> Tensor
+  grad_output: sigmoid_backward(grad, output.conj())
+  output: grad.conj() * grad_output * (-2 * output.conj() + 1)
+  result: sigmoid_backward(grad_output_t, output_p) + output_t.conj() * grad_output_p * (-2 * output_p.conj() + 1)
+
+- name: tanh_backward(Tensor grad_output, Tensor output) -> Tensor
+  grad_output: tanh_backward(grad, output.conj())
+  output: grad.conj() * (-2 * output.conj() * grad_output)
+  result: tanh_backward(grad_output_t, output_p) + output_t.conj() * (-2 * output_p.conj() * grad_output_p)
+
+# cudnn
+- name: _cudnn_ctc_loss(Tensor log_probs, Tensor targets, int[] input_lengths, int[] target_lengths, int blank, bool deterministic, bool zero_infinity) -> (Tensor, Tensor)
+  log_probs: _cudnn_ctc_loss_backward(grad, result0, result1, zero_infinity)
+
+- name: _cudnn_ctc_loss.Tensor(Tensor log_probs, Tensor targets, Tensor input_lengths, Tensor target_lengths, int blank, bool deterministic, bool zero_infinity) -> (Tensor, Tensor)
+  log_probs: _cudnn_ctc_loss_backward(grad, result0, result1, zero_infinity)
+
+- name: cudnn_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
+  self, weight: "_cudnn_convolution_backward(self, grad, weight, padding, output_padding, stride, dilation, true, groups, {grad_input_mask[0], grad_input_mask[1]})"
+
+- name: _mps_convolution_transpose(Tensor self, Tensor weight, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor
+  self, weight: "grad.defined() ? mps_convolution_transpose_backward_symint(self, grad, weight, padding, output_padding, stride, dilation, groups, grad_input_mask) : std::tuple<Tensor, Tensor>()"
+
+- name: cudnn_convolution(Tensor self, Tensor weight, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic, bool allow_tf32) -> Tensor
+  self, weight: "_cudnn_convolution_backward(self, grad, weight, padding, std::vector<c10::SymInt>(padding.size(), 0), stride, dilation, false, groups, {grad_input_mask[0], grad_input_mask[1]})"
+
+- name: cudnn_grid_sampler(Tensor self, Tensor grid) -> Tensor output
+  self, grid: "grad.defined() ? cudnn_grid_sampler_backward(self, grid, grad) : std::tuple<Tensor, Tensor>()"
+
+- name: cudnn_affine_grid_generator(Tensor theta, int N, int C, int H, int W) -> Tensor grid
+  theta: cudnn_affine_grid_generator_backward(grad, N, C, H, W)
+
+# NB: Why is the backwards here so complicated?  CuDNN cannot be used to compute
+# backward in evaluation mode, because the math for backward in evaluation mode
+# is different (since the forward math is different), and CuDNN does not support
+# it.  And in any case, you shouldn't be using this bn in evaluation mode,
+# because it should be merged into the previous convolution (left for future
+# work.)
+# NB2: The quotes around the gradient are needed to appease YAML parsing rules.
+- name: cudnn_batch_norm(Tensor input, Tensor weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float exponential_average_factor, float epsilon) -> (Tensor, Tensor, Tensor, Tensor)
+  input, weight, bias: "grad.defined() ? (training ? cudnn_batch_norm_backward(input, grad.contiguous(input.suggest_memory_format()), weight, running_mean, running_var, result1, result2, epsilon, retain_variables ? result3.clone() : result3) : native_batch_norm_backward(grad, input, weight, running_mean, running_var, result1, result2, training, epsilon, grad_input_mask)) : std::tuple<Tensor, Tensor, Tensor>()"
+  result0: batch_norm_jvp(input_p, input_t, weight_p, weight_t, bias_p, bias_t, running_mean, running_var, result1, result2, training, epsilon)
+
+# HACK: save_mean and save_var are going to be passed in as
+# requires_grad variables (even though we'll never backprop through
+# them) so we need to prevent the unpacking from triggering an error.
+- name: cudnn_batch_norm_backward(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon, Tensor reserveSpace) -> (Tensor, Tensor, Tensor)
+  save_mean: not_implemented("cudnn_batch_norm_backward save_mean")
+  save_var: not_implemented("cudnn_batch_norm_backward save_var")
+  reserveSpace: not_implemented("cudnn_batch_norm_backward reserveSpace")
+  input, weight, grad_output: batchnorm_double_backward(input, weight, grads[0], grads[1], grads[2], grad_output, running_mean, running_var, true, epsilon, save_mean, save_var, grad_input_mask)
+
+# nnpack
+
+- name: _nnpack_spatial_convolution(Tensor input, Tensor weight, Tensor? bias, SymInt[2] padding, SymInt[2] stride=1) -> Tensor
+  # NNPACK does not support strided convolutions in the backwards path, which is the reason why we are using the closest available function that does here.
+  input, weight, bias: "grad.defined() ? convolution_backward_symint(grad, input, weight, bias->sym_sizes(), stride, padding, std::vector<c10::SymInt>(padding.size(), 1), false, std::vector<c10::SymInt>(padding.size(), 0), 1, grad_input_mask) : std::tuple<Tensor, Tensor, Tensor>()"
+
+#LSTM MPS
+- name: _lstm_mps(Tensor input, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor, Tensor, Tensor, Tensor, Tensor)
+  output_differentiability: [True, True, True, False, False, False]
+  input, hx, params: "lstm_mps_backward(grads[0], grads[1], grads[2], result3, result4, input, result5, hx, params, has_biases, num_layers, dropout, train, bidirectional, batch_first)"
+
+- name: lstm_mps_backward(Tensor? grad_y, Tensor? grad_hy, Tensor? grad_cy, Tensor z_state, Tensor cell_state_fwd, Tensor input, Tensor layersOutputs, Tensor[] hx, Tensor[] params, bool has_biases, int num_layers, float dropout, bool train, bool bidirectional, bool batch_first) -> (Tensor, Tensor[], Tensor[])
+
+
+
+# Only frst three of _cudnn_rnn outputs can have gradients.
+# _cudnn_rnn outputs: (output, hy, cy, reserve, weight_buf)
+- name: _cudnn_rnn(Tensor input, Tensor[] weight, int weight_stride0, Tensor? weight_buf, Tensor hx, Tensor? cx, int mode, SymInt hidden_size, SymInt proj_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, SymInt[] batch_sizes, Tensor? dropout_state) -> (Tensor, Tensor, Tensor, Tensor, Tensor)
+  dropout_state: non_differentiable
+  output_differentiability: [True, True, True, False, False]
+  input, hx, cx, weight: "_cudnn_rnn_backward_symint(input, weight, weight_stride0, result4, hx, cx, result0, grads[0], grads[1], grads[2], mode, hidden_size, proj_size, num_layers, batch_first, dropout, train, bidirectional, batch_sizes, dropout_state, retain_variables ? result3.clone() : result3, grad_input_mask)"
+
+- name: _cudnn_rnn_backward(Tensor input, Tensor[] weight, int weight_stride0, Tensor weight_buf, Tensor hx, Tensor? cx, Tensor output, Tensor? grad_output, Tensor? grad_hy, Tensor? grad_cy, int mode, SymInt hidden_size, SymInt proj_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, SymInt[] batch_sizes, Tensor? dropout_state, Tensor reserve, bool[4] output_mask) -> (Tensor, Tensor, Tensor, Tensor[])
+  dropout_state: non_differentiable
+  input: not_implemented("_cudnn_rnn_backward", kCudnnDoubleBackwardMsg)
+  weight: not_implemented_list("_cudnn_rnn_backward", kCudnnDoubleBackwardMsg)
+  hx: not_implemented("_cudnn_rnn_backward", kCudnnDoubleBackwardMsg)
+  cx: not_implemented("_cudnn_rnn_backward", kCudnnDoubleBackwardMsg)
+  output: not_implemented("_cudnn_rnn_backward", kCudnnDoubleBackwardMsg)
+  grad_output: not_implemented("_cudnn_rnn_backward", kCudnnDoubleBackwardMsg)
+  grad_hy: not_implemented("_cudnn_rnn_backward", kCudnnDoubleBackwardMsg)
+  grad_cy: not_implemented("_cudnn_rnn_backward", kCudnnDoubleBackwardMsg)
+
+# miopen
+
+- name: miopen_convolution_transpose(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] output_padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic) -> Tensor
+  self, weight, bias: "grad.defined() ? convolution_backward_symint(grad, self, weight, bias->sym_sizes(), stride, padding, dilation, true, output_padding, groups, grad_input_mask) : std::tuple<Tensor, Tensor, Tensor>()"
+
+- name: miopen_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic) -> Tensor
+  self, weight, bias: "grad.defined() ? convolution_backward_symint(grad, self, weight, bias->sym_sizes(), stride, padding, dilation, false, std::vector<c10::SymInt>(padding.size(), 0), groups, grad_input_mask) : std::tuple<Tensor, Tensor, Tensor>()"
+
+- name: miopen_depthwise_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups, bool benchmark, bool deterministic) -> Tensor
+  self, weight, bias: "grad.defined() ? convolution_backward_symint(grad, self, weight, bias->sym_sizes(), stride, padding, dilation, false, std::vector<c10::SymInt>(padding.size(), 0), groups, grad_input_mask) : std::tuple<Tensor, Tensor, Tensor>()"
+
+- name: miopen_batch_norm(Tensor input, Tensor weight, Tensor? bias, Tensor? running_mean, Tensor? running_var, bool training, float exponential_average_factor, float epsilon) -> (Tensor, Tensor, Tensor)
+  input, weight, bias: "grad.defined() ? (training ? miopen_batch_norm_backward(input, grad.contiguous(), weight, running_mean, running_var, result1, result2, epsilon) : native_batch_norm_backward(grad, input, weight, running_mean, running_var, result1, result2, training, epsilon, grad_input_mask)) : std::tuple<Tensor, Tensor, Tensor>()"
+
+- name: miopen_batch_norm_backward(Tensor input, Tensor grad_output, Tensor weight, Tensor? running_mean, Tensor? running_var, Tensor? save_mean, Tensor? save_var, float epsilon) -> (Tensor, Tensor, Tensor)
+  save_mean: not_implemented("miopen_batch_norm_backward save_mean")
+  save_var: not_implemented("miopen_batch_norm_backward save_var")
+  input, weight, grad_output: batchnorm_double_backward(input, weight, grads[0], grads[1], grads[2], grad_output, running_mean, running_var, true, epsilon, save_mean, save_var, grad_input_mask)
+
+- name: miopen_rnn(Tensor input, Tensor[] weight, int weight_stride0, Tensor hx, Tensor? cx, int mode, int hidden_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, int[] batch_sizes, Tensor? dropout_state) -> (Tensor, Tensor, Tensor, Tensor, Tensor)
+  dropout_state: non_differentiable
+  output_differentiability: [True, True, True, False, False]
+  input, hx, cx, weight: "miopen_rnn_backward(input, weight, weight_stride0, result4, hx, cx, result0, grads[0], grads[1], grads[2], mode, hidden_size, num_layers, batch_first, dropout, train, bidirectional, batch_sizes, dropout_state, retain_variables ? result3.clone() : result3, grad_input_mask)"
+
+- name: miopen_rnn_backward(Tensor input, Tensor[] weight, int weight_stride0, Tensor weight_buf, Tensor hx, Tensor? cx, Tensor output, Tensor? grad_output, Tensor? grad_hy, Tensor? grad_cy, int mode, int hidden_size, int num_layers, bool batch_first, float dropout, bool train, bool bidirectional, int[] batch_sizes, Tensor? dropout_state, Tensor reserve, bool[4] output_mask) -> (Tensor, Tensor, Tensor, Tensor[])
+  dropout_state: non_differentiable
+
+- name: mkldnn_rnn_layer(Tensor input, Tensor weight0, Tensor weight1, Tensor weight2, Tensor weight3, Tensor hx_, Tensor cx_, bool reverse, int[] batch_sizes, int mode, int hidden_size, int num_layers, bool has_biases, bool bidirectional, bool batch_first, bool train) -> (Tensor, Tensor, Tensor, Tensor)
+  output_differentiability: [True, True, True, False]
+  input, weight0, weight1, weight2, weight3, hx_, cx_: "GradMode::is_enabled() ? mkldnn_rnn_layer_differentiable_backward(input, weight0, weight1, weight2, weight3, hx_, cx_, result0, result1, result2, grads[0], grads[1], grads[2], reverse, mode, hidden_size, num_layers, has_biases, train, bidirectional, batch_sizes, batch_first, result3) : mkldnn_rnn_layer_backward(input, weight0, weight1, weight2, weight3, hx_, cx_, result0, result1, result2, grads[0], grads[1], grads[2], reverse, mode, hidden_size, num_layers, has_biases, train, bidirectional, batch_sizes, batch_first, result3)"
+
+- name: mkldnn_rnn_layer_backward(Tensor input, Tensor weight1, Tensor weight2, Tensor weight3, Tensor weight4, Tensor hx_, Tensor cx_tmp, Tensor output, Tensor hy_, Tensor cy_, Tensor? grad_output, Tensor? grad_hy, Tensor? grad_cy, bool reverse, int mode, int hidden_size, int num_layers, bool has_biases, bool train, bool bidirectional, int[] batch_sizes, bool batch_first, Tensor workspace) -> (Tensor, Tensor, Tensor, Tensor, Tensor, Tensor, Tensor)
+
+# mkldnn
+- name: mkldnn_convolution(Tensor self, Tensor weight, Tensor? bias, SymInt[] padding, SymInt[] stride, SymInt[] dilation, SymInt groups) -> Tensor
+  self, weight, bias: "grad.defined() ? convolution_backward_symint(grad, self, weight, bias->sym_sizes(), stride, padding, dilation, /*transposed=*/ false, /*output_padding=*/ std::vector<c10::SymInt>(padding.size(), 0), groups, grad_input_mask) : std::tuple<Tensor, Tensor, Tensor>()"
+
+- name: mkldnn_linear(Tensor self, Tensor weight, Tensor? bias=None) -> Tensor
+  self, weight, bias: mkldnn_linear_backward(self, grad, weight, grad_input_mask)
+
+- name: mkldnn_max_pool2d(Tensor self, int[2] kernel_size, int[2] stride=[], int[2] padding=0, int[2] dilation=1, bool ceil_mode=False) -> Tensor
+  self: mkldnn_max_pool2d_backward(grad, result, self, kernel_size, stride, padding, dilation, ceil_mode)
+
+- name: mkldnn_max_pool3d(Tensor self, int[3] kernel_size, int[3] stride=[], int[3] padding=0, int[3] dilation=1, bool ceil_mode=False) -> Tensor
+  self: mkldnn_max_pool3d_backward(grad, result, self, kernel_size, stride, padding, dilation, ceil_mode)
+
+- name: mkldnn_adaptive_avg_pool2d(Tensor self, int[2] output_size) -> Tensor
+  self: mkldnn_adaptive_avg_pool2d_backward(grad, self)
+
+- name: _mkldnn_reshape(Tensor self, int[] shape) -> Tensor
+  self: grad.reshape_symint(self.sym_sizes())
+
+# NestedTensor
+- name: _nested_tensor_from_tensor_list(Tensor[] list, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+  list: "grad.defined()? at::unbind(grad) : std::vector<Tensor>(list.size())"
+
+- name: _nested_tensor_from_mask(Tensor t, Tensor mask, bool mask_check=True) -> Tensor
+  t: grad.to_padded_tensor_symint(0, t.sym_sizes())
+  mask: non_differentiable
+
+- name: _nested_from_padded(Tensor padded, Tensor cpu_nested_shape_example, bool fuse_transform_0213=False) -> Tensor
+  padded: _nested_from_padded_backward(grad, padded, fuse_transform_0213)
+  cpu_nested_shape_example: non_differentiable
+
+- name: to_padded_tensor(Tensor self, float padding, SymInt[]? output_size=None) -> Tensor
+  self: at::_nested_from_padded(grad, self._nested_tensor_size())
+  padding: non_differentiable
+
+- name:  _nested_view_from_buffer(Tensor(a) self, Tensor nested_size, Tensor nested_strides, Tensor offsets) -> Tensor(a)
+  self: grad.values()
+  nested_size: non_differentiable
+  nested_strides: non_differentiable
+
+- name: _nested_view_from_jagged(Tensor(a) self, Tensor offsets, Tensor dummy, Tensor? lengths=None, int ragged_idx=1) -> Tensor(a)
+  self: grad.values()
+  offsets: non_differentiable
+  lengths: non_differentiable
+  dummy: non_differentiable
+
+- name: _nested_get_values(Tensor(a) self) -> Tensor(a)
+  self: _nested_view_from_jagged(grad, at::_nested_get_offsets(self), at::_nested_get_jagged_dummy(self), at::_nested_get_lengths(self), at::_nested_get_ragged_idx(self))
+
+# Transformers
+- name: _scaled_dot_product_efficient_attention(Tensor query, Tensor key, Tensor value, Tensor? attn_bias, bool compute_log_sumexp, float dropout_p=0.0, bool is_causal=False, *, float? scale=None) -> (Tensor output, Tensor log_sumexp, Tensor philox_seed, Tensor philox_offset)
+  output_differentiability: [True, False, False, False]
+  query, key, value, attn_bias: _scaled_dot_product_efficient_attention_backward(grad, query, key, value, attn_bias, output, log_sumexp, philox_seed, philox_offset, dropout_p, grad_input_mask, is_causal, scale)
+
+- name: _scaled_dot_product_flash_attention(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, bool return_debug_mask=False, *, float? scale=None) -> (Tensor output, Tensor logsumexp, Tensor cum_seq_q, Tensor cum_seq_k, SymInt max_q, SymInt max_k, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
+  output_differentiability: [True, False, False, False, False, False, False, False, False]
+  query, key, value: _scaled_dot_product_flash_attention_backward_symint(grad, query, key, value, output, logsumexp, cum_seq_q, cum_seq_k, max_q, max_k, dropout_p, is_causal, philox_seed, philox_offset, scale)
+
+- name: _scaled_dot_product_flash_attention_for_cpu(Tensor query, Tensor key, Tensor value, float dropout_p=0.0, bool is_causal=False, *, Tensor? attn_mask=None, float? scale=None) -> (Tensor output, Tensor logsumexp)
+  output_differentiability: [True, False]
+  query, key, value: _scaled_dot_product_flash_attention_for_cpu_backward(grad, query, key, value, output, logsumexp, dropout_p, is_causal, attn_mask, scale)
+
+- name: _flash_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? cum_seq_q, Tensor? cum_seq_k, SymInt max_q, SymInt max_k, float dropout_p, bool is_causal, bool return_debug_mask, *, float? scale=None) -> (Tensor output, Tensor softmax_logsumexp, Tensor philox_seed, Tensor philox_offset, Tensor debug_attn_mask)
+  output_differentiability: [True, False, False, False, False]
+  query, key, value: _flash_attention_backward_symint(grad, query, key, value, output, softmax_logsumexp, cum_seq_q, cum_seq_k, max_q, max_k, dropout_p, is_causal, philox_seed, philox_offset, scale)
+
+- name: _efficient_attention_forward(Tensor query, Tensor key, Tensor value, Tensor? bias, Tensor? cu_seqlens_q, Tensor? cu_seqlens_k, int? max_seqlen_q, int? max_seqlen_k, float dropout_p, int custom_mask_type, bool compute_log_sumexp=False, *, float? scale=None, Tensor? causal_diagonal=None, Tensor? seqlen_k=None) -> (Tensor output, Tensor logsumexp, Tensor philox_seed, Tensor philox_offset, SymInt max_seqlen_batch_q, SymInt max_seqlen_batch_k)
+  output_differentiability: [True, False, False, False, False, False]
+  query, key, value, bias: _efficient_attention_backward_symint(grad, query, key, value, bias, output, cu_seqlens_q, cu_seqlens_k, max_seqlen_batch_q, max_seqlen_batch_k, logsumexp, dropout_p, philox_seed, philox_offset, custom_mask_type, bias.requires_grad(), scale)
+
+# fft
+- name: _fft_r2c(Tensor self, int[] dim, int normalization, bool onesided) -> Tensor
+  self: fft_r2c_backward(grad, dim, normalization, onesided, self.sym_size(dim.back()))
+  result: auto_linear
+
+- name: _fft_c2r(Tensor self, int[] dim, int normalization, SymInt last_dim_size) -> Tensor
+  self: fft_c2r_backward(grad, dim, normalization)
+  result: auto_linear
+
+- name: _fft_c2c(Tensor self, SymInt[] dim, int normalization, bool forward) -> Tensor
+  self: _fft_c2c_symint(grad, dim, normalization, !forward)
+  result: auto_linear
+
+- name: unbind.int(Tensor(a -> *) self, int dim=0) -> Tensor(a)[]
+  dispatch:
+    Default:
+      self: unbind_backward(grads, dim)
+      result: auto_linear
+    AutogradNestedTensor:
+      self: unbind_backward_nested(grads, at::native::get_nested_tensor_impl(self)->get_nested_sizes(), dim, self.options())
+      result: auto_linear
+
+- name: stack(Tensor[] tensors, int dim=0) -> Tensor
+  tensors: stack_tensors_backward(grad, dim, to_args_scalartypes(tensors))
+  result: stack_jvp(tensors, dim)
+
+# fused RNN kernels
+
+# Only frst two of _thnn_fused_lstm_cell outputs can have gradients.
+# _thnn_fused_lstm_cell outputs: (hy, cy, workspace)
+- name: _thnn_fused_lstm_cell(Tensor input_gates, Tensor hidden_gates, Tensor cx, Tensor? input_bias=None, Tensor? hidden_bias=None) -> (Tensor, Tensor, Tensor)
+  output_differentiability: [True, True, False]
+  input_gates, hidden_gates, cx, input_bias, hidden_bias: "GradMode::is_enabled() ? _thnn_differentiable_lstm_cell_backward(grads[0], grads[1], input_gates, hidden_gates, input_bias, hidden_bias, cx, result1) : _thnn_fused_lstm_cell_backward(grads[0], grads[1], cx, result1, result2, input_bias.defined())"
+
+- name: _thnn_fused_gru_cell(Tensor input_gates, Tensor hidden_gates, Tensor hx, Tensor? input_bias=None, Tensor? hidden_bias=None) -> (Tensor, Tensor)
+  input_gates, hidden_gates, hx, input_bias, hidden_bias: "grad.defined() ? (GradMode::is_enabled() ? _thnn_differentiable_gru_cell_backward(grad, input_gates, hidden_gates, hx, input_bias, hidden_bias) : _thnn_fused_gru_cell_backward(grad, result1, input_bias.defined())) : std::tuple<Tensor, Tensor, Tensor, Tensor, Tensor>()"
+
+# PackedSequence helpers
+- name: _pack_padded_sequence(Tensor input, Tensor lengths, bool batch_first) -> (Tensor, Tensor)
+  input: _pack_padded_sequence_backward_symint(grad, input.sym_sizes(), result1, batch_first)
+
+# TH wrappers
+- name: eq.Scalar(Tensor self, Scalar other) -> Tensor
+  output_differentiability: [False]
+
+- name: eq.Tensor(Tensor self, Tensor other) -> Tensor
+  output_differentiability: [False]
+
+- name: ge.Scalar(Tensor self, Scalar other) -> Tensor
+  output_differentiability: [False]
+
+- name: ge.Tensor(Tensor self, Tensor other) -> Tensor
+  output_differentiability: [False]
+
+- name: gt.Scalar(Tensor self, Scalar other) -> Tensor
+  output_differentiability: [False]
+
+- name: gt.Tensor(Tensor self, Tensor other) -> Tensor
+  output_differentiability: [False]
+
+- name: le.Scalar(Tensor self, Scalar other) -> Tensor
+  output_differentiability: [False]
+
+- name: le.Tensor(Tensor self, Tensor other) -> Tensor
+  output_differentiability: [False]
+
+- name: lt.Scalar(Tensor self, Scalar other) -> Tensor
+  output_differentiability: [False]
+
+- name: lt.Tensor(Tensor self, Tensor other) -> Tensor
+  output_differentiability: [False]
+
+- name: ne.Scalar(Tensor self, Scalar other) -> Tensor
+  output_differentiability: [False]
+
+- name: ne.Tensor(Tensor self, Tensor other) -> Tensor
+  output_differentiability: [False]
+
+- name: multinomial(Tensor self, int num_samples, bool replacement=False, *, Generator? generator=None) -> Tensor
+  output_differentiability: [False]
+
+- name: nonzero(Tensor self) -> Tensor
+  output_differentiability: [False]
+
+- name: segment_reduce(Tensor data, str reduce, *, Tensor? lengths=None, Tensor? indices=None, Tensor? offsets=None, int axis=0, bool unsafe=False, Scalar? initial=None) -> Tensor
+  data: _segment_reduce_backward(grad, result, data, reduce, lengths, offsets, axis, initial)
+
+- name: _pin_memory(Tensor self, Device? device=None) -> Tensor
+  self: grad
+
+- name: _new_zeros_with_same_feature_meta(Tensor self, Tensor other, *, int self_num_batch_dims=0) -> Tensor
+  self: non_differentiable
+  other: non_differentiable
+  output_differentiability: [False]
+
+- name: _test_warn_in_autograd(Tensor self) -> Tensor
+  self: warn_backwards(grad)
+
+- name: _test_autograd_multiple_dispatch.fullcoverage(Tensor self) -> Tensor
+  dispatch:
+    Default:
+      self: grad.expand_symint(self.sym_sizes()) + 1
+      result: auto_linear
+    AutogradNestedTensor:
+      self: grad.mul(grad)
+    AutogradCUDA:
+      self: grad.expand_symint(self.sym_sizes()) * 2
+
+- name: _test_autograd_multiple_dispatch.ntonly(Tensor self, bool b) -> Tensor
+  dispatch:
+    AutogradNestedTensor:
+      self: grad.mul(grad).add(grad)
+
+- name: _test_autograd_multiple_dispatch_view(Tensor(a) self) -> Tensor(a)
+  dispatch:
+    Default:
+      self: grad.reshape_as(self)
+    AutogradCUDA:
+      self: grad.reshape_as(self) + 1
+
+- name: _efficientzerotensor(SymInt[] size, *, ScalarType? dtype=None, Layout? layout=None, Device? device=None, bool? pin_memory=None) -> Tensor
+  output_differentiability: [False]
+
+- name: scatter_reduce.two(Tensor self, int dim, Tensor index, Tensor src, str reduce, *, bool include_self=True) -> Tensor
+  self, src: scatter_reduce_backward(grad, self, dim, index, src, reduce, include_self, result)
+  index: non_differentiable
+  result: scatter_reduce_jvp(self_p, self_t, dim, index, src_p, src_t, reduce, include_self, result)
+
+- name: special_airy_ai(Tensor x) -> Tensor
+  x: non_differentiable
+
+- name: special_bessel_j0(Tensor self) -> Tensor
+  self: non_differentiable
+
+- name: special_bessel_j1(Tensor self) -> Tensor
+  self: non_differentiable
+
+- name: special_bessel_y0(Tensor self) -> Tensor
+  self: non_differentiable
+
+- name: special_bessel_y1(Tensor self) -> Tensor
+  self: non_differentiable
+
+- name: special_chebyshev_polynomial_t(Tensor x, Tensor n) -> Tensor
+  x: non_differentiable
+  n: non_differentiable
+
+- name: special_chebyshev_polynomial_t.x_scalar(Scalar x, Tensor n) -> Tensor
+  n: non_differentiable
+
+- name: special_chebyshev_polynomial_t.n_scalar(Tensor x, Scalar n) -> Tensor
+  x: non_differentiable
+
+- name: special_chebyshev_polynomial_u(Tensor x, Tensor n) -> Tensor
+  x: non_differentiable
+  n: non_differentiable
+
+- name: special_chebyshev_polynomial_u.x_scalar(Scalar x, Tensor n) -> Tensor
+  n: non_differentiable
+
+- name: special_chebyshev_polynomial_u.n_scalar(Tensor x, Scalar n) -> Tensor
+  x: non_differentiable
+
+- name: special_chebyshev_polynomial_v(Tensor x, Tensor n) -> Tensor
+  x: non_differentiable
+  n: non_differentiable
+
+- name: special_chebyshev_polynomial_v.x_scalar(Scalar x, Tensor n) -> Tensor
+  n: non_differentiable
+
+- name: special_chebyshev_polynomial_v.n_scalar(Tensor x, Scalar n) -> Tensor
+  x: non_differentiable
+
+- name: special_chebyshev_polynomial_w(Tensor x, Tensor n) -> Tensor
+  x: non_differentiable
+  n: non_differentiable
+
+- name: special_chebyshev_polynomial_w.x_scalar(Scalar x, Tensor n) -> Tensor
+  n: non_differentiable
+
+- name: special_chebyshev_polynomial_w.n_scalar(Tensor x, Scalar n) -> Tensor
+  x: non_differentiable
+
+- name: special_hermite_polynomial_h(Tensor x, Tensor n) -> Tensor
+  x: non_differentiable
+  n: non_differentiable
+
+- name: special_hermite_polynomial_h.x_scalar(Scalar x, Tensor n) -> Tensor
+  n: non_differentiable
+
+- name: special_hermite_polynomial_h.n_scalar(Tensor x, Scalar n) -> Tensor
+  x: non_differentiable
+
+- name: special_hermite_polynomial_he(Tensor x, Tensor n) -> Tensor
+  x: non_differentiable
+  n: non_differentiable
+
+- name: special_hermite_polynomial_he.x_scalar(Scalar x, Tensor n) -> Tensor
+  n: non_differentiable
+
+- name: special_hermite_polynomial_he.n_scalar(Tensor x, Scalar n) -> Tensor
+  x: non_differentiable
+
+- name: special_laguerre_polynomial_l(Tensor x, Tensor n) -> Tensor
+  x: non_differentiable
+  n: non_differentiable
+
+- name: special_laguerre_polynomial_l.x_scalar(Scalar x, Tensor n) -> Tensor
+  n: non_differentiable
+
+- name: special_laguerre_polynomial_l.n_scalar(Tensor x, Scalar n) -> Tensor
+  x: non_differentiable
+
+- name: special_legendre_polynomial_p(Tensor x, Tensor n) -> Tensor
+  x: non_differentiable
+  n: non_differentiable
+
+- name: special_legendre_polynomial_p.x_scalar(Scalar x, Tensor n) -> Tensor
+  n: non_differentiable
+
+- name: special_legendre_polynomial_p.n_scalar(Tensor x, Scalar n) -> Tensor
+  x: non_differentiable
+
+- name: special_modified_bessel_i0(Tensor self) -> Tensor
+  self: non_differentiable
+
+- name: special_modified_bessel_i1(Tensor self) -> Tensor
+  self: non_differentiable
+
+- name: special_modified_bessel_k0(Tensor self) -> Tensor
+  self: non_differentiable
+
+- name: special_modified_bessel_k1(Tensor self) -> Tensor
+  self: non_differentiable
+
+- name: special_scaled_modified_bessel_k0(Tensor x) -> Tensor
+  x: non_differentiable
+
+- name: special_scaled_modified_bessel_k1(Tensor x) -> Tensor
+  x: non_differentiable
+
+- name: special_shifted_chebyshev_polynomial_t(Tensor x, Tensor n) -> Tensor
+  x: non_differentiable
+  n: non_differentiable
+
+- name: special_shifted_chebyshev_polynomial_t.x_scalar(Scalar x, Tensor n) -> Tensor
+  n: non_differentiable
+
+- name: special_shifted_chebyshev_polynomial_t.n_scalar(Tensor x, Scalar n) -> Tensor
+  x: non_differentiable
+
+- name: special_shifted_chebyshev_polynomial_u(Tensor x, Tensor n) -> Tensor
+  x: non_differentiable
+  n: non_differentiable
+
+- name: special_shifted_chebyshev_polynomial_u.x_scalar(Scalar x, Tensor n) -> Tensor
+  n: non_differentiable
+
+- name: special_shifted_chebyshev_polynomial_u.n_scalar(Tensor x, Scalar n) -> Tensor
+  x: non_differentiable
+
+- name: special_shifted_chebyshev_polynomial_v(Tensor x, Tensor n) -> Tensor
+  x: non_differentiable
+  n: non_differentiable
+
+- name: special_shifted_chebyshev_polynomial_v.x_scalar(Scalar x, Tensor n) -> Tensor
+  n: non_differentiable
+
+- name: special_shifted_chebyshev_polynomial_v.n_scalar(Tensor x, Scalar n) -> Tensor
+  x: non_differentiable
+
+- name: special_shifted_chebyshev_polynomial_w(Tensor x, Tensor n) -> Tensor
+  x: non_differentiable
+  n: non_differentiable
+
+- name: special_shifted_chebyshev_polynomial_w.x_scalar(Scalar x, Tensor n) -> Tensor
+  n: non_differentiable
+
+- name: special_shifted_chebyshev_polynomial_w.n_scalar(Tensor x, Scalar n) -> Tensor
+  x: non_differentiable
+
+- name: special_spherical_bessel_j0(Tensor x) -> Tensor
+  x: non_differentiable
+
+- name: _reshape_copy(Tensor self, SymInt[] size) -> Tensor
+  self: grad.reshape_symint(self.sym_sizes())
+  result: auto_linear
+
+# note(crcrpar): `torchgen/api/autograd` logic would unwantedly replace substrings of `self` and `other` of function names.
+- name: _foreach_div.List(Tensor[] self, Tensor[] other) -> Tensor[]
+  self: div_tensor_self_backward(grads[i], other[i], self[i].scalar_type())
+  other: div_tensor_other_backward(grads[i], self[i], other[i])
+  result: (self_t - other_t * result[i]) / other_p
+
+- name: _foreach_pow.List(Tensor[] self, Tensor[] exponent) -> Tensor[]
+  self: pow_backward_self(grads[i], self[i], exponent[i])
+  exponent: pow_backward_exponent(grads[i], self[i], exponent[i], result[i])
+  result: (pow_backward_self(self_t.conj(), self_p, exponent_p) + pow_backward_exponent(exponent_t.conj(), self_p, exponent_p, result[i])).conj()
+
+- name: _foreach_pow.ScalarList(Tensor[] self, Scalar[] exponent) -> Tensor[]
+  self: pow_backward(grads[i], self[i], exponent[i])
+  result: pow_backward(self_t.conj(), self_p, exponent[i]).conj()
+
+- name: _foreach_pow.ScalarAndTensor(Scalar self, Tensor[] exponent) -> Tensor[]
+  exponent: pow_backward_exponent(grads[i], self, exponent[i], result[i])
+
+# note(crcrpar): following definitions seem necessary because the reference native functions
+# of `maximum` and `minimum` don't have the overload def with Scalar as their second argument.
+- name: _foreach_minimum.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
+  self: at::where(self[i] == scalar, grads[i] / 2, grads[i]).masked_fill_(self[i] > scalar, 0)
+  result: scalar + at::where(self_p == scalar, at::scalar_tensor(0.5, result[i].options()), (self_p < scalar).to(result[i].scalar_type())) * (self_t - scalar)
+
+- name: _foreach_minimum.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]
+  self: at::where(self[i] == scalars[i], grads[i] / 2, grads[i]).masked_fill_(self[i] > scalars[i], 0)
+  result: scalars[i] + at::where(self_p == scalars[i], at::scalar_tensor(0.5, result[i].options()), (self_p < scalars[i]).to(result[i].scalar_type())) * (self_t - scalars[i])
+
+- name: _foreach_maximum.Scalar(Tensor[] self, Scalar scalar) -> Tensor[]
+  self: at::where(self[i] == scalar, grads[i] / 2, grads[i]).masked_fill_(self[i] < scalar, 0)
+  result: scalar + at::where(self_p == scalar, at::scalar_tensor(0.5, result[i].options()), (self_p > scalar).to(result[i].scalar_type())) * (self_t - scalar)
+
+- name: _foreach_maximum.ScalarList(Tensor[] self, Scalar[] scalars) -> Tensor[]
+  self: at::where(self[i] == scalars[i], grads[i] / 2, grads[i]).masked_fill_(self[i] < scalars[i], 0)
+  result: scalars[i] + at::where(self_p == scalars[i], at::scalar_tensor(0.5, result[i].options()), (self_p > scalars[i]).to(result[i].scalar_type())) * (self_t - scalars[i])
+
+# note(crcrpar): forward-mode AD is tricky for a simple string replace to handle:
+#   formula.replace("p", "ord") produces `norm_jvord(self_ord, self_t, ord, result)`
+- name: _foreach_norm.Scalar(Tensor[] self, Scalar ord=2) -> Tensor[]
+  self: norm_backward(grads[i], self[i], ord, result[i])
+  result: norm_jvp(self_p, self_t, ord, result[i])
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/gen_autograd.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/gen_autograd.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d4aa91d3fadec2747874d8c3e8f7cfe7a331ff5
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/gen_autograd.py
@@ -0,0 +1,146 @@
+"""
+To run this file by hand from the root of the PyTorch
+repository, run:
+
+python -m tools.autograd.gen_autograd \
+       aten/src/ATen/native/native_functions.yaml \
+       aten/src/ATen/native/tags.yaml \
+       $OUTPUT_DIR \
+       tools/autograd
+
+Where $OUTPUT_DIR is where you would like the files to be
+generated.  In the full build system, OUTPUT_DIR is
+torch/csrc/autograd/generated/
+"""
+
+# gen_autograd.py generates C++ autograd functions and Python bindings.
+#
+# It delegates to the following scripts:
+#
+#  gen_autograd_functions.py: generates subclasses of torch::autograd::Node
+#  gen_variable_type.py: generates VariableType.h which contains all tensor methods
+#  gen_python_functions.py: generates Python bindings to THPVariable
+#
+
+import argparse
+import os
+from typing import List
+
+from torchgen.api import cpp
+from torchgen.api.autograd import (
+    match_differentiability_info,
+    NativeFunctionWithDifferentiabilityInfo,
+)
+from torchgen.gen import parse_native_yaml
+from torchgen.selective_build.selector import SelectiveBuilder
+
+from . import gen_python_functions
+from .gen_autograd_functions import (
+    gen_autograd_functions_lib,
+    gen_autograd_functions_python,
+)
+from .gen_inplace_or_view_type import gen_inplace_or_view_type
+from .gen_trace_type import gen_trace_type
+from .gen_variable_factories import gen_variable_factories
+from .gen_variable_type import gen_variable_type
+from .gen_view_funcs import gen_view_funcs
+from .load_derivatives import load_derivatives
+
+
+def gen_autograd(
+    native_functions_path: str,
+    tags_path: str,
+    out: str,
+    autograd_dir: str,
+    operator_selector: SelectiveBuilder,
+    disable_autograd: bool = False,
+) -> None:
+    # Parse and load derivatives.yaml
+    differentiability_infos, used_dispatch_keys = load_derivatives(
+        os.path.join(autograd_dir, "derivatives.yaml"), native_functions_path, tags_path
+    )
+
+    template_path = os.path.join(autograd_dir, "templates")
+
+    native_funcs = parse_native_yaml(native_functions_path, tags_path).native_functions
+    fns = sorted(
+        filter(
+            operator_selector.is_native_function_selected_for_training, native_funcs
+        ),
+        key=lambda f: cpp.name(f.func),
+    )
+    fns_with_diff_infos: List[
+        NativeFunctionWithDifferentiabilityInfo
+    ] = match_differentiability_info(fns, differentiability_infos)
+
+    # Generate VariableType.h/cpp
+    if not disable_autograd:
+        gen_variable_type(
+            out,
+            native_functions_path,
+            tags_path,
+            fns_with_diff_infos,
+            template_path,
+            used_dispatch_keys,
+        )
+
+        gen_inplace_or_view_type(
+            out, native_functions_path, tags_path, fns_with_diff_infos, template_path
+        )
+
+        # operator filter not applied as tracing sources are excluded in selective build
+        gen_trace_type(out, native_funcs, template_path)
+    # Generate Functions.h/cpp
+    gen_autograd_functions_lib(out, differentiability_infos, template_path)
+
+    # Generate variable_factories.h
+    gen_variable_factories(out, native_functions_path, tags_path, template_path)
+
+    # Generate ViewFuncs.h/cpp
+    gen_view_funcs(out, fns_with_diff_infos, template_path)
+
+
+def gen_autograd_python(
+    native_functions_path: str,
+    tags_path: str,
+    out: str,
+    autograd_dir: str,
+) -> None:
+    differentiability_infos, _ = load_derivatives(
+        os.path.join(autograd_dir, "derivatives.yaml"), native_functions_path, tags_path
+    )
+
+    template_path = os.path.join(autograd_dir, "templates")
+
+    # Generate Functions.h/cpp
+    gen_autograd_functions_python(out, differentiability_infos, template_path)
+
+    # Generate Python bindings
+    deprecated_path = os.path.join(autograd_dir, "deprecated.yaml")
+    gen_python_functions.gen(
+        out, native_functions_path, tags_path, deprecated_path, template_path
+    )
+
+
+def main() -> None:
+    parser = argparse.ArgumentParser(description="Generate autograd C++ files script")
+    parser.add_argument(
+        "native_functions", metavar="NATIVE", help="path to native_functions.yaml"
+    )
+    parser.add_argument("tags", metavar="NATIVE", help="path to tags.yaml")
+    parser.add_argument("out", metavar="OUT", help="path to output directory")
+    parser.add_argument(
+        "autograd", metavar="AUTOGRAD", help="path to autograd directory"
+    )
+    args = parser.parse_args()
+    gen_autograd(
+        args.native_functions,
+        args.tags,
+        args.out,
+        args.autograd,
+        SelectiveBuilder.get_nop_selector(),
+    )
+
+
+if __name__ == "__main__":
+    main()
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/gen_autograd_functions.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/gen_autograd_functions.py
new file mode 100644
index 0000000000000000000000000000000000000000..a1a432e2b3d811f6447ff0d4d8a585aa2c942bdb
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/gen_autograd_functions.py
@@ -0,0 +1,912 @@
+# Generates C++ autograd functions for the derivatives of ATen operations
+#
+# This writes two files:
+#  Functions.h/cpp: subclasses of autograd::Node
+#  python_functions.h/cpp: Python bindings for the above classes
+#
+from typing import Dict, List, Sequence, Tuple
+
+from torchgen.api.autograd import (
+    Derivative,
+    DifferentiabilityInfo,
+    SavedAttribute,
+    uses_retain_variables,
+    uses_single_grad,
+)
+from torchgen.api.types import (
+    ArrayRefCType,
+    BaseCppType,
+    BaseCType,
+    Binding,
+    boolT,
+    doubleT,
+    intArrayRefT,
+    iTensorListRefT,
+    ListCType,
+    longT,
+    MutRefCType,
+    OptionalCType,
+    optionalIntArrayRefT,
+    optionalSymIntArrayRefT,
+    scalarT,
+    stringT,
+    symIntArrayRefT,
+    SymIntT,
+    TENSOR_LIST_LIKE_CTYPES,
+    tensorListT,
+    tensorT,
+    VectorCType,
+)
+from torchgen.code_template import CodeTemplate
+from torchgen.model import Argument, FunctionSchema
+from torchgen.utils import FileManager
+
+from .gen_inplace_or_view_type import VIEW_FUNCTIONS
+
+FUNCTION_DECLARATION = CodeTemplate(
+    """\
+#ifdef _WIN32
+struct ${op} : public ${superclass} {
+  TORCH_API ${op}() = default;
+#else
+struct TORCH_API ${op} : public ${superclass} {
+#endif
+  using ${superclass}::${superclass};
+  variable_list apply(variable_list&& grads) override;
+  std::string name() const override { return "${op}"; }
+  void release_variables() override {
+    ${thread_lock}
+    ${release_variables}
+  }
+  ${will_release_variables}
+  void compiled_args(CompiledNodeArgs& args) override;
+  variable_list apply_with_saved(const variable_list& inputs, SwapSavedVariables& saved) override;
+  ${saved_variables}
+  ${saved_list_sizes}
+};
+"""
+)
+
+WILL_RELEASE_VARIABLES = CodeTemplate(
+    """\
+bool retain_variables = true;
+void will_release_variables() override {
+  retain_variables = false;
+}
+"""
+)
+
+FUNCTION_DEFINITION = CodeTemplate(
+    """\
+variable_list ${op}::apply(variable_list&& grads) {
+  ${thread_lock}
+  ${asserts}
+  IndexRangeGenerator gen;
+  ${compute_index_ranges}
+  variable_list grad_inputs(gen.size());
+  ${body}
+  return grad_inputs;
+}
+void ${op}::compiled_args(CompiledNodeArgs& args) {
+    ${compiled_args}
+}
+variable_list ${op}::apply_with_saved(const variable_list& grads, SwapSavedVariables& saved) {
+    ${apply_with_saved_before}
+    variable_list result = apply(variable_list(grads));
+    ${apply_with_saved_after}
+    return result;
+}
+"""
+)
+
+GRAD_INPUT_MASK = CodeTemplate(
+    """\
+  auto grad_input_mask = std::array<bool, ${n}>{
+    ${masks}
+  };\
+"""
+)
+
+DERIVATIVE_SINGLE = CodeTemplate(
+    """\
+if (task_should_compute_output({ ${name}_ix })) {
+  auto grad_result = ${derivative};
+  copy_range(grad_inputs, ${name}_ix, grad_result);
+}
+"""
+)
+
+# note(crcrpar): `self` argument and other optional positional argument
+# of foreach functions are basically a list of n `Tensor`s thus iterating over
+# `grads` in order to utilize and apply the existing derivative definitions
+# to each `Tensor`(s) of `self`, and the others.
+DERIVATIVE_SINGLE_FOREACH = CodeTemplate(
+    """\
+if (task_should_compute_output({ ${name}_ix })) {
+  std::vector<Tensor> grad_result;
+  grad_result.reserve(grads.size());
+  for (const auto & i : c10::irange(grads.size())) {
+    if (grads[i].defined()) {
+      grad_result.emplace_back(${derivative});
+    } else {
+      grad_result.emplace_back(Tensor());
+    }
+  }
+  copy_range(grad_inputs, ${name}_ix, grad_result);
+}
+"""
+)
+
+DERIVATIVE_MULTI_COPY_RANGE = CodeTemplate(
+    """\
+  if (task_should_compute_output({ ${name}_ix })) {
+    copy_range(grad_inputs, ${name}_ix, std::get<${i}>(grad_result));
+  }
+"""
+)
+
+DERIVATIVE_MULTI = CodeTemplate(
+    """\
+if (task_should_compute_output({ ${idx_ranges} })) {
+  ${grad_input_mask}
+  auto grad_result = ${derivative};
+  ${copy_ranges}
+}
+"""
+)
+
+# Generates python bindings
+#
+# This generates the definitions for:
+#   (1) The PyTypeObject for each backward grad_fn subclassing Node
+#   (2) The entry for PyTypeObject's tp_getset slot (an array of PyGetSetDef structs)
+#       We generate one PyGetSetDef struct for each of grad_fn's saved inputs and outputs
+#       Each PyGetSetDef has a function ptr to a getter, also defined here (3).
+#   (3) Getters for each of grad_fn's saved inputs and outputs.
+#
+PY_FUNCTION_DEFINITION = CodeTemplate(
+    """\
+static PyTypeObject ${op}Class;
+addClass<${op}>(module, ${op}Class, "${op}", ${op}_properties);
+"""
+)
+
+PY_FUNCTION_PROPS_AND_GETTERS = CodeTemplate(
+    """\
+${all_getter_definitions}
+
+static struct PyGetSetDef ${op}_properties[] = {
+  THP_FUNCTION_DEFAULT_PROPERTIES,
+  ${all_getsetdef_structs}
+  {nullptr} /* sentinel */
+};
+
+"""
+)
+
+PY_GETSETDEF_STRUCT = CodeTemplate(
+    """\
+{(char*)"_saved_${name}", (getter)THP${op}_${name}_getter, nullptr, nullptr, nullptr}"""
+)
+
+PY_RAW_GETSETDEF_STRUCT = CodeTemplate(
+    """\
+{(char*)"_raw_saved_${name}", (getter)THP${op}_${name}_raw_getter, nullptr, nullptr, nullptr}"""
+)
+
+# Getter templates
+GETTER_DEFINITION = CodeTemplate(
+    """\
+PyObject* THP${op}_${name}_getter(THPCppFunction *self, void *_unused) {
+  HANDLE_TH_ERRORS
+  auto prop = static_cast<${op}*>(self->cdata.get())->${name};
+  ${body}
+  END_HANDLE_TH_ERRORS
+}
+"""
+)
+
+GETTER_DEFINITION_SAVEDVAR = CodeTemplate(
+    """\
+PyObject* THP${op}_${name}_getter(THPCppFunction *self, void *_unused) {
+  HANDLE_TH_ERRORS
+  const auto& prop = static_cast<${op}*>(self->cdata.get())->${name}_;
+  ${body}
+  END_HANDLE_TH_ERRORS
+}
+"""
+)
+
+GETTER_DEFINITION_RAW_SAVEDVAR = CodeTemplate(
+    """\
+PyObject* THP${op}_${name}_raw_getter(THPCppFunction *self, void *_unused) {
+  HANDLE_TH_ERRORS
+  const auto& prop = static_cast<${op}*>(self->cdata.get())->${name}_;
+  ${body}
+  END_HANDLE_TH_ERRORS
+}
+"""
+)
+
+GETTER_DEFINITION_VEC_SAVEDVAR = CodeTemplate(
+    """\
+PyObject* THP${op}_${name}_getter(THPCppFunction *self, void *_unused) {
+  HANDLE_TH_ERRORS
+  const auto *node = static_cast<${op}*>(self->cdata.get());
+  const auto& prop = node->${name}_;
+  if (node->${name}_released_) {
+    PyErr_SetString(PyExc_RuntimeError, ERR_BACKWARD_TWICE);
+    return nullptr;
+  }
+  ${body}
+  END_HANDLE_TH_ERRORS
+}
+"""
+)
+
+GETTER_DEFINITION_RAW_VEC_SAVEDVAR = CodeTemplate(
+    """\
+PyObject* THP${op}_${name}_raw_getter(THPCppFunction *self, void *_unused) {
+  HANDLE_TH_ERRORS
+  const auto *node = static_cast<${op}*>(self->cdata.get());
+  const auto& prop = node->${name}_;
+  if (node->${name}_released_) {
+    PyErr_SetString(PyExc_RuntimeError, ERR_BACKWARD_TWICE);
+    return nullptr;
+  }
+  ${body}
+  END_HANDLE_TH_ERRORS
+}
+"""
+)
+
+GETTER_DEFINITION_OPT = CodeTemplate(
+    """\
+PyObject* THP${op}_${name}_getter(THPCppFunction *self, void *_unused) {
+  HANDLE_TH_ERRORS
+  auto opt_prop = static_cast<${op}*>(self->cdata.get())->${name};
+  if (!opt_prop.has_value()) {
+    Py_RETURN_NONE;
+  }
+  auto prop = opt_prop.value();
+  ${body}
+  END_HANDLE_TH_ERRORS
+}
+"""
+)
+
+GETTER_DEFINITION_OPT_ARRAYREF = CodeTemplate(
+    """\
+PyObject* THP${op}_${name}_getter(THPCppFunction *self, void *_unused) {
+  HANDLE_TH_ERRORS
+  auto opt_prop = static_cast<${op}*>(self->cdata.get())->${name};
+  if (!opt_prop.list.has_value()) {
+    Py_RETURN_NONE;
+  }
+  auto prop = opt_prop.list.value();
+  ${body}
+  END_HANDLE_TH_ERRORS
+}
+"""
+)
+
+# Getter body
+GETTER_BODY_SAVEDVAR = """\
+return THPVariable_Wrap(prop.unpack(self->cdata));
+"""
+
+GETTER_BODY_RAW_SAVEDVAR = """\
+pybind11::object obj = pybind11::cast(prop, pybind11::return_value_policy::reference);
+return obj.release().ptr();
+"""
+
+GETTER_BODY_VEC_SAVEDVAR = """\
+PyObject* tup = PyTuple_New((Py_ssize_t) prop.size());
+for (auto i: c10::irange(prop.size())) {
+  PyTuple_SetItem(tup, (Py_ssize_t) i, THPVariable_Wrap(prop[i].unpack(self->cdata)));
+}
+return tup;
+"""
+
+GETTER_BODY_RAW_VEC_SAVEDVAR = """\
+PyObject* tup = PyTuple_New((Py_ssize_t) prop.size());
+for (auto i : c10::irange(prop.size())) {
+  pybind11::object obj = pybind11::cast(prop[i], pybind11::return_value_policy::reference);
+  PyTuple_SetItem(tup, (Py_ssize_t) i, obj.release().ptr());
+}
+return tup;
+"""
+
+GETTER_BODY_ARRAYREF_LONG = """\
+PyObject* tup = PyTuple_New((Py_ssize_t) prop.size());
+for (auto i : c10::irange(prop.size())) {
+  PyTuple_SetItem(tup, (Py_ssize_t) i, PyLong_FromUnsignedLong((uint64_t) prop[i]));
+}
+return tup;
+"""
+
+GETTER_BODY_ARRAYREF_SYMINT = """\
+PyObject* tup = PyTuple_New((Py_ssize_t) prop.size());
+for (auto i : c10::irange(prop.size())) {
+    auto si = prop[i];
+    if (auto m = si.maybe_as_int()) {
+      PyTuple_SetItem(tup, (Py_ssize_t) i, PyLong_FromUnsignedLong(*m));
+    } else {
+      auto py_symint = py::cast(si).release().ptr();
+      PyTuple_SetItem(tup, (Py_ssize_t) i, py_symint);
+    }
+}
+return tup;
+"""
+
+GETTER_BODY_ARRAYREF_DOUBLE = """\
+PyObject* tup = PyTuple_New((Py_ssize_t) prop.size());
+for (auto i : c10::irange(prop.size())) {
+  PyTuple_SetItem(tup, (Py_ssize_t) i, PyFloat_FromDouble((double) prop[i]));
+}
+return tup;
+"""
+
+GETTER_BODY_INT64_T = """\
+return PyLong_FromUnsignedLong((int64_t) prop);
+"""
+
+GETTER_BODY_SYMINT = """\
+if (auto m = prop.maybe_as_int()) {
+  return PyLong_FromUnsignedLong(*m);
+} else {
+  return py::cast(prop).release().ptr();
+}
+"""
+
+GETTER_BODY_DOUBLE = """\
+return PyFloat_FromDouble((double) prop);
+"""
+
+GETTER_BODY_BOOL = """\
+if (prop) {
+  Py_RETURN_TRUE;
+} else {
+  Py_RETURN_FALSE;
+}
+"""
+
+GETTER_BODY_STRING = """\
+return PyUnicode_FromStringAndSize(prop.data(), prop.size());
+"""
+
+GETTER_BODY_SCALAR = """\
+if (prop.isComplex()) {
+  auto cprop = prop.to<c10::complex<double>>();
+  return PyComplex_FromDoubles(cprop.real(), cprop.imag());
+} else if (prop.isFloatingPoint()) {
+  return PyFloat_FromDouble(prop.to<double>());
+} else if (prop.isIntegral(/*includeBool=*/false)) {
+  return PyLong_FromLong(prop.to<int64_t>());
+} else if (prop.isBoolean()) {
+  if (prop.to<bool>()) {
+    Py_RETURN_TRUE;
+  } else {
+    Py_RETURN_FALSE;
+  }
+} else {
+  PyErr_SetString(PyExc_RuntimeError, "Unknown scalar type");
+  return nullptr;
+}
+"""
+
+
+GETTER_BODY_VEC_SCALAR = """\
+PyObject* tup = PyTuple_New((Py_ssize_t) prop.size());
+for (auto i: c10::irange(prop.size())) {
+  if (prop[i].isComplex()) {
+    auto cprop = prop[i].to<c10::complex<double>>();
+    PyTuple_SetItem(tup, (Py_ssize_t) i, PyComplex_FromDoubles(cprop.real(), cprop.imag()));
+  } else if (prop[i].isFloatingPoint()) {
+    auto double_prop = prop[i].to<double>();
+    PyTuple_SetItem(tup, (Py_ssize_t) i, PyFloat_FromDouble(double_prop));
+  } else if (prop[i].isIntegral(/*includeBool=*/false)) {
+    auto long_prop = prop[i].to<int64_t>();
+    PyTuple_SetItem(tup, (Py_ssize_t) i, PyLong_FromLong(long_prop));
+  } else if (prop[i].isBoolean()) {
+    if (prop[i].to<bool>()) {
+      PyTuple_SetItem(tup, (Py_ssize_t) i, Py_True);
+    } else {
+      PyTuple_SetItem(tup, (Py_ssize_t) i, Py_False);
+    }
+  } else {
+    PyErr_SetString(PyExc_RuntimeError, "Unknown scalar type");
+    return nullptr;
+  }
+}
+return tup;
+"""
+
+
+MISC_GETTER_DEFS = {
+    OptionalCType(BaseCType(longT)): (GETTER_DEFINITION_OPT, GETTER_BODY_INT64_T),
+    OptionalCType(BaseCType(SymIntT)): (GETTER_DEFINITION_OPT, GETTER_BODY_SYMINT),
+    BaseCType(doubleT): (GETTER_DEFINITION, GETTER_BODY_DOUBLE),
+    OptionalCType(BaseCType(doubleT)): (GETTER_DEFINITION_OPT, GETTER_BODY_DOUBLE),
+    BaseCType(boolT): (GETTER_DEFINITION, GETTER_BODY_BOOL),
+    BaseCType(scalarT): (GETTER_DEFINITION, GETTER_BODY_SCALAR),
+    OptionalCType(BaseCType(scalarT)): (GETTER_DEFINITION_OPT, GETTER_BODY_SCALAR),
+}
+
+# These functions have backwards which cannot be traced, and so must have
+# their backward functions traced opaquely.
+# VIEW_FUNCTIONS are not traceable because they use as_strided, which
+# has an untraceable backwards, see
+# https://github.com/pytorch/pytorch/issues/4250
+# TODO: This is probably not exhaustive, but it's a start
+UNTRACEABLE_FUNCTIONS = VIEW_FUNCTIONS
+
+
+def get_infos_with_derivatives_list(
+    differentiability_infos: Dict[FunctionSchema, Dict[str, DifferentiabilityInfo]]
+) -> List[DifferentiabilityInfo]:
+    diff_info_list = [
+        info
+        for diffinfo_dict in differentiability_infos.values()
+        for info in diffinfo_dict.values()
+    ]
+
+    return list(filter(lambda info: info.args_with_derivatives, diff_info_list))
+
+
+def gen_autograd_functions_lib(
+    out: str,
+    differentiability_infos: Dict[FunctionSchema, Dict[str, DifferentiabilityInfo]],
+    template_path: str,
+) -> None:
+    """Functions.h and Functions.cpp body
+
+    These contain the auto-generated subclasses of torch::autograd::Node
+    for each every differentiable torch function.
+    """
+
+    # get a 1D list of diffinfos, we do not need them to be per FunctionSchema/DispatchKey here
+    # infos with the diff dispatchkeys but the same name will still be in the same shard.
+    infos = get_infos_with_derivatives_list(differentiability_infos)
+    declarations = [process_function(f, FUNCTION_DECLARATION) for f in infos]
+    definitions = [process_function(f, FUNCTION_DEFINITION) for f in infos]
+
+    file_basename = "Functions"
+    fm = FileManager(install_dir=out, template_dir=template_path, dry_run=False)
+    for suffix in [".h", ".cpp"]:
+        fname = file_basename + suffix
+        fm.write_with_template(
+            fname,
+            fname,
+            lambda: {
+                "generated_comment": "@"
+                + f"generated from {fm.template_dir_for_comments()}/"
+                + fname,
+                "autograd_function_declarations": declarations,
+                "autograd_function_definitions": definitions,
+            },
+        )
+
+
+def gen_autograd_functions_python(
+    out: str,
+    differentiability_infos: Dict[FunctionSchema, Dict[str, DifferentiabilityInfo]],
+    template_path: str,
+) -> None:
+    fm = FileManager(install_dir=out, template_dir=template_path, dry_run=False)
+    num_shards = 5
+    fm.write(
+        "python_functions.h",
+        lambda: {
+            "generated_comment": "@"
+            + f"generated from {fm.template_dir_for_comments()}/python_functions.h",
+            "shard_forward_declare": [
+                f"void initialize_autogenerated_functions_{i}(PyObject* module);"
+                for i in range(num_shards)
+            ],
+            "shard_call": [
+                f"initialize_autogenerated_functions_{i}(module);"
+                for i in range(num_shards)
+            ],
+        },
+    )
+
+    # get a 1D list of diffinfos, we do not need them to be per FunctionSchema/DispatchKey here
+    # infos with the diff dispatchkeys but the same name will still be in the same shard.
+    infos = get_infos_with_derivatives_list(differentiability_infos)
+    fm.write_sharded(
+        "python_functions.cpp",
+        infos,
+        key_fn=lambda info: info.name,
+        base_env={
+            "generated_comment": "@"
+            + f"generated from {fm.template_dir_for_comments()}/python_functions.cpp",
+        },
+        env_callable=lambda info: {
+            "py_function_initializers": [
+                process_function(info, PY_FUNCTION_DEFINITION)
+            ],
+            "py_function_props_and_getters": [
+                process_function(info, PY_FUNCTION_PROPS_AND_GETTERS)
+            ],
+        },
+        num_shards=num_shards,
+        sharded_keys={"py_function_initializers", "py_function_props_and_getters"},
+    )
+
+
+def process_function(info: DifferentiabilityInfo, template: CodeTemplate) -> str:
+    saved_variables: List[str] = []
+    release_variables: List[str] = []
+    saved_list_sizes: List[str] = []
+    unpack: List[str] = []
+    asserts: List[str] = []
+    compute_index_ranges: List[str] = []
+    getter_definitions: List[str] = []
+    py_getsetdef_structs: List[str] = []
+    compiled_args: List[str] = []
+    apply_with_saved_before: List[str] = []
+    apply_with_saved_after: List[str] = []
+
+    for arg in info.args_with_derivatives:
+        if arg.type in TENSOR_LIST_LIKE_CTYPES:
+            size = f"{arg.name}_size_"
+            saved_list_sizes.append(f"size_t {arg.name}_size_;")
+        else:
+            size = "1"
+        compute_index_ranges.append(f"auto {arg.name}_ix = gen.range({size});")
+
+    def save_var(var: SavedAttribute, is_output: bool) -> None:
+        name = var.nctype.name
+        type = var.nctype.type
+        should_append_getsetdef = True
+        should_append_raw_getsetdef = False
+        visit_name = name
+
+        if (
+            type == BaseCType(tensorT)
+            or type == OptionalCType(BaseCType(tensorT))
+            or type == MutRefCType(OptionalCType(BaseCType(tensorT)))
+            or (type == BaseCType(scalarT) and is_output)
+        ):
+            saved_variables.append(f"SavedVariable {name}_;")
+            release_variables.append(f"{name}_.reset_data();")
+            ptr = "shared_from_this()" if is_output else ""
+            unpack.append(f"auto {name} = {name}_.unpack({ptr});")
+            getter_definitions.append(
+                GETTER_DEFINITION_SAVEDVAR.substitute(
+                    op=info.op, name=name, body=GETTER_BODY_SAVEDVAR
+                )
+            )
+            getter_definitions.append(
+                GETTER_DEFINITION_RAW_SAVEDVAR.substitute(
+                    op=info.op, name=name, body=GETTER_BODY_RAW_SAVEDVAR
+                )
+            )
+            should_append_raw_getsetdef = True
+            visit_name = f"{name}_"
+        elif (
+            type == BaseCType(tensorListT)
+            or type == BaseCType(iTensorListRefT)
+            or type == VectorCType(BaseCType(tensorT))
+        ):
+            # note(crcrpar): [nuanced return type of out-of-place foreach functions]
+            # When an out-of-place foreach function whose return signature is `Tensor[]`
+            # spells out its backward definitions in `derivatives.yaml`, and some of them depend on
+            # `result`, `result`'s type is interpreted and treated as `std::vector<Tensor>`.
+            # An out-of-place foreach whose backwards rely on their output doesn't suffer from this
+            # difference if the definitions are codegen'ed.
+            # This special case is needed for `_foreach_pow.List` and `_foreach_pow.ScalarAndTensor`
+            # as of https://github.com/pytorch/pytorch/pull/105504.
+            if type == VectorCType(BaseCType(tensorT)):
+                assert (
+                    info.func.func.name.name.base.startswith("_foreach") and is_output
+                )
+            saved_variables.append(f"std::vector<SavedVariable> {name}_;")
+            saved_variables.append(f"bool {name}_released_ = false;")
+            # Just clear() is sufficient, we don't need to loop and clear each variable.
+            # Because the SavedVariable owns a tensor and a grad_fn, removing the SavedVariable makes them go away as well.
+            release_variables.append(f"{name}_.clear();")
+            release_variables.append(f"{name}_released_ = true;")
+            ptr = "shared_from_this()" if is_output else "nullptr"
+            unpack.append(f"auto {name} = unpack_list({name}_, {ptr});")
+            asserts.append(f"TORCH_CHECK(!{name}_released_, ERR_BACKWARD_TWICE);")
+            getter_definitions.append(
+                GETTER_DEFINITION_VEC_SAVEDVAR.substitute(
+                    op=info.op, name=name, body=GETTER_BODY_VEC_SAVEDVAR
+                )
+            )
+            getter_definitions.append(
+                GETTER_DEFINITION_RAW_VEC_SAVEDVAR.substitute(
+                    op=info.op, name=name, body=GETTER_BODY_RAW_VEC_SAVEDVAR
+                )
+            )
+            should_append_raw_getsetdef = True
+            visit_name = f"{name}_"
+        elif type == ListCType(OptionalCType(BaseCType(tensorT))):
+            saved_variables.append(f"std::vector<SavedVariable> {name}_;")
+            saved_variables.append(f"bool {name}_released_ = false;")
+            # Just clear() is sufficient, we don't need to loop and clear each variable.
+            # Because the SavedVariable owns a tensor and a grad_fn, removing the SavedVariable makes them go away as well.
+            release_variables.append(f"{name}_.clear();")
+            release_variables.append(f"{name}_released_ = true;")
+            unpack.append(f"auto {name} = unpack_opt_list({name}_);")
+            asserts.append(f"TORCH_CHECK(!{name}_released_, ERR_BACKWARD_TWICE);")
+            getter_definitions.append(
+                GETTER_DEFINITION_VEC_SAVEDVAR.substitute(
+                    op=info.op, name=name, body=GETTER_BODY_VEC_SAVEDVAR
+                )
+            )
+            getter_definitions.append(
+                GETTER_DEFINITION_RAW_VEC_SAVEDVAR.substitute(
+                    op=info.op, name=name, body=GETTER_BODY_RAW_VEC_SAVEDVAR
+                )
+            )
+            should_append_raw_getsetdef = True
+            visit_name = f"{name}_"
+        elif type == BaseCType(intArrayRefT):
+            saved_variables.append(f"std::vector<int64_t> {name};")
+            getter_definitions.append(
+                GETTER_DEFINITION.substitute(
+                    op=info.op, name=name, body=GETTER_BODY_ARRAYREF_LONG
+                )
+            )
+        elif type == BaseCType(symIntArrayRefT):
+            saved_variables.append(f"std::vector<c10::SymInt> {name};")
+            getter_definitions.append(
+                GETTER_DEFINITION.substitute(
+                    op=info.op, name=name, body=GETTER_BODY_ARRAYREF_SYMINT
+                )
+            )
+        elif type == BaseCType(optionalIntArrayRefT):
+            saved_variables.append(f"c10::OptionalArray<int64_t> {name};")
+            getter_definitions.append(
+                GETTER_DEFINITION_OPT_ARRAYREF.substitute(
+                    op=info.op, name=name, body=GETTER_BODY_ARRAYREF_LONG
+                )
+            )
+        elif type == BaseCType(optionalSymIntArrayRefT):
+            saved_variables.append(f"c10::OptionalArray<c10::SymInt> {name};")
+            getter_definitions.append(
+                GETTER_DEFINITION_OPT_ARRAYREF.substitute(
+                    op=info.op, name=name, body=GETTER_BODY_ARRAYREF_SYMINT
+                )
+            )
+        elif type == OptionalCType(BaseCType(intArrayRefT)):
+            saved_variables.append(f"c10::OptionalArray<int64_t> {name};")
+            getter_definitions.append(
+                GETTER_DEFINITION_OPT_ARRAYREF.substitute(
+                    op=info.op, name=name, body=GETTER_BODY_ARRAYREF_LONG
+                )
+            )
+        elif type == OptionalCType(BaseCType(symIntArrayRefT)):
+            saved_variables.append(f"c10::OptionalArray<c10::SymInt> {name};")
+            getter_definitions.append(
+                GETTER_DEFINITION_OPT_ARRAYREF.substitute(
+                    op=info.op, name=name, body=GETTER_BODY_ARRAYREF_SYMINT
+                )
+            )
+        elif type == OptionalCType(ArrayRefCType(BaseCType(doubleT))):
+            saved_variables.append(f"c10::OptionalArray<double> {name};")
+            getter_definitions.append(
+                GETTER_DEFINITION_OPT_ARRAYREF.substitute(
+                    op=info.op, name=name, body=GETTER_BODY_ARRAYREF_DOUBLE
+                )
+            )
+        elif type == BaseCType(longT):
+            saved_variables.append(f"{type.cpp_type()} {name} = 0;")
+            getter_definitions.append(
+                GETTER_DEFINITION.substitute(
+                    op=info.op, name=name, body=GETTER_BODY_INT64_T
+                )
+            )
+        elif type == BaseCType(SymIntT):
+            saved_variables.append(f"c10::SymInt {name};")
+            getter_definitions.append(
+                GETTER_DEFINITION.substitute(
+                    op=info.op, name=name, body=GETTER_BODY_SYMINT
+                )
+            )
+        elif type == BaseCType(stringT):
+            saved_variables.append(f"std::string {name};")
+            getter_definitions.append(
+                GETTER_DEFINITION.substitute(
+                    op=info.op, name=name, body=GETTER_BODY_STRING
+                )
+            )
+        elif type == OptionalCType(BaseCType(stringT)):
+            saved_variables.append(f"c10::optional<std::string> {name};")
+            getter_definitions.append(
+                GETTER_DEFINITION_OPT.substitute(
+                    op=info.op, name=name, body=GETTER_BODY_STRING
+                )
+            )
+        elif type == ArrayRefCType(
+            elem=BaseCType(type=BaseCppType(ns="at", name="Scalar"))
+        ):
+            saved_variables.append(f"std::vector<at::Scalar> {name};")
+            saved_variables.append(f"bool {name}_released_ = false;")
+            # Just clear() is sufficient, we don't need to loop and clear each variable.
+            # Because the SavedVariable owns a tensor and a grad_fn, removing the SavedVariable makes them go away as well.
+            release_variables.append(f"{name}.clear();")
+            # release_variables.append(f"{name}_released_ = true;")
+            # unpack.append(f"auto {name} = unpack_list({name}_);")
+            # asserts.append(f"TORCH_CHECK(!{name}_released_, ERR_BACKWARD_TWICE);")
+            getter_definitions.append(
+                CodeTemplate(
+                    """\
+PyObject* THP${op}_${name}_getter(THPCppFunction *self, void *_unused) {
+  HANDLE_TH_ERRORS
+  const auto *node = static_cast<${op}*>(self->cdata.get());
+  const auto& prop = node->${name};
+  if (node->${name}_released_) {
+    PyErr_SetString(PyExc_RuntimeError, ERR_BACKWARD_TWICE);
+    return nullptr;
+  }
+  ${body}
+  END_HANDLE_TH_ERRORS
+}
+                            """
+                ).substitute(
+                    op=info.op,
+                    name=name,
+                    body=GETTER_BODY_VEC_SCALAR,
+                )
+            )
+        else:
+            # Check for indicators that you're putting a non-owning reference
+            # into the saved variable field.  If this is spuriously firing,
+            # edit this field.  Otherwise, you probably need to add a case
+            # above.
+            assert (
+                "ref" not in type.cpp_type().lower()
+                and "view" not in type.cpp_type().lower()
+                and "*" not in type.cpp_type()
+                and "&" not in type.cpp_type()
+            ), f"{type.cpp_type()} looks like it contains a non-owning reference"
+            saved_variables.append(f"{type.cpp_type()} {name};")
+
+            if type in MISC_GETTER_DEFS:
+                getter_def, body = MISC_GETTER_DEFS[type]
+                getter_definitions.append(
+                    getter_def.substitute(op=info.op, name=name, body=body)
+                )
+            else:
+                # Types we don't expose python bindings to yet:
+                #   TypeAndSize, at::ScalarType, TensorOptions, TensorGeometry,
+                #   std::vector<std::vector<int64_t>>, std::vector<at::ScalarType>
+                should_append_getsetdef = False
+
+        if should_append_getsetdef:
+            py_getsetdef_structs.append(
+                PY_GETSETDEF_STRUCT.substitute(op=info.op, name=name)
+            )
+        if should_append_raw_getsetdef:
+            py_getsetdef_structs.append(
+                PY_RAW_GETSETDEF_STRUCT.substitute(op=info.op, name=name)
+            )
+
+        compiled_args.append(f"args.collect({visit_name});")
+        apply_with_saved_before.append(f"saved.before({visit_name});")
+        apply_with_saved_after.append(f"saved.after({visit_name});")
+
+    for var in sorted(info.all_saved_inputs, key=lambda sa: str(sa.nctype.name)):
+        save_var(var, is_output=False)
+    for var in sorted(info.all_saved_outputs, key=lambda sa: str(sa.nctype.name)):
+        save_var(var, is_output=True)
+
+    # lock the mutex when we release variables and in Node::apply to protect thread safety
+    # see Note [Thread Safety on Autograd Node]
+    if len(release_variables) > 0:
+        thread_lock = "std::lock_guard<std::mutex> lock(mutex_);"
+    else:
+        thread_lock = ""
+
+    if uses_retain_variables(info):
+        will_release_variables = WILL_RELEASE_VARIABLES.substitute()
+    else:
+        will_release_variables = ""
+
+    body: List[str] = []
+
+    if uses_single_grad(info):
+        body.append("const auto& grad = grads[0];")
+    else:
+        # Generate aliases for gradients named for returned values.
+        body.extend(
+            f"const auto& {name} = grads[{info.available_named_gradients.index(name)}];"
+            for name in sorted(info.used_named_gradients)
+        )
+
+    def emit_derivative(
+        derivative: Derivative,
+        args_with_derivatives: Sequence[Binding],
+    ) -> Tuple[bool, str]:
+        formula = derivative.formula
+        var_names = derivative.var_names
+        if len(var_names) == 1:
+            checks_any_grad_defined = False
+            if "not_implemented" not in formula:
+                matching_args = [
+                    arg for arg in args_with_derivatives if arg.name == var_names[0]
+                ]
+                if len(matching_args) == 1:
+                    # We can add undefined grad support if the input variable is a Tensor
+                    arg = matching_args[0]
+                    if isinstance(arg.argument, Argument) and str(
+                        arg.argument.type
+                    ) in ("Tensor", "Tensor?"):
+                        formula = "any_grad_defined ? (" + formula + ") : Tensor()"
+                        checks_any_grad_defined = True
+            if info.name.startswith("_foreach_"):
+                derivative_template = DERIVATIVE_SINGLE_FOREACH
+            else:
+                derivative_template = DERIVATIVE_SINGLE
+            return (
+                checks_any_grad_defined,
+                derivative_template.substitute(name=var_names[0], derivative=formula),
+            )
+        else:
+            if "grad_input_mask" in formula:
+                masks = [
+                    f"task_should_compute_output({{ {n}_ix }})," for n in var_names
+                ]
+                grad_input_mask = GRAD_INPUT_MASK.substitute(
+                    masks=masks, n=len(var_names)
+                )
+            else:
+                grad_input_mask = ""
+            idx_ranges = ", ".join(f"{n}_ix" for n in var_names)
+            copy_ranges: List[str] = []
+            for i, n in enumerate(var_names):
+                copy_ranges.append(DERIVATIVE_MULTI_COPY_RANGE.substitute(name=n, i=i))
+            return False, DERIVATIVE_MULTI.substitute(
+                idx_ranges=idx_ranges,
+                copy_ranges=copy_ranges,
+                derivative=formula,
+                grad_input_mask=grad_input_mask,
+            )
+
+    body.extend(unpack)
+    need_any_grad_defined_var = False
+    for derivative in info.derivatives:
+        checks_any_grad_defined, derivative_text = emit_derivative(
+            derivative, info.args_with_derivatives
+        )
+        body.append(derivative_text)
+        need_any_grad_defined_var |= checks_any_grad_defined
+    # Since single-output derivative formulas need to check if grads are
+    # defined, only perform the check once, before all the formulas
+    if need_any_grad_defined_var:
+        body.insert(
+            -len(info.derivatives),
+            "bool any_grad_defined = any_variable_defined(grads);",
+        )
+
+    if info.name in UNTRACEABLE_FUNCTIONS:
+        superclass = "Node"
+    else:
+        superclass = "TraceableFunction"
+
+    all_getsetdef_structs = (
+        ",\n".join(py_getsetdef_structs) + "," if len(py_getsetdef_structs) != 0 else ""
+    )
+    all_getter_definitions = "\n".join(getter_definitions)
+
+    return template.substitute(
+        op=info.op,
+        compute_index_ranges=compute_index_ranges,
+        saved_variables=saved_variables,
+        release_variables=release_variables,
+        saved_list_sizes=saved_list_sizes,
+        asserts=asserts,
+        thread_lock=thread_lock,
+        will_release_variables=will_release_variables,
+        body=body,
+        superclass=superclass,
+        all_getter_definitions=all_getter_definitions,
+        all_getsetdef_structs=all_getsetdef_structs,
+        compiled_args=compiled_args,
+        apply_with_saved_before=apply_with_saved_before,
+        apply_with_saved_after=apply_with_saved_after,
+    )
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/gen_inplace_or_view_type.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/gen_inplace_or_view_type.py
new file mode 100644
index 0000000000000000000000000000000000000000..d1392f5407c0c7f24843631436b17281a15a9c55
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/gen_inplace_or_view_type.py
@@ -0,0 +1,675 @@
+# Generates ADInplaceOrViewType.h/cpp
+#
+# NOTE: If any changes are being made to the ADInplaceOrView codegen please also check
+# if updates are needed in torch/csrc/autograd/autograd_not_implemented_fallback.cpp
+# The fallback is expected to mimick this codegen, so we should keep the two in sync.
+
+from typing import Dict, List, Optional, Tuple
+
+from torchgen.api import cpp
+from torchgen.api.autograd import (
+    dispatch_strategy,
+    gen_differentiable_outputs,
+    NativeFunctionWithDifferentiabilityInfo,
+)
+from torchgen.api.types import (
+    BaseCType,
+    Binding,
+    boolT,
+    ConstRefCType,
+    CType,
+    DispatcherSignature,
+    intArrayRefT,
+    longT,
+    OptionalCType,
+    symIntArrayRefT,
+    SymIntT,
+    # See Note [Nested Arg Types]
+    tensorT,
+)
+from torchgen.code_template import CodeTemplate
+from torchgen.context import with_native_function
+from torchgen.model import (
+    NativeFunction,
+    SchemaKind,
+    SelfArgument,
+    TensorOptionsArguments,
+    Type,
+)
+from torchgen.utils import FileManager
+
+from .context import with_native_function_with_differentiability_info
+from .gen_trace_type import (
+    get_return_value,
+    MANUAL_AUTOGRAD,
+    tie_return_values,
+    type_wrapper_name,
+)
+
+# See NOTE [ Autograd View Variables ] in variable.h for details.
+# If you update list VIEW_FUNCTIONS or RETURNS_VIEWS_OF_INPUT,
+# you **MUST** also update the public list of view ops accordingly in
+# docs/source/tensor_view.rst. Note not all ATen functions are exposed to public,
+# e.g alias & sparse_coo_tensor_with_dims_and_tensors.
+#
+# A map: function name => name of the argument that all outputs are view of
+
+VIEW_FUNCTIONS_WITH_METADATA_CHANGE = [
+    "view_as_complex",
+    "view_as_real",
+    "_conj",
+    "_neg_view",
+    "_nested_get_values",
+    "_nested_view_from_buffer",
+    "_nested_view_from_jagged",
+]
+
+VIEW_FUNCTIONS = {
+    "numpy_T": "self",
+    "alias": "self",
+    "as_strided": "self",
+    "diagonal": "self",
+    "expand": "self",
+    "permute": "self",
+    "select": "self",
+    "slice": "self",
+    "slice_inverse": "self",
+    "split": "self",
+    "split_with_sizes": "self",
+    "squeeze": "self",
+    "t": "self",
+    "transpose": "self",
+    "unfold": "self",
+    "unsqueeze": "self",
+    "flatten": "self",
+    "view": "self",
+    "unbind": "self",
+    "_indices": "self",
+    "_values": "self",
+    "indices": "self",
+    "values": "self",
+    "crow_indices": "self",
+    "col_indices": "self",
+    "ccol_indices": "self",
+    "row_indices": "self",
+    # sparse_coo ctor output should really be views of both indices and values,
+    # but we only supports making as view of a single variable, and indices is
+    # discrete anyways.
+    # FIXME: clone indices on construction.
+    "sparse_coo_tensor_with_dims_and_tensors": "values",
+    "_reshape_alias": "self",
+    "_test_autograd_multiple_dispatch_view": "self",
+}
+
+for key in VIEW_FUNCTIONS_WITH_METADATA_CHANGE:
+    VIEW_FUNCTIONS[key] = "self"
+
+# note: some VIEW_FUNCTIONS are just compositions of the view functions above
+# this list contains both the root view functions and any that are purely composed
+# of viewing functions, and is used by the JIT to determine when an operator
+# may return a view of its inputs; however they may sometimes return a copy.
+# (e.g. `contiguous`)
+RETURNS_VIEWS_OF_INPUT = set(VIEW_FUNCTIONS.keys()).union(
+    {
+        "chunk",
+        "detach",
+        "contiguous",
+        "reshape",
+        "reshape_as",
+        "expand_as",
+        "view_as",
+        "real",
+        "imag",
+        "narrow",
+        "movedim",
+        "tensor_split",
+        "swapdims",
+        "swapaxes",
+        "mT",
+        "mH",
+        "adjoint",
+        "matrix_H",
+    }
+)
+
+# These are the functions we consider views for the purposes of validating
+# StorageImpl and TensorImpl in gen_variable_type.
+# `_unsafe_view` is not included in VIEW_FUNCTIONS above because it is not a
+# view for the purposes of ADInplaceOrView kernel, we do not want to call as_view
+# See NOTE [Unsafe View] for more info.
+ALL_VIEW_FUNCTIONS = {
+    **VIEW_FUNCTIONS,
+    "_unsafe_view": "self",
+}
+
+ARRAYREF_TO_VEC = CodeTemplate(
+    """\
+auto ${vec} = ${arg}.vec();
+"""
+)
+
+OPTIONAL_TO_VAL = CodeTemplate(
+    """\
+auto ${val} = ${arg}.value_or(${default});
+"""
+)
+
+CALL_DISPATCH = CodeTemplate(
+    """\
+at::_ops::${unambiguous_name}::call(${unpacked_args})"""
+)
+
+REVERSE_VIEW_DISPATCH = CodeTemplate(
+    """\
+${reverse_name}(${unpacked_args})"""
+)
+
+MULTI_OUTPUT_VIEW_ITERATION = CodeTemplate(
+    """\
+for (auto ${view_idx} : c10::irange(${var}.size())) {
+  ${body}
+}
+"""
+)
+
+SETUP_REPLAY_VIEW_IF_NOT_SUPPORT_AS_STRIDED_OR_VIEW_WITH_METADATA_CHANGE = CodeTemplate(
+    """\
+std::unique_ptr<torch::autograd::ViewFunc> func(nullptr);
+std::function<at::Tensor(const at::Tensor&)> rev_func=nullptr;
+if (${is_view_with_metadata_change} ||
+    !self.unsafeGetTensorImpl()->support_as_strided() ||
+    self.unsafeGetTensorImpl()->is_python_dispatch() ||
+    c10::AutogradState::get_tls_state().get_view_replay_enabled()) {
+  ${replay_view_func}
+  ${reverse_replay_view_func}
+}
+"""
+)
+
+REPLAY_VIEW_FUNC = CodeTemplate(
+    """\
+func = std::make_unique<${view_func_name}>(${view_func_args});
+"""
+)
+
+REVERSE_REPLAY_VIEW_LAMBDA_FUNC = CodeTemplate(
+    """\
+rev_func = [=](const at::Tensor& ${input_view}) {
+  return ${reverse_replay_view_call};
+};
+"""
+)
+
+METHOD_DEFINITION = CodeTemplate(
+    """\
+${return_type} ${type_wrapper_name}(${formals}) {
+  ${type_definition_body}
+}
+"""
+)
+
+WRAPPER_REGISTRATION = CodeTemplate(
+    """\
+m.impl("${unqual_operator_name_with_overload}",
+       TORCH_FN(${class_type}::${type_wrapper_name})
+);
+"""
+)
+
+AUTOGRAD_NOT_IMPLEMENTED_REGISTRATION = CodeTemplate(
+    """\
+m.impl("${unqual_operator_name_with_overload}", torch::autograd::autogradNotImplementedFallback());
+"""
+)
+
+INPLACE_REDISPATCH = CodeTemplate(
+    """\
+{
+  at::AutoDispatchBelowADInplaceOrView guard;
+  at::_ops::${unambiguous_name}::redispatch(${unpacked_args});
+}
+"""
+)
+
+ASSIGN_RETURN_VALUE = CodeTemplate(
+    """\
+${return_values} = ${rhs_value};
+"""
+)
+
+VIEW_REDISPATCH = CodeTemplate(
+    """\
+${assign_return_values} ([&]() {
+  at::AutoDispatchBelowADInplaceOrView guard;
+  return at::_ops::${unambiguous_name}::redispatch(${unpacked_args});
+})();
+"""
+)
+
+TMP_VAR = "_tmp"
+
+
+# FIXME: Ideally these functions should be methods on Type class, but we have a
+#        comment in codegen/model.py there saying these concepts are not well defined.
+#        Thus we put a version that commonly used by autograd codegen here.
+def is_tensor_type(t: Type) -> bool:
+    # TODO: Should handle optional here?
+    return t.is_tensor_like() and t.is_list_like() is None
+
+
+def is_tensor_list_type(t: Type) -> bool:
+    # TODO: Should handle optional here?
+    return t.is_tensor_like() and t.is_list_like() is not None
+
+
+UNPACK_TENSOR = CodeTemplate(
+    """\
+auto${ref} ${arg_name}_ = unpack${suffix}(${arg_name}, "${arg_name}", ${arg_pos});"""
+)
+
+
+def unpacked_name(arg_name: str) -> str:
+    return arg_name + "_"
+
+
+# e.g. select.int -> select_copy_int_inverse()
+def inverse_view_name(f: NativeFunction) -> str:
+    copy_variant = f"{f.root_name}_copy"
+    overload = f"{f.func.name.overload_name}"
+    if overload != "":
+        overload = "_" + overload
+    return f"{copy_variant}{overload}_inverse"
+
+
+def extract_bindings(f: NativeFunction) -> List[Binding]:
+    return [
+        r
+        for a in f.func.schema_order_arguments()
+        for r in cpp.argument(
+            a,
+            method=False,
+            symint=True,
+            cpp_no_default_args=set(),
+            faithful=False,
+            has_tensor_options=False,
+        )
+    ]
+
+
+@with_native_function
+def unpack_args(f: NativeFunction) -> Tuple[List[str], List[Binding]]:
+    body: List[str] = []
+    unpacked_bindings: List[Binding] = []
+
+    for i, binding in enumerate(extract_bindings(f)):
+        assert not isinstance(binding.argument, SelfArgument)
+        if isinstance(binding.argument, TensorOptionsArguments):
+            raise RuntimeError("VariableKernel shouldn't take TensorOptions")
+
+        is_nullable = binding.argument.type.is_nullable()
+        if not binding.argument.type.is_tensor_like() or is_nullable:
+            unpacked_bindings.append(binding)
+            continue
+
+        is_tensor_list = is_tensor_list_type(binding.argument.type)
+        ref = (not is_nullable) and not is_tensor_list
+        suffix = "_opt" if is_nullable and not is_tensor_list else ""
+        body.append(
+            UNPACK_TENSOR.substitute(
+                arg_name=binding.name,
+                arg_pos=i,
+                suffix=suffix,
+                ref="&" if ref else "",
+            )
+        )
+        unpacked_bindings.append(
+            Binding(
+                name=unpacked_name(binding.name),
+                nctype=binding.nctype,
+                argument=binding.argument,
+                default=binding.default,
+            )
+        )
+
+    return body, unpacked_bindings
+
+
+def get_base_name(f: NativeFunction) -> str:
+    return f.func.name.name.base  # TODO: should be str(f.func.name.name)?
+
+
+def get_view_info(f: NativeFunction) -> Optional[str]:
+    base_name = get_base_name(f)
+    view_info = VIEW_FUNCTIONS.get(base_name, None)
+    if view_info is None and base_name in RETURNS_VIEWS_OF_INPUT:
+        view_info = "self"
+    return view_info
+
+
+def emit_view_func(
+    f: NativeFunction, bindings: List[Binding], view_idx: Optional[str] = None
+) -> str:
+    """Generate an additional lambda function to recover views in backward when as_strided is not supported.
+    See Note [View + Inplace update for base tensor] and [View + Inplace update for view tensor] for more details.
+    """
+    # TODO: Clean this logic up if we get rid of reverse view funcs or reify them.
+    input_base = "input_base"
+    replay_view_func = ""
+    updated_args: List[str] = []
+    known_view_arg_simple_types: List[CType] = [
+        BaseCType(longT),
+        OptionalCType(BaseCType(longT)),
+        BaseCType(SymIntT),
+        OptionalCType(BaseCType(SymIntT)),
+        BaseCType(boolT),
+        BaseCType(intArrayRefT),
+        BaseCType(symIntArrayRefT),
+        ConstRefCType(BaseCType(tensorT)),
+        ConstRefCType(OptionalCType(BaseCType(tensorT))),
+    ]
+    for binding in bindings:
+        arg, arg_type = binding.name, binding.nctype.type
+        if arg == "self":
+            updated_args.append(input_base)
+            continue
+        if arg_type not in known_view_arg_simple_types:
+            known_types_str = ", ".join([str(t) for t in known_view_arg_simple_types])
+            raise TypeError(
+                f"You are adding an {arg_type} {arg} argument to op {cpp.name(f.func)} in addition to known types: "
+                f"{known_types_str}. Please update the list or materialize it so that it can be closed "
+                "over by value, also add a test in pytorch/xla/test/test_operations.py where this code "
+                "is exercised."
+            )
+        if arg_type == BaseCType(intArrayRefT) or arg_type == BaseCType(
+            symIntArrayRefT
+        ):
+            # It's not safe to close over IntArrayRef by value, since this is a
+            # reference type, so materialize a vector to close over by value
+            arg_vec = arg + "_vec"
+            replay_view_func += ARRAYREF_TO_VEC.substitute(arg=arg, vec=arg_vec)
+            updated_args.append(arg_vec)
+        elif arg_type == OptionalCType(BaseCType(longT)):
+            # Materialize int64_t? to int64_t
+            arg_value = arg + "_val"
+            replay_view_func += OPTIONAL_TO_VAL.substitute(
+                arg=arg, val=arg_value, default="0"
+            )
+            updated_args.append(arg_value)
+        elif arg_type == ConstRefCType(BaseCType(tensorT)) or arg_type == ConstRefCType(
+            OptionalCType(BaseCType(tensorT))
+        ):
+            # NB: Closing over a tensor. If a user modifies this tensor, this will be silently
+            # incorrect. The proper thing to do is to store the version counter and copy on write.
+            updated_args.append(arg)
+        else:
+            updated_args.append(arg)
+
+    from .gen_view_funcs import view_func_name
+
+    view_func_args = [b.name for b in bindings if b.name != "self"]
+    if view_idx is not None:
+        view_func_args.append(f"{view_idx}")
+    replay_view_func += REPLAY_VIEW_FUNC.substitute(
+        view_func_name=view_func_name(f, include_namespace=True),
+        view_func_args=view_func_args,
+    )
+
+    input_view = "input_view"
+    reverse_unpacked_args = [
+        "self",
+        f"{input_view}",
+        # inverse_return_mode=
+        "at::functionalization::InverseReturnMode::AlwaysView",
+        *(() if view_idx is None else (f"{view_idx}",)),
+        # skip input_base arg
+        *updated_args[1:],
+    ]
+
+    from torchgen.api.functionalization import reverse_name
+
+    reverse_replay_view_call = REVERSE_VIEW_DISPATCH.substitute(
+        reverse_name=reverse_name(f, include_namespace=True),
+        unpacked_args=reverse_unpacked_args,
+    )
+    reverse_replay_view_func = REVERSE_REPLAY_VIEW_LAMBDA_FUNC.substitute(
+        input_view=input_view, reverse_replay_view_call=reverse_replay_view_call
+    )
+
+    is_view_with_metadata_change = (
+        "true" if cpp.name(f.func) in VIEW_FUNCTIONS_WITH_METADATA_CHANGE else "false"
+    )
+
+    return SETUP_REPLAY_VIEW_IF_NOT_SUPPORT_AS_STRIDED_OR_VIEW_WITH_METADATA_CHANGE.substitute(
+        is_view_with_metadata_change=is_view_with_metadata_change,
+        replay_view_func=replay_view_func,
+        reverse_replay_view_func=reverse_replay_view_func,
+    )
+
+
+def emit_view_body(
+    fn: NativeFunctionWithDifferentiabilityInfo, var: str
+) -> Tuple[str, str]:
+    # See NOTE [ Autograd View Variables ] in variable.h for details.
+    f = fn.func
+    base_name = get_base_name(f)
+    view_info = get_view_info(f)
+    call = ""
+    differentiable_outputs = gen_differentiable_outputs(fn)
+    differentiable_output_vars = {r.name for r in differentiable_outputs}
+    if not isinstance(view_info, str):
+        raise TypeError(
+            f"The view info should be a string for {base_name}, but it is: {view_info}"
+        )
+    if len(differentiable_output_vars) == 0:
+        # no output is differentiable (.indices() for SparseTensors for example)
+        rhs_value = (
+            f"as_view({view_info}, {var}, "
+            f"/* is_bw_differentiable */ false, /* is_fw_differentiable */ false)"
+        )
+    elif len(differentiable_output_vars) == 1:
+        # Single differentiable output (Tensor or Tensor[])
+        return_info = differentiable_outputs[0]
+        # We only support simple Tensor or a TensorList for functions that return views
+        if not is_tensor_type(return_info.type) and not is_tensor_list_type(
+            return_info.type
+        ):
+            raise RuntimeError(
+                f"{base_name} that return differentiable views can only return Tensor or Tensor[]"
+            )
+
+        # See Note [ View + Inplace detection]
+        def get_creation_meta_in_mode(original: str) -> str:
+            creation_meta_with_grad_mode = f"(at::GradMode::is_enabled() ? {original} : CreationMeta::NO_GRAD_MODE)"
+            return f"InferenceMode::is_enabled() ? CreationMeta::INFERENCE_MODE : {creation_meta_with_grad_mode}"
+
+        # Only allow rebasing of the history if we return a single Tensor
+        # If we are in a no grad block, raise a warning
+        # See NOTE [ View + Inplace detection ] for more details about this logic
+        if is_tensor_list_type(return_info.type):
+            creation_meta = get_creation_meta_in_mode("CreationMeta::MULTI_OUTPUT_NODE")
+            view_idx = "view_idx"
+            view_func = emit_view_func(
+                f, extract_bindings(f), view_idx=view_idx
+            ).strip()
+            as_view_call = (
+                f"as_view(/* base */ {view_info}, /* output */ {var}[{view_idx}], "
+                "/* is_bw_differentiable */ true, /* is_fw_differentiable */ true, "
+                "/* view_func */ std::move(func), /* rev_view_func */ rev_func, "
+                f"/* creation_meta */ {creation_meta});"
+            )
+            call += MULTI_OUTPUT_VIEW_ITERATION.substitute(
+                var=var, view_idx=view_idx, body=f"{view_func}\n{as_view_call}"
+            )
+            rhs_value = f"std::move({var})"
+        else:
+            call += emit_view_func(f, extract_bindings(f), view_idx=None)
+            creation_meta = get_creation_meta_in_mode("CreationMeta::DEFAULT")
+            rhs_value = (
+                f"as_view(/* base */ {view_info}, /* output */ {var}, /* is_bw_differentiable */ true, "
+                "/* is_fw_differentiable */ true, "
+                f"/* view_func */ std::move(func), /* rev_view_func */ rev_func, /* creation_meta */ {creation_meta})"
+            )
+    else:
+        # This could be supported but we don't need it at the moment, so keeping things simple.
+        raise RuntimeError(
+            "Function that return multiple differentiable output "
+            "when at least one of them is view is not supported."
+        )
+    return call, rhs_value
+
+
+def modifies_arguments(f: NativeFunction) -> bool:
+    return f.func.kind() in [SchemaKind.inplace, SchemaKind.out]
+
+
+@with_native_function_with_differentiability_info
+def emit_inplace_or_view_body(fn: NativeFunctionWithDifferentiabilityInfo) -> List[str]:
+    f = fn.func
+    inplace_view_body: List[str] = []
+
+    dispatcher_sig = DispatcherSignature.from_schema(f.func)
+    dispatcher_exprs = dispatcher_sig.exprs()
+
+    # code-generated ADInplaceOrView kernels plumb and recompute dispatch keys directly through the kernel for performance.
+    # See Note [Plumbing Keys Through The Dispatcher] for details.
+    dispatch_key_set = "ks & c10::after_ADInplaceOrView_keyset"
+    redispatch_args = ", ".join([dispatch_key_set] + [a.expr for a in dispatcher_exprs])
+
+    # Note that this calls the slow, dispatching variants of manual_cpp_binding ops.
+    # We could probably work harder to ensure that the fast variants are called instead, but the perf benefit would be minimal.
+    if modifies_arguments(f):  # inplace op
+        inplace_view_body.append(
+            INPLACE_REDISPATCH.substitute(
+                unambiguous_name=f.func.name.unambiguous_name(),
+                unpacked_args=redispatch_args,
+            )
+        )
+        for r in cpp.return_names(f):
+            inplace_view_body.append(f"increment_version({r});")
+    else:
+        assert get_view_info(f) is not None
+        inplace_view_body.append(
+            VIEW_REDISPATCH.substitute(
+                assign_return_values="auto " + TMP_VAR + " = ",
+                unambiguous_name=f.func.name.unambiguous_name(),
+                unpacked_args=redispatch_args,
+            )
+        )
+        call, rhs_value = emit_view_body(fn, TMP_VAR)
+        inplace_view_body.append(call)
+        assert rhs_value is not None
+        inplace_view_body.append(
+            ASSIGN_RETURN_VALUE.substitute(
+                return_values=tie_return_values(f), rhs_value=rhs_value
+            )
+        )
+    if f.func.returns:
+        inplace_view_body.append(f"return {get_return_value(f)};")
+    return inplace_view_body
+
+
+@with_native_function
+def gen_formals(f: NativeFunction) -> str:
+    return ", ".join(
+        # code-generated autograd kernels plumb and recompute dispatch keys directly through the kernel for performance.
+        # See Note [Plumbing Keys Through The Dispatcher] for details.
+        ["c10::DispatchKeySet ks"]
+        + [
+            f'{cpp.argument_type(a, binds="__placeholder__", symint=True).cpp_type()} {a.name}'
+            for a in f.func.schema_order_arguments()
+        ]
+    )
+
+
+@with_native_function_with_differentiability_info
+def inplace_or_view_method_definition(
+    fn: NativeFunctionWithDifferentiabilityInfo,
+) -> Optional[str]:
+    f = fn.func
+    if get_view_info(f) is None and (
+        # For functions that modify their inputs but don't return them,
+        # we can't give them autograd support.
+        # See https://github.com/pytorch/pytorch/issues/53796
+        not modifies_arguments(f)
+        or len(f.func.returns) == 0
+    ):
+        return None
+    return METHOD_DEFINITION.substitute(
+        return_type=cpp.returns_type(f.func.returns, symint=True).cpp_type(),
+        type_wrapper_name=type_wrapper_name(f),
+        formals=gen_formals(f),
+        type_definition_body=emit_inplace_or_view_body(fn),
+    )
+
+
+@with_native_function_with_differentiability_info
+def inplace_or_view_method_registration(
+    fn: NativeFunctionWithDifferentiabilityInfo,
+) -> Optional[str]:
+    f = fn.func
+    if get_view_info(f) is None and (
+        not modifies_arguments(f) or len(f.func.returns) == 0
+    ):
+        return None
+    return WRAPPER_REGISTRATION.substitute(
+        unqual_operator_name_with_overload=f.func.name,
+        type_wrapper_name=type_wrapper_name(f),
+        class_type="ADInplaceOrView",
+    )
+
+
+def use_derived(fn: NativeFunctionWithDifferentiabilityInfo) -> bool:
+    f = fn.func
+    name = cpp.name(f.func)
+    return name not in MANUAL_AUTOGRAD and dispatch_strategy(fn) == "use_derived"
+
+
+def gen_inplace_or_view_type_env(
+    fn: NativeFunctionWithDifferentiabilityInfo,
+) -> Dict[str, List[str]]:
+    definition = inplace_or_view_method_definition(fn)
+    registration = inplace_or_view_method_registration(fn)
+
+    return {
+        "ops_headers": (
+            [f"#include <ATen/ops/{fn.func.root_name}_ops.h>"]
+            if definition is not None
+            else []
+        ),
+        "inplace_or_view_method_definitions": [definition]
+        if definition is not None
+        else [],
+        "inplace_or_view_wrapper_registrations": [registration]
+        if registration is not None
+        else [],
+    }
+
+
+def gen_inplace_or_view_type(
+    out: str,
+    native_yaml_path: str,
+    tags_yaml_path: str,
+    fns_with_infos: List[NativeFunctionWithDifferentiabilityInfo],
+    template_path: str,
+) -> None:
+    # NOTE: see Note [Sharded File] at the top of the VariableType.cpp
+    # template regarding sharding of the generated files.
+    num_shards = 2
+
+    fm = FileManager(install_dir=out, template_dir=template_path, dry_run=False)
+    fm.write_sharded(
+        "ADInplaceOrViewType.cpp",
+        [fn for fn in fns_with_infos if use_derived(fn)],
+        key_fn=lambda fn: fn.func.root_name,
+        base_env={
+            "generated_comment": "@"
+            + f"generated from {fm.template_dir_for_comments()}/ADInplaceOrViewType.cpp",
+        },
+        env_callable=gen_inplace_or_view_type_env,
+        num_shards=2,
+        sharded_keys={
+            "ops_headers",
+            "inplace_or_view_method_definitions",
+            "inplace_or_view_wrapper_registrations",
+        },
+    )
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/gen_python_functions.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/gen_python_functions.py
new file mode 100644
index 0000000000000000000000000000000000000000..9a689af790214b329fee5dd4f6c2e00c7b38ed3a
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/gen_python_functions.py
@@ -0,0 +1,1396 @@
+# Generates Python bindings for ATen functions
+#
+# The bindings are generated as methods on python_variable or functions on the
+# torch._C._nn. torch._C._fft, torch._C._linalg, torch._C._nested, torch._C._sparse
+# or torch._C._special objects.
+#
+
+# Code tries to stick to the following rules:
+#
+# - templates should be colocated with the functions that use them.
+#   no templates are currently shared between functions, but if that
+#   happens, maybe put the template with the first one
+#
+# - don't use environment dictionaries when calling template.substitute().
+#   pass named arguments directly for everything, otherwise it's much too
+#   hard to track what's actually being used and by who
+#
+# - colocate any new hacks/adjustments with existing ones of the same kind.
+#   ideally in a data structure rather than code if possible. See e.g.
+#   SCHEMA_DEFAULT_CONVERSION_HACKS, etc.
+#
+# - similarly, conversions from one format to another should ideally happen
+#   all at once in a single place.
+#
+# - no nontrivial nested functions. couple-liners are ok but please no more.
+#   especially avoid functions that read/write outer variables defined far away.
+#
+# - raise RuntimeError instead of asserting, and put as much
+#   information as is available into the message. I.e. no need to
+#   plumb in new params whose only purpose is to fill out an error
+#   message, but use what's there
+#
+
+import itertools
+import re
+from collections import defaultdict
+
+from typing import Callable, Dict, Iterable, List, Optional, Sequence, Set, Tuple
+
+import yaml
+from torchgen.api import cpp
+from torchgen.api.python import (
+    arg_parser_output_exprs,
+    cpp_dispatch_exprs,
+    cpp_dispatch_target,
+    dispatch_lambda_args,
+    dispatch_lambda_exprs,
+    dispatch_lambda_return_str,
+    has_tensor_options,
+    PythonSignature,
+    PythonSignatureDeprecated,
+    PythonSignatureGroup,
+    PythonSignatureNativeFunctionPair,
+    signature,
+    signature_from_schema,
+    structseq_fieldnames,
+)
+
+from torchgen.code_template import CodeTemplate
+from torchgen.context import with_native_function
+from torchgen.gen import cpp_string, parse_native_yaml, parse_tags_yaml
+from torchgen.model import (
+    Argument,
+    BaseOperatorName,
+    FunctionSchema,
+    NativeFunction,
+    SchemaKind,
+    Type,
+    Variant,
+)
+from torchgen.utils import FileManager, split_name_params
+from torchgen.yaml_utils import YamlLoader
+
+from .gen_inplace_or_view_type import is_tensor_list_type
+from .gen_trace_type import should_trace
+
+#
+# declarations blocklist
+# We skip codegen for these functions, for various reasons.
+# Future PRs will categorize this list and eliminate or hoist
+# them out of eager-only codegen.
+# See https://github.com/pytorch/pytorch/issues/30788
+#
+
+# These functions require manual Python bindings or are not exposed to Python
+_SKIP_PYTHON_BINDINGS = [
+    "alias",
+    "contiguous",
+    "is_cuda",
+    "is_sparse",
+    "is_sparse_csr",
+    "size",
+    "stride",
+    "sym_size",
+    "sym_stride",
+    "sym_storage_offset",
+    "sym_numel",
+    ".*_backward",
+    ".*_backward_(out|input|weight|bias)",
+    ".*_forward",
+    ".*_forward_out",
+    ".*_jvp",
+    "_unsafe_view",
+    "tensor",
+    "_?sparse_(coo|compressed|csr|csc|bsr|bsc)_tensor.*",
+    "_range.*",
+    "_sparse_add_out",
+    "_sparse_div.*",
+    "_sparse_mul.*",
+    "_sparse_sub.*",
+    "_sparse_dense_add_out",
+    "index",
+    "index_out",
+    "unique_dim_consecutive",
+    "_cumsum.*",
+    "_cumprod.*",
+    "_sum.*",
+    "_prod.*",
+    "_th_.*",
+    "_thnn_.*",
+    "range.*",
+    "_solve.*",
+    "_inverse.*",
+    "_cholesky.*",
+    "_triangular_solve.*",
+    "_qr.*",
+    "_svd.*",
+    "slice",
+    "item",
+    "_local_scalar_dense",
+    "to",
+    "_to_copy",
+    "_to_copy_out",
+    "_reshape_copy",
+    "_reshape_copy_out",
+    "copy_sparse_to_sparse_",
+    "copy_",
+    "numpy_T",
+    "matrix_H",
+    "mT",
+    "mH",  # these need to be an attributes in Python, not functions
+    "nonzero(_(out|numpy))?",
+    "set_data",
+    ".*_overrideable",  # overrideable functions for backend extension
+    "data",
+    "is_leaf",
+    "output_nr",
+    "_version",
+    "requires_grad_",
+    "retains_grad",
+    "set_",
+    "_fw_primal",
+    "fake_quantize_per_tensor_affine_cachemask",
+    "fake_quantize_per_channel_affine_cachemask",
+    "_new_zeros_with_same_feature_meta",
+    "_has_same_storage_numel",  # used for forward AD internals
+    "_reshape_alias",
+    "replace_",  # only used by the functionalization pass, doesn't need to be exposed to python
+    "copy",  # only used by the functionalization pass
+    "fill.Tensor",  # only used by the functionalization pass
+    "fill.Scalar",  # only used by the functionalization pass
+    "lift.*",
+    "normal_functional",  # only used by the functionalization pas
+    "nbytes",
+    "itemsize",
+]
+
+SKIP_PYTHON_BINDINGS = [
+    re.compile(rf"^{pattern}$") for pattern in _SKIP_PYTHON_BINDINGS
+]
+
+# These function signatures are not exposed to Python. Note that this signature
+# list does not support regex.
+SKIP_PYTHON_BINDINGS_SIGNATURES = [
+    "add.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor",
+    "add_.Scalar(Tensor(a!) self, Scalar other, Scalar alpha=1) -> Tensor(a!)",
+    "sub.Scalar(Tensor self, Scalar other, Scalar alpha=1) -> Tensor",
+    "sub_.Scalar(Tensor(a!) self, Scalar other, Scalar alpha=1) -> Tensor(a!)",
+    "mul.Scalar(Tensor self, Scalar other) -> Tensor",
+    "mul_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)",
+    "div.Scalar(Tensor self, Scalar other) -> Tensor",
+    "div_.Scalar(Tensor(a!) self, Scalar other) -> Tensor(a!)",
+]
+
+
+@with_native_function
+def should_generate_py_binding(f: NativeFunction) -> bool:
+    # NativeFunctions that are entirely code-generated should not get python bindings
+    # because these codegen implementations are often inefficient. A handful of
+    # view_copy style ops were exposed accidentally when they were handwritten and now
+    # that we are moving them to codegen for bc reasons we need to keep them exposed in
+    # python.
+    if "generated" in f.tags and "view_copy" not in f.tags:
+        return False
+
+    name = cpp.name(f.func)
+    for skip_regex in SKIP_PYTHON_BINDINGS:
+        if skip_regex.match(name):
+            return False
+
+    signature = str(f.func)
+    for pattern in SKIP_PYTHON_BINDINGS_SIGNATURES:
+        if pattern == signature:
+            return False
+    return True
+
+
+def get_pycname(name: BaseOperatorName) -> str:
+    return f"THPVariable_{name}"
+
+
+def is_noarg(overloads: Sequence[PythonSignatureNativeFunctionPair]) -> bool:
+    return len(overloads) == 1 and overloads[0].signature.arguments_count() == 0
+
+
+def is_py_variable_method(f: NativeFunction) -> bool:
+    return f.python_module is None and Variant.method in f.variants
+
+
+def is_py_torch_function(f: NativeFunction) -> bool:
+    return f.python_module is None and Variant.function in f.variants
+
+
+def is_py_nn_function(f: NativeFunction) -> bool:
+    return f.python_module == "nn"
+
+
+def is_py_fft_function(f: NativeFunction) -> bool:
+    return f.python_module == "fft"
+
+
+def is_py_linalg_function(f: NativeFunction) -> bool:
+    return f.python_module == "linalg"
+
+
+def is_py_nested_function(f: NativeFunction) -> bool:
+    return f.python_module == "nested"
+
+
+def is_py_sparse_function(f: NativeFunction) -> bool:
+    return f.python_module == "sparse"
+
+
+def is_py_special_function(f: NativeFunction) -> bool:
+    return f.python_module == "special"
+
+
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
+#
+#                            Main Function
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
+
+
+def gen(
+    out: str,
+    native_yaml_path: str,
+    tags_yaml_path: str,
+    deprecated_yaml_path: str,
+    template_path: str,
+    *,
+    symint: bool = True,
+) -> None:
+    fm = FileManager(install_dir=out, template_dir=template_path, dry_run=False)
+    native_functions = parse_native_yaml(
+        native_yaml_path, tags_yaml_path
+    ).native_functions
+    native_functions = list(filter(should_generate_py_binding, native_functions))
+
+    methods = load_signatures(native_functions, deprecated_yaml_path, method=True)
+    create_python_bindings(
+        fm,
+        methods,
+        is_py_variable_method,
+        None,
+        "python_variable_methods.cpp",
+        method=True,
+        symint=symint,
+    )
+
+    # NOTE: num_shards here must be synced with gatherTorchFunctions in
+    #       torch/csrc/autograd/python_torch_functions_manual.cpp
+    functions = load_signatures(native_functions, deprecated_yaml_path, method=False)
+    create_python_bindings_sharded(
+        fm,
+        functions,
+        is_py_torch_function,
+        "torch",
+        "python_torch_functions.cpp",
+        method=False,
+        num_shards=3,
+        symint=symint,
+    )
+
+    create_python_bindings(
+        fm,
+        functions,
+        is_py_nn_function,
+        "torch.nn",
+        "python_nn_functions.cpp",
+        method=False,
+        symint=symint,
+    )
+
+    create_python_bindings(
+        fm,
+        functions,
+        is_py_fft_function,
+        "torch.fft",
+        "python_fft_functions.cpp",
+        method=False,
+        symint=symint,
+    )
+
+    create_python_bindings(
+        fm,
+        functions,
+        is_py_linalg_function,
+        "torch.linalg",
+        "python_linalg_functions.cpp",
+        method=False,
+        symint=symint,
+    )
+
+    create_python_bindings(
+        fm,
+        functions,
+        is_py_nested_function,
+        "torch.nested",
+        "python_nested_functions.cpp",
+        method=False,
+    )
+
+    create_python_bindings(
+        fm,
+        functions,
+        is_py_sparse_function,
+        "torch.sparse",
+        "python_sparse_functions.cpp",
+        method=False,
+        symint=symint,
+    )
+
+    create_python_bindings(
+        fm,
+        functions,
+        is_py_special_function,
+        "torch.special",
+        "python_special_functions.cpp",
+        method=False,
+        symint=symint,
+    )
+
+    # Currently, we only use `functions` to generate `return_types` bindings.
+    # All methods which return structseq have function variant at this point.
+    # If any method only operator with structseq is added in the future,
+    # we will have to address that.
+    create_python_return_type_bindings(
+        fm, functions, lambda fn: True, "python_return_types.cpp"
+    )
+    create_python_return_type_bindings_header(
+        fm, functions, lambda fn: True, "python_return_types.h"
+    )
+
+    valid_tags = parse_tags_yaml(tags_yaml_path)
+
+    def gen_tags_enum() -> Dict[str, str]:
+        return {
+            "enum_of_valid_tags": (
+                "".join(
+                    [f'\n.value("{tag}", at::Tag::{tag})' for tag in sorted(valid_tags)]
+                )
+            )
+        }
+
+    fm.write("python_enum_tag.cpp", gen_tags_enum)
+
+
+def group_filter_overloads(
+    pairs: Sequence[PythonSignatureNativeFunctionPair],
+    pred: Callable[[NativeFunction], bool],
+) -> Dict[BaseOperatorName, List[PythonSignatureNativeFunctionPair]]:
+    grouped: Dict[
+        BaseOperatorName, List[PythonSignatureNativeFunctionPair]
+    ] = defaultdict(list)
+    for pair in pairs:
+        if pred(pair.function):
+            grouped[pair.function.func.name.name].append(pair)
+    return grouped
+
+
+def create_python_bindings(
+    fm: FileManager,
+    pairs: Sequence[PythonSignatureNativeFunctionPair],
+    pred: Callable[[NativeFunction], bool],
+    module: Optional[str],
+    filename: str,
+    *,
+    method: bool,
+    symint: bool = True,
+) -> None:
+    """Generates Python bindings to ATen functions"""
+    py_methods: List[str] = []
+    ops_headers: List[str] = []
+    py_method_defs: List[str] = []
+    py_forwards: List[str] = []
+
+    grouped = group_filter_overloads(pairs, pred)
+
+    for name in sorted(grouped.keys(), key=str):
+        overloads = grouped[name]
+        py_methods.append(
+            method_impl(name, module, overloads, method=method, symint=symint)
+        )
+        py_method_defs.append(method_def(name, module, overloads, method=method))
+        py_forwards.extend(forward_decls(name, overloads, method=method))
+        ops_headers.append(f"#include <ATen/ops/{name.base}.h>")
+
+    fm.write_with_template(
+        filename,
+        filename,
+        lambda: {
+            "generated_comment": "@"
+            + f"generated from {fm.template_dir_for_comments()}/{filename}",
+            "ops_headers": ops_headers,
+            "py_forwards": py_forwards,
+            "py_methods": py_methods,
+            "py_method_defs": py_method_defs,
+        },
+    )
+
+
+def create_python_return_type_bindings(
+    fm: FileManager,
+    pairs: Sequence[PythonSignatureNativeFunctionPair],
+    pred: Callable[[NativeFunction], bool],
+    filename: str,
+) -> None:
+    """
+    Generate function to initialize and return named tuple for native functions
+    which returns named tuple and registration invocations in `python_return_types.cpp`.
+    """
+    py_return_types_definition: List[str] = []
+    py_return_types_registrations: List[str] = []
+
+    grouped = group_filter_overloads(pairs, pred)
+
+    for name in sorted(grouped.keys(), key=str):
+        overloads = grouped[name]
+        definitions, registrations = generate_return_type_definition_and_registrations(
+            overloads
+        )
+        py_return_types_definition.append(
+            "" if not definitions else "\n".join(definitions)
+        )
+        py_return_types_registrations.append(
+            "" if not registrations else "\n".join(registrations)
+        )
+
+    fm.write_with_template(
+        filename,
+        filename,
+        lambda: {
+            "generated_comment": "@"
+            + f"generated from {fm.template_dir_for_comments()}/{filename}",
+            "py_return_types": py_return_types_definition,
+            "py_return_types_registrations": py_return_types_registrations,
+        },
+    )
+
+
+def create_python_return_type_bindings_header(
+    fm: FileManager,
+    pairs: Sequence[PythonSignatureNativeFunctionPair],
+    pred: Callable[[NativeFunction], bool],
+    filename: str,
+) -> None:
+    """
+    Generate function to initialize and return named tuple for native functions
+    which returns named tuple and relevant entry for the map in `python_return_types.cpp`.
+    """
+    py_return_types_declarations: List[str] = []
+
+    grouped = group_filter_overloads(pairs, pred)
+
+    for name in sorted(grouped.keys(), key=str):
+        overloads = grouped[name]
+        declarations = generate_return_type_declarations(overloads)
+        py_return_types_declarations.append(
+            "" if not declarations else "\n".join(declarations)
+        )
+
+    fm.write_with_template(
+        filename,
+        filename,
+        lambda: {
+            "generated_comment": "@"
+            + f"generated from {fm.template_dir_for_comments()}/{filename}",
+            "py_return_types_declarations": py_return_types_declarations,
+        },
+    )
+
+
+def create_python_bindings_sharded(
+    fm: FileManager,
+    pairs: Sequence[PythonSignatureNativeFunctionPair],
+    pred: Callable[[NativeFunction], bool],
+    module: Optional[str],
+    filename: str,
+    *,
+    method: bool,
+    num_shards: int,
+    symint: bool = True,
+) -> None:
+    """Generates Python bindings to ATen functions"""
+    grouped = group_filter_overloads(pairs, pred)
+
+    def key_func(
+        kv: Tuple[BaseOperatorName, List[PythonSignatureNativeFunctionPair]]
+    ) -> str:
+        return kv[0].base
+
+    def env_func(
+        kv: Tuple[BaseOperatorName, List[PythonSignatureNativeFunctionPair]]
+    ) -> Dict[str, List[str]]:
+        name, fn_pairs = kv
+        return {
+            "ops_headers": [f"#include <ATen/ops/{name.base}.h>"],
+            "py_forwards": list(forward_decls(name, fn_pairs, method=method)),
+            "py_methods": [
+                method_impl(name, module, fn_pairs, method=method, symint=symint)
+            ],
+            "py_method_defs": [method_def(name, module, fn_pairs, method=method)],
+        }
+
+    fm.write_sharded(
+        filename,
+        grouped.items(),
+        base_env={
+            "generated_comment": "@"
+            + f"generated from {fm.template_dir_for_comments()}/{filename}",
+        },
+        key_fn=key_func,
+        env_callable=env_func,
+        num_shards=num_shards,
+        sharded_keys={"ops_headers", "py_forwards", "py_methods", "py_method_defs"},
+    )
+
+
+def load_signatures(
+    native_functions: List[NativeFunction],
+    deprecated_yaml_path: str,
+    *,
+    method: bool,
+    skip_deprecated: bool = False,
+    pyi: bool = False,
+) -> Sequence[PythonSignatureNativeFunctionPair]:
+    @with_native_function
+    def gen_signature_pairs(f: NativeFunction) -> PythonSignatureNativeFunctionPair:
+        return PythonSignatureNativeFunctionPair(
+            signature=signature(f, method=method, pyi=pyi),
+            function=f,
+        )
+
+    pairs = list(map(gen_signature_pairs, native_functions))
+    deprecated = load_deprecated_signatures(
+        pairs, deprecated_yaml_path, method=method, pyi=pyi
+    )
+    return pairs if skip_deprecated else pairs + deprecated
+
+
+def load_deprecated_signatures(
+    pairs: Sequence[PythonSignatureNativeFunctionPair],
+    deprecated_yaml_path: str,
+    *,
+    method: bool,
+    pyi: bool,
+) -> List[PythonSignatureNativeFunctionPair]:
+    # The deprecated.yaml doesn't have complete type information, we need
+    # find and leverage the original ATen signature (to which it delegates
+    # the call) to generate the full python signature.
+    # We join the deprecated and the original signatures using type-only form.
+
+    # group the original ATen signatures by name
+    grouped: Dict[str, List[PythonSignatureNativeFunctionPair]] = defaultdict(list)
+    for pair in pairs:
+        grouped[pair.signature.name].append(pair)
+
+    # find matching original signatures for each deprecated signature
+    results: List[PythonSignatureNativeFunctionPair] = []
+
+    with open(deprecated_yaml_path) as f:
+        deprecated_defs = yaml.load(f, Loader=YamlLoader)
+
+    for deprecated in deprecated_defs:
+        schema = FunctionSchema.parse(deprecated["name"])
+        aten_name, call_args = split_name_params(deprecated["aten"])
+        is_out = aten_name.endswith("_out")
+        if is_out:
+            aten_name = aten_name.replace("_out", "")
+
+        # HACK: these are fixed constants used to pass the aten function.
+        # The type must be known ahead of time
+        known_constants = {
+            "1": Type.parse("Scalar"),
+        }
+        schema_args_by_name = {a.name: a for a in schema.arguments.flat_all}
+        for name in call_args:
+            assert (
+                name in schema_args_by_name or name in known_constants
+            ), f"deprecation definiton: Unrecognized value {name}"
+
+        # Map deprecated signature arguments to their aten signature and test
+        # if the types and alias annotation match.
+        def is_schema_compatible(
+            aten_schema: FunctionSchema,
+        ) -> bool:
+            arguments: Iterable[Argument]
+            if is_out:
+                arguments = itertools.chain(
+                    aten_schema.arguments.out, aten_schema.arguments.flat_non_out
+                )
+            else:
+                arguments = aten_schema.arguments.flat_all
+
+            for i, arg in enumerate(arguments):
+                if i < len(call_args):
+                    arg_name = call_args[i]
+                    if arg_name in known_constants:
+                        schema_type = known_constants[arg_name]
+                        schema_annotation = None
+                    else:
+                        schema_arg = schema_args_by_name[arg_name]
+                        schema_type = schema_arg.type
+                        schema_annotation = schema_arg.annotation
+
+                    if schema_type != arg.type or schema_annotation != arg.annotation:
+                        return False
+                else:
+                    if arg.default is None:
+                        return False
+
+            return len(schema.returns) == len(aten_schema.returns) and all(
+                a == b for a, b in zip(schema.returns, aten_schema.returns)
+            )
+
+        any_schema_found = False
+        for pair in grouped[aten_name]:
+            if not is_schema_compatible(pair.function.func):
+                continue
+            any_schema_found = True
+
+            python_sig = signature_from_schema(
+                schema,
+                category_override=pair.function.category_override,
+                method=method,
+                pyi=pyi,
+            )
+
+            results.append(
+                PythonSignatureNativeFunctionPair(
+                    signature=PythonSignatureDeprecated(
+                        name=python_sig.name,
+                        input_args=python_sig.input_args,
+                        input_kwargs=python_sig.input_kwargs,
+                        output_args=python_sig.output_args,
+                        tensor_options_args=python_sig.tensor_options_args,
+                        method=python_sig.method,
+                        deprecated_schema=schema,
+                        deprecated_args_exprs=tuple(call_args),
+                        returns=python_sig.returns,
+                    ),
+                    function=pair.function,
+                )
+            )
+        assert (
+            any_schema_found
+        ), f"No native function with name {aten_name} matched signature:\n  {str(schema)}"
+
+    return results
+
+
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
+#
+#                         Named Tuple Codegen
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
+
+
+@with_native_function
+def gen_structseq_typename_key(f: NativeFunction) -> str:
+    name = cpp.name(f.func)
+    fieldnames = structseq_fieldnames(f.func.returns)
+    return "_".join([name] + fieldnames)
+
+
+def emit_structseq_call(
+    overloads: Sequence[PythonSignatureNativeFunctionPair],
+) -> Tuple[List[str], Dict[str, str]]:
+    """
+    Generate block of named tuple type def inits, and add typeref snippets
+    to declarations that use them
+    """
+    typenames: Dict[
+        str, str
+    ] = {}  # map from unique name + field name lists to typedef name
+    typedefs: List[str] = []  # typedef declarations and init code
+
+    for overload in overloads:
+        fieldnames = structseq_fieldnames(overload.function.func.returns)
+        if not fieldnames:
+            continue
+
+        name = cpp.name(overload.function.func)  # use @with_native_function?
+        tn_key = gen_structseq_typename_key(overload.function)
+        typename = typenames.get(tn_key)
+        if typename is None:
+            typename = f'NamedTuple{"" if not typedefs else len(typedefs)}'
+            typenames[tn_key] = typename
+            typedefs.append(
+                f"""\
+static PyTypeObject* {typename} = generated::get_{name}_structseq();"""
+            )
+
+    return typedefs, typenames
+
+
+def generate_return_type_definition_and_registrations(
+    overloads: Sequence[PythonSignatureNativeFunctionPair],
+) -> Tuple[List[str], List[str]]:
+    """
+    Generate block of function in `python_return_types.cpp` to initialize
+    and return named tuple for a native function which returns named tuple
+    and registration invocations in same file.
+    """
+    typenames: Dict[
+        str, str
+    ] = {}  # map from unique name + field name lists to typedef name
+    definitions: List[str] = []  # function definition to register the typedef
+    registrations: List[str] = []  # register call for the typedef
+
+    for overload in overloads:
+        fieldnames = structseq_fieldnames(overload.function.func.returns)
+        if not fieldnames:
+            continue
+
+        fields = ", ".join(f'{{"{fn}", ""}}' for fn in fieldnames)
+
+        name = cpp.name(overload.function.func)  # use @with_native_function?
+        tn_key = gen_structseq_typename_key(overload.function)
+        typename = typenames.get(tn_key)
+
+        if typename is None:
+            typename = f'{name}NamedTuple{"" if not definitions else len(definitions)}'
+            typenames[tn_key] = typename
+            definitions.append(
+                f"""\
+PyTypeObject* get_{name}_structseq() {{
+    static PyStructSequence_Field NamedTuple_fields[] = {{ {fields},  {{nullptr}} }};
+    static PyTypeObject {typename};
+    static bool is_initialized = false;
+    static PyStructSequence_Desc desc = {{ "torch.return_types.{name}", nullptr, NamedTuple_fields, {len(fieldnames)} }};
+    if (!is_initialized) {{
+        PyStructSequence_InitType(&{typename}, &desc);
+        {typename}.tp_repr = (reprfunc)torch::utils::returned_structseq_repr;
+        is_initialized = true;
+    }}
+    return &{typename};
+}}
+"""
+            )
+            registrations.append(
+                f'addReturnType(return_types_module, "{name}", generated::get_{name}_structseq());'
+            )
+
+    return definitions, registrations
+
+
+def generate_return_type_declarations(
+    overloads: Sequence[PythonSignatureNativeFunctionPair],
+) -> List[str]:
+    """
+    Generate block of function declarations in `python_return_types.h` to initialize
+    and return named tuple for a native function.
+    """
+    typenames: Dict[
+        str, str
+    ] = {}  # map from unique name + field name lists to typedef name
+    declarations: List[str] = []  # function declaration to register the typedef
+
+    for overload in overloads:
+        fieldnames = structseq_fieldnames(overload.function.func.returns)
+        if not fieldnames:
+            continue
+
+        name = cpp.name(overload.function.func)  # use @with_native_function?
+        tn_key = gen_structseq_typename_key(overload.function)
+        typename = typenames.get(tn_key)
+
+        if typename is None:
+            typename = (
+                f'{name}NamedTuple{"" if not declarations else len(declarations)}'
+            )
+            typenames[tn_key] = typename
+            declarations.append(f"PyTypeObject* get_{name}_structseq();")
+
+    return declarations
+
+
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
+#
+#                         Method Impl Codegen
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
+
+# python binding for all overloads of a particular function/method
+PY_VARIABLE_METHOD_VARARGS = CodeTemplate(
+    r"""\
+// ${name}
+static PyObject * ${pycname}(PyObject* self_, PyObject* args, PyObject* kwargs)
+{
+  ${method_header}
+  static PythonArgParser parser({
+    ${signatures}
+  }, /*traceable=*/${traceable});
+
+  ParsedArgs<${max_args}> parsed_args;
+  auto _r = parser.parse(${self_}, args, kwargs, parsed_args);
+  ${check_has_torch_function}
+  switch (_r.idx) {
+    ${dispatch}
+  }
+  ${method_footer}
+}
+
+"""
+)
+
+# handler for a single parsed signature - may be a single overload or
+# a pair of overloads that whose signatures only differ in output params
+# (plugged into PY_VARIABLE_METHOD_VARARGS as an item in ${dispatch})
+PY_VARIABLE_CASE = CodeTemplate(
+    """\
+case ${overload_index}: {
+  ${body}
+}
+"""
+)
+
+# python binding for single-overload function/method
+PY_VARIABLE_METHOD_VARARGS_SINGLETON = CodeTemplate(
+    """\
+// ${name}
+static PyObject * ${pycname}(PyObject* self_, PyObject* args, PyObject* kwargs)
+{
+  ${method_header}
+  static PythonArgParser parser({
+    ${signatures}
+  }, /*traceable=*/${traceable});
+
+  ParsedArgs<${max_args}> parsed_args;
+  auto _r = parser.parse(${self_}, args, kwargs, parsed_args);
+  ${check_has_torch_function}
+  ${dispatch}
+  ${method_footer}
+}
+
+"""
+)
+
+# python binding for a method with no args, shortcuts parsing
+PY_VARIABLE_METHOD_NOARGS = CodeTemplate(
+    """\
+// ${name}
+static PyObject * ${pycname}(PyObject* self_, PyObject* args)
+{
+  ${method_header}
+  ${check_has_torch_function}
+  ${dispatch}
+  ${method_footer}
+}
+
+"""
+)
+
+
+def method_impl(
+    name: BaseOperatorName,
+    module: Optional[str],
+    overloads: Sequence[PythonSignatureNativeFunctionPair],
+    *,
+    method: bool,
+    symint: bool = True,
+) -> str:
+    """
+    Generate a python binding for all overloads of an op.
+    """
+    pycname = get_pycname(name)
+    noarg = is_noarg(overloads)
+    structseq_inits, structseq_typenames = emit_structseq_call(overloads)
+
+    method_header = ["HANDLE_TH_ERRORS"]
+    method_header += structseq_inits
+    method_header += (
+        ["const Tensor& self = THPVariable_Unpack(self_);"] if method else []
+    )
+
+    method_footer = ([] if noarg else ["Py_RETURN_NONE;"]) + ["END_HANDLE_TH_ERRORS"]
+
+    traceable = "true" if all(should_trace(o.function) for o in overloads) else "false"
+
+    grouped_overloads: Sequence[PythonSignatureGroup] = group_overloads(
+        overloads, symint=symint
+    )
+    is_singleton = len(grouped_overloads) == 1
+    signatures: List[str] = []
+    dispatch: List[str] = []
+    for overload_index, overload in enumerate(grouped_overloads):
+        signature = overload.signature.signature_str(symint=symint)
+        signatures.append(f"{cpp_string(str(signature))},")
+        dispatch_body = emit_dispatch_case(overload, structseq_typenames, symint=symint)
+        dispatch.append(
+            PY_VARIABLE_CASE.substitute(
+                overload_index=overload_index, body=dispatch_body
+            )
+            if not is_singleton
+            else dispatch_body
+        )
+
+    if noarg:
+        template = PY_VARIABLE_METHOD_NOARGS
+    elif is_singleton:
+        template = PY_VARIABLE_METHOD_VARARGS_SINGLETON
+    else:
+        template = PY_VARIABLE_METHOD_VARARGS
+
+    return template.substitute(
+        name=name,
+        pycname=pycname,
+        method_header=method_header,
+        max_args=max(o.signature.arguments_count() for o in overloads),
+        signatures=signatures,
+        traceable=traceable,
+        check_has_torch_function=gen_has_torch_function_check(
+            name=name,
+            module=module,
+            noarg=noarg,
+            method=method,
+        ),
+        dispatch=dispatch,
+        method_footer=method_footer,
+        self_="self_" if method else "nullptr",
+    )
+
+
+def gen_has_torch_function_check(
+    name: BaseOperatorName, module: Optional[str], *, noarg: bool, method: bool
+) -> str:
+    if noarg:
+        if method:
+            return f"""\
+if(check_has_torch_function(self_)) {{
+  return handle_torch_function(self_, "{name}");
+}}
+"""
+        else:
+            return ""
+
+    self_ = "self_" if method else "nullptr"
+    namespace = (
+        {
+            "torch": "THPVariableFunctionsModule",
+            "torch.nn": "THPNNVariableFunctionsModule",
+            "torch.fft": "THPFFTVariableFunctionsModule",
+            "torch.linalg": "THPLinalgVariableFunctionsModule",
+            "torch.nested": "THPNestedVariableFunctionsModule",
+            "torch.sparse": "THPSparseVariableFunctionsModule",
+            "torch.special": "THPSpecialVariableFunctionsModule",
+        }[module]
+        if module
+        else "THPVariableClass"
+    )
+
+    return f"""\
+if(_r.has_torch_function()) {{
+  return handle_torch_function(_r, {self_}, args, kwargs, {namespace}, "{module or "torch.Tensor"}");
+}}
+"""
+
+
+# handler for output/no-output overload pair
+PY_VARIABLE_OUT = CodeTemplate(
+    """\
+if (_r.isNone(${out_idx})) {
+  ${call_dispatch}
+} else {
+  ${call_dispatch_out}
+}
+"""
+)
+
+
+def emit_dispatch_case(
+    overload: PythonSignatureGroup,
+    structseq_typenames: Dict[str, str],
+    *,
+    symint: bool = True,
+) -> str:
+    """
+    Emit dispatch code for a single parsed signature. This corresponds to either
+    a single native function, or a pair that differ only in output params. In the
+    latter case, a single python signature is used for both and dispatching
+    switches on the presence/absence of passed output args.
+    """
+    if overload.outplace is not None:
+        # dispatch output and no-output variants, branch on _r.isNone(<out_idx>)
+        return PY_VARIABLE_OUT.substitute(
+            out_idx=overload.signature.output_idx(),
+            call_dispatch=emit_single_dispatch(
+                overload.signature, overload.base, structseq_typenames, symint=symint
+            ),
+            call_dispatch_out=emit_single_dispatch(
+                overload.signature,
+                overload.outplace,
+                structseq_typenames,
+                symint=symint,
+            ),
+        )
+    else:
+        # no-output version only
+        return emit_single_dispatch(
+            overload.signature, overload.base, structseq_typenames, symint=symint
+        )
+
+
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
+#
+#                    Forward Declarations Codegen
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
+
+
+def forward_decls(
+    name: BaseOperatorName,
+    overloads: Sequence[PythonSignatureNativeFunctionPair],
+    *,
+    method: bool,
+) -> Tuple[str, ...]:
+    if method:
+        return ()
+
+    pycname = get_pycname(name)
+    if is_noarg(overloads):
+        return (
+            f"""\
+static PyObject * {pycname}(PyObject* self_, PyObject* args);
+""",
+        )
+    else:
+        return (
+            f"""\
+static PyObject * {pycname}(PyObject* self_, PyObject* args, PyObject* kwargs);
+""",
+        )
+
+
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
+#
+#              Method Def (Binding Table Entry) Codegen
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
+
+
+def method_def(
+    name: BaseOperatorName,
+    module: Optional[str],
+    overloads: Sequence[PythonSignatureNativeFunctionPair],
+    *,
+    method: bool,
+) -> str:
+    """
+    Generate method def entry.
+    """
+    pycname = get_pycname(name)
+
+    if name.dunder_method:
+        # PyMethodDef entry for binary op, throws not implemented error
+        pycname = f"TypeError_to_NotImplemented_<{pycname}>"
+
+    if is_noarg(overloads):
+        flags = "METH_NOARGS" if method else "METH_VARARGS | METH_KEYWORDS"
+    else:
+        pycname = f"castPyCFunctionWithKeywords({pycname})"
+        flags = "METH_VARARGS | METH_KEYWORDS"
+
+    if module == "torch":
+        flags += " | METH_STATIC"
+
+    return f'{{"{name}", {pycname}, {flags}, NULL}},'
+
+
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
+#
+#                   Overload Sorting and Grouping
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
+
+
+def group_overloads(
+    overloads: Sequence[PythonSignatureNativeFunctionPair], *, symint: bool = True
+) -> Sequence[PythonSignatureGroup]:
+    bases: Dict[str, PythonSignatureNativeFunctionPair] = {}
+    outplaces: Dict[str, PythonSignatureNativeFunctionPair] = {}
+
+    # first group by signature ignoring out arguments
+    for overload in overloads:
+        sig = overload.signature.signature_str(skip_outputs=True, symint=symint)
+        if overload.function.func.is_out_fn():
+            if sig in outplaces:
+                raise RuntimeError(
+                    f"Found duplicated function definition:\n- {overload.function.func}.\n"
+                    f"Existing definition:\n- {outplaces[sig].function.func}."
+                )
+            outplaces[sig] = overload
+        else:
+            if sig in bases:
+                raise RuntimeError(
+                    f"Found duplicated function definition:\n- {overload.function.func}.\n"
+                    f"Existing definition:\n- {bases[sig].function.func}."
+                )
+            bases[sig] = overload
+
+    for sig, out in outplaces.items():
+        if sig not in bases:
+            candidates: List[str] = []
+            for overload in overloads:
+                if (
+                    str(overload.function.func.name.name)
+                    == str(out.function.func.name.name)
+                    and not overload.function.func.is_out_fn()
+                    and not overload.signature.deprecated
+                ):
+                    candidates.append(
+                        overload.signature.signature_str(
+                            skip_outputs=True, symint=symint
+                        )
+                    )
+            out_sig = out.signature.signature_str(symint=symint)
+            raise RuntimeError(
+                f"While identifying overloads, we found an out schema {out_sig} without a corresponding non-out variant. "
+                f"We expected the non-out variant to have schema: \n- {sig}\nPlease check that you spelled the schema "
+                "correctly in native_functions.yaml. We discovered the following candidate(s): \n"
+                + "\n".join(f"- {candidate}" for candidate in candidates)
+            )
+
+    grouped = [
+        PythonSignatureGroup.from_pairs(
+            functional=base,
+            out=outplaces.get(sig),
+        )
+        for sig, base in bases.items()
+    ]
+    return sort_overloads(grouped, symint=symint)
+
+
+# This function declares a partial order on declarations, and sorts them according
+# to its linear extension. This is necessary, because there's some ambiguity in the
+# choice of overload, and we want a different order.
+#
+# See Note[Order of overloads matters]
+#
+# A few examples of ambiguous python signature pairs.
+#
+#   All parameters have the same type, except one taking Tensor the other taking
+#   Scalar. A numeric PyObject can be casted into Tensor, and a zero-dim Tensor
+#   object can be accepted as Scalar type parameter (see python_arg_parser.cpp).
+#   Therefore, same input arguments might be accepted by either python signature.
+#   We want to always parse the one taking Tensor first.
+#
+#     bitwise_and(Tensor input, Tensor other, *, Tensor out=None)
+#     bitwise_and(Tensor input, Scalar other, *, Tensor out=None)
+#
+#   If they have different number of parameters then they are not ambiguous - but
+#   the difference on output param can be ignored as it's optional.
+#
+#     multiply(Tensor input, Tensor other, *, Tensor out=None)
+#     multiply(Tensor input, Scalar other)
+#
+#   Both positional args and keyword-only args are considered together.
+#
+#     subtract(Tensor other, *, Scalar alpha=1)
+#     subtract(Scalar other, Scalar alpha=1)
+#
+# A few ambiguous cases which it does NOT handle yet.
+#
+#   If there is any difference in other parameters besides the Tensor/Scalar
+#   difference, then they are not considered ambiguous by this method anymore.
+#   However, the difference could be too trivial to disambiguate.
+#
+#     foo(Tensor input, Scalar other, Scalar bar)
+#     foo(Tensor input, Tensor other, double bar)
+#
+#   If they are taking different number of parameters then they are not considered
+#   ambiguous anymore, even if the difference is only on optional kwargs.
+#
+#     foo(Scalar other, Scalar alpha=1)
+#     foo(Tensor other, *, Scalar alpha=1, Scalar beta=1)
+#
+
+
+def sort_overloads(
+    grouped_overloads: Sequence[PythonSignatureGroup], *, symint: bool = True
+) -> Sequence[PythonSignatureGroup]:
+    # NB: Smaller here means lower priority
+
+    def is_arg_smaller(t1: Type, t2: Type) -> bool:
+        return (
+            str(t1) == "Scalar"
+            and str(t2) == "Tensor"
+            or str(t1) == "Scalar?"
+            and str(t2) == "Tensor?"
+            or "Dimname" in str(t1)
+            and "Dimname" not in str(t2)
+            or
+            # In the discussion https://github.com/pytorch/pytorch/issues/54555 it has been
+            # discussed why it is important to prioritize int/int? over int[]
+            str(t1) == "int[]"
+            and (str(t2) == "int" or str(t2) == "int?")
+            or
+            # TensorList currently throws an error during argument parsing, that's why it needs to be
+            # last in signature ordering. See discussion: https://github.com/pytorch/pytorch/issues/58087
+            str(t1) == "Tensor[]"
+            and str(t2).find("[]") != -1
+            or
+            # Prioritize IntArrayRef overload over SymIntArrayRef
+            str(t1) == "SymInt[]"
+            and str(t2) == "int[]"
+            or
+            # Make sure both in, SymInt are sorted consistently w.r.t. Tensor since Tensor can be implicitly
+            # converted to either int or SymInt.  Prioritize the Tensor overload since it otherwise gets shadowed.
+            (str(t1) == "SymInt" or str(t1) == "int")
+            and str(t2) == "Tensor"
+        )
+
+    def is_smaller(s1: PythonSignature, s2: PythonSignature) -> bool:
+        """Returns True if s1 < s2 in the partial order."""
+        args1, args2 = s1.arguments(skip_outputs=True), s2.arguments(skip_outputs=True)
+        if len(args1) != len(args2):
+            return False
+        # TODO: should use some canonical form instead of 'str(arg.type)' - see comments
+        # above. The old codegen used the deprecated 'dynamic_type(arg.type)', which
+        # ignores the optional annotation, i.e. 'Scalar' and 'Scalar?'.
+        equal = all(arg1.type == arg2.type for arg1, arg2 in zip(args1, args2))
+        smaller_or_equal = all(
+            str(arg1.type) == str(arg2.type) or is_arg_smaller(arg1.type, arg2.type)
+            for arg1, arg2 in zip(args1, args2)
+        )
+        return smaller_or_equal and not equal
+
+    # First sort by signature
+    grouped_overloads = sorted(
+        grouped_overloads, key=lambda x: x.signature.signature_str(symint=symint)
+    )
+
+    # Construct the relation graph
+    larger_than: Dict[int, Set[int]] = defaultdict(set)
+    for i1, overload1 in enumerate(grouped_overloads):
+        for i2, overload2 in enumerate(grouped_overloads):
+            if is_smaller(overload1.signature, overload2.signature):
+                larger_than[i1].add(i2)
+
+    if not larger_than:
+        return list(grouped_overloads)
+
+    # Use a topological sort to sort overloads according to the partial order.
+    N = len(grouped_overloads)
+    sorted_ids: List[int] = list(filter(lambda x: x not in larger_than, range(N)))
+
+    for idx in range(N):
+        # The size of sorted_ids will grow to N eventually.
+        i = sorted_ids[idx]
+        for j in sorted(larger_than.keys()):
+            larger = larger_than[j]
+            larger.discard(i)
+            if not larger:
+                del larger_than[j]
+                sorted_ids.append(j)
+
+    return [grouped_overloads[x] for x in sorted_ids]
+
+
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
+#
+#                       Codegen API Integration
+#
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
+
+
+def emit_single_dispatch(
+    ps: PythonSignature,
+    f: NativeFunction,
+    structseq_typenames: Dict[str, str],
+    *,
+    symint: bool = True,
+) -> str:
+    """
+    Emit dispatch code for a single native function.
+    """
+
+    @with_native_function
+    def go(f: NativeFunction) -> str:
+        # header comments
+        if isinstance(ps, PythonSignatureDeprecated):
+            schema_comment = f"// [deprecated] aten::{ps.deprecated_schema}"
+        else:
+            schema_comment = f"// aten::{f.func}"
+
+        deprecated = "[deprecated] " if ps.deprecated else ""
+
+        # dispatch lambda signature
+        name = cpp.name(f.func)
+        lambda_formals = ", ".join(
+            f"{a.type_str} {a.name}" for a in dispatch_lambda_args(ps, f, symint=symint)
+        )
+        lambda_return = dispatch_lambda_return_str(f)
+
+        # dispatch lambda body
+        dispatch_callee = cpp_dispatch_target(f)
+        dispatch_args = ", ".join(cpp_dispatch_exprs(f, python_signature=ps))
+
+        # from arg parser outputs to dispatch lambda arguments
+        parser_outputs = arg_parser_output_exprs(ps, f, symint=symint)
+        lambda_arg_exprs = dispatch_lambda_exprs(ps, f, symint=symint)
+        inits = "\n".join(lambda_arg_exprs.inits)
+        lambda_args = ", ".join(lambda_arg_exprs.exprs)
+
+        # scatter fields
+        # TODO: Checking `ps.method and ('requires_grad' in parser_outputs)` is a hacky
+        #       solution for enabling the 'requires_grad' argument for tensor methods
+        #       new_full, new_empty, and new_zeros. A much better but more difficult to
+        #       implement solution involves refactoring according to Ed's description here:
+        #       https://github.com/pytorch/pytorch/issues/36455#issuecomment-614767589
+        need_set_requires_grad = ps.tensor_options_args and (
+            not has_tensor_options(f)
+            or (ps.method and ("requires_grad" in parser_outputs))
+        )
+        set_requires_grad = (
+            f'.set_requires_grad({parser_outputs["requires_grad"].expr})'
+            if need_set_requires_grad
+            else ""
+        )
+
+        if lambda_return == "void":
+            # Make in-place foreach return `self` at python-binding level.
+            # ref: https://github.com/pytorch/pytorch/pull/118622#pullrequestreview-1904804954
+            self_arg = f.func.arguments.self_arg
+            return_stmt: str
+            if (
+                str(f.func.name).startswith("_foreach_")
+                and f.func.kind() == SchemaKind.inplace
+            ):
+                # note(crcrpar): `_foreach_pow.ScalarAndTensor` does NOT have its in-place
+                # variant and it unlikely to have it in the future. Thus it's safe to have the following assert.
+                assert self_arg is not None and is_tensor_list_type(
+                    self_arg.argument.type
+                )
+                return_stmt = """PyObject* self_tensorlist = _r.args[0];
+Py_INCREF(self_tensorlist);
+return self_tensorlist;
+"""
+            else:
+                return_stmt = "Py_RETURN_NONE;"
+            return f"""\
+{schema_comment}
+{inits}
+auto dispatch_{name} = []({lambda_formals}) -> {lambda_return} {{
+  pybind11::gil_scoped_release no_gil;
+  {dispatch_callee}({dispatch_args});
+}};
+dispatch_{name}({lambda_args}){set_requires_grad};
+{return_stmt}
+"""
+        else:
+            typename = structseq_typenames.get(gen_structseq_typename_key(f))
+            structseq_typeref = f"{typename}, " if typename is not None else ""
+            return f"""\
+{schema_comment}
+{inits}
+auto dispatch_{name} = []({lambda_formals}) -> {lambda_return} {{
+  pybind11::gil_scoped_release no_gil;
+  return {dispatch_callee}({dispatch_args});
+}};
+return wrap({structseq_typeref}dispatch_{name}({lambda_args}){set_requires_grad});
+"""
+
+    return go(f)
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/gen_trace_type.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/gen_trace_type.py
new file mode 100644
index 0000000000000000000000000000000000000000..9d9144bce04c2c38bb65e3d0baf3aae36e4f9568
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/gen_trace_type.py
@@ -0,0 +1,535 @@
+import itertools
+from typing import Dict, List, Sequence, Union
+
+from torchgen.api import cpp
+from torchgen.api.types import DispatcherSignature
+from torchgen.code_template import CodeTemplate
+from torchgen.context import with_native_function
+from torchgen.model import Argument, NativeFunction, SchemaKind, TensorOptionsArguments
+from torchgen.utils import FileManager
+
+# Note [Manual Backend kernels]
+# For these ops, we want to manually register to dispatch key Backend and
+# skip codegen-ed registeration to all keys before Backend.
+# For codegen this means:
+#   - op set below must match ops with manual_kernel_registration=True in native_functions.yaml
+#     where we skip codegen backend kernels
+#   - all ops below are part of MANUAL_AUTOGRAD to skip codegen Autograd kernel registration
+#   - all ops below are part of MANUAL_TRACER to skip codegen Tracer kernel registration
+# Note: we still register to dispatch key Profiler for these ops, keeping it untouched for now.
+# You can find the manual registration in torch/csrc/autograd/VariableTypeManual.cpp
+MANUAL_BACKEND = {
+    "options",
+    "data",
+    "set_data",
+    "is_leaf",
+    "output_nr",
+    "_version",
+    "retain_grad",
+    "_backward",
+    "requires_grad_",
+}
+
+# For these ops we want to skip the codegen-ed registration to both Autograd and Tracer keys.
+# You can find the manual registration in torch/csrc/autograd/VariableTypeManual.cpp
+MANUAL_AUTOGRAD_AND_TRACER = {
+    "resize_",
+    "resize_as_",
+    "detach",
+    "detach_",
+    "copy_",
+    "_fw_primal",
+    "_make_dual",
+}
+
+# Currently MANUAL_AUTOGRAD and MANUAL_TRACER share the same set of ops:
+#   union(MANUAL_BACKEND, MANUAL_AUTOGRAD_AND_TRACER)
+# You can find the manual registration in torch/csrc/autograd/VariableTypeManual.cpp
+MANUAL_AUTOGRAD = MANUAL_TRACER = MANUAL_BACKEND | MANUAL_AUTOGRAD_AND_TRACER
+
+# These functions we don't want to record for tracing, because we always want
+# to trace their constituent parts.  This is a temporary hack in lieue
+# of proper scopes, where subsequent compilation passes can ask for the unfolding
+# on demand.  Only concrete ATen methods can be disabled this way; it will have
+# NO EFFECT otherwise.
+DONT_RECORD_TRACE = {
+    "convolution",
+    "conv1d",
+    "conv2d",
+    "conv3d",
+    "conv_transpose1d",
+    "conv_transpose2d",
+    "conv_transpose3d",
+    "lstm_cell",
+    "gru_cell",
+    "rnn_tanh_cell",
+    "rnn_relu_cell",
+    # FIXME: figure out a better way when we support sparse tensors in jit
+    "_coalesced",
+}
+
+
+def should_trace(f: NativeFunction) -> bool:
+    # Operations involving Storage or Type are not traceable at the moment
+    if any(
+        str(arg.type) in {"Storage", "Type", "ConstQuantizerPtr"}
+        for arg in f.func.schema_order_arguments()
+    ):
+        return False
+    # We can't trace functions which don't have any Tensor or TensorList returns
+    if not any(r.type.is_tensor_like() for r in f.func.returns):
+        return False
+    return f.func.name.name.base not in DONT_RECORD_TRACE
+
+
+SELECT = CodeTemplate(
+    """\
+
+if (${cond}) {
+  ${true}
+} else {
+  ${false}
+}
+"""
+)
+
+OP_NAME = CodeTemplate(
+    """\
+op_name = c10::Symbol::fromQualString("aten::${trace_name}");
+"""
+)
+
+# These functions have their names recorded under trace renamed,
+RENAME_TRACE = {
+    "zero": "zeros_like",  # replacing aten::zero_ with aten::zeros_like
+    "fill": "full_like",  # replacing aten::fill_ with aten::full_like
+}
+
+
+def format_trace_op_name(f: NativeFunction) -> str:
+    # TODO: byte-for-byte compatible with old codegen behavior - should clean up
+    if (
+        f.func.kind() in (SchemaKind.functional, SchemaKind.out)
+        or f.func.name.name.dunder_method
+    ):
+        # special case for *_out functions: the in-place and out-of-place ops
+        # are overloaded with the same name in the JIT
+        trace_name = str(f.func.name.name)
+        trace_name = RENAME_TRACE.get(trace_name, trace_name)
+        return OP_NAME.substitute(trace_name=trace_name)
+
+    # otherwise, this is an in-place op and we need to emit both in- and
+    # out-of-place versions
+    outplace_trace_name = f.func.name.name.base
+    inplace_trace_name = cpp.name(f.func)
+    outplace_trace_name = RENAME_TRACE.get(outplace_trace_name, outplace_trace_name)
+    inplace_trace_name = RENAME_TRACE.get(inplace_trace_name, inplace_trace_name)
+
+    return SELECT.substitute(
+        cond="tracer_state->force_outplace",
+        true=OP_NAME.substitute(trace_name=outplace_trace_name),
+        false=OP_NAME.substitute(trace_name=inplace_trace_name),
+    )
+
+
+ADD_TRACE_INPUT = CodeTemplate("""jit::tracer::addInputs(node, "${name}", ${input});""")
+
+
+def format_trace_inputs(f: NativeFunction) -> str:
+    def dispatch_trace_input(
+        arg: Union[Argument, TensorOptionsArguments]
+    ) -> Sequence[str]:
+        if isinstance(arg, TensorOptionsArguments):
+            name = "options"
+            return [
+                ADD_TRACE_INPUT.substitute(
+                    name=name, input="c10::optTypeMetaToScalarType(options.dtype_opt())"
+                ),
+                ADD_TRACE_INPUT.substitute(name=name, input="options.layout()"),
+                ADD_TRACE_INPUT.substitute(name=name, input="options.device()"),
+                ADD_TRACE_INPUT.substitute(name=name, input="options.pinned_memory()"),
+            ]
+        else:
+            name = arg.name
+            if str(arg.type) == "Tensor?[]":
+                return [f'jit::tracer::addInputs(node, "{name}", {name});']
+            else:
+                return [ADD_TRACE_INPUT.substitute(name=name, input=name)]
+
+    args: List[Union[Argument, TensorOptionsArguments]] = list(
+        f.func.schema_order_arguments()
+    )
+
+    if f.func.is_out_fn():
+        # *_out functions take the result as a separate argument, but we don't want to
+        # trace that argument directly. Instead, we trace its TensorOptions.
+        # So first, we need to remove the out argument from the list of arguments to trace.
+        num_out_args = len(f.func.arguments.out)
+        args = args[:-num_out_args]
+
+    trace_inputs = itertools.chain.from_iterable(
+        dispatch_trace_input(arg) for arg in args
+    )
+
+    if f.func.is_out_fn():
+        # for *_out functions, handle the result argument differently for inplace/outplace.
+        # For inplace: just add the input to the end to confirm with the JIT schema
+        inplace = [
+            ADD_TRACE_INPUT.substitute(
+                name=f.func.arguments.out[i].name, input=f.func.arguments.out[i].name
+            )
+            for i in range(num_out_args)
+        ]
+
+        # for outplace: do nothing, except if the function is a factory.
+        # Factories are a bit special because their out-of-place overloads
+        # take an extra TensorOptions argument, which is missing in the _out function
+        has_tensor_return = any(r.type.is_tensor_like() for r in f.func.returns)
+        has_tensor_input_arg = any(
+            a.type.is_tensor_like() for a in f.func.arguments.flat_non_out
+        )
+        is_factory_method = f.category_override == "factory" or (
+            has_tensor_return and not has_tensor_input_arg
+        )
+
+        # HACK: preserve old codegen behavior - the old codegen set the `is_factory_method`
+        # flag for the whole family of ops with the same basename if any of them is a
+        # factory method. For most cases the whole family of ops are indeed all factory
+        # method - 'normal' is the only exception. So we handle it specially here to avoid
+        # cloning the old logic.
+        if f.func.name.name.base == "normal":
+            is_factory_method = True
+
+        if is_factory_method:
+            outplace = [
+                ADD_TRACE_INPUT.substitute(
+                    name="out",
+                    input="c10::optTypeMetaToScalarType(out.options().dtype_opt())",
+                ),
+                ADD_TRACE_INPUT.substitute(name="out", input="out.options().layout()"),
+                ADD_TRACE_INPUT.substitute(name="out", input="out.options().device()"),
+                ADD_TRACE_INPUT.substitute(
+                    name="out", input="out.options().pinned_memory()"
+                ),
+            ]
+        else:
+            outplace = []
+
+        trace_inputs = itertools.chain(
+            trace_inputs,
+            [
+                SELECT.substitute(
+                    cond="tracer_state->force_outplace",
+                    true="\n".join(outplace),
+                    false="\n".join(inplace),
+                )
+            ],
+        )
+
+    return "\n".join(trace_inputs)
+
+
+# `torch.jit.trace` have undocumented keyword argument `_force_outplace`,
+# which force jit to replace functions with outplace variants (for
+# example `aten::add_` becomes `aten::add`).
+#
+# This replacement implemented in-place with minimum modifications of
+# arguments stack (as it assumes that outplace call has the same arguments
+# as inplace version).
+#
+# However there are no such substitutions available for `aten::fill_`
+# and `aten::zero_` operators, as we never implemented `aten::fill`
+# and `aten::zero`. So jit tracing hack replacing `aten::zero_` with
+# `aten::zeros_like` and replacing `aten::fill_` with `aten::full_like`.
+#
+# But as they potentially can have different arguments, we also have
+# to hack into the stack and add missing ones.
+#
+# A possible alternative would be:
+#
+#  - Add `aten::fill` and `aten::zero`
+#
+#  - Or keep `aten::zeros_like` arguments aligned with `aten::zero_`
+# arguments (inside of the `native_functions.yaml`)
+RENAME_TRACE_ADD_ARGS = {
+    "fill": """\
+    jit::tracer::addInputs(node, "options", c10::optional<ScalarType>());
+    jit::tracer::addInputs(node, "options", layout_or_default(c10::nullopt));
+    jit::tracer::addInputs(node, "options", device_or_default(c10::nullopt));
+    jit::tracer::addInputs(node, "options", pinned_memory_or_default(c10::nullopt));
+    c10::optional<MemoryFormat> memory_format = c10::MemoryFormat::Preserve;
+    jit::tracer::addInputs(node, "memory_format", memory_format);
+""",
+    "zero": """\
+    jit::tracer::addInputs(node, "options", c10::optional<ScalarType>());
+    jit::tracer::addInputs(node, "options", layout_or_default(c10::nullopt));
+    jit::tracer::addInputs(node, "options", device_or_default(c10::nullopt));
+    jit::tracer::addInputs(node, "options", pinned_memory_or_default(c10::nullopt));
+    c10::optional<MemoryFormat> memory_format = c10::MemoryFormat::Preserve;
+    jit::tracer::addInputs(node, "memory_format", memory_format);
+""",
+}
+
+INPLACE_GUARD = CodeTemplate(
+    """\
+jit::tracer::ensureUniqueIfOutOfPlaced("${name}", ${mutable_input});
+"""
+)
+
+PRE_RECORD_TRACE = CodeTemplate(
+    """\
+torch::jit::Node* node = nullptr;
+std::shared_ptr<jit::tracer::TracingState> tracer_state;
+if (jit::tracer::isTracing()) {
+  tracer_state = jit::tracer::getTracingState();
+  at::Symbol op_name;
+  ${set_op_name}
+  node = tracer_state->createNode(op_name, /*num_outputs=*/0);
+  jit::tracer::recordSourceLocation(node);
+  ${add_trace_inputs}
+  tracer_state->insertNode(node);
+  ${inplace_guard}
+  jit::tracer::setTracingState(nullptr);
+}
+"""
+)
+
+
+def format_prerecord_trace(f: NativeFunction) -> str:
+    if not should_trace(f):
+        return ""
+
+    # TODO: clean up old codegen behavior
+    is_inplace = (
+        f.func.kind() in (SchemaKind.inplace, SchemaKind.out)
+        and not f.func.name.name.dunder_method
+    )
+    add_args = (
+        RENAME_TRACE_ADD_ARGS.get(f.func.name.name.base, "") if is_inplace else ""
+    )
+    additional_inputs = (
+        SELECT.substitute(
+            cond="tracer_state->force_outplace",
+            true=add_args,
+            false="",
+        )
+        if add_args
+        else ""
+    )
+
+    return PRE_RECORD_TRACE.substitute(
+        set_op_name=format_trace_op_name(f),
+        add_trace_inputs=format_trace_inputs(f) + additional_inputs,
+        inplace_guard=INPLACE_GUARD.substitute(
+            name=cpp.name(f.func),
+            mutable_input=f.func.arguments.out[0].name
+            if f.func.arguments.out
+            else "self",
+        )
+        if is_inplace
+        else "",
+    )
+
+
+POST_RECORD_TRACE = CodeTemplate(
+    """\
+if (tracer_state) {
+  jit::tracer::setTracingState(std::move(tracer_state));
+  ${add_trace_outputs}
+}
+"""
+)
+
+
+def format_postrecord_trace(f: NativeFunction) -> str:
+    if not should_trace(f):
+        return ""
+
+    # For outplacing ops, *_out overloads require special handling to move the
+    # output *argument* to a return value
+    if f.func.is_out_fn():
+        output_names_outplace = [arg.name for arg in f.func.arguments.out]
+        output_names_inplace = cpp.return_names(f)
+
+        # Code size optimization: the common case is that the return value is
+        # the same for both variants
+        if output_names_outplace == output_names_inplace:
+            outputs = [
+                f"jit::tracer::addOutput(node, {n});" for n in output_names_outplace
+            ]
+            return POST_RECORD_TRACE.substitute(add_trace_outputs=outputs)
+
+        selection = SELECT.substitute(
+            cond="force_outplace",
+            true="\n".join(
+                f"jit::tracer::addOutput(node, {n});" for n in output_names_outplace
+            ),
+            false="\n".join(
+                f"jit::tracer::addOutput(node, {n});" for n in output_names_inplace
+            ),
+        )
+        return POST_RECORD_TRACE.substitute(add_trace_outputs=selection)
+    else:
+        output_names = cpp.return_names(f)
+        outputs = [f"jit::tracer::addOutput(node, {n});" for n in output_names]
+        return POST_RECORD_TRACE.substitute(add_trace_outputs=outputs)
+
+
+def tie_return_values(f: NativeFunction) -> str:
+    if len(f.func.returns) == 1:
+        return f'auto {f.func.returns[0].name or "result"}'
+    names = cpp.return_names(f)
+    return f'auto [{", ".join(names)}]'
+
+
+def get_return_value(f: NativeFunction) -> str:
+    names = cpp.return_names(f)
+    if len(f.func.returns) == 1:
+        return names[0]
+    if f.func.kind() == SchemaKind.out:
+        return f'std::forward_as_tuple({", ".join(names)})'
+    else:
+        moved = ", ".join(f"std::move({name})" for name in names)
+        return f"std::make_tuple({moved})"
+
+
+TRACE_DISPATCH = CodeTemplate(
+    """\
+${assign_return_values}at::_ops::${unambiguous_name}::redispatch(${unpacked_args});"""
+)
+
+
+def emit_trace_body(f: NativeFunction) -> List[str]:
+    trace_body: List[str] = []
+
+    trace_body.append(format_prerecord_trace(f))
+
+    dispatcher_sig = DispatcherSignature.from_schema(f.func)
+    dispatcher_exprs = dispatcher_sig.exprs()
+
+    # code-generated tracing kernels plumb and recompute dispatch keys directly through the kernel for performance.
+    # See Note [Plumbing Keys Through The Dispatcher] for details.
+    dispatch_key_set = "ks & c10::DispatchKeySet(c10::DispatchKeySet::FULL_AFTER, c10::DispatchKey::Tracer)"
+    redispatch_args = ", ".join([dispatch_key_set] + [a.expr for a in dispatcher_exprs])
+
+    assign_return_values = (
+        f"{tie_return_values(f)} = "
+        if f.func.kind() in [SchemaKind.functional, SchemaKind.mutable]
+        and f.func.returns
+        else ""
+    )
+
+    # Note that this calls the slow, dispatching variants of manual_cpp_binding ops.
+    # We could probably work harder to ensure that the fast variants are
+    # called instead, but the perf benefit would be minimal.
+    trace_body.append(
+        TRACE_DISPATCH.substitute(
+            assign_return_values=assign_return_values,
+            unambiguous_name=f.func.name.unambiguous_name(),
+            unpacked_args=redispatch_args,
+        )
+    )
+
+    trace_body.append(format_postrecord_trace(f))
+    if f.func.returns:
+        trace_body.append(f"return {get_return_value(f)};")
+    return trace_body
+
+
+METHOD_DEFINITION = CodeTemplate(
+    """\
+${return_type} ${type_wrapper_name}(${formals}) {
+  ${type_definition_body}
+}
+"""
+)
+
+
+def type_wrapper_name(f: NativeFunction, key: str = "Default") -> str:
+    if f.func.name.overload_name:
+        name = f"{cpp.name(f.func)}_{f.func.name.overload_name}"
+    else:
+        name = cpp.name(f.func)
+
+    # The key argument is only used in gen_variable_type where we need fns per autograd dispatch key.
+    # In gen_trace_type and gen_inplace_view_type where only one fn per native_fn must be generated,
+    # the key argument should not be passed.
+    # We do not append key if it is Default so that generated functions from
+    # before per-dispatch-key derivatives were added retain the same names.
+    if key != "Default":
+        name = name + f"_{key}"
+    return name
+
+
+@with_native_function
+def method_definition(f: NativeFunction) -> str:
+    assert cpp.name(f.func) not in MANUAL_TRACER
+
+    formals = ", ".join(
+        # code-generated tracing kernels plumb and recompute dispatch keys directly through the kernel for performance.
+        # See Note [Plumbing Keys Through The Dispatcher] for details.
+        ["c10::DispatchKeySet ks"]
+        + [
+            f'{cpp.argument_type(a, binds="__placeholder__", symint=True).cpp_type()} {a.name}'
+            for a in f.func.schema_order_arguments()
+        ]
+    )
+
+    return METHOD_DEFINITION.substitute(
+        return_type=cpp.returns_type(f.func.returns, symint=True).cpp_type(),
+        type_wrapper_name=type_wrapper_name(f),
+        formals=formals,
+        type_definition_body=emit_trace_body(f),
+    )
+
+
+WRAPPER_REGISTRATION = CodeTemplate(
+    """\
+m.impl("${name}",
+       TORCH_FN(${class_type}::${type_wrapper_name})
+);
+"""
+)
+
+
+@with_native_function
+def method_registration(f: NativeFunction) -> str:
+    assert cpp.name(f.func) not in MANUAL_TRACER
+
+    return WRAPPER_REGISTRATION.substitute(
+        name=f.func.name,
+        type_wrapper_name=type_wrapper_name(f),
+        class_type="TraceType",
+    )
+
+
+def gen_trace_type_func(fn: NativeFunction) -> Dict[str, List[str]]:
+    return {
+        "ops_headers": [f"#include <ATen/ops/{fn.root_name}_ops.h>"],
+        "trace_method_definitions": [method_definition(fn)],
+        "trace_wrapper_registrations": [method_registration(fn)],
+    }
+
+
+def gen_trace_type(
+    out: str, native_functions: List[NativeFunction], template_path: str
+) -> None:
+    # NOTE: see Note [Sharded File] at the top of the VariableType.cpp
+    # template regarding sharding of the generated files.
+    fm = FileManager(install_dir=out, template_dir=template_path, dry_run=False)
+    fm.write_sharded(
+        "TraceType.cpp",
+        [fn for fn in native_functions if cpp.name(fn.func) not in MANUAL_TRACER],
+        key_fn=lambda fn: fn.root_name,
+        base_env={
+            "generated_comment": "@"
+            + f"generated from {fm.template_dir_for_comments()}/TraceType.cpp",
+        },
+        env_callable=gen_trace_type_func,
+        num_shards=5,
+        sharded_keys={
+            "ops_headers",
+            "trace_method_definitions",
+            "trace_wrapper_registrations",
+        },
+    )
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/load_derivatives.py b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/load_derivatives.py
new file mode 100644
index 0000000000000000000000000000000000000000..ad05cf187ef5e281e00d66e668d93df9078112b3
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/load_derivatives.py
@@ -0,0 +1,1013 @@
+# Parses derivatives.yaml into autograd functions
+#
+# Each autograd function is represented by `DifferentiabilityInfo` containing
+# a list of `Derivative`. See `torchgen.api.autograd` for the data models.
+import re
+from collections import defaultdict
+from typing import Any, Counter, Dict, List, Match, Optional, Sequence, Set, Tuple
+
+import yaml
+from torchgen.api import cpp
+
+from torchgen.api.autograd import (
+    Derivative,
+    DifferentiabilityInfo,
+    ForwardDerivative,
+    SavedAttribute,
+)
+from torchgen.api.types import (
+    BaseCType,
+    Binding,
+    boolT,
+    CppSignatureGroup,
+    layoutT,
+    longT,
+    NamedCType,
+    OptionalCType,
+    scalarTypeT,
+    SpecialArgName,
+    stringT,
+    symIntArrayRefT,
+    SymIntT,
+    tensorGeometryT,
+    tensorOptionsT,
+    typeAndSizeT,
+    VectorCType,
+)
+from torchgen.context import with_native_function
+from torchgen.gen import get_grouped_by_view_native_functions, parse_native_yaml
+from torchgen.model import (
+    AUTOGRAD_KEYS,
+    FunctionSchema,
+    NativeFunction,
+    NativeFunctionsViewGroup,
+    OperatorName,
+    SchemaKind,
+    Type,
+    Variant,
+)
+from torchgen.utils import concatMap, IDENT_REGEX, split_name_params
+from torchgen.yaml_utils import YamlLoader
+
+DerivativeRet = Tuple[Dict[FunctionSchema, Dict[str, DifferentiabilityInfo]], Set[str]]
+
+_GLOBAL_LOAD_DERIVATIVE_CACHE: Dict[Tuple[str, str], DerivativeRet] = {}
+
+_VALID_AUTOGRAD_KEYS = set(AUTOGRAD_KEYS)
+
+
+# This function directly adds per-dispatchkey derivative entries for {view}_copy variants of each view op.
+# Since every {view} and {view}_copy op shares the same derivative formula,
+# we generate them here instead of duplicating them in the yaml.
+# See Note [Codegen'd {view}_copy Operators]
+def add_view_copy_derivatives(
+    infos: Dict[FunctionSchema, Dict[str, DifferentiabilityInfo]],
+    view_groups: List[NativeFunctionsViewGroup],
+) -> None:
+    # Get the map from each view op's name to its corresponding view group
+    view_name_to_group: Dict[OperatorName, NativeFunctionsViewGroup] = {
+        g.view.func.name: g for g in view_groups
+    }
+
+    view_infos = {}
+
+    for info_dispatch_dict in infos.values():
+        # maybe_view_group only needs to be calculated once per info_dispatch_dict
+        maybe_view_group = None
+        view_copy_differentiability_infos = {}
+        for dispatch_key, info in info_dispatch_dict.items():
+            maybe_view_group = view_name_to_group.get(info.func.func.name, None)
+            if maybe_view_group is not None and maybe_view_group.view_copy is not None:
+                view_copy_info = info.create_view_copy_from_view_derivative(
+                    maybe_view_group
+                )
+                if view_copy_info is not None:
+                    fn_schema = view_copy_info.func.func
+                    view_copy_differentiability_infos[dispatch_key] = view_copy_info
+            else:
+                break
+        # prefer manually-defined derivatives if any
+        if len(view_copy_differentiability_infos) > 0 and fn_schema not in infos:
+            assert fn_schema is not None
+            view_infos[fn_schema] = view_copy_differentiability_infos
+
+    infos.update(view_infos)
+
+
+def load_derivatives(
+    derivatives_yaml_path: str, native_yaml_path: str, tags_yaml_path: str
+) -> DerivativeRet:
+    # Do some caching as this is a deterministic function
+    global _GLOBAL_LOAD_DERIVATIVE_CACHE
+    key = (derivatives_yaml_path, native_yaml_path)
+    if key not in _GLOBAL_LOAD_DERIVATIVE_CACHE:
+        with open(derivatives_yaml_path) as f:
+            definitions = yaml.load(f, Loader=YamlLoader)
+
+        funcs = parse_native_yaml(native_yaml_path, tags_yaml_path).native_functions
+        # From the parsed native functions, separate out the (generated) view_copy functions,
+        # so we can generate derivatives for them separately.
+        native_functions_with_view_groups = get_grouped_by_view_native_functions(funcs)
+        native_functions = concatMap(
+            lambda g: [g]
+            if isinstance(g, NativeFunction)
+            else list(g.functions(include_copy=True)),
+            native_functions_with_view_groups,
+        )
+        view_groups = [
+            g
+            for g in native_functions_with_view_groups
+            if isinstance(g, NativeFunctionsViewGroup)
+        ]
+
+        # What's the difference between function schema v.s. signature?
+        # function schema is the complete declaration including mutability annotation / default value and etc.
+        # signature is the canonical schema for a group of functions (in-place/out/functional variants)
+        # that are semantically related.
+        functions_by_signature: Dict[
+            FunctionSchema, List[NativeFunction]
+        ] = defaultdict(list)
+        functions_by_schema: Dict[str, NativeFunction] = {}
+        for function in native_functions:
+            functions_by_signature[function.func.signature()].append(function)
+            assert str(function.func) not in functions_by_schema
+            functions_by_schema[str(function.func)] = function
+
+        # Keep track of how many of which ops we've seen so we can
+        # disambiguate them with a numeric suffix.
+        op_counter = Counter[str]()
+
+        # infos is a dict that maps FunctionSchema -> a dict of per dispatch key DifferentiabilityInfos
+        # this is useful because in tools/autograd/gen_autograd.py:match_differentiability_info
+        # we ultimately need to categorize the DifferentiabilityInfos by FunctionSchema
+        infos: Dict[FunctionSchema, Dict[str, DifferentiabilityInfo]] = {}
+        used_dispatch_keys: Set[str] = set()
+        for defn_dict in definitions:
+            # Ensure that the old derivatives.yaml schema with no dispatch key can be loaded.
+            if "dispatch" not in defn_dict:
+                specification = defn_dict.pop("name")
+                output_differentiability = defn_dict.pop(
+                    "output_differentiability", None
+                )
+                defn_dict = {"name": specification, "dispatch": {"Default": defn_dict}}
+                if output_differentiability:
+                    defn_dict["output_differentiability"] = output_differentiability
+            name, per_dispatch_diffinfos = create_differentiability_info(
+                defn_dict,
+                functions_by_signature,
+                functions_by_schema,
+                op_counter,
+                used_dispatch_keys,
+            )
+            infos[name] = per_dispatch_diffinfos
+
+        add_view_copy_derivatives(infos, view_groups)
+
+        # cache both loaded infos as well a a set of all the dispatch_keys/aliases
+        # that appear in derivatives.yaml. used_dispatch_keys is useful for generating
+        # VariableType.cpp where we need a TORCH_LIBRARY_IMPL for every autograd dispatch key used
+        _GLOBAL_LOAD_DERIVATIVE_CACHE[key] = infos, used_dispatch_keys
+
+    return _GLOBAL_LOAD_DERIVATIVE_CACHE[key]
+
+
+# TODO: Why is this going through CppSignatureGroup, that doesn't make sense...
+@with_native_function
+def cpp_arguments(f: NativeFunction) -> Sequence[Binding]:
+    sigs = CppSignatureGroup.from_native_function(f, method=False)
+    if sigs.symint_signature is not None:
+        return sigs.symint_signature.arguments()
+    else:
+        return sigs.signature.arguments()
+
+
+def create_derivative(
+    f: NativeFunction,
+    formula: str,
+    var_names: Tuple[str, ...],
+    available_named_gradients: Sequence[str],
+) -> Derivative:
+    original_formula = formula
+    arguments: List[NamedCType] = [
+        a.nctype.remove_const_ref() for a in cpp_arguments(f)
+    ]
+
+    return_names = tuple(n if n != "self" else "result" for n in cpp.return_names(f))
+    return_types = tuple(
+        cpp.return_type(r, symint=True).remove_const_ref() for r in f.func.returns
+    )
+
+    named_returns = [
+        NamedCType(name, type) for name, type in zip(return_names, return_types)
+    ]
+
+    formula, saved_inputs = saved_variables(formula, arguments, var_names)
+    formula, saved_outputs = saved_variables(formula, named_returns, var_names)
+
+    used_named_gradients = {
+        name
+        for name in available_named_gradients
+        if re.search(IDENT_REGEX.format(name), formula)
+    }
+
+    # Check that the referenced derivatives in the formula are in bounds
+    for i in used_gradient_indices(formula):
+        if i >= len(f.func.returns):
+            raise RuntimeError(
+                f"Out of bounds grads access: derivative formula for {cpp.name(f.func)} "
+                f"used grads[{i}], but the forward only returns {len(f.func.returns)} outputs."
+            )
+
+    return Derivative(
+        formula=formula,
+        original_formula=original_formula,
+        var_names=var_names,
+        saved_inputs=saved_inputs,
+        saved_outputs=saved_outputs,
+        named_gradients=used_named_gradients,
+    )
+
+
+def create_forward_derivative(
+    f: NativeFunction, formula: str, names: Tuple[str, ...]
+) -> ForwardDerivative:
+    var_names = names
+    var_types: Optional[Tuple[Type, ...]] = None
+    for r in f.func.returns:
+        if r.name in var_names:
+            if var_types is None:
+                var_types = tuple()
+            var_types = var_types + (r.type,)
+
+    # Handle default return names
+    if var_types is None:
+        if var_names == ("result",):
+            assert len(f.func.returns) == 1
+            var_types = (f.func.returns[0].type,)
+        else:
+            for var_name in var_names:
+                res = re.findall(r"^result(\d+)$", var_name)
+                if len(res) == 1:
+                    if var_types is None:
+                        var_types = tuple()
+                    arg_idx = int(res[0])
+                    var_types = var_types + (f.func.returns[arg_idx].type,)
+
+    assert var_types is not None, "No matching output for forward derivative definition"
+    return ForwardDerivative(
+        formula=formula,
+        var_names=var_names,
+        var_types=var_types,
+        required_inputs_fw_grad=None,
+        required_inputs_primal=None,
+        required_original_self_value=False,
+        is_reusing_outplace_formula=False,
+    )
+
+
+def postprocess_forward_derivatives(
+    f: NativeFunction,
+    defn_name: str,
+    all_arg_names: List[str],
+    derivatives: List[Derivative],
+    forward_derivatives: List[ForwardDerivative],
+    args_with_derivatives: Sequence[Binding],
+) -> List[ForwardDerivative]:
+    def find_required_inputs(formula: str, postfix: str) -> Tuple[str, ...]:
+        is_foreach = f.func.name.name.base.startswith("_foreach_")
+        required_inputs = set()
+        for arg in args_with_derivatives:
+            if (
+                arg.type in ("at::TensorList", "const at::ITensorListRef &")
+                and not is_foreach
+            ):
+                # The functions taking TensorList handle everything internally
+                continue
+            arg_name = arg.name
+
+            found = re.search(IDENT_REGEX.format(arg_name), formula)
+            if found:
+                raise RuntimeError(
+                    f"The forward formula for {defn_name} is using the base name of the {arg_name} "
+                    f"argument which is ambiguous. You should use {arg_name}_p to access the primal "
+                    f"value and {arg_name}_t to access the tangent."
+                )
+
+            found = re.search(IDENT_REGEX.format(arg_name + postfix), formula)
+            if found:
+                required_inputs.add(arg_name)
+
+        return tuple(required_inputs)
+
+    updated_derivatives: List[ForwardDerivative] = []
+
+    for defn in forward_derivatives:
+        formula = defn.formula
+        required_inputs_tangent = find_required_inputs(formula, "_t")
+        if formula == "auto_element_wise":
+            assert (
+                f.func.kind() != SchemaKind.inplace
+            ), f"Cannot use auto_element_wise with {f.func.name} because it is an in-place variant"
+            if (
+                (not len(args_with_derivatives) == 1)
+                or len(forward_derivatives) > 1
+                or len(forward_derivatives[0].var_names) > 1
+            ):
+                raise RuntimeError(
+                    f"Derivative definition of {defn_name} in derivatives.yaml defines the "
+                    "forward definition of gradient as element_wise but this only "
+                    "works for functions with a single differentiable input and a "
+                    "single differentiable output."
+                )
+            if not len(derivatives) == 1:
+                raise RuntimeError(
+                    f"Derivative definition of {defn_name} in derivatives.yaml defines the "
+                    "forward definition of gradient as element_wise but it does not "
+                    "defines the gradient formula for its argument which is required."
+                )
+            # This transformation is based on the observation that for element-wise functions, the Jacobian
+            # matrix is diagonal and thus doing J * v is the same as (v^T J)^T (in practice, we ignore the transpositions)
+            # For the complex case, we use hermitian transpose and get (v.conj() J).conj()
+            # So here we are going to re-use the backward formula and replace two things:
+            # 1) all occurrences of "grad" with "foo_t.conj()", where foo is the name of the unique differentiable input.
+            # 2) all usage of an original input "foo" with its primal value "foo_p".
+            # 3) conjugate the final result
+            # For example, for abs, the backward formula is:
+            #   grad * self.sgn()
+            # And this function generates a forward formula that is:
+            #   (self_t.conj() * self_p.sgn()).conj()
+
+            backward_formula = derivatives[0].original_formula
+            input_name = args_with_derivatives[0].name
+
+            # Do replacement 1) of the grad
+            def repl(m: Any) -> str:
+                return f"{m.group(1)}{input_name}_t.conj(){m.group(2)}"
+
+            fw_formula = re.sub(IDENT_REGEX.format("grad"), repl, backward_formula)
+
+            # Do replacement 2) of the input variables
+            for arg in args_with_derivatives:
+                arg_name = arg.name
+
+                def repl(m: Any) -> str:
+                    return f"{m.group(1)}{arg_name}_p{m.group(2)}"
+
+                fw_formula = re.sub(IDENT_REGEX.format(arg_name), repl, fw_formula)
+
+            # Do the final conjugate 3)
+            fw_formula = f"({fw_formula}).conj()"
+
+            # Since there is a single differentiable inputs and we necessarily need its tangent we can
+            # simply require all differentiable input's tangent.
+            required_inputs_tangent = tuple(all_arg_names)
+            formula = fw_formula
+        elif formula == "auto_linear":
+            if (
+                len(forward_derivatives) > 1
+                or len(forward_derivatives[0].var_names) > 1
+            ):
+                raise RuntimeError(
+                    f"Derivative definition of {defn_name} in derivatives.yaml defines the "
+                    "forward definition of gradient as linear but this only works "
+                    "for functions with a single differentiable output."
+                )
+            # This transformation is based on the observation that linear functions can be written as:
+            #   y = f(x) = A * x
+            # For some matrix A and the Jacobian of the function f is also A.
+            # So doing J * v = A * v = f(v).
+            # Hence to do the jvp, we simply need to evaluate the function at the point v instead of x.
+            # We do this by calling the forward again by replacing any occurrence of the differentiable
+            # input "foo" by it's tangent "foo_t".
+            # Note that multiple inputs are not a problem as long as the function is truly linear wrt to
+            # the vector where all the differentiable inputs are stacked.
+
+            diff_arg_names = [arg.name for arg in args_with_derivatives]
+            assert len(diff_arg_names) > 0
+
+            # Do replacement of input variables
+            new_args = []
+            for arg_name in all_arg_names:
+                if arg_name in diff_arg_names:
+                    arg_name = arg_name + "_t"
+                new_args.append(arg_name)
+
+            # TODO we are trolling
+            if f.func.has_symint():
+                defn_name += "_symint"
+
+            # Call into the forward again. We need two cases here to handle both Tensor methods and at:: functions.
+            if Variant.function in f.variants:
+                fw_formula = f"at::{defn_name}({', '.join(new_args)})"
+            else:
+                assert Variant.method in f.variants
+                fw_formula = f"{new_args[0]}.{defn_name}({', '.join(new_args[1:])})"
+
+            # All of the input tangents are always used so all of them are required here.
+            required_inputs_tangent = tuple(diff_arg_names)
+            formula = fw_formula
+
+        # At this point, the formula is final and is not modified anymore.
+
+        # During forward formula, we use the primal instead of the input Tensors.
+        # This call inspects the formula to find for which input's primal are used.
+        required_inputs_primal = find_required_inputs(formula, "_p")
+
+        updated_derivatives.append(
+            ForwardDerivative(
+                formula=formula,
+                var_names=defn.var_names,
+                var_types=defn.var_types,
+                required_inputs_fw_grad=required_inputs_tangent,
+                required_inputs_primal=required_inputs_primal,
+                required_original_self_value=False,
+                is_reusing_outplace_formula=False,
+            )
+        )
+
+    return updated_derivatives
+
+
+def is_forward_derivative_definition(
+    all_arg_names: List[str], names: Tuple[str, ...]
+) -> bool:
+    for name in names:
+        if name not in all_arg_names:
+            return True
+        else:
+            return False
+    raise RuntimeError("Expected `names` to be non-empty")
+
+
+def create_differentiability_info(
+    defn_dict: Dict[Any, Any],
+    functions_by_signature: Dict[FunctionSchema, List[NativeFunction]],
+    functions_by_schema: Dict[str, NativeFunction],
+    op_counter: Counter[str],
+    used_dispatch_keys: Set[str],
+) -> Tuple[FunctionSchema, Dict[str, DifferentiabilityInfo]]:
+    """Processes a single entry `defn` in derivatives.yaml"""
+
+    def canonical_function(
+        functions: Sequence[NativeFunction], name: str
+    ) -> NativeFunction:
+        for f in functions:
+            if (
+                not f.func.is_functional_fn()
+                and not f.func.is_out_fn()
+                and name == str(f.func.name.name)
+            ):
+                return f
+        # some functions only have in-place variants
+        assert name + "_" == cpp.name(functions[0].func)
+        return functions[0]
+
+    def split_names(raw_names: str) -> Tuple[str, ...]:
+        """Given "foo, bar", return ["foo", "bar"]."""
+        return tuple(x.strip() for x in raw_names.split(","))
+
+    def check_grad_usage(defn_name: str, derivatives: Sequence[Derivative]) -> None:
+        """
+        Check for some subtle mistakes one might make when writing derivatives.
+        These mistakes will compile, but will be latent until a function is
+        used with double backwards.
+        """
+
+        uses_grad = False  # true if any derivative uses "grad"
+        num_grads_uses = 0  # count of uses of "grads" or "grads[INDEX]"
+        uses_named_grads = False  # true if any derivative uses "grad_{name}"
+        used_grads_indices: List[int] = []  # which indices of grads are used
+        for d in derivatives:
+            formula = d.formula
+            uses_grad = uses_grad or bool(
+                re.findall(IDENT_REGEX.format("grad"), formula)
+            )
+            num_grads_uses += len(re.findall(IDENT_REGEX.format("grads"), formula))
+            uses_named_grads = uses_named_grads or bool(d.named_gradients)
+            used_grads_indices.extend(used_gradient_indices(formula))
+        # This is a basic sanity check: the number of places we see
+        # "grads" should be no fewer than the number of indices we see
+        # inside "grads". They may not be equal because we may use
+        # "grads" without an index.
+        assert num_grads_uses >= len(used_grads_indices)
+        # Thus if the number is equal, every use of grads is also
+        # indexed.
+        only_used_grads_indices = num_grads_uses == len(used_grads_indices)
+
+        if uses_grad and num_grads_uses > 0:
+            raise RuntimeError(
+                f"Derivative definition of {defn_name} in derivatives.yaml illegally "
+                "mixes use of 'grad' and 'grads'. Consider replacing "
+                "occurrences of 'grad' with 'grads[0]'"
+            )
+
+        if only_used_grads_indices and set(used_grads_indices) == {0}:
+            raise RuntimeError(
+                f"Derivative definition of {defn_name} in derivatives.yaml solely "
+                "refers to 'grads[0]'.  If the first output is indeed the "
+                "only differentiable output, replace 'grads[0]' with 'grad'; "
+                "otherwise, there is a likely error in your derivatives "
+                "declaration."
+            )
+
+        if uses_named_grads and (uses_grad or num_grads_uses > 0):
+            raise RuntimeError(
+                f"Derivative definition of {defn_name} in derivatives.yaml illegally "
+                'mixes use of "grad_RETURN_NAME" and "grad" or "grads[x]". Use '
+                "only one method for identifying gradients."
+            )
+
+    @with_native_function
+    def set_up_derivatives(
+        f: NativeFunction,
+    ) -> Tuple[
+        Sequence[Derivative],
+        Sequence[ForwardDerivative],
+        Sequence[Binding],
+        Sequence[str],
+        Sequence[str],
+    ]:
+        # Set up the derivative information
+        derivatives: List[Derivative] = []
+        forward_derivatives: List[ForwardDerivative] = []
+        non_differentiable_arg_names: List[str] = []
+        args_with_derivatives_set: Set[str] = set()
+
+        all_arg_names = [a.name for a in cpp_arguments(f)]
+        all_ret_names = [
+            r.name for r in f.func.returns
+        ]  # only used for the assert below
+        # output_differentiability is captured from the enclosed
+        # scope. Don't modify it.
+        #
+        # If it is not present, then no output is explicitly
+        # undifferentiable.
+        #
+        # It may be present and shorter than the length of return
+        # values. If that's the case, any return value that does not
+        # have a corresponding entry is considered not differentiable.
+        differentiability = output_differentiability or [True] * len(f.func.returns)
+        # A return is available as a named gradient ...
+        available_named_gradients = [
+            f"grad_{ret.name}"
+            for ret, differentiable in zip(f.func.returns, differentiability)
+            # if it has not been explicitly made undifferentiable
+            if differentiable
+            # and if it has a name
+            and ret.name is not None
+            # and if its type is differentiable
+            and ret.type.is_tensor_like()
+        ]
+
+        for raw_names in sorted(defn.keys()):
+            formula = defn[raw_names]
+            names = split_names(raw_names)
+
+            for name in names:
+                assert not (name in all_arg_names and name in all_ret_names), (
+                    f"While processing the derivative formula for '{f.func.name}' wrt '{name}', "
+                    f"expected '{name}' to not be both an input arg and named return. "
+                )
+
+            if is_forward_derivative_definition(all_arg_names, names):
+                forward_derivatives.append(create_forward_derivative(f, formula, names))
+            else:
+                if formula.lower().strip() == "non_differentiable":
+                    non_differentiable_arg_names += names
+                else:
+                    derivative = create_derivative(
+                        f, formula, names, available_named_gradients
+                    )
+                    derivatives.append(derivative)
+                    args_with_derivatives_set |= set(names)
+
+        overlap = args_with_derivatives_set.intersection(non_differentiable_arg_names)
+        if overlap:
+            raise RuntimeError(
+                f"derivatives definition for {defn} have overlapped non_differentiable "
+                f"and differentiable variables: {overlap}"
+            )
+
+        # Next, let us determine the list of inputs in order.
+        # TODO: do we need eagerly calculate and save it here? Can it be derived
+        # from NativeFunction and `derivatives` on callsites instead?
+        args_with_derivatives = [
+            a for a in cpp_arguments(f) if a.name in args_with_derivatives_set
+        ]
+
+        # Postprocess forward derivatives definitions now that we know the differentiable arguments
+        forward_derivatives = postprocess_forward_derivatives(
+            f,
+            defn_name,
+            all_arg_names,
+            derivatives,
+            forward_derivatives,
+            args_with_derivatives,
+        )
+
+        # Test to see if the use of 'grads' makes sense.
+        check_grad_usage(defn_name, derivatives)
+
+        return (
+            derivatives,
+            forward_derivatives,
+            args_with_derivatives,
+            non_differentiable_arg_names,
+            available_named_gradients,
+        )
+
+    # NB: Removes 'name' from defn dictionary
+    specification = defn_dict.pop("name")
+    defn_name, _ = split_name_params(specification)
+    # NB: Removes 'output_differentiability' from defn dictionary
+    #     `None` means all differentiable.
+    output_differentiability = defn_dict.pop("output_differentiability", None)
+    output_differentiability_conditions = None
+    if output_differentiability and any(
+        isinstance(diff, str) for diff in output_differentiability
+    ):
+        if len(output_differentiability) != 1:
+            raise RuntimeError(
+                f"Not supported: for {specification},"
+                f"output_differentiability must either be "
+                f"List[bool] or a List[str] where each str is a "
+                f"condition. In the case where it is a condition, "
+                f"we only support single-output functions. "
+                f"Please file us an issue. "
+            )
+        output_differentiability_conditions = output_differentiability
+        output_differentiability = [True]
+
+    schema_function = functions_by_schema.get(specification)
+    if not schema_function:
+        avail = "\n".join(
+            k for k, v in functions_by_schema.items() if cpp.name(v.func) == defn_name
+        )
+        raise RuntimeError(
+            f"could not find ATen function for schema: {specification} "
+            f".  Available signatures:\n{avail}"
+        )
+
+    # now map this to the legacy schema; this isn't technically necessary, but we'd need some logic here
+    # to map in-place schemas to the out-of-place variants.
+    # TODO: maybe the logic to handle the legacy schema is no longer necessary?
+    signature = schema_function.func.signature()
+    functions = functions_by_signature[signature]
+    if len(functions) == 0:
+        avail = "\n".join(
+            str(k)
+            for k, v in functions_by_signature.items()
+            if cpp.name(k) == defn_name
+        )
+        raise RuntimeError(
+            f"could not find ATen function for legacy signature: {signature} "
+            f"corresponding to schema {specification}.  Please report a bug to PyTorch. "
+            f"Available signatures:\n{avail}"
+        )
+
+    canonical = canonical_function(functions, defn_name)
+    if "grad_input_mask" in (a.name for a in cpp_arguments(canonical)):
+        raise RuntimeError(
+            f"Schema for {defn_name} has an argument named grad_input_mask, "
+            "but this name would be shadowed by our codegen. "
+            "Please use a different name in native_functions.yaml."
+        )
+
+    if "result" in (a.name for a in cpp_arguments(canonical)):
+        raise RuntimeError(
+            f"Schema for {defn_name} has an argument named result, "
+            "but this is only allowed for outputs."
+            "Please use a different name in native_functions.yaml."
+        )
+
+    diffinfo_dict = {}
+    for key, defn in defn_dict["dispatch"].items():
+        if key != "Default" and key not in _VALID_AUTOGRAD_KEYS:
+            raise RuntimeError(
+                f"Invalid dispatch key {key} in derivatives.yaml for {specification},"
+                f" expected key to be one of {_VALID_AUTOGRAD_KEYS}"
+            )
+        if key not in used_dispatch_keys:
+            used_dispatch_keys.add(key)
+
+        (
+            derivatives,
+            forward_derivatives,
+            args_with_derivatives,
+            non_differentiable_arg_names,
+            available_named_gradients,
+        ) = set_up_derivatives(canonical)
+
+        used_named_gradients: Set[str] = set()
+        for d in derivatives:
+            used_named_gradients |= d.named_gradients
+
+        # only assign an op name if we are actually going to calculate a derivative
+        op = None
+        if args_with_derivatives:
+            op_prefix = _create_op_prefix(defn_name)
+            if key != "Default":
+                op_prefix = op_prefix + key
+            op = f"{op_prefix}{op_counter[op_prefix]}"
+            op_counter[op_prefix] += 1
+
+        diffinfo_dict[key] = DifferentiabilityInfo(
+            name=defn_name,
+            func=canonical,
+            op=op,
+            derivatives=derivatives,
+            forward_derivatives=forward_derivatives,
+            all_saved_inputs=dedup_vars(
+                [v for d in derivatives for v in d.saved_inputs]
+            ),
+            all_saved_outputs=dedup_vars(
+                [v for d in derivatives for v in d.saved_outputs]
+            ),
+            available_named_gradients=available_named_gradients,
+            used_named_gradients=used_named_gradients,
+            args_with_derivatives=args_with_derivatives,
+            non_differentiable_arg_names=non_differentiable_arg_names,
+            output_differentiability=output_differentiability,
+            output_differentiability_conditions=output_differentiability_conditions,
+        )
+
+    return canonical.func, diffinfo_dict
+
+
+GRAD_INDEX_REGEX = r"(?:^|\W)grads\[(\d+)\]"
+
+
+def used_gradient_indices(formula: str) -> List[int]:
+    """Determine a list of gradient indices (the i in grads[i]) that
+    are used by the formula.
+
+    >>> used_gradient_indices("foo(grads[0], grads[1])")
+    [0, 1]
+    """
+    return [int(i) for i in re.findall(GRAD_INDEX_REGEX, formula)]
+
+
+def saved_variables(
+    formula: str,
+    nctypes: List[NamedCType],
+    var_names: Tuple[str, ...],
+) -> Tuple[str, Tuple[SavedAttribute, ...]]:
+    def stride_expr(name: str) -> str:
+        assert var_names == (name,), (
+            'Replacement for ".strides()" is currently only supported for single derivatives of the same tensor '
+            'that ".strides()" is being called on.'
+        )
+        return f'strides_or_error({name}, "{name}")'
+
+    REPLACEMENTS: List[Tuple[str, Dict[str, Any]]] = [
+        # replace self.sym_sizes() with self_sym_sizes
+        (
+            r"{}.sym_sizes\(\)",
+            {
+                "suffix": "_sym_sizes",
+                "nctype": lambda name: NamedCType(name, BaseCType(symIntArrayRefT)),
+            },
+        ),
+        # replace self->sym_sizes() with self_sym_sizes_opt
+        (
+            r"{}->sym_sizes\(\)",
+            {
+                "suffix": "_sym_sizes_opt",
+                "nctype": lambda name: NamedCType(
+                    name, OptionalCType(BaseCType(symIntArrayRefT))
+                ),
+                "expr": lambda name: f"{name}.has_value() ? c10::optional<c10::SymIntArrayRef>({name}->sym_sizes()) : c10::nullopt",
+            },
+        ),
+        # replace self.sym_blocksize() with self_sym_blocksize_opt
+        (
+            r"{}.sym_blocksize\(\)",
+            {
+                "suffix": "_self_sym_blocksize_opt",
+                "nctype": lambda name: NamedCType(
+                    name, OptionalCType(BaseCType(symIntArrayRefT))
+                ),
+                "expr": lambda name: f"at::sparse_csr::getSymIntBlockSize({name})",
+            },
+        ),
+        # replace self.options() with self_options
+        (
+            r"{}.options\(\)",
+            {
+                "suffix": "_options",
+                "nctype": lambda name: NamedCType(name, BaseCType(tensorOptionsT)),
+            },
+        ),
+        # replace zeros_like(self) with self_info
+        (
+            r"zeros_like\({}\)",
+            {
+                "suffix": "_info",
+                "nctype": lambda name: NamedCType(name, BaseCType(typeAndSizeT)),
+                "expr": lambda name: name,  # at save-time
+                "res": lambda name: name + "_info.zeros()",  # at eval-time
+            },
+        ),
+        # replace self.sym_size(2) with self_sym_size_2
+        (
+            r"{}.sym_size\((-?\w+)\)",
+            {
+                "suffix": lambda m: f"_sym_argsize_{m.groups()[0].replace('-', 'minus_')}",
+                "nctype": lambda name: NamedCType(name, BaseCType(SymIntT)),
+            },
+        ),
+        # replace self.numel() with self_numel
+        (
+            r"{}.numel\(\)",
+            {
+                "suffix": "_numel",
+                "nctype": lambda name: NamedCType(name, BaseCType(longT)),
+            },
+        ),
+        # replace self.sym_numel() with self_sym_numel
+        (
+            r"{}.sym_numel\(\)",
+            {
+                "suffix": "_sym_numel",
+                "nctype": lambda name: NamedCType(name, BaseCType(SymIntT)),
+            },
+        ),
+        # replace to_args_sizes(self) with self_args_sizes
+        (
+            r"to_args_sizes\({}\)",
+            {
+                "suffix": "_args_sizes",
+                "nctype": lambda name: NamedCType(
+                    name, VectorCType(VectorCType(BaseCType(longT)))
+                ),
+            },
+        ),
+        # replace to_args_sizes_symint(self) with self_args_sizes
+        (
+            r"to_args_sizes_symint\({}\)",
+            {
+                "suffix": "_args_sizes_symint",
+                "nctype": lambda name: NamedCType(
+                    name, VectorCType(VectorCType(BaseCType(SymIntT)))
+                ),
+            },
+        ),
+        # replace to_args_scalartypes(self) with self_args_scalartypes
+        (
+            r"to_args_scalartypes\({}\)",
+            {
+                "suffix": "_args_scalartypes",
+                "nctype": lambda name: NamedCType(
+                    name, VectorCType(BaseCType(scalarTypeT))
+                ),
+            },
+        ),
+        # replace TensorGeometry(self) with self_geometry
+        (
+            r"TensorGeometry\({}\)",
+            {
+                "suffix": "_geometry",
+                "nctype": lambda name: NamedCType(name, BaseCType(tensorGeometryT)),
+            },
+        ),
+        (
+            r"{}.scalar_type\(\)",
+            {
+                "suffix": "_scalar_type",
+                "nctype": lambda name: NamedCType(name, BaseCType(scalarTypeT)),
+            },
+        ),
+        # replace self.dim() with self_dim
+        (
+            r"{}.dim\(\)",
+            {
+                "suffix": "_dim",
+                "nctype": lambda name: NamedCType(name, BaseCType(longT)),
+            },
+        ),
+        # replace self.sym_strides() with self_sym_strides
+        (
+            r"{}.sym_strides\(\)",
+            {
+                "suffix": "_sym_strides",
+                "nctype": lambda name: NamedCType(name, BaseCType(symIntArrayRefT)),
+                "expr": stride_expr,
+            },
+        ),
+        # replace self.layout() with self_layout
+        (
+            r"{}.layout\(\)",
+            {
+                "suffix": "_layout",
+                "nctype": lambda name: NamedCType(name, BaseCType(layoutT)),
+            },
+        ),
+        # replace self.is_conj() with self_conjugate
+        (
+            r"{}.is_conj\(\)",
+            {
+                "suffix": "_conjugate",
+                "nctype": lambda name: NamedCType(name, BaseCType(boolT)),
+            },
+        ),
+    ]
+
+    # find which arguments need to be saved
+    saved: List[SavedAttribute] = []
+
+    if ".sizes()" in formula or "->sizes()" in formula:
+        raise RuntimeError(
+            ".sizes() is not supported in derivative formulas. Instead, please use the SymInt version,"
+            + f".sym_sizes(), which returned a c10::SymIntArrayRef. formula={formula}"
+        )
+    if re.search(r"\.size\([-]?\d+\)", formula) or re.search(
+        r"->size\([-]?\d+\)", formula
+    ):
+        raise RuntimeError(
+            ".size(int) is not supported in derivative formulas. Instead, please use the SymInt version,"
+            + f".sym_size(int), which returned a c10::SymIntArrayRef. formula={formula}"
+        )
+    if ".strides()" in formula or "->strides()" in formula:
+        raise RuntimeError(
+            ".strides() is not supported in derivative formulas. Instead, please use the SymInt version,"
+            + f".sym_strides(), which returned a c10::SymIntArrayRef. formula={formula}"
+        )
+    for nctype in nctypes:
+        name = (
+            nctype.name.name if isinstance(nctype.name, SpecialArgName) else nctype.name
+        )
+        # First search the formula for expressions which can be evaluated
+        # when the autograd Function is created to avoid saving variables
+        for regex, info in REPLACEMENTS:
+
+            def repl(m: Match[str]) -> str:
+                suffix: str = (
+                    info["suffix"](m) if callable(info["suffix"]) else info["suffix"]
+                )
+                expr: str = info["expr"](name) if "expr" in info else m.group(0)
+                saved.append(
+                    SavedAttribute(
+                        nctype=info["nctype"](name + suffix),
+                        expr=expr,
+                    )
+                )
+                if "res" in info:
+                    replacement: str = info["res"](name)
+                    return replacement
+                return name + suffix
+
+            formula = re.sub(regex.format(name), repl, formula)
+
+        # c10::optional<std::string> types stored in Backward nodes must be
+        # converted to c10::optional<c10::string_view> before being passed into
+        # the backward function
+        if nctype.type == OptionalCType(BaseCType(stringT)):
+            formula = re.sub(
+                rf"\b{name}\b",
+                f"{name}.has_value() ? c10::optional<c10::string_view>({name}.value()) : c10::nullopt",
+                formula,
+            )
+
+        # Find any variables which remain in the formula and save them
+        if re.search(IDENT_REGEX.format(name), formula):
+            saved.append(
+                SavedAttribute(
+                    nctype=nctype,
+                    expr=name,
+                )
+            )
+
+    return formula, tuple(saved)
+
+
+def _create_op_prefix(name: str) -> str:
+    """Takes a native function name converts to a op prefix name.
+
+    Note that the "name" parameter must be the native function name
+    without the optional variant suffix, so "add" instead of
+    "add.out".
+
+    OP names correspond to classes, hence the change to title case.
+
+    Example::
+    >>> _create_op_prefix('add')
+    'AddBackward'
+    """
+    camel_case = "".join([p.title() for p in name.split("_")])
+    return (camel_case + "Backward").replace("ForwardBackward", "Backward")
+
+
+def dedup_vars(vars: Sequence[SavedAttribute]) -> Sequence[SavedAttribute]:
+    seen: Set[str] = set()
+    saved: List[SavedAttribute] = []
+    for var in vars:
+        name = (
+            var.nctype.name.name
+            if isinstance(var.nctype.name, SpecialArgName)
+            else var.nctype.name
+        )
+        if name in seen:
+            continue
+        seen.add(name)
+        saved.append(var)
+    return saved
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/ADInplaceOrViewType.cpp b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/ADInplaceOrViewType.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..e8276697eee065a36d1b16e583a5f011f92541c2
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/ADInplaceOrViewType.cpp
@@ -0,0 +1,38 @@
+#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
+#include "torch/csrc/autograd/VariableTypeUtils.h"
+#include "torch/csrc/autograd/generated/ViewFuncs.h"
+
+#include <torch/library.h>
+#include <ATen/FunctionalInverses.h>
+#include <ATen/FunctionalTensorWrapper.h>
+
+// ${generated_comment}
+
+#ifndef AT_PER_OPERATOR_HEADERS
+#include <ATen/Operators.h>
+#else
+$ops_headers
+#endif
+
+using namespace at;
+using torch::autograd::CreationMeta;
+using torch::autograd::as_view;
+using torch::autograd::increment_version;
+
+namespace torch {
+
+namespace ADInplaceOrView {
+
+namespace {
+${inplace_or_view_method_definitions}
+}  // namespace
+}  // namespace ADInplaceOrView
+
+namespace {
+
+TORCH_LIBRARY_IMPL(aten, ADInplaceOrView, m) {
+  ${inplace_or_view_wrapper_registrations};
+}
+
+}  // namespace
+} // namespace torch
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/Functions.cpp b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/Functions.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..5bc089f67df74b300bc8de6568b702d48e0cb6c2
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/Functions.cpp
@@ -0,0 +1,20 @@
+#include "torch/csrc/autograd/FunctionsManual.h"
+#include "torch/csrc/dynamo/compiled_autograd.h"
+
+// ${generated_comment}
+
+// The manual function definitions that used to be here are now in torch/csrc/autograd/FunctionsManual.cpp
+// This speeds up re-compilation and allow to share these implementations so that they can be
+// used for forward mode AD formulas as well.
+
+using namespace torch::autograd::generated::details;
+using at::Tensor;
+using at::Scalar;
+using at::IntArrayRef;
+using at::TensorList;
+
+namespace torch::autograd::generated {
+
+${autograd_function_definitions}
+
+} // namespace torch::autograd::generated
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/Functions.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/Functions.h
new file mode 100644
index 0000000000000000000000000000000000000000..437a5e8e898895b2c1f113d19bdf34c6e34b1ea8
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/Functions.h
@@ -0,0 +1,51 @@
+#pragma once
+
+// ${generated_comment}
+
+#include <ATen/ATen.h>
+#include <ATen/core/functional.h>
+#include <ATen/TensorGeometry.h>
+
+#include "torch/csrc/autograd/function.h"
+#include "torch/csrc/autograd/variable.h"
+#include "torch/csrc/autograd/saved_variable.h"
+#include <torch/csrc/Export.h>
+
+#include <c10/core/SymIntArrayRef.h>
+
+namespace torch { namespace autograd { namespace generated {
+
+using at::Scalar;
+using at::Tensor;
+using at::IntArrayRef;
+using at::ArrayRef;
+using at::Type;
+using at::TensorGeometry;
+using at::ScalarType;
+using c10::optional;
+using c10::fmap;
+
+inline std::vector<Tensor> unpack_list(at::ArrayRef<SavedVariable> xs, std::shared_ptr<Node> saved_for = nullptr) {
+  // NB: we must explicitly do the conversion in the lambda, otherwise template
+  // deduction will give a Tensor of Variable which is not convertible
+  return fmap(xs, [&saved_for](const SavedVariable& x) {
+    // TODO(crcrpar): Use `std::move(saved_for)` to avoid incrementing refcount, which would need refactoring.
+    return static_cast<Tensor>(x.unpack(saved_for));
+  });
+}
+
+inline c10::List<c10::optional<Tensor>> unpack_opt_list(at::ArrayRef<SavedVariable> xs, std::shared_ptr<Node> saved_for = nullptr) {
+  torch::List<c10::optional<Tensor>> result;
+  result.reserve(xs.size());
+  for (const SavedVariable& v : xs) {
+    auto var = v.unpack(saved_for);
+    result.push_back(var.defined() ? c10::optional<Tensor>(var) : c10::nullopt);
+  }
+  return result;
+}
+
+using torch::autograd::TypeAndSize;
+
+${autograd_function_declarations}
+
+}}} // namespace torch::autograd::generated
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/TraceType.cpp b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/TraceType.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..fb5e7ae44a5353a3cc2a90858fe33b7fc0ef8bfd
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/TraceType.cpp
@@ -0,0 +1,40 @@
+#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
+#include "torch/csrc/jit/frontend/tracer.h"
+
+#include <torch/library.h>
+
+#include "torch/csrc/autograd/function.h"
+
+#include "ATen/quantized/Quantizer.h"
+
+// ${generated_comment}
+
+// See the `Tracer` section in `torch/csrc/jit/OVERVIEW.md`.
+// NOTE See [Sharded File] comment in VariableType
+
+#ifndef AT_PER_OPERATOR_HEADERS
+#include <ATen/Operators.h>
+#else
+$ops_headers
+#endif
+
+using namespace at;
+
+namespace torch {
+
+namespace TraceType {
+
+namespace {
+${trace_method_definitions}
+}  // namespace
+}  // namespace TraceType
+
+namespace {
+
+TORCH_LIBRARY_IMPL(aten, Tracer, m) {
+  ${trace_wrapper_registrations};
+}
+
+}  // namespace
+
+} // namespace torch
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/VariableType.cpp b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/VariableType.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..08f1f8b698e528ca382ead2fb64ee0a45a708b08
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/VariableType.cpp
@@ -0,0 +1,65 @@
+#include "torch/csrc/autograd/VariableTypeUtils.h"
+#include "torch/csrc/autograd/generated/VariableType.h"
+#include "torch/csrc/autograd/FunctionsManual.h"
+
+#include <ATen/RedispatchFunctions.h>
+#include <c10/core/impl/TorchDispatchModeTLS.h>
+#include <ATen/core/TorchDispatchUtils.h>
+#include <torch/library.h>
+
+#include <ATen/SparseCsrTensorUtils.h>
+
+
+// ${generated_comment}
+
+// NOTE [Sharded File]: on this file's split-into-shards state
+//
+// Back in the good old days, VariableType.cpp was generated as one
+// file with every function in it, and everything was great and
+// simple.
+//
+// However, this file was also very large (over 36,000 lines), and
+// compiling it was very slow, and in fact was a significant
+// bottleneck for incremental rebuilds. To address this, we now
+// generate the file split across multiple shards, named
+// VariableType_0.cpp and so on, which can be compiled in parallel.
+//
+// For ease of inspection and debugging, so that it's not necessary to
+// go rooting around in multiple files, we also generate all the
+// functions together in VariableTypeEverything.cpp. This generated
+// file is only for convenience; it's not actually used in the
+// build. If the file you're looking at now is one of the shards, you
+// may want to switch over to the Everything variant to make you
+// grepping smoother.
+
+using namespace at;
+using namespace torch::autograd::generated;
+using namespace torch::autograd::generated::details;
+
+
+namespace torch::autograd {
+
+namespace VariableType {
+namespace{
+  C10_UNUSED void reset_grad_accumulator(Variable & self) {
+    AutogradMeta* meta = torch::autograd::impl::get_autograd_meta(self);
+    if (meta != nullptr) {
+      meta->grad_accumulator_.reset();
+    }
+  }
+}
+
+namespace {
+
+
+${type_derived_method_definitions}
+}
+}
+
+namespace {
+
+${wrapper_registrations}
+
+}
+
+} // namespace torch::autograd
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/VariableType.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/VariableType.h
new file mode 100644
index 0000000000000000000000000000000000000000..065812694cfe4e17623d5fa0464b8ea5f0199a6a
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/VariableType.h
@@ -0,0 +1,59 @@
+#pragma once
+
+// ${generated_comment}
+
+#include <ATen/core/Tensor.h>
+#include <ATen/Context.h>
+
+#include <c10/util/intrusive_ptr.h>
+
+#include <torch/csrc/Export.h>
+#include <torch/csrc/autograd/autograd_not_implemented_fallback.h>
+
+#include <cstdint> // for size_t
+#include <functional> // for function
+#include <memory> // for unique_ptr
+#include <string>
+#include <vector>
+
+namespace at {
+  struct Quantizer;
+};
+
+namespace torch { namespace autograd {
+
+using Variable = at::Tensor;
+using at::Context;
+using at::Device;
+using at::Dimname;
+using at::DimnameList;
+using at::Generator;
+using at::IntArrayRef;
+using at::MemoryFormat;
+using at::QScheme;
+using at::Scalar;
+using at::ScalarType;
+using at::Storage;
+using at::Tensor;
+using at::TensorList;
+using at::TensorOptions;
+using at::Quantizer;
+// This is temporary typedef to enable Quantizer in aten native function API
+// we'll remove them when we are actually exposing Quantizer class
+// to frontend
+using ConstQuantizerPtr = const c10::intrusive_ptr<Quantizer>&;
+using c10::optional;
+
+namespace VariableType {
+  TORCH_API std::vector<at::DeprecatedTypeProperties*> allCUDATypes();
+  TORCH_API std::vector<at::DeprecatedTypeProperties*> allXPUTypes();
+  TORCH_API std::vector<at::DeprecatedTypeProperties*> allCPUTypes();
+  TORCH_API std::vector<at::DeprecatedTypeProperties*> allPrivateUser1Types();
+
+  at::Tensor & unpack(Tensor & t, const char * name, int pos);
+  const at::Tensor & unpack(const Tensor & t, const char * name, int pos);
+  at::Tensor unpack_opt(const Tensor & t, const char * name, int pos);
+  std::vector<at::Tensor> unpack(const at::ITensorListRef& tl, const char *name, int pos);
+};
+
+}} // namespace torch::autograd
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/ViewFuncs.cpp b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/ViewFuncs.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..11b9b194fb46f924e863c4c1dab5cbb8dbb0601b
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/ViewFuncs.cpp
@@ -0,0 +1,14 @@
+#include <torch/csrc/autograd/generated/ViewFuncs.h>
+
+// ${generated_comment}
+
+using at::Tensor;
+using at::Scalar;
+using at::IntArrayRef;
+using at::TensorList;
+
+namespace torch::autograd::generated {
+
+${view_func_definitions}
+
+} // namespace torch::autograd::generated
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/ViewFuncs.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/ViewFuncs.h
new file mode 100644
index 0000000000000000000000000000000000000000..faf5ab6881f189d72288afa4016b126539be1a5e
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/ViewFuncs.h
@@ -0,0 +1,28 @@
+#pragma once
+
+// ${generated_comment}
+
+#include <torch/library.h>
+#include <torch/csrc/autograd/variable.h>
+#include <c10/core/SymIntArrayRef.h>
+
+#ifndef AT_PER_OPERATOR_HEADERS
+#include <ATen/Operators.h>
+#else
+$ops_headers
+#endif
+
+namespace torch::autograd::generated {
+
+using at::Scalar;
+using at::Tensor;
+using at::IntArrayRef;
+using at::ArrayRef;
+using at::Type;
+using at::ScalarType;
+using c10::optional;
+using c10::fmap;
+
+${view_func_declarations}
+
+} // namespace torch::autograd::generated
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/annotated_fn_args.py.in b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/annotated_fn_args.py.in
new file mode 100644
index 0000000000000000000000000000000000000000..1012c008451745b8f1ed1454a864f666caf2618a
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/annotated_fn_args.py.in
@@ -0,0 +1,11 @@
+"""
+This file is needed for generating procedural tests required for
+testing __torch_function__. See tests/test_overrides.py.
+"""
+
+# flake8: noqa
+import torch
+
+annotated_args = {
+${annotated_args}
+}
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_enum_tag.cpp b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_enum_tag.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..83cfad1d7ba4d6fc3529caf78e036c5883e7bc23
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_enum_tag.cpp
@@ -0,0 +1,15 @@
+#include <torch/csrc/autograd/python_enum_tag.h>
+#include <torch/csrc/utils/pybind.h>
+#include <pybind11/pybind11.h>
+#include <ATen/core/enum_tag.h>
+
+namespace py = pybind11;
+namespace torch {
+    namespace autograd {
+    void initEnumTag(PyObject* module) {
+        auto m = py::handle(module).cast<py::module>();
+        py::enum_<at::Tag>(m, "Tag")
+        ${enum_of_valid_tags};
+        m.doc() = "An Enum that contains tags that can be assigned to an operator registered in C++.";
+    }
+}}
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_fft_functions.cpp b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_fft_functions.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..71ac4e2226d2db418eba5690995424d3f007e620
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_fft_functions.cpp
@@ -0,0 +1,81 @@
+#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
+// ${generated_comment}
+
+#include "torch/csrc/Device.h"
+#include "torch/csrc/DynamicTypes.h"
+#include "torch/csrc/Exceptions.h"
+#include "torch/csrc/autograd/python_fft_functions.h"
+#include "torch/csrc/autograd/generated/python_return_types.h"
+#include "torch/csrc/autograd/python_variable.h"
+#include "torch/csrc/autograd/utils/wrap_outputs.h"
+#include "torch/csrc/autograd/utils/python_arg_parsing.h"
+#include "torch/csrc/autograd/generated/variable_factories.h"
+#include "torch/csrc/utils/out_types.h"
+#include "torch/csrc/utils/pycfunction_helpers.h"
+#include "torch/csrc/utils/python_arg_parser.h"
+#include "torch/csrc/utils/structseq.h"
+#include "torch/csrc/utils/device_lazy_init.h"
+
+#include <ATen/core/Tensor.h>
+
+#ifndef AT_PER_OPERATOR_HEADERS
+#include <ATen/Functions.h>
+#else
+$ops_headers
+#endif
+
+using at::Tensor;
+using at::Device;
+using at::Layout;
+using at::Scalar;
+using at::ScalarType;
+using at::Backend;
+using at::OptionalDeviceGuard;
+using at::DeviceGuard;
+using at::TensorOptions;
+using at::IntArrayRef;
+using at::Generator;
+using at::TensorList;
+using at::Dimname;
+using at::DimnameList;
+
+using torch::utils::check_out_type_matches;
+using namespace torch::autograd::utils;
+
+namespace torch::autograd {
+
+// generated forward declarations start here
+
+${py_forwards}
+
+static PyMethodDef fft_functions[] = {
+  ${py_method_defs}
+  {NULL}
+};
+
+static PyObject* THPFFTVariableFunctionsModule = NULL;
+
+void initFFTFunctions(PyObject* module) {
+  static struct PyModuleDef def = {
+     PyModuleDef_HEAD_INIT,
+     "torch._C._fft",
+     NULL,
+     -1,
+     fft_functions
+  };
+  PyObject* fft = PyModule_Create(&def);
+  THPFFTVariableFunctionsModule = fft;
+  if (!fft) {
+    throw python_error();
+  }
+  // steals a reference to fft
+  if (PyModule_AddObject(module, "_fft", fft) != 0) {
+    throw python_error();
+  }
+}
+
+// generated methods start here
+
+${py_methods}
+
+} // namespace torch::autograd
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_functions.cpp b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_functions.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..1522d6cd0f5a2a1fc0188bf9d6d0d59fe1b27d85
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_functions.cpp
@@ -0,0 +1,37 @@
+#include <torch/csrc/autograd/generated/python_functions.h>
+
+// ${generated_comment}
+
+#include <Python.h>
+#include <ATen/ATen.h>
+
+#include <c10/core/SymNodeImpl.h>
+#include "torch/csrc/autograd/generated/Functions.h"
+#include "torch/csrc/autograd/python_cpp_function.h"
+#include <torch/csrc/autograd/python_variable.h>
+#include <torch/csrc/autograd/saved_variable.h>
+#include <torch/csrc/utils/pybind.h>
+#include <pybind11/pybind11.h>
+#include <torch/csrc/utils/pybind.h>
+
+// NOTE: See [Sharded File] comment in VariableType
+
+namespace torch::autograd::generated {
+
+template<typename C>
+static void addClass(PyObject* module, PyTypeObject& type, const char* name,
+  PyGetSetDef* function_properties=NULL, PyMethodDef* function_methods=NULL)
+{
+  _initFunctionPyTypeObject(type, name, function_properties, function_methods);
+  Py_INCREF(&type);
+  PyModule_AddObject(module, name, (PyObject*)&type);
+  registerCppFunction(typeid(C), &type);
+}
+
+${py_function_props_and_getters}
+
+void initialize_autogenerated_functions${shard_id}(PyObject* module) {
+  ${py_function_initializers}
+}
+
+} // namespace torch::autograd::generated
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_functions.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_functions.h
new file mode 100644
index 0000000000000000000000000000000000000000..22e37207e219431100fefaf21b02e3ed0f63d956
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_functions.h
@@ -0,0 +1,17 @@
+#pragma once
+
+#include <Python.h>
+
+// ${generated_comment}
+
+// Python bindings for automatically generated autograd functions
+
+namespace torch { namespace autograd { namespace generated {
+
+${shard_forward_declare}
+
+inline void initialize_autogenerated_functions(PyObject* module) {
+  ${shard_call}
+}
+
+}}} // namespace torch::autograd::generated
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_linalg_functions.cpp b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_linalg_functions.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c93752a3ddbfcf111426f98c3ea68fc625e94def
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_linalg_functions.cpp
@@ -0,0 +1,68 @@
+#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
+// ${generated_comment}
+
+#include "torch/csrc/Device.h"
+#include "torch/csrc/DynamicTypes.h"
+#include "torch/csrc/Exceptions.h"
+#include "torch/csrc/autograd/python_linalg_functions.h"
+#include "torch/csrc/autograd/generated/python_return_types.h"
+#include "torch/csrc/autograd/python_variable.h"
+#include "torch/csrc/autograd/utils/wrap_outputs.h"
+#include "torch/csrc/autograd/utils/python_arg_parsing.h"
+#include "torch/csrc/utils/pycfunction_helpers.h"
+#include "torch/csrc/utils/python_arg_parser.h"
+#include "torch/csrc/utils/structseq.h"
+
+#ifndef AT_PER_OPERATOR_HEADERS
+#include <ATen/Functions.h>
+#else
+$ops_headers
+#endif
+
+using at::Tensor;
+using at::Scalar;
+using at::ScalarType;
+using at::MemoryFormat;
+using at::Generator;
+using at::IntArrayRef;
+using at::TensorList;
+
+using namespace torch::autograd::utils;
+
+namespace torch::autograd {
+
+// generated forward declarations start here
+
+${py_forwards}
+
+static PyMethodDef linalg_functions[] = {
+  ${py_method_defs}
+  {NULL}
+};
+
+static PyObject* THPLinalgVariableFunctionsModule = NULL;
+
+void initLinalgFunctions(PyObject* module) {
+  static struct PyModuleDef def = {
+     PyModuleDef_HEAD_INIT,
+     "torch._C._linalg",
+     NULL,
+     -1,
+     linalg_functions
+  };
+  PyObject* linalg = PyModule_Create(&def);
+  THPLinalgVariableFunctionsModule = linalg;
+  if (!linalg) {
+    throw python_error();
+  }
+  // steals a reference to linalg
+  if (PyModule_AddObject(module, "_linalg", linalg) != 0) {
+    throw python_error();
+  }
+}
+
+// generated methods start here
+
+${py_methods}
+
+} // namespace torch::autograd
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_nn_functions.cpp b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_nn_functions.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..f311cfebe4c5f18c8b741cb03e8f7aaac05f3b0c
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_nn_functions.cpp
@@ -0,0 +1,113 @@
+#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
+// ${generated_comment}
+
+#include "torch/csrc/Device.h"
+#include "torch/csrc/DynamicTypes.h"
+#include "torch/csrc/Exceptions.h"
+#include "torch/csrc/autograd/python_nn_functions.h"
+#include "torch/csrc/autograd/generated/python_return_types.h"
+#include "torch/csrc/autograd/python_variable.h"
+#include "torch/csrc/autograd/utils/wrap_outputs.h"
+#include "torch/csrc/autograd/utils/python_arg_parsing.h"
+#include "torch/csrc/utils/pycfunction_helpers.h"
+#include "torch/csrc/utils/python_arg_parser.h"
+#include "torch/csrc/utils/structseq.h"
+#include "torch/csrc/utils/tensor_memoryformats.h"
+
+#ifndef AT_PER_OPERATOR_HEADERS
+#include <ATen/Functions.h>
+#else
+$ops_headers
+#endif
+
+using at::Tensor;
+using at::Scalar;
+using at::MemoryFormat;
+using at::Generator;
+using at::IntArrayRef;
+using at::ArrayRef;
+
+using namespace torch::autograd::utils;
+
+namespace torch::autograd {
+
+static PyObject* THPNNVariableFunctionsModule = NULL;
+
+static PyObject * THPVariable__parse_to(PyObject* module, PyObject* args, PyObject* kwargs)
+{
+  HANDLE_TH_ERRORS
+  static PythonArgParser parser({
+    "to(Device device=None, ScalarType dtype=None, bool non_blocking=False, bool copy=False, *, MemoryFormat? memory_format=None)",
+    "to(ScalarType dtype, bool non_blocking=False, bool copy=False, *, MemoryFormat? memory_format=None)",
+    "to(Tensor tensor, bool non_blocking=False, bool copy=False, *, MemoryFormat? memory_format=None)",
+  });
+  ParsedArgs<5> parsed_args;
+  auto r = parser.parse(args, kwargs, parsed_args);
+  if (r.has_torch_function()) {
+    return handle_torch_function(r, args, kwargs, THPNNVariableFunctionsModule, "torch.nn", "_parse_to");
+  }
+  auto parsed = parse_to_conversion(r, /*allow_copy*/ false); // we don't want copy for nn.Module.to
+  auto& device = std::get<0>(parsed);
+  auto& scalarType = std::get<1>(parsed);
+  auto non_blocking = std::get<2>(parsed);
+  auto opt_memory_format = std::get<4>(parsed);
+  auto tuple = THPObjectPtr{PyTuple_New(4)};
+  if (!tuple) throw python_error();
+  if (device) {
+    PyTuple_SET_ITEM(tuple.get(), 0, THPDevice_New(*device));
+  } else {
+    Py_INCREF(Py_None);
+    PyTuple_SET_ITEM(tuple.get(), 0, Py_None);
+  }
+  if (scalarType) {
+    PyTuple_SET_ITEM(tuple.get(), 1, torch::autograd::utils::wrap(torch::getTHPDtype(*scalarType)));
+  } else {
+    Py_INCREF(Py_None);
+    PyTuple_SET_ITEM(tuple.get(), 1, Py_None);
+  }
+  PyTuple_SET_ITEM(tuple.get(), 2, torch::autograd::utils::wrap(non_blocking));
+  if (opt_memory_format.has_value()) {
+    PyTuple_SET_ITEM(tuple.get(), 3, torch::utils::getTHPMemoryFormat(opt_memory_format.value()));
+  } else {
+    Py_INCREF(Py_None);
+    PyTuple_SET_ITEM(tuple.get(), 3, Py_None);
+  }
+  return tuple.release();
+  END_HANDLE_TH_ERRORS
+}
+
+// generated forward declarations start here
+
+${py_forwards}
+
+static PyMethodDef nn_functions[] = {
+  {"_parse_to", castPyCFunctionWithKeywords(THPVariable__parse_to),
+    METH_VARARGS | METH_KEYWORDS, nullptr},
+  ${py_method_defs}
+  {NULL}
+};
+
+void initNNFunctions(PyObject* module) {
+  static struct PyModuleDef def = {
+     PyModuleDef_HEAD_INIT,
+     "torch._C._nn",
+     NULL,
+     -1,
+     nn_functions
+  };
+  PyObject* nn = PyModule_Create(&def);
+  THPNNVariableFunctionsModule = nn;
+  if (!nn) {
+    throw python_error();
+  }
+  // steals a reference to nn
+  if (PyModule_AddObject(module, "_nn", nn) != 0) {
+    throw python_error();
+  }
+}
+
+// generated methods start here
+
+${py_methods}
+
+} // namespace torch::autograd
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_return_types.cpp b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_return_types.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..139e6b8958336cfcc8328fa33581e9f1ab6d5532
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_return_types.cpp
@@ -0,0 +1,52 @@
+#include <Python.h>
+
+#include <vector>
+#include <map>
+#include <string>
+
+#include "torch/csrc/autograd/generated/python_return_types.h"
+#include "torch/csrc/utils/structseq.h"
+#include "torch/csrc/Exceptions.h"
+
+namespace torch { namespace autograd { namespace generated {
+
+${py_return_types}
+
+}}}
+
+namespace torch::autograd {
+
+static void addReturnType(
+    PyObject* module,
+    const char* name,
+    PyTypeObject* type) {
+  // hold onto the TypeObject for the unlikely case of user
+  // deleting or overriding it.
+  Py_INCREF(type);
+  if (PyModule_AddObject(
+          module,
+          name,
+          (PyObject*)type) != 0) {
+    Py_DECREF(type);
+    throw python_error();
+  }
+}
+
+void initReturnTypes(PyObject* module) {
+  static struct PyModuleDef def = {
+      PyModuleDef_HEAD_INIT, "torch._C._return_types", nullptr, -1, {}};
+  PyObject* return_types_module = PyModule_Create(&def);
+  if (!return_types_module) {
+    throw python_error();
+  }
+
+  ${py_return_types_registrations}
+
+  // steals a reference to return_types on success
+  if (PyModule_AddObject(module, "_return_types", return_types_module) != 0) {
+    Py_DECREF(return_types_module);
+    throw python_error();
+  }
+}
+
+} // namespace torch::autograd
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_return_types.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_return_types.h
new file mode 100644
index 0000000000000000000000000000000000000000..ce6c355ea146a272709255b898603764112168b9
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_return_types.h
@@ -0,0 +1,14 @@
+#pragma once
+
+namespace torch {
+namespace autograd {
+namespace generated {
+
+${py_return_types_declarations}
+
+}
+
+void initReturnTypes(PyObject* module);
+
+} // namespace autograd
+} // namespace torch
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_sparse_functions.cpp b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_sparse_functions.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..648d91442102e9b950cb2ddb8db545c4b4e1100e
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_sparse_functions.cpp
@@ -0,0 +1,67 @@
+#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
+// ${generated_comment}
+
+#include "torch/csrc/Device.h"
+#include "torch/csrc/DynamicTypes.h"
+#include "torch/csrc/Exceptions.h"
+#include "torch/csrc/autograd/python_sparse_functions.h"
+#include "torch/csrc/autograd/python_variable.h"
+#include "torch/csrc/autograd/utils/wrap_outputs.h"
+#include "torch/csrc/autograd/utils/python_arg_parsing.h"
+#include "torch/csrc/utils/pycfunction_helpers.h"
+#include "torch/csrc/utils/python_arg_parser.h"
+#include "torch/csrc/utils/structseq.h"
+
+#ifndef AT_PER_OPERATOR_HEADERS
+#include <ATen/Functions.h>
+#else
+$ops_headers
+#endif
+
+using at::Tensor;
+using at::Scalar;
+using at::ScalarType;
+using at::MemoryFormat;
+using at::Generator;
+using at::IntArrayRef;
+using at::TensorList;
+
+using namespace torch::autograd::utils;
+
+namespace torch::autograd {
+
+// generated forward declarations start here
+
+${py_forwards}
+
+static PyMethodDef sparse_functions[] = {
+  ${py_method_defs}
+  {NULL}
+};
+
+static PyObject* THPSparseVariableFunctionsModule = NULL;
+
+void initSparseFunctions(PyObject* module) {
+  static struct PyModuleDef def = {
+     PyModuleDef_HEAD_INIT,
+     "torch._C._sparse",
+     NULL,
+     -1,
+     sparse_functions
+  };
+  PyObject* sparse = PyModule_Create(&def);
+  THPSparseVariableFunctionsModule = sparse;
+  if (!sparse) {
+    throw python_error();
+  }
+  // steals a reference to sparse
+  if (PyModule_AddObject(module, "_sparse", sparse) != 0) {
+    throw python_error();
+  }
+}
+
+// generated methods start here
+
+${py_methods}
+
+} // namespace torch::autograd
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_special_functions.cpp b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_special_functions.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..bf9e109b4a77352cd85ba828b97d67d329543867
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_special_functions.cpp
@@ -0,0 +1,79 @@
+#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
+// ${generated_comment}
+
+#include "torch/csrc/Device.h"
+#include "torch/csrc/DynamicTypes.h"
+#include "torch/csrc/Exceptions.h"
+#include "torch/csrc/autograd/python_special_functions.h"
+#include "torch/csrc/autograd/generated/python_return_types.h"
+#include "torch/csrc/autograd/python_variable.h"
+#include "torch/csrc/autograd/utils/wrap_outputs.h"
+#include "torch/csrc/autograd/utils/python_arg_parsing.h"
+#include "torch/csrc/autograd/generated/variable_factories.h"
+#include "torch/csrc/utils/out_types.h"
+#include "torch/csrc/utils/pycfunction_helpers.h"
+#include "torch/csrc/utils/python_arg_parser.h"
+#include "torch/csrc/utils/structseq.h"
+#include "torch/csrc/utils/device_lazy_init.h"
+
+#ifndef AT_PER_OPERATOR_HEADERS
+#include <ATen/Functions.h>
+#else
+$ops_headers
+#endif
+
+using at::Tensor;
+using at::Device;
+using at::Layout;
+using at::Scalar;
+using at::ScalarType;
+using at::Backend;
+using at::OptionalDeviceGuard;
+using at::DeviceGuard;
+using at::TensorOptions;
+using at::IntArrayRef;
+using at::Generator;
+using at::TensorList;
+using at::Dimname;
+using at::DimnameList;
+
+using torch::utils::check_out_type_matches;
+using namespace torch::autograd::utils;
+
+namespace torch::autograd {
+
+// generated forward declarations start here
+
+${py_forwards}
+
+static PyMethodDef special_functions[] = {
+  ${py_method_defs}
+  {NULL}
+};
+
+static PyObject* THPSpecialVariableFunctionsModule = NULL;
+
+void initSpecialFunctions(PyObject* module) {
+  static struct PyModuleDef def = {
+     PyModuleDef_HEAD_INIT,
+     "torch._C._special",
+     NULL,
+     -1,
+     special_functions
+  };
+  PyObject* special = PyModule_Create(&def);
+  THPSpecialVariableFunctionsModule = special;
+  if (!special) {
+    throw python_error();
+  }
+  // steals a reference to special
+  if (PyModule_AddObject(module, "_special", special) != 0) {
+    throw python_error();
+  }
+}
+
+// generated methods start here
+
+${py_methods}
+
+} // namespace torch::autograd
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_torch_functions.cpp b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_torch_functions.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..c17d1040e1892b6a215a8c4264fe5a5345265bc7
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_torch_functions.cpp
@@ -0,0 +1,93 @@
+#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
+// ${generated_comment}
+
+// Python bindings for torch.* functions implemented through ATen.
+//
+// The functions are bound as static methods on a class
+// torch._C._VariableFunctions which is also aliased as Variable._torch
+// and also copied into 'torch' module.
+
+#include <Python.h>
+
+// Undefine the copysign macro so that at::copysign works as intended with MSVC
+// https://github.com/python/cpython/blob/c60394c7fc9cc09b16e9675a3eeb5844b6d8523f/PC/pyconfig.h#L196
+#ifdef _MSC_VER
+#undef copysign
+#endif // _MSC_VER
+
+#include "torch/csrc/autograd/python_torch_functions.h"
+#include "torch/csrc/autograd/python_variable.h"
+#include "torch/csrc/autograd/utils/wrap_outputs.h"
+#include "torch/csrc/Dtype.h"
+#include "torch/csrc/DynamicTypes.h"
+#include "torch/csrc/Exceptions.h"
+#include "torch/csrc/utils/out_types.h"
+#include "torch/csrc/utils/pybind.h"
+#include "torch/csrc/utils/pycfunction_helpers.h"
+#include "torch/csrc/utils/python_arg_parser.h"
+#include "torch/csrc/utils/tensor_layouts.h"
+#include "torch/csrc/utils/tensor_new.h"
+#include "torch/csrc/utils/tensor_numpy.h"
+#include "torch/csrc/jit/frontend/tracer.h"
+#include "torch/csrc/autograd/generated/variable_factories.h"
+#include "torch/csrc/utils/structseq.h"
+#include "torch/csrc/utils/device_lazy_init.h"
+#include "torch/csrc/autograd/generated/python_return_types.h"
+
+#include <ATen/core/Tensor.h>
+
+#ifndef AT_PER_OPERATOR_HEADERS
+#include <ATen/Functions.h>
+#else
+$ops_headers
+#endif
+
+#include <functional>
+#include <initializer_list>
+#include <stdexcept>
+#include <utility>
+
+using at::Tensor;
+using at::Device;
+using at::Layout;
+using at::Scalar;
+using at::ScalarType;
+using at::Backend;
+using at::OptionalDeviceGuard;
+using at::DeviceGuard;
+using at::TensorOptions;
+using at::IntArrayRef;
+using at::Generator;
+using at::TensorList;
+using at::Dimname;
+using at::DimnameList;
+using at::ArrayRef;
+
+using torch::utils::check_out_type_matches;
+using namespace torch::autograd::utils;
+
+// NOTE: See [Sharded File] comment in VariableType
+
+namespace torch::autograd {
+
+// generated forward declarations start here
+
+${py_forwards}
+
+static PyMethodDef torch_functions_shard[] = {
+  ${py_method_defs}
+};
+
+void gatherTorchFunctions${shard_id}(std::vector<PyMethodDef> &torch_functions) {
+  constexpr size_t num_functions = sizeof(torch_functions_shard) / sizeof(torch_functions_shard[0]);
+  torch_functions.insert(
+    torch_functions.end(),
+    torch_functions_shard,
+    torch_functions_shard + num_functions);
+}
+
+// generated methods start here
+
+${py_methods}
+
+} // namespace torch::autograd
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_variable_methods.cpp b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_variable_methods.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..cd82955c222cfd946a7b766c011746c3aa376670
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/python_variable_methods.cpp
@@ -0,0 +1,1279 @@
+#define TORCH_ASSERT_ONLY_METHOD_OPERATORS
+// ${generated_comment}
+
+#include <Python.h>
+
+// Undefine the copysign macro so that at::copysign works as intended with MSVC
+// https://github.com/python/cpython/blob/c60394c7fc9cc09b16e9675a3eeb5844b6d8523f/PC/pyconfig.h#L196
+#ifdef _MSC_VER
+#undef copysign
+#endif // _MSC_VER
+
+#include "torch/csrc/DynamicTypes.h"
+#include "torch/csrc/Exceptions.h"
+#include "torch/csrc/Size.h"
+#include "torch/csrc/autograd/generated/VariableType.h"
+#include "torch/csrc/autograd/python_variable.h"
+#include "torch/csrc/autograd/utils/python_arg_parsing.h"
+#include "torch/csrc/autograd/utils/error_messages.h"
+#include "torch/csrc/autograd/utils/wrap_outputs.h"
+#include "torch/csrc/jit/frontend/tracer.h"
+#ifdef USE_CUDA
+#include "torch/csrc/cuda/Event.h"
+#endif
+#include "torch/csrc/utils/device_lazy_init.h"
+#include "torch/csrc/utils/object_ptr.h"
+#include "torch/csrc/utils/pycfunction_helpers.h"
+#include "torch/csrc/utils/python_arg_parser.h"
+#include "torch/csrc/utils/python_numbers.h"
+#include "torch/csrc/utils/python_strings.h"
+#include "torch/csrc/utils/python_tuples.h"
+#include "torch/csrc/utils/tensor_apply.h"
+#include "torch/csrc/utils/tensor_list.h"
+#include "torch/csrc/utils/tensor_new.h"
+#include "torch/csrc/utils/tensor_numpy.h"
+#include "torch/csrc/utils/tensor_types.h"
+#include "torch/csrc/utils/structseq.h"
+#include "torch/csrc/autograd/generated/python_return_types.h"
+
+#include <ATen/core/Tensor.h>
+#include <ATen/FuncTorchTLS.h>
+#include "c10/util/Optional.h"
+#include "c10/core/Stream.h"
+
+#include <stdexcept>
+
+#ifndef AT_PER_OPERATOR_HEADERS
+#include <ATen/Functions.h>
+#else
+$ops_headers
+#include <ATen/ops/_local_scalar_dense.h>
+#endif
+
+using at::DeviceGuard;
+using at::device_of;
+using at::OptionalDeviceGuard;
+using at::Backend;
+using at::Scalar;
+using at::ScalarType;
+using at::Tensor;
+using c10::Stream;
+using namespace torch::autograd::utils;
+
+namespace torch::autograd {
+
+static PyObject * THPVariable__is_view(PyObject *self, PyObject* args)
+{
+  HANDLE_TH_ERRORS
+  if (check_has_torch_function(self)) {
+    return handle_torch_function(self, "_is_view", args);
+  }
+  auto& self_ = THPVariable_Unpack(self);
+  if (self_.is_view()) {
+    Py_RETURN_TRUE;
+  } else {
+    Py_RETURN_FALSE;
+  }
+  END_HANDLE_TH_ERRORS
+}
+
+// implemented on the python object bc no support for first-class functions in native_functions.yaml
+// See: ATen/native/README.md for more context
+static PyObject * THPVariable_apply_(PyObject* self, PyObject* arg)
+{
+  HANDLE_TH_ERRORS
+  if (check_has_torch_function(self)) {
+    auto args = py::make_tuple(py::handle(arg));
+    return handle_torch_function(self, "apply_", args.ptr());
+  }
+  auto& self_ = THPVariable_Unpack(self);
+  if (self_.requires_grad()) {
+    throw std::runtime_error(
+        "Can't call apply_() on Variable that requires grad. Use "
+        "var.detach().apply_() instead.");
+  }
+  return THPVariable_Wrap(torch::utils::apply_(self_, arg));
+  END_HANDLE_TH_ERRORS
+}
+
+static PyObject * THPVariable_size(PyObject* self, PyObject* args, PyObject* kwargs)
+{
+  HANDLE_TH_ERRORS
+  static PythonArgParser parser({
+    "size(int64_t? dim=None)",
+    "size(Dimname dim)",
+  });
+  auto& self_ = THPVariable_Unpack(self);
+  ParsedArgs<3> parsed_args;
+  auto r = parser.parse(self, args, kwargs, parsed_args);
+
+  if(r.has_torch_function()){
+    return handle_torch_function(r, self, args, kwargs, THPVariableClass, "torch.Tensor");
+  }
+  if (r.idx == 0) {
+    if (!r.toInt64Optional(0).has_value()) {
+      return THPSize_NewFromSymSizes(self_);
+    }
+    if (jit::tracer::isTracing()) {
+      // will error out if a tensor has symints
+      return wrap(jit::tracer::getSizeOf(self_, r.toInt64(0)));
+    } else {
+      return torch::toPyObject(self_.sym_size(r.toInt64(0)));
+    }
+  } else if (r.idx == 1) {
+    if (jit::tracer::isTracing()) {
+      TORCH_INTERNAL_ASSERT(false, "NYI: Named tensors w/ JIT");
+    }
+    return wrap(self_.size(r.dimname(0)));
+  }
+  Py_RETURN_NONE;
+  END_HANDLE_TH_ERRORS
+}
+
+static PyObject * THPVariable_stride(PyObject* self, PyObject* args, PyObject* kwargs)
+{
+  HANDLE_TH_ERRORS
+  static PythonArgParser parser({
+    "stride(int64_t? dim=None)",
+    "stride(Dimname dim)",
+  });
+  auto& self_ = THPVariable_Unpack(self);
+  ParsedArgs<3> parsed_args;
+  auto r = parser.parse(self, args, kwargs, parsed_args);
+
+  if(r.has_torch_function()){
+    return handle_torch_function(r, self, args, kwargs, THPVariableClass, "torch.Tensor");
+  }
+
+  if (r.idx == 0) {
+    if (r.toInt64Optional(0).has_value()) {
+      return torch::toPyObject(self_.sym_stride(r.toInt64(0)));
+    }
+    // yes, this is called strides in ATen.
+    at::SymIntArrayRef strides = self_.sym_strides();
+    // we can't do the normal wrapping here because IntArrayRef maps to both
+    // torch.Size and tuple in python
+    // TODO: consider factoring this out
+    THPObjectPtr tuple(PyTuple_New(strides.size()));
+    if (!tuple) throw python_error();
+    for (size_t i = 0; i != strides.size(); i++) {
+      PyObject* s = torch::toPyObject(strides[i]);
+      if (!s) throw python_error();
+      PyTuple_SET_ITEM(tuple.get(), i, s);
+    }
+    return tuple.release();
+  } else if (r.idx == 1) {
+    return wrap(self_.stride(r.dimname(0)));
+  }
+  Py_RETURN_NONE;
+  END_HANDLE_TH_ERRORS
+}
+
+// implemented on the python object to avoid dispatch overhead
+static PyObject * THPVariable_get_device(PyObject* self_, PyObject* args)
+{
+  HANDLE_TH_ERRORS
+  if (check_has_torch_function(self_)) {
+    return handle_torch_function(self_, "get_device", args, nullptr);
+  }
+  auto& self = THPVariable_Unpack(self_);
+  return wrap(self.get_device());
+  END_HANDLE_TH_ERRORS
+}
+
+static PyObject * THPVariable_has_names(PyObject* self_, PyObject* args)
+{
+  HANDLE_TH_ERRORS
+  if (check_has_torch_function(self_)) {
+    return handle_torch_function(self_, "has_names", args);
+  }
+  auto& self = THPVariable_Unpack(self_);
+  return wrap(self.has_names());
+  END_HANDLE_TH_ERRORS
+}
+
+// implemented on the python object to avoid dispatch overhead
+static PyObject * THPVariable_data_ptr(PyObject* self_, PyObject* args)
+{
+  HANDLE_TH_ERRORS
+  if (check_has_torch_function(self_)) {
+    return handle_torch_function(self_, "data_ptr", args);
+  }
+  auto& self = THPVariable_Unpack(self_);
+  return wrap(self.data_ptr());
+  END_HANDLE_TH_ERRORS
+}
+
+// implemented on the python object to avoid dispatch overhead
+static PyObject * THPVariable_storage_offset(PyObject* self_, PyObject* args)
+{
+  HANDLE_TH_ERRORS
+  if (check_has_torch_function(self_)) {
+    return handle_torch_function(self_, "storage_offset");
+  }
+  auto& self = THPVariable_Unpack(self_);
+  return py::cast(self.sym_storage_offset()).release().ptr();
+  END_HANDLE_TH_ERRORS
+}
+
+// implemented on the python object to avoid dispatch overhead
+static PyObject * THPVariable_dim(PyObject* self, PyObject* args)
+{
+   HANDLE_TH_ERRORS
+   if (check_has_torch_function(self)) {
+     return handle_torch_function(self, "dim", args);
+   }
+   auto& self_ = THPVariable_Unpack(self);
+   return THPUtils_packInt64(self_.dim());
+   END_HANDLE_TH_ERRORS
+}
+
+// implemented on the python object to avoid dispatch overhead
+static PyObject * THPVariable_numel(PyObject* self, PyObject* args)
+{
+   HANDLE_TH_ERRORS
+   if (check_has_torch_function(self)) {
+     return handle_torch_function(self, "numel", args);
+   }
+   auto& self_ = THPVariable_Unpack(self);
+   if (jit::tracer::isTracing()) {
+     return wrap(jit::tracer::getNumelOf(self_));
+   } else {
+     return py::cast(self_.sym_numel()).release().ptr();
+   }
+   END_HANDLE_TH_ERRORS
+}
+
+static Tensor dispatch_contiguous(const Tensor & self, at::MemoryFormat memory_format) {
+  pybind11::gil_scoped_release no_gil;
+  OptionalDeviceGuard device_guard(device_of(self));
+  return self.contiguous(memory_format);
+}
+
+static PyObject * THPVariable_contiguous(PyObject* self, PyObject* args, PyObject* kwargs)
+{
+  HANDLE_TH_ERRORS
+  static PythonArgParser parser({
+    "contiguous(*, MemoryFormat memory_format=contiguous_format)",
+  });
+  ParsedArgs<1> parsed_args;
+  auto r = parser.parse(self, args, kwargs, parsed_args);
+
+  if(r.has_torch_function()){
+    return handle_torch_function(r, self, args, kwargs, THPVariableClass, "torch.Tensor");
+  }
+
+  auto& self_ = THPVariable_Unpack(self);
+  auto memory_format = r.memoryformat(0);
+  // avoids touching the GIL or current device if self is already contiguous
+  if (self_.is_contiguous(memory_format)) {
+    // NOTE: this logic is duplicated from VariableType.cpp. Since we need to
+    // record this call to contiguous() in the trace regardless of whether
+    // we actually call contiguous here, we need to record this information
+    // manually.
+    if (jit::tracer::isTracing()) {
+      auto tracer_state = jit::tracer::getTracingState();
+      auto op_name = c10::Symbol::fromQualString("aten::contiguous");
+      auto node = tracer_state->createNode(op_name, /*num_outputs=*/0);
+      jit::tracer::recordSourceLocation(node);
+      jit::tracer::addInputs(node, "self", self_);
+      jit::tracer::addInputs(node, "memory_format", memory_format);
+      tracer_state->insertNode(node);
+      jit::tracer::addOutput(node, self_);
+    }
+    Py_INCREF(self);
+    return self;
+  }
+  return THPVariable_Wrap(dispatch_contiguous(self_, memory_format));
+  END_HANDLE_TH_ERRORS
+}
+
+static Tensor dispatch_copy_(const Tensor & self, const Tensor & other, bool non_blocking) {
+  pybind11::gil_scoped_release no_gil;
+  OptionalDeviceGuard device_guard(device_of(self));
+  return self.copy_(other, non_blocking);
+}
+
+ static PyObject * THPVariable_copy_(PyObject* self, PyObject* args, PyObject* kwargs)
+{
+  HANDLE_TH_ERRORS
+  static PythonArgParser parser({
+    "copy_(Tensor other, bool non_blocking=False)",
+    "copy_(Tensor other, bool async=False)|deprecated"
+  });
+  auto& self_ = THPVariable_Unpack(self);
+  ParsedArgs<2> parsed_args;
+  auto r = parser.parse(self, args, kwargs, parsed_args);
+
+  if(r.has_torch_function()){
+    return handle_torch_function(r, self, args, kwargs, THPVariableClass, "torch.Tensor");
+  }
+
+  return THPVariable_Wrap(dispatch_copy_(self_, r.tensor(0), r.toBool(1)));
+  END_HANDLE_TH_ERRORS
+}
+
+template<typename T>
+static T dispatch_to(const Tensor & self) {
+  pybind11::gil_scoped_release no_gil;
+  OptionalDeviceGuard device_guard(device_of(self));
+  TORCH_CHECK_VALUE(self.sym_numel() == 1, "only one element tensors can be converted to Python scalars");
+  return self.template item<T>();
+}
+
+static PyObject * THPVariable_float_scalar(PyObject* self, PyObject* args) {
+  HANDLE_TH_ERRORS
+  if (check_has_torch_function(self)) {
+    return handle_torch_function(self, "__float__", args);
+  }
+  jit::tracer::warn("Converting a tensor to a Python float", jit::tracer::WARN_PYTHON_DATAFLOW);
+  auto& self_ = THPVariable_Unpack(self);
+  return wrap(dispatch_to<double>(self_));
+  END_HANDLE_TH_ERRORS
+}
+
+static PyObject * THPVariable_complex_scalar(PyObject* self, PyObject* args) {
+  HANDLE_TH_ERRORS
+  if (check_has_torch_function(self)) {
+    return handle_torch_function(self, "__complex__", args);
+  }
+  jit::tracer::warn("Converting a tensor to a Python complex", jit::tracer::WARN_PYTHON_DATAFLOW);
+  auto& self_ = THPVariable_Unpack(self);
+  return wrap(dispatch_to<c10::complex<double>>(self_));
+  END_HANDLE_TH_ERRORS
+}
+
+static PyObject * THPVariable_integral_scalar(PyObject* self, PyObject* args) {
+  HANDLE_TH_ERRORS
+  if (check_has_torch_function(self)) {
+    return handle_torch_function(self, "__int__", args);
+  }
+  jit::tracer::warn("Converting a tensor to a Python integer", jit::tracer::WARN_PYTHON_DATAFLOW);
+  auto& self_ = THPVariable_Unpack(self);
+  if (isFloatingType(self_.scalar_type())) {
+    // we can't dispatch to item<int64_t> here because we want to avoid ATen overflow checks;
+    // the python integral type (long in python2) can't overflow.
+    return THPUtils_packDoubleAsInt(dispatch_to<double>(self_));
+  } else {
+    return wrap(dispatch_to<int64_t>(self_));
+  }
+  END_HANDLE_TH_ERRORS
+}
+
+// This is the __index__ function in Python which is similar to __int__, but
+// called when used as a slice.
+static PyObject * THPVariable_index_scalar(PyObject* self, PyObject* args) {
+  HANDLE_TH_ERRORS
+  if (check_has_torch_function(self)) {
+    return handle_torch_function(self, "__index__", args);
+  }
+  auto& self_ = THPVariable_Unpack(self);
+  // TODO: change the condition to `self_.dim() != 0` once we expose scalars
+  // in PyTorch.
+  if (!isIntegralType(self_.scalar_type(), /*includeBool=*/true) || self_.sym_numel() != 1) {
+    throw TypeError("only integer tensors of a single element can be converted to an index");
+  }
+  return wrap(dispatch_to<int64_t>(self_));
+  END_HANDLE_TH_ERRORS
+}
+
+static Tensor dispatch_invert(const Tensor & self) {
+  pybind11::gil_scoped_release no_gil;
+  OptionalDeviceGuard device_guard(device_of(self));
+  return self.bitwise_not();
+}
+
+static PyObject * THPVariable_invert(PyObject* self, PyObject* args) {
+  HANDLE_TH_ERRORS
+  if (check_has_torch_function(self)) {
+    return handle_torch_function(self, "__invert__", args);
+  }
+  auto& self_ = THPVariable_Unpack(self);
+  if (!isIntegralType(self_.scalar_type(), /*includeBool=*/true)) {
+    throw TypeError("~ (operator.invert) is only implemented on integer and Boolean-type tensors");
+  }
+  return THPVariable_Wrap(dispatch_invert(self_));
+  END_HANDLE_TH_ERRORS
+}
+
+static Tensor dispatch_to(const Tensor & self, Device device, bool non_blocking, bool copy, c10::optional<c10::MemoryFormat> optional_memory_format) {
+  pybind11::gil_scoped_release no_gil;
+  // NOTE: this is where we record aten::to in the graph during tracing. However, the behavior of aten::to
+  // is different with respect to TensorOptions fields that are not present: aten::to inherits fields that
+  // are missing from the self argument while the tracer assumes that they should be populated with the
+  // default values (eg. float for scalar type). By explicitly copying over the tensor options here we fully
+  // specify all tensor options and thus record the proper trace
+  return self.to(self.options().device(device).memory_format(optional_memory_format), non_blocking, copy);
+}
+
+static Tensor dispatch_to(const Tensor & self, bool non_blocking, bool copy, c10::optional<c10::MemoryFormat> optional_memory_format) {
+  pybind11::gil_scoped_release no_gil;
+  return self.to(self.options().memory_format(optional_memory_format), non_blocking, copy);
+}
+
+static Tensor dispatch_to(const Tensor & self, ScalarType dtype, bool non_blocking, bool copy, c10::optional<c10::MemoryFormat> optional_memory_format) {
+  pybind11::gil_scoped_release no_gil;
+  // TODO: Make this call the TensorOptions version, maybe?
+  return self.to(dtype, non_blocking, copy, optional_memory_format);
+}
+
+static Tensor dispatch_to(const Tensor & self, Device device, ScalarType dtype, bool non_blocking, bool copy, c10::optional<c10::MemoryFormat> optional_memory_format) {
+  pybind11::gil_scoped_release no_gil;
+  // TODO: Make this call the TensorOptions version, maybe?
+  return self.to(device, dtype, non_blocking, copy, optional_memory_format);
+}
+
+static PyObject * THPVariable_cpu(PyObject* self, PyObject* args, PyObject* kwargs)
+{
+   HANDLE_TH_ERRORS
+   static PythonArgParser parser({
+     "cpu(*, MemoryFormat? memory_format=None)"
+   });
+   auto& self_ = THPVariable_Unpack(self);
+   ParsedArgs<1> parsed_args;
+   auto r = parser.parse(self, args, kwargs, parsed_args);
+
+   if(r.has_torch_function()){
+    return handle_torch_function(r, self, args, kwargs, THPVariableClass, "torch.Tensor");
+    }
+
+   auto opt_memory_format = r.memoryformatOptional(0);
+   return THPVariable_Wrap(dispatch_to(self_, at::Device(at::DeviceType::CPU), false, false, opt_memory_format));
+   END_HANDLE_TH_ERRORS
+}
+
+static Tensor dispatch_nonzero(const Tensor & self) {
+  pybind11::gil_scoped_release no_gil;
+  OptionalDeviceGuard device_guard(device_of(self));
+  return self.nonzero();
+}
+
+static std::vector<Tensor> dispatch_nonzero_numpy(const Tensor & self) {
+  pybind11::gil_scoped_release no_gil;
+  OptionalDeviceGuard device_guard(device_of(self));
+  return self.nonzero_numpy();
+}
+
+static PyObject * THPVariable_nonzero(PyObject* self, PyObject* args, PyObject* kwargs)
+{
+  HANDLE_TH_ERRORS
+  static PythonArgParser parser({
+    "nonzero()",
+    "nonzero(*, bool as_tuple)",
+  });
+  auto& self_ = THPVariable_Unpack(self);
+  ParsedArgs<2> parsed_args;
+  auto r = parser.parse(self, args, kwargs, parsed_args);
+
+  if(r.has_torch_function()){
+    return handle_torch_function(r, self, args, kwargs, THPVariableClass, "torch.Tensor");
+  }
+
+  if (r.idx == 0 || (r.idx == 1 && !r.toBool(0))) {
+    return wrap(dispatch_nonzero(self_));
+  } else {
+    return wrap(dispatch_nonzero_numpy(self_));
+  }
+  END_HANDLE_TH_ERRORS
+}
+
+static PyObject * THPVariable_cuda(PyObject* self, PyObject* args, PyObject* kwargs)
+{
+  HANDLE_TH_ERRORS
+  static PythonArgParser parser({
+    "cuda(Device? device=None, bool non_blocking=False, *, MemoryFormat? memory_format=None)",
+    "cuda(Device? device=None, bool async=False, *, MemoryFormat? memory_format=None)|deprecated"
+  });
+  auto& self_ = THPVariable_Unpack(self);
+  ParsedArgs<3> parsed_args;
+  auto r = parser.parse(self, args, kwargs, parsed_args);
+
+  if(r.has_torch_function()){
+    return handle_torch_function(r, self, args, kwargs, THPVariableClass, "torch.Tensor");
+  }
+
+  auto device = r.isNone(0) ? at::Device(at::DeviceType::CUDA) : r.device(0);
+  auto opt_memory_format = r.memoryformatOptional(2);
+  TORCH_CHECK(device.is_cuda(), "Invalid device, must be cuda device");
+  torch::utils::device_lazy_init(at::kCUDA);
+  return THPVariable_Wrap(dispatch_to(self_, device, r.toBool(1), false, opt_memory_format));
+  END_HANDLE_TH_ERRORS
+}
+
+static PyObject * THPVariable_xpu(PyObject* self, PyObject* args, PyObject* kwargs)
+{
+  HANDLE_TH_ERRORS
+  static PythonArgParser parser({
+    "xpu(Device? device=None, bool non_blocking=False, *, MemoryFormat? memory_format=None)",
+    "xpu(Device? device=None, bool async=False, *, MemoryFormat? memory_format=None)|deprecated"
+  });
+  auto& self_ = THPVariable_Unpack(self);
+  ParsedArgs<3> parsed_args;
+  auto r = parser.parse(self, args, kwargs, parsed_args);
+
+  if (r.has_torch_function()) {
+    return handle_torch_function(r, self, args, kwargs, THPVariableClass, "torch.Tensor");
+  }
+
+  auto device = r.isNone(0) ? at::Device(at::DeviceType::XPU) : r.device(0);
+  auto opt_memory_format = r.memoryformatOptional(2);
+  TORCH_CHECK(device.is_xpu(), "Invalid device, must be xpu device");
+  torch::utils::device_lazy_init(at::kXPU);
+  return THPVariable_Wrap(dispatch_to(self_, device, r.toBool(1), false, opt_memory_format));
+  END_HANDLE_TH_ERRORS
+}
+
+static PyObject * THPVariable_ipu(PyObject* self, PyObject* args, PyObject* kwargs)
+{
+  HANDLE_TH_ERRORS
+  static PythonArgParser parser({
+    "ipu(Device? device=None, bool non_blocking=False, *, MemoryFormat? memory_format=None)",
+    "ipu(Device? device=None, bool async=False, *, MemoryFormat? memory_format=None)|deprecated"
+  });
+  auto& self_ = THPVariable_Unpack(self);
+  ParsedArgs<3> parsed_args;
+  auto r = parser.parse(self, args, kwargs, parsed_args);
+
+  if (r.has_torch_function()) {
+    return handle_torch_function(r, self, args, kwargs, THPVariableClass, "torch.Tensor");
+  }
+
+  auto device = r.isNone(0) ? at::Device(at::DeviceType::IPU) : r.device(0);
+  auto opt_memory_format = r.memoryformatOptional(2);
+  TORCH_CHECK(device.is_ipu(), "Invalid device, must be ipu device");
+  return THPVariable_Wrap(dispatch_to(self_, device, r.toBool(1), false, opt_memory_format));
+  END_HANDLE_TH_ERRORS
+}
+
+static PyObject * THPVariable_to_type(PyObject* self, ScalarType scalarType, c10::optional<c10::MemoryFormat> optional_memory_format) {
+  HANDLE_TH_ERRORS
+  auto& self_ = THPVariable_Unpack(self);
+  return THPVariable_Wrap(dispatch_to(self_, scalarType, false, false, optional_memory_format));
+  END_HANDLE_TH_ERRORS
+}
+
+static PyObject * THPVariable_byte(PyObject* self, PyObject* args, PyObject* kwargs)  {
+  HANDLE_TH_ERRORS
+  static PythonArgParser parser({
+    "byte(*, MemoryFormat? memory_format=None)"
+  });
+  ParsedArgs<1> parsed_args;
+  auto r = parser.parse(self, args, kwargs, parsed_args);
+
+  if(r.has_torch_function()){
+    return handle_torch_function(r, self, args, kwargs, THPVariableClass, "torch.Tensor");
+  }
+
+  auto opt_memory_format = r.memoryformatOptional(0);
+  return THPVariable_to_type(self, ScalarType::Byte, opt_memory_format);
+  END_HANDLE_TH_ERRORS
+}
+
+static PyObject * THPVariable_char(PyObject* self, PyObject* args, PyObject* kwargs)  {
+  HANDLE_TH_ERRORS
+  static PythonArgParser parser({
+    "char(*, MemoryFormat? memory_format=None)"
+  });
+  ParsedArgs<1> parsed_args;
+  auto r = parser.parse(self, args, kwargs, parsed_args);
+
+  if(r.has_torch_function()){
+    return handle_torch_function(r, self, args, kwargs, THPVariableClass, "torch.Tensor");
+  }
+
+  auto opt_memory_format = r.memoryformatOptional(0);
+  return THPVariable_to_type(self, ScalarType::Char, opt_memory_format);
+  END_HANDLE_TH_ERRORS
+}
+
+static PyObject * THPVariable_double(PyObject* self, PyObject* args, PyObject* kwargs) {
+  HANDLE_TH_ERRORS
+  static PythonArgParser parser({
+    "double(*, MemoryFormat? memory_format=None)"
+  });
+  ParsedArgs<1> parsed_args;
+  auto r = parser.parse(self, args, kwargs, parsed_args);
+
+  if(r.has_torch_function()){
+    return handle_torch_function(r, self, args, kwargs, THPVariableClass, "torch.Tensor");
+  }
+
+  auto opt_memory_format = r.memoryformatOptional(0);
+  return THPVariable_to_type(self, ScalarType::Double, opt_memory_format);
+  END_HANDLE_TH_ERRORS
+}
+
+static PyObject * THPVariable_float(PyObject* self, PyObject* args, PyObject* kwargs) {
+  HANDLE_TH_ERRORS
+  static PythonArgParser parser({
+    "float(*, MemoryFormat? memory_format=None)"
+  });
+  ParsedArgs<1> parsed_args;
+  auto r = parser.parse(self, args, kwargs, parsed_args);
+
+  if(r.has_torch_function()){
+    return handle_torch_function(r, self, args, kwargs, THPVariableClass, "torch.Tensor");
+  }
+
+  auto opt_memory_format = r.memoryformatOptional(0);
+  return THPVariable_to_type(self, ScalarType::Float, opt_memory_format);
+  END_HANDLE_TH_ERRORS
+}
+
+static PyObject * THPVariable_cdouble(PyObject* self, PyObject* args, PyObject* kwargs) {
+  HANDLE_TH_ERRORS
+  static PythonArgParser parser({
+    "cdouble(*, MemoryFormat? memory_format=None)"
+  });
+  ParsedArgs<1> parsed_args;
+  auto r = parser.parse(self, args, kwargs, parsed_args);
+
+  if(r.has_torch_function()){
+    return handle_torch_function(r, self, args, kwargs, THPVariableClass, "torch.Tensor");
+  }
+
+  auto opt_memory_format = r.memoryformatOptional(0);
+  return THPVariable_to_type(self, ScalarType::ComplexDouble, opt_memory_format);
+  END_HANDLE_TH_ERRORS
+}
+
+static PyObject * THPVariable_cfloat(PyObject* self, PyObject* args, PyObject* kwargs) {
+  HANDLE_TH_ERRORS
+  static PythonArgParser parser({
+    "cfloat(*, MemoryFormat? memory_format=None)"
+  });
+  ParsedArgs<1> parsed_args;
+  auto r = parser.parse(self, args, kwargs, parsed_args);
+
+  if(r.has_torch_function()){
+    return handle_torch_function(r, self, args, kwargs, THPVariableClass, "torch.Tensor");
+  }
+
+  auto opt_memory_format = r.memoryformatOptional(0);
+  return THPVariable_to_type(self, ScalarType::ComplexFloat, opt_memory_format);
+  END_HANDLE_TH_ERRORS
+}
+
+static PyObject * THPVariable_half(PyObject* self, PyObject* args, PyObject* kwargs) {
+  HANDLE_TH_ERRORS
+  static PythonArgParser parser({
+    "half(*, MemoryFormat? memory_format=None)"
+  });
+  ParsedArgs<1> parsed_args;
+  auto r = parser.parse(self, args, kwargs, parsed_args);
+
+  if(r.has_torch_function()){
+    return handle_torch_function(r, self, args, kwargs, THPVariableClass, "torch.Tensor");
+  }
+
+  auto opt_memory_format = r.memoryformatOptional(0);
+  return THPVariable_to_type(self, ScalarType::Half, opt_memory_format);
+  END_HANDLE_TH_ERRORS
+}
+
+static PyObject * THPVariable_int(PyObject* self, PyObject* args, PyObject* kwargs) {
+  HANDLE_TH_ERRORS
+  static PythonArgParser parser({
+    "int(*, MemoryFormat? memory_format=None)"
+  });
+  ParsedArgs<1> parsed_args;
+  auto r = parser.parse(self, args, kwargs, parsed_args);
+
+  if(r.has_torch_function()){
+    return handle_torch_function(r, self, args, kwargs, THPVariableClass, "torch.Tensor");
+  }
+
+  auto opt_memory_format = r.memoryformatOptional(0);
+  return THPVariable_to_type(self, ScalarType::Int, opt_memory_format);
+  END_HANDLE_TH_ERRORS
+}
+
+static PyObject * THPVariable_long(PyObject* self, PyObject* args, PyObject* kwargs) {
+  HANDLE_TH_ERRORS
+  static PythonArgParser parser({
+    "long(*, MemoryFormat? memory_format=None)"
+  });
+  ParsedArgs<1> parsed_args;
+  auto r = parser.parse(self, args, kwargs, parsed_args);
+
+  if(r.has_torch_function()){
+    return handle_torch_function(r, self, args, kwargs, THPVariableClass, "torch.Tensor");
+  }
+
+  auto opt_memory_format = r.memoryformatOptional(0);
+  return THPVariable_to_type(self, ScalarType::Long, opt_memory_format);
+  END_HANDLE_TH_ERRORS
+}
+
+static PyObject * THPVariable_short(PyObject* self, PyObject* args, PyObject* kwargs) {
+  HANDLE_TH_ERRORS
+  static PythonArgParser parser({
+    "short(*, MemoryFormat? memory_format=None)"
+  });
+  ParsedArgs<1> parsed_args;
+  auto r = parser.parse(self, args, kwargs, parsed_args);
+
+  if(r.has_torch_function()){
+    return handle_torch_function(r, self, args, kwargs, THPVariableClass, "torch.Tensor");
+  }
+
+  auto opt_memory_format = r.memoryformatOptional(0);
+  return THPVariable_to_type(self, ScalarType::Short, opt_memory_format);
+  END_HANDLE_TH_ERRORS
+}
+
+static PyObject * THPVariable_bool(PyObject* self, PyObject* args, PyObject* kwargs) {
+  HANDLE_TH_ERRORS
+  static PythonArgParser parser({
+    "bool(*, MemoryFormat? memory_format=None)"
+  });
+  ParsedArgs<1> parsed_args;
+  auto r = parser.parse(self, args, kwargs, parsed_args);
+
+  if(r.has_torch_function()){
+    return handle_torch_function(r, self, args, kwargs, THPVariableClass, "torch.Tensor");
+  }
+
+  auto opt_memory_format = r.memoryformatOptional(0);
+  return THPVariable_to_type(self, ScalarType::Bool, opt_memory_format);
+  END_HANDLE_TH_ERRORS
+}
+
+static PyObject * THPVariable_bfloat16(PyObject* self, PyObject* args, PyObject* kwargs) {
+  HANDLE_TH_ERRORS
+  static PythonArgParser parser({
+    "bfloat16(*, MemoryFormat? memory_format=None)"
+  });
+  ParsedArgs<1> parsed_args;
+  auto r = parser.parse(self, args, kwargs, parsed_args);
+
+  if(r.has_torch_function()){
+    return handle_torch_function(r, self, args, kwargs, THPVariableClass, "torch.Tensor");
+  }
+
+  auto opt_memory_format = r.memoryformatOptional(0);
+  return THPVariable_to_type(self, ScalarType::BFloat16, opt_memory_format);
+  END_HANDLE_TH_ERRORS
+}
+
+static PyObject * THPVariable_element_size(PyObject* self, PyObject* args)
+{
+  HANDLE_TH_ERRORS
+  if (check_has_torch_function(self)) {
+    return handle_torch_function(self, "element_size", args);
+  }
+  auto& self_ = THPVariable_Unpack(self);
+  return THPUtils_packInt64(self_.element_size());
+  END_HANDLE_TH_ERRORS
+}
+
+// implemented on the python object bc PyObjects not declarable in native_functions.yaml
+// See: ATen/native/README.md for more context
+static PyObject * THPVariable_numpy(PyObject* self, PyObject* args, PyObject* kwargs)
+{
+  HANDLE_TH_ERRORS
+  static PythonArgParser parser({
+    "numpy(*, bool force=False)"
+  });
+  auto& self_ = THPVariable_Unpack(self);
+  ParsedArgs<1> parsed_args;
+  auto r = parser.parse(self, args, kwargs, parsed_args);
+
+  if (r.has_torch_function()) {
+    return handle_torch_function(r, self, args, kwargs, THPVariableClass, "torch.Tensor");
+  }
+
+  jit::tracer::warn("Converting a tensor to a NumPy array", jit::tracer::WARN_PYTHON_DATAFLOW);
+  return torch::utils::tensor_to_numpy(self_, r.toBool(0));
+  END_HANDLE_TH_ERRORS
+}
+
+static PyObject * THPVariable_requires_grad_(PyObject* self, PyObject* args, PyObject* kwargs)
+{
+  HANDLE_TH_ERRORS
+  static PythonArgParser parser({
+    "requires_grad_(bool requires_grad=True)",
+  });
+  auto& self_ = THPVariable_Unpack(self);
+  ParsedArgs<1> parsed_args;
+  auto r = parser.parse(self, args, kwargs, parsed_args);
+
+  if(r.has_torch_function()){
+    return handle_torch_function(r, self, args, kwargs, THPVariableClass, "torch.Tensor");
+  }
+
+  // temporary hack to improve functorch UX.
+  const auto& functorch_tls = at::functorch::functorchTLSAccessor();
+  if (functorch_tls) {
+    functorch_tls->checkSupportsInplaceRequiresGrad();
+  }
+
+  auto requires_grad = r.toBool(0);
+  // should we throw if requires_grad is true?  var.requires_grad = True throws here
+  // but it's nice to let this be a no-op.
+  if (!self_.is_leaf() && !requires_grad) {
+    throw std::runtime_error(autograd::utils::requires_grad_leaf_error(requires_grad));
+  }
+  if (requires_grad && ! isDifferentiableType(at::typeMetaToScalarType(self_.dtype()))) {
+    throw std::runtime_error("only Tensors of floating point dtype can require gradients");
+  }
+  self_.set_requires_grad(requires_grad);
+  return THPVariable_Wrap(self_);
+  END_HANDLE_TH_ERRORS
+}
+
+inline bool dispatch_is_contiguous(const Tensor & self, MemoryFormat memory_format) {
+  return self.is_contiguous(memory_format);
+}
+
+// implemented on the python object to avoid dispatch overhead
+static PyObject * THPVariable_is_contiguous(PyObject* self_, PyObject* args, PyObject* kwargs)
+{
+  HANDLE_TH_ERRORS
+  static PythonArgParser parser({
+    "is_contiguous(*, MemoryFormat memory_format=contiguous_format)",
+  });
+  ParsedArgs<1> parsed_args;
+  auto r = parser.parse(self_, args, kwargs, parsed_args);
+
+  if(r.has_torch_function()){
+    return handle_torch_function(r, self_, args, kwargs, PyObject_Type(self_), "torch.Tensor");
+  }
+
+  auto memory_format = r.memoryformat(0);
+  auto& self = THPVariable_Unpack(self_);
+  return wrap(dispatch_is_contiguous(self, memory_format));
+  END_HANDLE_TH_ERRORS
+}
+
+// implemented on the python object to avoid dispatch overhead
+static PyObject * THPVariable_item(PyObject* self, PyObject* args)
+{
+  HANDLE_TH_ERRORS
+  if (check_has_torch_function(self)) {
+    return handle_torch_function(self, "item", args);
+  }
+  jit::tracer::warn("Converting a tensor to a Python number", jit::tracer::WARN_PYTHON_DATAFLOW);
+  auto& self_ = THPVariable_Unpack(self);
+  auto dispatch_item_ = [](const Tensor& self) -> at::Scalar {
+    pybind11::gil_scoped_release no_gil;
+    return self.item();
+  };
+  return py::cast(dispatch_item_(self_)).release().ptr();
+  END_HANDLE_TH_ERRORS
+}
+
+// implemented on the python object bc no support for first class functions in native_functions.yaml
+// See: ATen/native/README.md for more context
+static PyObject * THPVariable_map_(PyObject* self, PyObject* args, PyObject* kwargs)
+{
+  HANDLE_TH_ERRORS
+  static PythonArgParser parser({ "map_(Tensor other, PyObject* callable)" });
+  auto& self_ = THPVariable_Unpack(self);
+  ParsedArgs<2> parsed_args;
+  auto r = parser.parse(self, args, kwargs, parsed_args);
+
+  if(r.has_torch_function()){
+    return handle_torch_function(r, self, args, kwargs, THPVariableClass, "torch.Tensor");
+  }
+
+  Variable other = r.tensor(0);
+  if (self_.requires_grad() || other.requires_grad()) {
+    throw std::runtime_error(
+        "Can't call map_() on Variable that requires grad. Use "
+        "var.detach().map_() instead.");
+  }
+  TORCH_CHECK(
+      !self_.unsafeGetTensorImpl()->is_python_dispatch() && !other.unsafeGetTensorImpl()->is_python_dispatch(),
+      ".map_ is not supported for tensor subclasses.");
+
+  return THPVariable_Wrap(torch::utils::map_(self_, other, r.pyobject(1)));
+  END_HANDLE_TH_ERRORS
+}
+
+// implemented on the python object bc no support for first class functions in native_functions.yaml
+// See: ATen/native/README.md for more context
+static PyObject * THPVariable_map2_(PyObject* self, PyObject* args, PyObject* kwargs)
+{
+  HANDLE_TH_ERRORS
+  static PythonArgParser parser({ "map2_(Tensor x, Tensor y, PyObject* callable)" });
+  auto& self_ = THPVariable_Unpack(self);
+  ParsedArgs<3> parsed_args;
+  auto r = parser.parse(self, args, kwargs, parsed_args);
+
+  if(r.has_torch_function()){
+    return handle_torch_function(r, self, args, kwargs, THPVariableClass, "torch.Tensor");
+  }
+
+  Variable x = r.tensor(0);
+  Variable y = r.tensor(1);
+  if (self_.requires_grad() || x.requires_grad() || y.requires_grad()) {
+    throw std::runtime_error(
+        "Can't call map2_() on Variable that requires grad. Use "
+        "var.detach().map2_() instead.");
+  }
+  TORCH_CHECK(
+      !x.unsafeGetTensorImpl()->is_python_dispatch() && !y.unsafeGetTensorImpl()->is_python_dispatch(),
+      ".map2_ is not supported for tensor subclasses.");
+  return THPVariable_Wrap(torch::utils::map2_(self_, x, y, r.pyobject(2)));
+  END_HANDLE_TH_ERRORS
+}
+
+static PyObject * THPVariable_new(PyObject* self, PyObject* args, PyObject* kwargs)
+{
+  HANDLE_TH_ERRORS
+  if (check_has_torch_function(self)) {
+    return handle_torch_function(self, "new", args, kwargs);
+  }
+  auto& self_ = THPVariable_Unpack(self);
+  OptionalDeviceGuard device_guard(device_of(self_));
+  return THPVariable_Wrap(torch::utils::legacy_tensor_new(legacyExtractDispatchKey(self_), self_.scalar_type(), args, kwargs));
+  END_HANDLE_TH_ERRORS
+}
+
+static PyObject * THPVariable_new_tensor(PyObject* self, PyObject* args, PyObject* kwargs)
+{
+  HANDLE_TH_ERRORS
+  if (check_has_torch_function(self)) {
+    return handle_torch_function(self, "new_tensor", args, kwargs);
+  }
+  auto& self_ = THPVariable_Unpack(self);
+  OptionalDeviceGuard device_guard(device_of(self_));
+  return THPVariable_Wrap(torch::utils::new_tensor(legacyExtractDispatchKey(self_), self_.scalar_type(), args, kwargs));
+  END_HANDLE_TH_ERRORS
+}
+
+static PyObject * THPVariable_storage(PyObject* self, PyObject* arg)
+{
+  HANDLE_TH_ERRORS
+  if (check_has_torch_function(self)) {
+    return handle_torch_function(self, "untyped_storage");
+  }
+  auto& self_ = THPVariable_Unpack(self);
+  return createPyObject(self_.storage());
+  END_HANDLE_TH_ERRORS
+}
+
+static PyObject * THPVariable_to(PyObject* self, PyObject* args, PyObject* kwargs)
+{
+  HANDLE_TH_ERRORS
+  static PythonArgParser parser({
+    "to(Device device=None, ScalarType dtype=None, bool non_blocking=False, bool copy=False, *, MemoryFormat? memory_format=None)",
+    "to(ScalarType dtype, bool non_blocking=False, bool copy=False, *, MemoryFormat? memory_format=None)",
+    "to(Tensor tensor, bool non_blocking=False, bool copy=False, *, MemoryFormat? memory_format=None)",
+  });
+  ParsedArgs<5> parsed_args;
+  auto r = parser.parse(self, args, kwargs, parsed_args);
+  if (r.has_torch_function()) {
+    return handle_torch_function(r, self, args, kwargs, THPVariableClass, "torch.Tensor");
+  }
+  auto parsed = parse_to_conversion(r, /*allow_copy*/ true);
+  auto& device = std::get<0>(parsed);
+  auto& scalarType = std::get<1>(parsed);
+  auto non_blocking = std::get<2>(parsed);
+  auto copy = std::get<3>(parsed);
+  auto opt_memory_format = std::get<4>(parsed);
+  auto& self_ = THPVariable_Unpack(self);
+  torch::utils::maybe_initialize_device(device);
+  if (device && device->is_privateuseone()) {
+    at::globalContext().lazyInitPrivateUse1();
+  }
+  if (!device && !scalarType && !copy && !opt_memory_format.has_value()) {
+    Py_INCREF(self);
+    return self;
+  } else if (!device && !scalarType) {
+    return THPVariable_Wrap(
+        dispatch_to(self_, non_blocking, copy, opt_memory_format));
+  } else if (!device) {
+    return THPVariable_Wrap(dispatch_to(self_, *scalarType, non_blocking, copy, opt_memory_format));
+  } else if (!scalarType) {
+    return THPVariable_Wrap(dispatch_to(self_, *device, non_blocking, copy, opt_memory_format));
+  } else {
+    return THPVariable_Wrap(dispatch_to(self_, *device, *scalarType, non_blocking, copy, opt_memory_format));
+  }
+  Py_RETURN_NONE;
+  END_HANDLE_TH_ERRORS
+}
+
+// implemented on the python object b/c arbitrarily nested list not declarable in native_functions.yaml
+// See: ATen/native/README.md for more context
+static PyObject * THPVariable_tolist(PyObject* self, PyObject* args)
+{
+  HANDLE_TH_ERRORS
+  if (check_has_torch_function(self)) {
+    return handle_torch_function(self, "tolist", args);
+  }
+  jit::tracer::warn("Converting a tensor to a Python list", jit::tracer::WARN_PYTHON_DATAFLOW);
+  auto self_ = THPVariable_Unpack(self);
+  return torch::utils::tensor_to_list(self_);
+  END_HANDLE_TH_ERRORS
+}
+
+static PyObject * THPVariable_type(PyObject* self, PyObject* args, PyObject* kwargs)
+{
+  HANDLE_TH_ERRORS
+  static PythonArgParser parser({
+    "type(PyObject* dtype=None, bool non_blocking=False, *, MemoryFormat? memory_format=None)",
+    "type(PyObject* dtype=None, bool async=False, *, MemoryFormat? memory_format=None)|deprecated"
+  });
+  auto& self_ = THPVariable_Unpack(self);
+  ParsedArgs<3> parsed_args;
+  auto r = parser.parse(self, args, kwargs, parsed_args);
+
+  if(r.has_torch_function()){
+    return handle_torch_function(r, self, args, kwargs, THPVariableClass, "torch.Tensor");
+  }
+
+  if (r.isNone(0)) {
+    return THPUtils_packString(torch::utils::options_to_string(self_.options()));
+  }
+  auto obj = r.pyobject(0);
+  auto opt_memory_format = r.memoryformatOptional(2);
+  std::string type_name;
+  bool is_dtype = false;
+  if (PyType_Check(obj)) {
+    if (obj == THPVariableClass) {
+      type_name = "torch.Tensor";
+    } else {
+      type_name = ((PyTypeObject*)obj)->tp_name;
+    }
+  } else if (THPUtils_checkString(obj)) {
+    type_name = THPUtils_unpackString(obj);
+  } else if (THPDtype_Check(obj)) {
+    is_dtype = true;
+  } else {
+    throw TypeError("dtype must be a type, str, or dtype object");
+  }
+  ScalarType scalar_type;
+  Device device = self_.device();
+  if (is_dtype) {
+    scalar_type = r.scalartype(0);
+    return THPVariable_Wrap(dispatch_to(self_, scalar_type, /*non_blocking=*/ r.toBool(1), /*copy=*/ false, opt_memory_format));
+  }
+  at::TensorOptions options = torch::utils::options_from_string(type_name);
+  scalar_type = at::typeMetaToScalarType(options.dtype());
+  auto device_type = options.device().type();
+  if (device_type != device.type()) {
+    device = at::Device(device_type);
+  }
+  torch::utils::maybe_initialize_device(device);
+  if (device.is_privateuseone()) {
+    at::globalContext().lazyInitPrivateUse1();
+  }
+  return THPVariable_Wrap(dispatch_to(self_, device, scalar_type, /*non_blocking=*/ r.toBool(1), /*copy=*/ false, opt_memory_format));
+  END_HANDLE_TH_ERRORS
+}
+
+// generated methods start here
+
+${py_methods}
+
+static PyObject * THPVariable_bool_scalar(PyObject* self, PyObject* args) {
+  if (check_has_torch_function(self)) {
+    HANDLE_TH_ERRORS
+    return handle_torch_function(self, "__bool__", args);
+    END_HANDLE_TH_ERRORS
+  }
+  jit::tracer::warn("Converting a tensor to a Python boolean", jit::tracer::WARN_PYTHON_DATAFLOW);
+  return THPVariable_is_nonzero(self, args);
+}
+
+// Wrapper converts a raised TypeError into returning NotImplemented
+// Used to implement binary arithmetic operators
+template <PyObject* (*Func)(PyObject*, PyObject*, PyObject*)>
+static PyObject * TypeError_to_NotImplemented_(PyObject* self, PyObject* args, PyObject* kwargs) {
+
+  PyObject* ret = Func(self, args, kwargs);
+  if (!ret && PyErr_ExceptionMatches(PyExc_TypeError)) {
+    PyErr_Clear();
+    Py_INCREF(Py_NotImplemented);
+    ret = Py_NotImplemented;
+  }
+  return ret;
+}
+
+// set_ has to be defined in the template because the c10::Storage object
+// does not have a type, and we need to make sure the Python storage object's
+// type matches the tensor's type
+static PyObject* THPVariable_set_(
+    PyObject* self_,
+    PyObject* args,
+    PyObject* kwargs) {
+  HANDLE_TH_ERRORS
+  const Tensor& self = THPVariable_Unpack(self_);
+  static PythonArgParser parser(
+      {
+          "set_()",
+          "set_(Storage source)",
+          "set_(Storage source, SymInt storage_offset, SymIntArrayRef size, SymIntArrayRef stride=None)",
+          "set_(Tensor source)",
+          "set_(Tensor source, SymInt storage_offset, SymIntArrayRef size, SymIntArrayRef stride=None)",
+      },
+      /*traceable=*/false);
+
+  ParsedArgs<4> parsed_args;
+  auto _r = parser.parse(args, kwargs, parsed_args);
+
+  switch (_r.idx) {
+    case 0: {
+      // aten::set_(Tensor(a!) self) -> Tensor(a!)
+      auto dispatch_set_ = [](const Tensor& self) -> Tensor {
+        pybind11::gil_scoped_release no_gil;
+        return self.set_();
+      };
+      return wrap(dispatch_set_(self));
+    }
+    case 1: {
+      // aten::set_.source_Storage(Tensor(a!) self, Storage source) ->
+      // Tensor(a!)
+      at::ScalarType storage_scalar_type;
+      bool is_typed_storage = true;
+      at::Storage storage = _r.storage(0, storage_scalar_type, is_typed_storage);
+      TORCH_CHECK(storage_scalar_type == self.dtype() || !is_typed_storage,
+        "Expected a Storage of type ", self.dtype(),
+        " or an UntypedStorage, but got type ", storage_scalar_type,
+        " for argument 1 'storage'");
+      auto dispatch_set_ = [](const Tensor& self, Storage source) -> Tensor {
+        pybind11::gil_scoped_release no_gil;
+        return self.set_(source);
+      };
+      return wrap(dispatch_set_(self, storage));
+    }
+    case 2: {
+      // aten::set_.source_Storage_storage_offset(Tensor(a!) self, Storage
+      // source, int storage_offset, int[] size, int[] stride=[]) -> Tensor(a!)
+      at::ScalarType storage_scalar_type;
+      bool is_typed_storage = true;
+      at::Storage storage = _r.storage(0, storage_scalar_type, is_typed_storage);
+      TORCH_CHECK(storage_scalar_type == self.dtype() || !is_typed_storage,
+        "Expected a Storage of type ", self.dtype(),
+        " or an UntypedStorage, but got type ", storage_scalar_type,
+        " for argument 1 'storage'");
+      auto dispatch_set_ = [](const Tensor& self,
+                              Storage source,
+                              c10::SymInt storage_offset,
+                              c10::SymIntArrayRef size,
+                              c10::SymIntArrayRef stride) -> Tensor {
+        pybind11::gil_scoped_release no_gil;
+        return self.set__symint(source, storage_offset, size, stride);
+      };
+      return wrap(dispatch_set_(
+          self, storage, _r.toSymInt(1), _r.symintlist(2), _r.symintlist(3)));
+    }
+    case 3: {
+      // aten::set_.source_Tensor(Tensor(a!) self, Tensor source) -> Tensor(a!)
+      auto dispatch_set_ = [](const Tensor& self, const Tensor& source) -> Tensor {
+        TORCH_CHECK(source.dtype() == self.dtype(), "Could not set tensor of type ", source.dtype(), " to a tensor of type ", self.dtype());
+        pybind11::gil_scoped_release no_gil;
+        return self.set_(source);
+      };
+      return wrap(dispatch_set_(self, _r.tensor(0)));
+    }
+    case 4: {
+      // aten::set_.source_Tensor_storage_offset(Tensor(a!) self, Tensor
+      // source, int storage_offset, int[] size, int[] stride=[]) -> Tensor(a!)
+      at::Tensor storage = _r.tensor(0);
+      auto dispatch_set_ = [](const Tensor& self,
+                              const Tensor& source,
+                              c10::SymInt storage_offset,
+                              c10::SymIntArrayRef size,
+                              c10::SymIntArrayRef stride) -> Tensor {
+        pybind11::gil_scoped_release no_gil;
+        return self.set__symint(source, storage_offset, size, stride);
+      };
+      return wrap(dispatch_set_(
+          self, storage, _r.toSymInt(1), _r.symintlist(2), _r.symintlist(3)));
+    }
+  }
+  Py_RETURN_NONE;
+  END_HANDLE_TH_ERRORS
+}
+
+// XXX: ops that are bound here are not exposed to the C++ api nor the JIT.
+// Any new ops added here should be accompanied with a comment why they are not
+// being registered through native_functions.yaml, and be tagged cpp / JIT
+PyMethodDef variable_methods[] = {
+  // These magic methods are all implemented on python object to wrap NotImplementedError
+  {"__add__", castPyCFunctionWithKeywords(TypeError_to_NotImplemented_<THPVariable_add>), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"__radd__", castPyCFunctionWithKeywords(TypeError_to_NotImplemented_<THPVariable_add>), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"__iadd__", castPyCFunctionWithKeywords(TypeError_to_NotImplemented_<THPVariable_add_>), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"__rmul__", castPyCFunctionWithKeywords(TypeError_to_NotImplemented_<THPVariable_mul>), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"__mul__", castPyCFunctionWithKeywords(TypeError_to_NotImplemented_<THPVariable_mul>), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"__imul__", castPyCFunctionWithKeywords(TypeError_to_NotImplemented_<THPVariable_mul_>), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"__sub__", castPyCFunctionWithKeywords(TypeError_to_NotImplemented_<THPVariable_sub>), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"__isub__", castPyCFunctionWithKeywords(TypeError_to_NotImplemented_<THPVariable_sub_>), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"__div__", castPyCFunctionWithKeywords(TypeError_to_NotImplemented_<THPVariable_div>), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"__truediv__", castPyCFunctionWithKeywords(TypeError_to_NotImplemented_<THPVariable_div>), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"__floordiv__", castPyCFunctionWithKeywords(TypeError_to_NotImplemented_<THPVariable_floor_divide>), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"__idiv__", castPyCFunctionWithKeywords(TypeError_to_NotImplemented_<THPVariable_div_>), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"__ifloordiv__", castPyCFunctionWithKeywords(TypeError_to_NotImplemented_<THPVariable_floor_divide_>), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"__mod__", castPyCFunctionWithKeywords(TypeError_to_NotImplemented_<THPVariable_remainder>), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"__imod__", castPyCFunctionWithKeywords(TypeError_to_NotImplemented_<THPVariable_remainder_>), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"__eq__", castPyCFunctionWithKeywords(TypeError_to_NotImplemented_<THPVariable_eq>), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"__ne__", castPyCFunctionWithKeywords(TypeError_to_NotImplemented_<THPVariable_ne>), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"__lt__", castPyCFunctionWithKeywords(TypeError_to_NotImplemented_<THPVariable_lt>), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"__le__", castPyCFunctionWithKeywords(TypeError_to_NotImplemented_<THPVariable_le>), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"__gt__", castPyCFunctionWithKeywords(TypeError_to_NotImplemented_<THPVariable_gt>), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"__ge__", castPyCFunctionWithKeywords(TypeError_to_NotImplemented_<THPVariable_ge>), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"__rand__", castPyCFunctionWithKeywords(TypeError_to_NotImplemented_<THPVariable_bitwise_and>), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"__ror__", castPyCFunctionWithKeywords(TypeError_to_NotImplemented_<THPVariable_bitwise_or>), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"__rxor__", castPyCFunctionWithKeywords(TypeError_to_NotImplemented_<THPVariable_bitwise_xor>), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"__bool__", THPVariable_bool_scalar, METH_NOARGS, NULL},
+  {"__float__", THPVariable_float_scalar, METH_NOARGS, NULL},
+  {"__complex__", THPVariable_complex_scalar, METH_NOARGS, NULL},
+  {"__int__", THPVariable_integral_scalar, METH_NOARGS, NULL},
+  {"__long__", THPVariable_integral_scalar, METH_NOARGS, NULL},
+  {"__index__", THPVariable_index_scalar, METH_NOARGS, NULL},
+  {"__nonzero__", THPVariable_bool_scalar, METH_NOARGS, NULL},
+  {"__invert__", THPVariable_invert, METH_NOARGS, NULL},
+  {"__matmul__", castPyCFunctionWithKeywords(TypeError_to_NotImplemented_<THPVariable_matmul>), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"_is_view", THPVariable__is_view, METH_NOARGS, NULL},
+  {"apply_", THPVariable_apply_, METH_O, NULL},
+  {"bfloat16", castPyCFunctionWithKeywords(THPVariable_bfloat16), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"byte", castPyCFunctionWithKeywords(THPVariable_byte), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"char", castPyCFunctionWithKeywords(THPVariable_char), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"contiguous", castPyCFunctionWithKeywords(THPVariable_contiguous), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"copy_", castPyCFunctionWithKeywords(THPVariable_copy_), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"cpu", castPyCFunctionWithKeywords(THPVariable_cpu), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"cuda", castPyCFunctionWithKeywords(THPVariable_cuda), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"xpu", castPyCFunctionWithKeywords(THPVariable_xpu), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"ipu", castPyCFunctionWithKeywords(THPVariable_ipu), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"data_ptr", THPVariable_data_ptr, METH_NOARGS, NULL},
+  {"dim", THPVariable_dim, METH_NOARGS, NULL},
+  {"has_names", THPVariable_has_names, METH_NOARGS, NULL},
+  {"double", castPyCFunctionWithKeywords(THPVariable_double), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"cdouble", castPyCFunctionWithKeywords(THPVariable_cdouble), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"element_size", THPVariable_element_size, METH_NOARGS, NULL},
+  {"float", castPyCFunctionWithKeywords(THPVariable_float), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"cfloat", castPyCFunctionWithKeywords(THPVariable_cfloat), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"get_device", THPVariable_get_device, METH_NOARGS, NULL},
+  {"bool", castPyCFunctionWithKeywords(THPVariable_bool), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"half", castPyCFunctionWithKeywords(THPVariable_half), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"int", castPyCFunctionWithKeywords(THPVariable_int), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"is_contiguous", castPyCFunctionWithKeywords(THPVariable_is_contiguous), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"item", THPVariable_item, METH_NOARGS, NULL},
+  {"long", castPyCFunctionWithKeywords(THPVariable_long), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"map_", castPyCFunctionWithKeywords(THPVariable_map_), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"map2_", castPyCFunctionWithKeywords(THPVariable_map2_), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"ndimension", THPVariable_dim, METH_NOARGS, NULL},
+  {"nelement", THPVariable_numel, METH_NOARGS, NULL},
+  {"new", castPyCFunctionWithKeywords(THPVariable_new), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"new_tensor", castPyCFunctionWithKeywords(THPVariable_new_tensor), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"nonzero", castPyCFunctionWithKeywords(THPVariable_nonzero), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"numel", THPVariable_numel, METH_NOARGS, NULL},
+  {"numpy", castPyCFunctionWithKeywords(THPVariable_numpy), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"requires_grad_", castPyCFunctionWithKeywords(THPVariable_requires_grad_), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"set_", castPyCFunctionWithKeywords(THPVariable_set_), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"short", castPyCFunctionWithKeywords(THPVariable_short), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"size", castPyCFunctionWithKeywords(THPVariable_size), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"untyped_storage", THPVariable_storage, METH_NOARGS, NULL},
+  {"storage_offset", THPVariable_storage_offset, METH_NOARGS, NULL},
+  {"stride", castPyCFunctionWithKeywords(THPVariable_stride), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"to", castPyCFunctionWithKeywords(THPVariable_to), METH_VARARGS | METH_KEYWORDS, NULL},
+  {"tolist", THPVariable_tolist, METH_NOARGS, NULL},
+  {"type", castPyCFunctionWithKeywords(THPVariable_type), METH_VARARGS | METH_KEYWORDS, NULL},
+  ${py_method_defs}
+  {NULL}
+};
+
+} // namespace torch::autograd
diff --git a/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/variable_factories.h b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/variable_factories.h
new file mode 100644
index 0000000000000000000000000000000000000000..1129d899c89752eb1e6af264fc8dfcef0c862bcd
--- /dev/null
+++ b/tuning-competition-baseline/.venv/lib/python3.11/site-packages/torchgen/packaged/autograd/templates/variable_factories.h
@@ -0,0 +1,135 @@
+#pragma once
+
+// ${generated_comment}
+
+#include <ATen/core/Tensor.h>
+#include <ATen/TracerMode.h>
+#include <ATen/core/grad_mode.h>
+#include <c10/util/ArrayRef.h>
+#include <c10/core/MemoryFormat.h>
+#include <torch/csrc/api/include/torch/detail/TensorDataContainer.h>
+#include <torch/csrc/autograd/variable.h>
+
+#ifndef AT_PER_OPERATOR_HEADERS
+#include <ATen/Functions.h>
+#else
+#include <ATen/ops/from_blob.h>
+$ops_headers
+#endif
+
+#include <functional>
+#include <initializer_list>
+#include <utility>
+
+namespace torch {
+
+/// NOTE: Currently `torch::tensor(...)` doesn't support mixed data types
+/// (i.e. `torch::tensor({{bool, 2.0}})` doesn't work). We might be able to
+/// support it in the future by iterating over all sub-lists to find
+/// the largest data type that can represent all of the elements, or by using
+/// variadic templates.
+///
+/// NOTE: C++ `torch::tensor` with a floating-point type or an `at::ArrayRef` / `std::vector` /
+/// (nested) braced-init-list of floating-point types always produces a tensor of dtype
+/// `torch::get_default_dtype()`, matching Python `torch.tensor` behavior.
+///
+/// NOTE: C++ `torch::tensor` with an integer type or an `at::ArrayRef` / `std::vector` /
+/// (nested) braced-init-list of integer types always produces a tensor of dtype `at::kLong`
+/// (aka. int64_t), matching Python `torch.tensor` behavior.
+///
+/// NOTE: The following dtypes are not supported by `torch::tensor` currently:
+/// - `unsigned int`
+/// - `unsigned long int`
+/// - `unsigned long long int`
+/// - `long long int`
+inline at::Tensor tensor(detail::TensorDataContainer tensor_data_container, const at::TensorOptions& options = {}) {
+  return autograd::make_variable(
+    // note: we remove the requires_grad setting from the TensorOptions because
+    // it is ignored anyways (and we actually have an assertion that it isn't set
+    // which would fail otherwise). We handle requires_grad explicitly here
+    // instead of passing it through to the kernel.
+    tensor_data_container.convert_to_tensor(options.requires_grad(c10::nullopt)),
+    options.requires_grad());
+}
+
+/// A generic deleter function.
+using Deleter = std::function<void(void*)>;
+using at::MemoryFormat;
+
+/// Exposes the given `data` as a `Tensor` without taking ownership of the
+/// original data. `sizes` should specify the shape of the tensor, `strides` the
+/// stride in each dimension. The `deleter` function (a
+/// `std::function<void(void*)>`) will be called on the `data` when the Tensor
+/// data would normally be deallocated. The `TensorOptions` specify additional
+/// configuration options for the returned tensor, such as what type to
+/// interpret the `data` as.
+inline at::Tensor from_blob(
+    void* data,
+    at::IntArrayRef sizes,
+    at::IntArrayRef strides,
+    const Deleter& deleter,
+    const at::TensorOptions& options = at::TensorOptions()) {
+  at::Tensor tensor = ([&]() {
+    at::AutoDispatchBelowAutograd guard;  // TODO: remove
+    at::tracer::impl::NoTracerDispatchMode tracer_guard;
+    return at::from_blob(data, sizes, strides, deleter, options.requires_grad(c10::nullopt));
+  })();
+  return autograd::make_variable(tensor, options.requires_grad());
+}
+
+/// Exposes the given `data` as a `Tensor` without taking ownership of the
+/// original data. `sizes` should specify the shape of the tensor, `strides` the
+/// stride in each dimension. The `TensorOptions`
+/// specify additional configuration options for the returned tensor, such as
+/// what type to interpret the `data` as.
+inline at::Tensor from_blob(
+    void* data,
+    at::IntArrayRef sizes,
+    at::IntArrayRef strides,
+    const at::TensorOptions& options = at::TensorOptions()) {
+  at::Tensor tensor = ([&]() {
+    at::AutoDispatchBelowAutograd guard;  // TODO: remove
+    at::tracer::impl::NoTracerDispatchMode tracer_guard;
+    return at::from_blob(data, sizes, strides, options.requires_grad(c10::nullopt));
+  })();
+  return autograd::make_variable(tensor, options.requires_grad());
+}
+
+/// Exposes the given `data` as a `Tensor` without taking ownership of the
+/// original data. `sizes` should specify the shape of the tensor. The `deleter`
+/// (a `std::function<void(void*)>`) function will be called on the `data` when
+/// the Tensor data would normally be deallocated. The `TensorOptions` specify
+/// additional configuration options for the returned tensor, such as what type
+/// to interpret the `data` as.
+inline at::Tensor from_blob(
+    void* data,
+    at::IntArrayRef sizes,
+    const Deleter& deleter,
+    const at::TensorOptions& options = at::TensorOptions()) {
+  at::Tensor tensor = ([&]() {
+    at::AutoDispatchBelowAutograd guard;  // TODO: remove
+    at::tracer::impl::NoTracerDispatchMode tracer_guard;
+    return at::from_blob(data, sizes, deleter, options.requires_grad(c10::nullopt));
+  })();
+  return autograd::make_variable(tensor, options.requires_grad());
+}
+
+/// Exposes the given `data` as a `Tensor` without taking ownership of the
+/// original data. `sizes` should specify the shape of the tensor. The
+/// `TensorOptions` specify additional configuration options for the returned
+/// tensor, such as what type to interpret the `data` as.
+inline at::Tensor from_blob(
+    void* data,
+    at::IntArrayRef sizes,
+    const at::TensorOptions& options = at::TensorOptions()) {
+  at::Tensor tensor = ([&]() {
+    at::AutoDispatchBelowAutograd guard;  // TODO: remove
+    at::tracer::impl::NoTracerDispatchMode tracer_guard;
+    return at::from_blob(data, sizes, options.requires_grad(c10::nullopt));
+  })();
+  return autograd::make_variable(tensor, options.requires_grad());
+}
+
+${function_definitions}
+
+} // namespace torch