Add files using upload-large-folder tool

6a22ec9 verified 6 months ago

44.7 kB

	# Copyright (c) Microsoft Corporation. All rights reserved.
	# Licensed under the MIT License.

	# NOTE: This will eventually replace the existing constant_folding.py and evaluator.py files.

	from __future__ import annotations

	import dataclasses
	import logging
	import math
	import typing
	from typing import Any, Callable, Collection, Iterable, Sequence, Union

	import numpy as np
	import onnx
	import onnx.reference.ops
	import onnx_ir as ir

	import onnxscript.utils.utils as utils
	from onnxscript.ir import _tape

	DEFAULT_CONSTANT_FOLD_INPUT_SIZE_LIMIT = 512

	DEFAULT_CONSTANT_FOLD_OUTPUT_SIZE_LIMIT = 512 * 512


	_NON_DETERMINISTIC_OPS = frozenset(
	{
	"RandomUniform",
	"RandomNormal",
	"RandomUniformLike",
	"RandomNormalLike",
	"Multinomial",
	}
	)


	logger = logging.getLogger(__name__)


	def _is_control_flow_op(node: ir.Node) -> bool:
	graph_types = {ir.AttributeType.GRAPH, ir.AttributeType.GRAPHS}
	return any(attr.type in graph_types for attr in node.attributes.values())


	def _is_non_deterministic_op(node: ir.Node) -> bool:
	return node.op_type in _NON_DETERMINISTIC_OPS and utils.is_onnx_domain(node.domain)


	def _is_onnx_op(node: ir.Node, op_type: str) -> bool:
	return node.op_type == op_type and utils.is_onnx_domain(node.domain)


	# "Standard" evaluators are used to perform constant-folding.
	# The API below works only for non-control-flow ops (ops without any graph-attributes).
	# This currently used ONNX's reference implementation. But we could also
	# use ORT's implementation if we want to.


	def _process_constant_node(node: ir.Node) -> None:
	"""Sets const_value of output value of a Constant op node."""
	if node.op_type != "Constant" or node.domain != "":
	return
	if len(node.attributes) != 1:
	return
	attr_name, attr_value = next(iter(node.attributes.items()))
	if len(node.outputs) != 1:
	return
	ir_value = node.outputs[0]

	if attr_value is None or not isinstance(attr_value, ir.Attr):
	return

	const_value: ir.TensorProtocol
	if attr_name in {"value_float", "value_floats"}:
	const_value = ir.Tensor(
	np.array(attr_value.value, dtype=np.float32), name=ir_value.name
	)
	elif attr_name in {"value_int", "value_ints"}:
	const_value = ir.Tensor(np.array(attr_value.value, dtype=np.int64), name=ir_value.name)
	elif attr_name in {"value_string", "value_strings"}:
	const_value = ir.StringTensor(
	np.array(attr_value.value, dtype=np.bytes_), name=ir_value.name
	)
	elif attr_name == "value":
	const_value = typing.cast(ir.TensorProtocol, attr_value.value)
	else:
	return

	ir_value.const_value = const_value
	ir_value.shape = const_value.shape # type: ignore
	ir_value.dtype = const_value.dtype


	def basic_constant_propagation(nodes: Iterable[ir.Node]) -> None:
	"""Performs basic constant propagation for a sequence of nodes.

	Just marks the output values of Constant op nodes with their const_value.
	"""
	for node in nodes:
	_process_constant_node(node)


	class ReferenceEvaluator:
	def get_evaluator(self, domain: str, op: str, version: int) -> Callable \| None:
	try:
	op_impl_class = onnx.reference.ops.load_op(domain, op, version)
	return op_impl_class.eval # noqa: TRY300
	except Exception:
	return None

	def evaluate(self, domain: str, op: str, version: int, args, *kwargs) -> Any:
	logger.debug("Evaluating %s::%s", domain, op)
	evaluator = self.get_evaluator(domain, op, version)
	if evaluator is None:
	return None
	try:
	return evaluator(args, *kwargs)
	except Exception as e:
	logger.warning("Evaluation failed: %s", e)
	return None


	_reference_evaluator = ReferenceEvaluator()


	@dataclasses.dataclass
	class Replacement:
	"""A replacement for a node in the graph."""

	new_outputs: Sequence[ir.Value]
	new_nodes: Sequence[ir.Node]


	# The optimizer tracks an optional symbolic value for each value in the model.
	# The symbolic value attached to a value X can be:
	# - another IR value Y (indicating that X is equal to Y)
	# - a list of IR values [Y1, Y2, ...] (indicating that X is a sequence of values Y1, Y2, ...)
	# - a Shape object (indicating that X is a shape value)
	# A Shape object as a symbolic value indicates that the corresponding value is
	# 1-D (or 0-D) tensor of INT64 values. The values in this object may be constants
	# or symbolic dimension values (like "batch_size", "sequence_length", etc.).
	# Currently, we assume that symbolic dimensions are also guaranteed to be non-negative.
	# TODO: Add support for negative symbolic dimensions.

	SymbolicValue = Union[ir.Value, list[ir.Value], ir.Shape]


	class OptimizerState:
	def __init__(self):
	self._sym_value_map: dict[ir.Value, SymbolicValue] = {}
	self._initializer_inputs: list[set[ir.Value]] = []

	@property
	def symbolic_value_map(self) -> dict[ir.Value, SymbolicValue]:
	return self._sym_value_map

	def get_sym_value(self, value: ir.Value \| None) -> SymbolicValue \| None:
	if value is None:
	return None
	return self._sym_value_map.get(value)

	def set_sym_value(self, value: ir.Value, sym_value: SymbolicValue) -> None:
	self._sym_value_map[value] = sym_value

	def get_shape_value(self, value: ir.Value \| None) -> ir.Shape \| None:
	const_value = _get_numpy_value(value, ir.DataType.INT64, size_limit=10)
	if const_value is not None:
	if const_value.ndim == 1:
	return ir.Shape(const_value.tolist())
	return None
	sym_value = self.get_sym_value(value)
	if isinstance(sym_value, ir.Shape):
	return sym_value
	# TODO use shape of value if available
	return None


	# The "partial evaluators" below are non-standard evaluators. They are used to perform
	# partial evaluation and/or static program analysis (abstract interpretation).

	# A partial-evaluator function takes a node, a RewriterContext, OptimizerState and returns
	# a Replacement for the node or None (if no replacement is needed). It may also return just
	# the ir.Value or ir.Values to replace the output values of the node, when the new nodes
	# can be inferred from the RewriterContext used to build the new nodes.

	RewriterContext = _tape.Builder
	ReturnValue = Union[Replacement, Sequence[ir.Value], ir.Value, None]
	PartialEvaluatorFunction = Callable[[ir.Node, RewriterContext, OptimizerState], ReturnValue]


	@dataclasses.dataclass
	class PartialEvaluator:
	"""A class that represents a partial-evaluator for a particular op.

	It is applicable for a specific version range (min_version, max_version) of the op.
	The min_version and max_version can be None, indicating that there is no version
	constraint in that direction.
	"""

	min_version: int \| None
	max_version: int \| None
	function: PartialEvaluatorFunction

	def valid_for(self, version: int) -> bool:
	"""Returns True if this evaluator is applicable for the given version."""
	return (self.min_version is None or version >= self.min_version) and (
	self.max_version is None or version <= self.max_version
	)


	class PartialEvaluatorRegistry:
	"""A class that maintains a registry of evaluators for ops."""

	def __init__(self):
	self.op_evaluators: dict[tuple[str, str], list[PartialEvaluator]] = {}

	def lookup_evaluators(self, domain: str, opname: str, version: int):
	evaluator_list = self.op_evaluators.get((domain, opname), [])
	return [
	evaluator.function for evaluator in evaluator_list if evaluator.valid_for(version)
	]

	def register(
	self, opname: str, domain: str = "", version=None
	) -> Callable[[PartialEvaluatorFunction], PartialEvaluatorFunction]:
	if (domain, opname) in self.op_evaluators:
	evaluator_list = self.op_evaluators[(domain, opname)]
	else:
	evaluator_list = []
	self.op_evaluators[(domain, opname)] = evaluator_list
	if version is None:
	min_version = None
	max_version = None
	elif isinstance(version, int):
	min_version = version
	max_version = version
	elif isinstance(version, tuple):
	min_version, max_version = version

	def decorator(function: PartialEvaluatorFunction) -> PartialEvaluatorFunction:
	evaluator_list.append(PartialEvaluator(min_version, max_version, function))
	return function

	return decorator


	registry: PartialEvaluatorRegistry = PartialEvaluatorRegistry()

	register = registry.register


	def _same_shape(shape1: ir.Shape, shape2: ir.Shape) -> bool:
	# Comparison of shapes as tuples works except if any dimension is None
	# (which represents an unknown dimension value). Thus, two shapes such
	# as (Batch, 1024) and (Batch, 1024) are considered equal, but (None, 1024)
	# and (None, 1024) are not considered equal.
	if any(isinstance(dim, ir.SymbolicDim) and dim.value is None for dim in shape1):
	return False
	return shape1.dims == shape2.dims


	def _get_numpy_value(
	val: ir.Value \| None, dtype: ir.DataType \| None = None, size_limit: int \| None = None
	) -> np.ndarray \| None:
	"""Returns the numpy value of a constant value, if available.

	It returns None if the value is not a constant value, or if the value is not of
	the specified element dtype, or if the size of the value exceeds the specified
	size_limit.
	"""
	if val is None:
	return None
	const_value = val.const_value
	if const_value is not None:
	if dtype is not None and const_value.dtype != dtype:
	return None
	if size_limit is not None and const_value.size > size_limit:
	return None
	try:
	# Reinterpret the array with `.view()` because some implementations of
	# ir.TensorProtocol (e.g. PyTorch<=2.7) do not use ml_dtypes for bfloat16 etc.
	array = const_value.numpy().view(const_value.dtype.numpy())
	except FileNotFoundError:
	# External data is not available.
	logger.warning(
	"External data for value '%s' is not available. "
	"This may lead to incorrect constant folding.",
	val.name,
	)
	return None
	assert isinstance(array, np.ndarray)
	return array
	return None


	def _get_bool_value(val: ir.Value \| None) -> bool \| None:
	if val is None:
	return None
	value = _get_numpy_value(val)
	if value is None:
	return None
	if value.size == 1 and value.dtype == bool:
	return value.item(0)
	return None


	def _get_input(node: ir.Node, index: int) -> ir.Value \| None:
	if index < len(node.inputs):
	return node.inputs[index]
	return None


	def _get_output(node: ir.Node, index: int) -> ir.Value \| None:
	if index < len(node.outputs):
	return node.outputs[index]
	return None


	def _update_type(value: ir.Value, type: ir.TypeProtocol \| None) -> None:
	if type is not None:
	# TODO: merge types
	value.type = type


	def _get_input_element_type(node: ir.Node, index: int) -> int:
	input = _get_input(node, index)
	if input is not None and input.type is not None:
	return input.type.dtype.value
	return ir.DataType.UNDEFINED.value


	def _get_int_attribute(node: ir.Node, name: str, default: int \| None = None) -> int \| None:
	if name in node.attributes:
	attr = node.attributes[name]
	if not isinstance(attr, ir.Attr):
	return None
	attr_val = attr.value
	if isinstance(attr_val, int):
	return attr_val
	# This is an invalid model: attribute has invalid/unexpected type.
	# For now, we just return None. We could raise an error too.
	return None
	return default


	@register("Abs")
	def abs(node: ir.Node, op, state: OptimizerState) -> ReturnValue:
	"""Replace an Abs node by Identity when applicable.

	Currently, addresses Abs applied to symbolic shapes.
	"""
	input = _get_input(node, 0)
	input_sym_value = state.get_shape_value(input)
	if input_sym_value is None:
	return None
	if any(isinstance(d, int) and d < 0 for d in input_sym_value):
	return None
	# Abs applied to a symbolic shape of the form [1, 1, SequenceLength].
	# We assume that SequenceLength is a non-negative integer.
	# The Abs op is redundant in this case.
	return op.Identity(input)


	@register("Gather")
	def gather(node: ir.Node, op, state: OptimizerState) -> ReturnValue:
	"""Replace a Gather node by a constant when applicable.

	Currently, handles the case of Gathering from a shape tensor.
	"""
	input = _get_input(node, 0)
	indices = _get_input(node, 1)
	if input is None or indices is None:
	return None
	input_sym_value = state.get_shape_value(input)
	if input_sym_value is None:
	return None
	axis = _get_int_attribute(node, "axis", None)
	if axis != 0:
	return None
	indices_numpy_value = _get_numpy_value(indices)
	if indices_numpy_value is None:
	return None
	if indices_numpy_value.ndim != 1:
	return None
	gathered = [input_sym_value[i] for i in indices_numpy_value]
	output = _get_output(node, 0)
	if output is not None:
	state.set_sym_value(output, ir.Shape(gathered))
	if all(isinstance(d, int) for d in gathered):
	return op.Constant(value_ints=ir.AttrInt64s("value_ints", gathered))
	return None


	@register("Reshape")
	def reshape(node: ir.Node, op, state: OptimizerState) -> ReturnValue:
	"""Replace a Reshape node by Identity when applicable."""
	input = _get_input(node, 0)
	shape = _get_input(node, 1)
	if input is None or shape is None:
	return None

	input_shape = input.shape
	shape_value = state.get_shape_value(shape)

	if shape_value is None or input_shape is None:
	return None

	# No need to check for special values like -1, 0, etc. here
	if _same_shape(input_shape, shape_value):
	return op.Identity(input)
	return None


	@register("Cast")
	def cast(node: ir.Node, op, state: OptimizerState) -> ReturnValue:
	input = _get_input(node, 0)
	output = _get_output(node, 0)

	if input is None or output is None:
	return None

	# TODO(rama): Parts of the following logic (implementing type/shape inference
	# for Cast op) should be unnecessary. Generic incremental shape-inference
	# should handle this. Only the optimization to eliminate redundant Cast ops
	# should be needed here.

	input_shape = input.shape
	if input_shape is not None:
	output.shape = input_shape.copy()

	input_dtype = _get_input_element_type(node, 0)
	output_dtype = _get_int_attribute(node, "to", None)
	if output_dtype is not None:
	if input_dtype == output_dtype:
	return op.Identity(input)
	output.type = ir.TensorType(ir.DataType(output_dtype))
	return None


	@register("CastLike")
	def cast_like(node: ir.Node, op, state: OptimizerState) -> ReturnValue:
	input0 = node.inputs[0]
	source_element_type = _get_input_element_type(node, 0)
	target_element_type = _get_input_element_type(node, 1)

	if target_element_type == ir.DataType.UNDEFINED:
	return None
	if source_element_type == target_element_type:
	return op.Identity(input0)
	return op.Cast(input0, to=target_element_type)


	@register("Shape")
	def shape(node: ir.Node, op, state: OptimizerState) -> ReturnValue:
	input = node.inputs[0]
	if input is None:
	return None
	shape = input.shape
	if shape is None:
	return None
	start = _get_int_attribute(node, "start", 0)
	end = _get_int_attribute(node, "end", None)
	shape_slice = shape[start:end]
	output = _get_output(node, 0)
	if output is not None:
	state.set_sym_value(output, ir.Shape(shape_slice))
	if all(isinstance(d, int) for d in shape_slice):
	return op.Constant(value_ints=ir.AttrInt64s("value_ints", list(shape_slice)))
	return None


	@register("Size")
	def size(node: ir.Node, op, state: OptimizerState) -> ReturnValue:
	input = _get_input(node, 0)
	if input is None:
	return None
	shape = input.shape
	if shape is None:
	return None
	size = 1
	for d in shape:
	if not isinstance(d, int):
	return None
	size *= d
	return op.Constant(value_int=size)


	@register("If")
	def if_op(node: ir.Node, op, state: OptimizerState) -> ReturnValue:
	cond_input = _get_input(node, 0)
	cond = _get_bool_value(cond_input)
	if cond is not None:
	# cond is a constant-value: inline the branch
	branch = "then_branch" if cond else "else_branch"
	graph_attr = node.attributes.get(branch)
	if graph_attr is None:
	return None
	if graph_attr.type != ir.AttributeType.GRAPH:
	return None
	assert isinstance(graph_attr, ir.Attr)
	graph = graph_attr.as_graph()
	# Copy the graph outputs and clear the graph outputs so that the values are free to move
	formal_outs = list(graph.outputs)
	graph.outputs.clear()
	actual_outs = node.outputs
	renamings = {
	formal.name: actual.name
	for formal, actual in zip(formal_outs, actual_outs)
	if actual is not None
	}
	# TODO: Extend renaming to intermediate values.

	def rename(name):
	return renamings.get(name, name)

	graph_nodes = list(graph)
	graph.remove(graph_nodes)
	for sub_node in graph_nodes:
	# TODO: handle renaming inside subgraphs in nodes
	for v in sub_node.outputs:
	v.name = rename(v.name)
	# Avoid name collision.
	sub_node.name = f"{node.name}_{sub_node.name}"

	# TODO: we should handle initializers as well!
	return Replacement(formal_outs, graph_nodes)
	return None


	@register("Identity")
	def identity(node: ir.Node, op, state: OptimizerState) -> ReturnValue:
	del op
	input = node.inputs[0]
	output = node.outputs[0]
	if input is not None and output is not None:
	state.set_sym_value(output, input)
	return None


	@register("SequenceConstruct")
	def sequence_construct(node: ir.Node, op, state: OptimizerState) -> ReturnValue:
	del op
	output = node.outputs[0]
	if output is not None:
	state.set_sym_value(output, list(node.inputs))
	return None


	@register("Concat")
	def concat(node: ir.Node, op, state: OptimizerState) -> ReturnValue:
	"""Replace a Concat node with a single input by Identity"""

	# Replace Concat(x) by Identity(x)
	inputs = node.inputs
	if len(inputs) == 1:
	return op.Identity(inputs[0])

	axis = _get_int_attribute(node, "axis", None)
	if axis is None:
	return None

	# Eliminate zero-length operands from Concat
	def has_zero_size(operand: ir.Value \| None) -> bool:
	if operand is None:
	return False # Invalid model
	if (shape := operand.shape) is None:
	return False
	try:
	# We have already checked that axis is an int value (!= None)
	dim_size = shape[axis] # type: ignore[index]
	except IndexError:
	return False
	return dim_size == 0 # return False if symbolic or None or non-zero int value

	new_inputs = [x for x in inputs if not has_zero_size(x)]
	if len(new_inputs) != len(inputs):
	if new_inputs:
	# Remove zero-length operands from Concat
	logger.debug(
	"Concat: removing zero-length operand(s) %s => %s", inputs, new_inputs
	)
	return op.Concat(*new_inputs, axis=axis)
	elif inputs:
	# All operands are zero-length. Concat is a no-op, but we need to use one of the
	# inputs to get the other dimensions correct:
	logger.debug("Concat: removing all zero-length operands %s", inputs)
	return op.Identity(inputs[0])
	else:
	# No inputs: invalid model.
	return None

	# Track value of tensors that carry a shape value:

	# Check axis attribute is 0

	if axis != 0:
	return None
	shapes = [state.get_shape_value(input) for input in inputs]
	if any(shape is None for shape in shapes):
	return None
	concatenated = ir.Shape(dim for shape in shapes for dim in shape.dims) # type: ignore[union-attr]
	output = node.outputs[0]
	if output is None:
	return None
	state.set_sym_value(output, concatenated)
	return None


	@register("Dropout", version=(12, None))
	def dropout(node: ir.Node, op, state: OptimizerState) -> ReturnValue:
	"""Replace a Dropout by Identity when applicable."""

	def optimized_dropout():
	input = node.inputs[0]
	output = op.Identity(input)
	if len(node.outputs) == 1:
	return output
	else:
	true_tensor = ir.tensor([True])
	input_shape = op.Shape(input)
	mask = op.ConstantOfShape(input_shape, value=true_tensor)
	return output, mask

	inputs = node.inputs
	if (len(inputs) <= 2) or inputs[2] is None:
	# No training_mode specified:
	return optimized_dropout()
	if _get_bool_value(inputs[2]) is False:
	# training_mode is False: dropout is not applied.
	return optimized_dropout()
	ratio = _get_numpy_value(inputs[1])
	if ratio is None:
	return None
	if ratio.size != 1: # Only scalar dropout ratio is supported.
	return None
	if ratio.item() == 0:
	# dropout ratio is 0: dropout is not applied.
	return optimized_dropout()
	return None


	@register("Expand")
	def expand(node: ir.Node, op, state: OptimizerState) -> ReturnValue:
	"""Replace an Expand node by Identity when applicable."""
	if len(node.inputs) != 2:
	return None
	if (input := node.inputs[0]) is None:
	return None
	if (input_shape := input.shape) is None:
	# Input shape is not known.
	return None
	if (expanded_shape := _get_numpy_value(node.inputs[1])) is None:
	# Target shape is not known.
	expanded_sym_shape = state.get_shape_value(node.inputs[1])
	if expanded_sym_shape is None or not _same_shape(input_shape, expanded_sym_shape):
	return None
	return op.Identity(input)
	if expanded_shape.ndim != 1:
	# Target shape must be a 1D tensor. Erroneous model.
	return None
	if input_shape.dims == tuple(expanded_shape.tolist()):
	return op.Identity(input)
	return None


	@register("ConcatFromSequence")
	def concat_from_sequence(node: ir.Node, op, state: OptimizerState) -> ReturnValue:
	input = node.inputs[0]
	inputs = state.get_sym_value(input)
	if inputs is None or any(x is None for x in inputs):
	return None
	new_axis = _get_int_attribute(node, "new_axis", 0)
	axis = _get_int_attribute(node, "axis", None)
	if axis is None:
	return None
	if input is not None and isinstance(inputs, list):
	if new_axis == 0:
	logger.debug("ConcatFromSequence => Concat: %s", [x.name for x in inputs])
	return op.Concat(*inputs, axis=axis)
	if new_axis == 1:
	# Unsqueeze the inputs with concat axis if new_axis is 1
	axis_value = op.Constant(value_int=axis)
	unsqueezed_inputs = []
	for node_input in inputs:
	unsqueezed_input = op.Unsqueeze(
	node_input, axis_value, _outputs=[f"{node_input.name}_unsqueeze"]
	)
	unsqueezed_inputs.append(unsqueezed_input)
	# Send unsqueezed outputs to Concat
	logger.debug(
	"ConcatFromSequence => Concat %s", [x.name for x in unsqueezed_inputs]
	)
	return op.Concat(*unsqueezed_inputs, axis=axis)
	return None


	@register("SplitToSequence")
	def split_to_sequence(node: ir.Node, op, state: OptimizerState) -> ReturnValue:
	"""Rewriting pattern.

	From

	splits = onnx::SplitToSequence(input, split, axis=axis)

	to

	split_0, split_1, ..., split_n = onnx::Split(input, split, axis=axis)
	splits = onnx::SequenceConstruct(split_0, split_1, ..., split_n)

	or

	split_0, split_1, ..., split_n = onnx::Split(input, axis=axis, num_outputs=n+1)
	splits = onnx::SequenceConstruct(split_0, split_1, ..., split_n)

	where number of output tensors in `splits` is statically known.
	onnx::SequenceConstruct will be further optimized away if possible, by its own designated evaluator.
	This allows downstream `SequenceAt` users to be replaced by `split_x` accordingly.
	"""
	input = node.inputs[0]
	split = node.inputs[1]
	output = node.outputs[0]

	if input is None or split is None or output is None:
	return None

	axis = _get_int_attribute(node, "axis", 0)
	if axis is None:
	return None
	shape = input.shape
	if shape is None:
	return None
	rank = len(shape)
	if axis < 0:
	axis = axis + rank
	if axis < 0 or axis >= rank:
	return None
	split_dimension_size = shape[axis]
	if not isinstance(split_dimension_size, int):
	return None

	split_value = _get_numpy_value(split)
	if split_value is None:
	return None
	assert isinstance(split_value, np.ndarray)

	if split_value.ndim == 0:
	# split into chunks all of size 'split' if possible.
	num_outputs = math.ceil(split_dimension_size / split_value.item())
	split_outputs = [f"{output.name}_split_{i}" for i in range(num_outputs)]
	split_values = op.Split(
	input, axis=axis, num_outputs=num_outputs, _outputs=split_outputs
	)
	elif split_value.ndim == 1:
	# split into 'size(split)' chunks
	num_outputs = split_value.size
	split_outputs = [f"{output.name}_split_{i}" for i in range(num_outputs)]
	split_values = op.Split(input, split, axis=axis, _outputs=split_outputs)
	else:
	return None

	# If Split returns a single value, we need to wrap it into a list.
	if isinstance(split_values, ir.Value):
	split_values = [split_values]

	keepdims = _get_int_attribute(node, "keepdims", 1)
	if keepdims is None:
	return None
	if keepdims == 0:
	# squeeze the split dimension if keepdims is 0
	axis_val = op.Constant(value_ints=[axis], _outputs=[f"{output.name}_axis"])
	squeezed_values = []
	for i in range(num_outputs):
	squeezed = op.Squeeze(
	split_values[i], axis_val, _outputs=[f"{split_outputs[i]}_squeeze"]
	)
	squeezed_values.append(squeezed)
	split_values = squeezed_values

	logger.debug("SplitToSequence => Split + SequenceConstruct")

	if isinstance(split_values, ir.Value):
	split_values = [split_values]
	return op.SequenceConstruct(*split_values)


	@register("SequenceAt")
	def sequence_at(node: ir.Node, op, state: OptimizerState) -> ReturnValue:
	input = node.inputs[0]
	position = node.inputs[1]
	output = node.outputs[0]
	if input is not None and position is not None:
	input_vals = state.get_sym_value(input)
	position_val = _get_numpy_value(position)
	if isinstance(input_vals, list) and position_val is not None:
	if position_val.size != 1:
	return None
	position_val = position_val.item()
	try:
	result = input_vals[position_val] # type: ignore[index]
	except IndexError:
	return None
	state.set_sym_value(output, result)
	logger.debug("SequenceAt %s => %s", input.name, result.name)
	return op.Identity(result)
	return None


	def _merge_shapes(shape1: ir.Shape \| None, shape2: ir.Shape \| None) -> ir.Shape \| None:
	def merge_dims(dim1, dim2):
	if dim1 == dim2:
	return dim1
	if not isinstance(dim1, ir.SymbolicDim):
	return dim1 # Prefer int value over symbolic dim
	if not isinstance(dim2, ir.SymbolicDim):
	return dim2
	if dim1.value is None:
	return dim2
	return dim1

	if shape1 is None:
	return shape2
	if shape2 is None:
	return shape1
	if len(shape1) != len(shape2):
	raise ValueError("Shapes must have the same rank.")
	return ir.Shape([merge_dims(dim1, dim2) for dim1, dim2 in zip(shape1, shape2)])


	class FoldConstantsPass(ir.passes.InPlacePass):
	"""A pass that folds constant expressions in the model.

	Attributes:
	shape_inference: Whether to perform shape inference.
	input_size_limit: Maximum size of input tensors to fold.
	output_size_limit: Maximum size of output tensors to fold.
	always_fold_ops: Collection of op types that should always be folded.
	For ops from the default opset, only op_type is neede (e.g. "Transpose"),
	otherwise specify the domain with ``{domain}::{op_type}``.
	"""

	def __init__(
	self,
	*,
	shape_inference: bool,
	input_size_limit: int,
	output_size_limit: int,
	always_fold_ops: Collection[str] = frozenset(["Transpose"]),
	) -> None:
	self.shape_inference = shape_inference
	self.input_size_limit = input_size_limit
	self.output_size_limit = output_size_limit
	ops = []
	for name in always_fold_ops:
	domain, op_type = name.split("::", 1) if "::" in name else ("", name)
	if domain == "ai.onnx":
	domain = ""
	ops.append((domain, op_type))
	self.always_fold_ops: frozenset[tuple[str, str]] = frozenset(ops)

	self._opset_imports: dict[str, int] = {}
	self._counts: dict[str, int] = {}
	self._sizes: dict[str, int] = {}
	self._modified: bool = False
	self._state = OptimizerState()
	self._reset()

	def _reset(self) -> None:
	"""Reset internal states for a new run."""
	self._counts = {}
	self._sizes = {}
	self._modified = False
	self._state = OptimizerState()

	def _do_inference(self, node: ir.Node) -> None:
	output_types = {}

	# TODO: handle optional inputs
	def get_constant_value(x: ir.Value) -> onnx.TensorProto \| None:
	value = _get_numpy_value(x, size_limit=20)
	if value is not None:
	assert x.const_value is not None
	return ir.serde.serialize_tensor(x.const_value)
	return None

	def get_type(value: ir.Value) -> onnx.TypeProto \| None:
	if value.type is not None:
	type_proto = ir.serde.serialize_type(value.type)
	if value.shape is not None:
	ir.serde.serialize_shape_into(type_proto, value.shape)
	return type_proto
	return None

	input_types = {x.name: get_type(x) for x in node.inputs if x is not None}
	input_data = {x.name: get_constant_value(x) for x in node.inputs if x is not None}
	input_data = {k: v for k, v in input_data.items() if v is not None}
	if any(t is None for t in input_types.values()):
	logger.debug(
	"Skipping shape inference for node %s due to missing input type.",
	node.name,
	)
	else:
	# TODO: pass in constant values, ir_version
	try:
	schema = onnx.defs.get_schema(
	node.op_type, self._opset_imports[node.domain], node.domain
	)
	output_types = onnx.shape_inference.infer_node_outputs(
	schema,
	ir.serde.serialize_node(node),
	input_types, # type: ignore[arg-type]
	input_data, # type: ignore[arg-type]
	)
	for output in node.outputs:
	if output.name in output_types:
	inferred_type = output_types[output.name]
	# TODO: merge types, check for conflicts
	inferred_shape = ir.serde.deserialize_type_proto_for_shape(
	inferred_type
	)
	output.shape = _merge_shapes(output.shape, inferred_shape)
	output.type = ir.serde.deserialize_type_proto_for_type(inferred_type)
	except Exception as e:
	logger.debug(
	"Skipping shape inference for node %s due to exception: %s",
	node.name,
	e,
	)

	def new_constant(self, node: ir.Node, value) -> ir.Node \| None:
	irvalue = node.outputs[0]
	if not isinstance(value, np.ndarray):
	# ONNX does not have a way to represent non-tensor constants, eg. a sequence.
	# So, a constant-value of type sequence is not folded, but it can be used
	# to optimize subsequent operations when possible.
	logger.info(
	"Skip storing constant folded value %s due to unsupported type %s.",
	irvalue.name,
	type(value),
	)
	return None

	tensor = ir.tensor(value)
	tensor.name = irvalue.name
	irvalue.const_value = tensor

	if value.size > self.output_size_limit:
	# Handle examples like Transpose(weight) to be folded even if the size is large,
	# as long as weight has no other uses. This won't increase model size.
	removed_input_size = 0
	for input in node.inputs:
	if (input is not None) and (len(input.uses()) == 1):
	array = _get_numpy_value(input)
	if array is not None:
	removed_input_size += array.size
	increased_size = value.size - removed_input_size
	if increased_size > 0:
	logger.info(
	"Skip storing constant folded nvalue %s due to large size %s.",
	irvalue.name,
	value.size,
	)
	return None

	logger.debug(
	"New constant for value %s dtype: %s shape: %s",
	irvalue.name,
	value.dtype,
	value.shape,
	)

	attributes = ir.convenience.convert_attributes({"value": tensor})
	node = ir.Node("", "Constant", inputs=[], attributes=attributes, num_outputs=1)
	return node

	def process_node(self, node: ir.Node) -> Replacement \| None:
	"""Process a node and return a Replacement if the node can be replaced."""
	for i, value in enumerate(node.inputs):
	sym_value = self._state.get_sym_value(value)
	if isinstance(sym_value, ir.Value):
	logger.debug(
	"Node [%s]: Replacing input %s with %s",
	node.name,
	value.name, # type: ignore[union-attr]
	sym_value.name,
	)
	node.replace_input_with(i, sym_value)
	self._modified = True
	# TODO(rama): consider merging type/other info from both values

	# Do incremental shape inference
	if self.shape_inference and not _is_control_flow_op(node):
	self._do_inference(node)

	if node.domain not in self._opset_imports:
	return None
	version = self._opset_imports[node.domain]
	op_optimizers = registry.lookup_evaluators(node.domain, node.op_type, version)
	for optimizer in op_optimizers:
	assert optimizer
	context = RewriterContext()
	output = optimizer(node, context, self._state)
	if output is not None:
	if isinstance(output, Replacement):
	return output
	if isinstance(output, ir.Value):
	output = [output]
	return Replacement(output, context.nodes)

	if _is_control_flow_op(node) or _is_non_deterministic_op(node):
	return None

	if _is_onnx_op(node, "Constant"):
	_process_constant_node(node)
	return None

	if any(x.is_graph_input() for x in node.inputs if x is not None):
	# Do not fold any graph inputs to preserve graph signature
	return None

	# Ensure all node inputs are constants
	if any(x.const_value is None for x in node.inputs if x is not None):
	if logger.isEnabledFor(logging.DEBUG):
	logger.debug(
	"Skipping constant folding for node %s because it has non-constant inputs",
	node,
	[x.name for x in node.inputs if x is not None],
	)
	return None

	input_tensors = [x.const_value if x is not None else None for x in node.inputs]
	if any(
	tensor.size > self.input_size_limit
	for tensor in input_tensors
	if tensor is not None
	):
	if (node.domain, node.op_type) in self.always_fold_ops and all(
	len(input.consumers()) == 1 for input in node.inputs if input is not None
	):
	# If the op is in always_fold_ops and all inputs are used only by this node,
	# we can still fold it even if the input size exceeds the limit.
	logger.debug(
	"Folding large constant for node %s because it is in the always_fold_ops list",
	node,
	)
	else:
	# Skip folding large tensors
	if logger.isEnabledFor(logging.DEBUG):
	input_sizes = [
	tensor.size for tensor in input_tensors if tensor is not None
	]
	logger.debug(
	"Skipping constant folding for node %s due to large input size: %s",
	node,
	input_sizes,
	)
	return None

	input_values = [_get_numpy_value(x) for x in node.inputs]

	def convert(av):
	if av.type == ir.AttributeType.TENSOR:
	return ir.serde.serialize_tensor(av.value)
	return av.value

	attr_values = {name: convert(attr) for name, attr in node.attributes.items()}
	outputs = _reference_evaluator.evaluate(
	node.domain, node.op_type, version, input_values, *attr_values
	)

	if outputs is None:
	return None
	if len(node.outputs) == 1 and not isinstance(outputs, (tuple, list)):
	replacement = self.new_constant(node, outputs)
	if _is_onnx_op(node, "ConstantOfShape") or replacement is None:
	return None
	return Replacement(replacement.outputs, [replacement])
	else:
	logger.warning(
	"Skipping constant folding for op %s with multiple outputs.", node.op_type
	)
	return None

	def replace_node(self, node: ir.Node, replacement, root: ir.Graph \| ir.Function) -> None:
	logger.debug("Replacing node: %s::%s %s", node.domain, node.op_type, node.name)

	ir.convenience.replace_nodes_and_values(
	root, node, [node], replacement.new_nodes, node.outputs, replacement.new_outputs
	)

	self._modified = True

	# TODO: what about new opset_imports?
	# TODO: track statistics about replaced nodes and sizes of new constants

	def visit_attribute(self, attr: ir.Attr) -> None:
	if attr.is_ref():
	return
	if attr.type == ir.AttributeType.GRAPH:
	self.visit_graph(attr.as_graph())
	elif attr.type == ir.AttributeType.GRAPHS:
	for graph in attr.as_graphs():
	self.visit_graph(graph)

	def visit_node(self, node: ir.Node, root: ir.Graph \| ir.Function) -> None:
	replacement = self.process_node(node)
	if replacement is None:
	# No change. Process attributes.
	for attr in node.attributes.values():
	self.visit_attribute(attr)
	return
	else:
	self.replace_node(node, replacement, root)

	def visit_graph(self, graph: ir.Graph) -> None:
	for node in graph:
	self.visit_node(node, graph)

	# Replace outputs if output nodes can be folded. This are typically outputs from
	# Identity nodes
	for i, output in enumerate(graph.outputs):
	if output is None:
	continue
	sym_value = self._state.get_sym_value(output)
	if not isinstance(sym_value, ir.Value):
	# An output must be a Value
	continue
	if not _sym_value_can_replace_graph_output(graph, sym_value, output):
	continue
	# Rename sym_value to match the output name
	sym_value.name = output.name
	graph.outputs[i] = sym_value
	self._modified = True

	def visit_function(self, function: ir.Function) -> None:
	for node in function:
	self.visit_node(node, function)

	def call(self, model: ir.Model) -> FoldConstantsResult:
	self._reset()
	self._opset_imports = model.opset_imports
	self.visit_graph(model.graph)
	for function in model.functions.values():
	# TODO(rama): Should we specialize functions?
	self.visit_function(function)
	return FoldConstantsResult(model, self._modified, self._state.symbolic_value_map)


	def _sym_value_can_replace_graph_output(
	graph: ir.Graph, sym_value: ir.Value, output: ir.Value
	) -> bool:
	if (producer := sym_value.producer()) is None:
	# If the sym_value has no producer, it is some graph's input
	# ONNX does not allow a graph input to be a graph output
	return False
	if producer.graph is not graph:
	# The sym_value must be produced by a node in the graph to be an output of this graph
	return False
	if sym_value.is_graph_output():
	# If the sym_value is already an output of a graph, we cannot rename it
	# to this output name. Otherwise the graph output represented by sym_value
	# will lose its name.
	return False
	return True


	@dataclasses.dataclass
	class FoldConstantsResult(ir.passes.PassResult):
	symbolic_value_map: dict[ir.Value, SymbolicValue]

	# Add conversion to bool for backward compatibility. The previously returned value
	# for the fold_constants method was a boolean indicating whether the model was modified.
	def __bool__(self) -> bool:
	return self.modified


	def fold_constants(
	model: ir.Model,
	*,
	onnx_shape_inference: bool = False,
	input_size_limit: int = DEFAULT_CONSTANT_FOLD_INPUT_SIZE_LIMIT,
	output_size_limit: int = DEFAULT_CONSTANT_FOLD_OUTPUT_SIZE_LIMIT,
	always_fold_ops: Collection[str] = frozenset(["Transpose"]),
	) -> FoldConstantsResult:
	"""
	Applies constant folding optimization to the model.

	Args:
	model: The ONNX model to optimize.
	onnx_shape_inference: Whether to enable ONNX shape inference during
	constant folding. Defaults to False.
	input_size_limit: The maximum size of input tensors
	that can be considered for constant folding. Defaults to
	`DEFAULT_CONSTANT_FOLD_INPUT_SIZE_LIMIT`.
	output_size_limit: The maximum size of output tensors
	that can be stored after constant folding. Defaults to
	`DEFAULT_CONSTANT_FOLD_OUTPUT_SIZE_LIMIT`.
	always_fold_ops: A collection of op types that should always be folded,
	regardless of their input or output sizes. For ops from the default opset,
	only op_type is neede (e.g. "Transpose"), otherwise specify the domain
	with ``{domain}::{op_type}``.

	Returns:
	An instance of `FoldConstantsResult`.

	"""
	folder_pass = FoldConstantsPass(
	shape_inference=onnx_shape_inference,
	input_size_limit=input_size_limit,
	output_size_limit=output_size_limit,
	always_fold_ops=always_fold_ops,
	)
	return folder_pass(model) # type: ignore[return-value]