xiaoanyu123's picture
Add files using upload-large-folder tool
6a22ec9 verified
# Copyright (c) Microsoft Corporation.
# Licensed under the MIT License.
"""
A one-layer SmolLM model test case, with inputs: input_ids, attention_mask, and position_ids.
This is an onnxscript version of the model.
"""
import numpy as np
import onnx_ir as ir
from onnxscript import script
from onnxscript.onnx_opset import opset18
from onnxscript.onnx_types import FLOAT, INT64
def make_model(
input_layernorm_weight_0,
post_attention_layernorm_weight0,
norm_weight,
head_weight,
self_attn_q_proj_weight0,
self_attn_k_proj_weight0,
self_attn_v_proj_weight0,
self_attn_o_proj_weight0,
mlp_gate_proj_weight0,
mlp_up_proj_weight0,
mlp_down_proj_weight0,
):
@script()
def main_graph(
input0: INT64[1, 10], input1: FLOAT[1, 10], input2: INT64[1, 10]
) -> (FLOAT[1, 10, 49152], FLOAT[1, 32, 10, 64], FLOAT[1, 32, 10, 64]):
model_layers_0_input_layernorm_weight = opset18.Constant(
value=input_layernorm_weight_0
)
model_layers_0_post_attention_layernorm_weight = opset18.Constant(
value=post_attention_layernorm_weight0
)
model_norm_weight = opset18.Constant(value=norm_weight)
lm_head_weight = opset18.Constant(value=head_weight)
model_layers_0_self_attn_q_proj_weight = opset18.Constant(
value=self_attn_q_proj_weight0
)
model_layers_0_self_attn_k_proj_weight = opset18.Constant(
value=self_attn_k_proj_weight0
)
model_layers_0_self_attn_v_proj_weight = opset18.Constant(
value=self_attn_v_proj_weight0
)
model_layers_0_self_attn_o_proj_weight = opset18.Constant(
value=self_attn_o_proj_weight0
)
model_layers_0_mlp_gate_proj_weight = opset18.Constant(value=mlp_gate_proj_weight0)
model_layers_0_mlp_up_proj_weight = opset18.Constant(value=mlp_up_proj_weight0)
model_layers_0_mlp_down_proj_weight = opset18.Constant(value=mlp_down_proj_weight0)
embedding = opset18.Gather(lm_head_weight, input0, axis=0)
minus_inf_10x10 = opset18.ConstantOfShape([10, 10], [-3.4028234663852886e38])
mask_10x10 = opset18.Trilu(minus_inf_10x10, 1)
slice_5 = opset18.Reshape(mask_10x10, [1, 1, 10, 10])
unsqueeze_2 = opset18.Unsqueeze(input1, 1)
unsqueeze_3 = opset18.Unsqueeze(unsqueeze_2, 2)
add = slice_5 + unsqueeze_3
eq = add == 0.0
slice_10 = slice_5
masked_fill = opset18.Where(eq, -3.4028235e38, slice_10)
val_179 = opset18.Transpose(masked_fill, perm=[2, 1, 0, 3])
slice_scatter = opset18.Transpose(val_179, perm=[2, 1, 0, 3])
val_191 = opset18.Transpose(slice_scatter, perm=[1, 0, 2, 3])
slice_scatter_1 = opset18.Transpose(val_191, perm=[1, 0, 2, 3])
unsqueeze_6 = opset18.Unsqueeze(input2, 1)
to_copy_1 = opset18.Cast(unsqueeze_6, to=1)
view_1 = opset18.Constant(
value=ir.tensor(
np.array(
[
1.0,
0.7498942017555237,
0.5623413324356079,
0.4216965138912201,
0.3162277638912201,
0.23713736236095428,
0.17782793939113617,
0.1333521455526352,
0.10000000149011612,
0.07498941570520401,
0.05623412877321243,
0.04216964915394783,
0.03162277862429619,
0.0237137358635664,
0.017782794311642647,
0.01333521492779255,
0.009999999776482582,
0.007498942315578461,
0.005623413249850273,
0.0042169648222625256,
0.003162277862429619,
0.0023713738191872835,
0.0017782794311642647,
0.0013335214462131262,
0.0010000000474974513,
0.0007498941849917173,
0.000562341301701963,
0.00042169648804701865,
0.0003162277862429619,
0.0002371373848291114,
0.00017782794020604342,
0.0001333521504420787,
],
dtype=np.float32,
).reshape([1, 32, 1])
)
)
view_2 = opset18.Reshape(to_copy_1, [1, 1, 10], allowzero=0)
bmm = view_1 @ view_2
view_3 = opset18.Reshape(bmm, [1, 32, 10], allowzero=0)
transpose = opset18.Transpose(view_3, perm=[0, 2, 1])
cat = opset18.Concat(transpose, transpose, axis=-1)
cos = opset18.Cos(cat)
sin = opset18.Sin(cat)
pow_1 = embedding**2.0
mean = opset18.ReduceMean(pow_1, [-1], keepdims=1, noop_with_empty_axes=0)
add_1 = mean + 1e-05
val_244 = opset18.Sqrt(add_1)
rsqrt = opset18.Reciprocal(val_244)
mul_3 = embedding * rsqrt
mul_4 = model_layers_0_input_layernorm_weight * mul_3
t = opset18.Transpose(model_layers_0_self_attn_q_proj_weight, perm=[1, 0])
view_5 = mul_4 @ t
t_1 = opset18.Transpose(model_layers_0_self_attn_k_proj_weight, perm=[1, 0])
view_7 = mul_4 @ t_1
t_2 = opset18.Transpose(model_layers_0_self_attn_v_proj_weight, perm=[1, 0])
view_9 = mul_4 @ t_2
view_10 = opset18.Reshape(view_5, [1, 10, 32, 64], allowzero=0)
transpose_1 = opset18.Transpose(view_10, perm=[0, 2, 1, 3])
view_11 = opset18.Reshape(view_7, [1, 10, 32, 64], allowzero=0)
transpose_2 = opset18.Transpose(view_11, perm=[0, 2, 1, 3])
view_12 = opset18.Reshape(view_9, [1, 10, 32, 64], allowzero=0)
transpose_3 = opset18.Transpose(view_12, perm=[0, 2, 1, 3])
unsqueeze_7 = opset18.Unsqueeze(cos, 1)
unsqueeze_8 = opset18.Unsqueeze(sin, 1)
mul_5 = transpose_1 * unsqueeze_7
val_267 = opset18.Constant(value_ints=[1])
slice_19 = opset18.Slice(transpose_1, [0], [32], [3], val_267)
val_277 = opset18.Constant(value_ints=[1])
slice_20 = opset18.Slice(transpose_1, [32], [9223372036854775807], [3], val_277)
neg = opset18.Neg(slice_20)
cat_1 = opset18.Concat(neg, slice_19, axis=-1)
mul_6 = cat_1 * unsqueeze_8
add_2 = mul_5 + mul_6
mul_7 = transpose_2 * unsqueeze_7
val_287 = opset18.Constant(value_ints=[1])
slice_21 = opset18.Slice(transpose_2, [0], [32], [3], val_287)
val_297 = opset18.Constant(value_ints=[1])
slice_22 = opset18.Slice(transpose_2, [32], [9223372036854775807], [3], val_297)
neg_1 = opset18.Neg(slice_22)
cat_2 = opset18.Concat(neg_1, slice_21, axis=-1)
mul_8 = cat_2 * unsqueeze_8
add_3 = mul_7 + mul_8
val_346 = opset18.Reshape(add_3, [-1, 10, 64], allowzero=0)
val_347 = opset18.Transpose(val_346, perm=[0, 2, 1])
val_349 = opset18.Reshape(val_347, [1, 32, 64, 10], allowzero=0)
val_351 = add_2 * [0.35355338]
val_353 = val_349 * [0.35355338]
val_354 = val_351 @ val_353
val_355 = val_354 + slice_scatter_1
val_356 = opset18.Softmax(val_355, axis=-1)
getitem = val_356 @ transpose_3
transpose_4 = opset18.Transpose(getitem, perm=[0, 2, 1, 3])
view_13 = opset18.Reshape(transpose_4, [1, 10, -1], allowzero=0)
t_3 = opset18.Transpose(model_layers_0_self_attn_o_proj_weight, perm=[1, 0])
view_15 = view_13 @ t_3
add_4 = embedding + view_15
pow_2 = add_4**2.0
mean_1 = opset18.ReduceMean(pow_2, [-1], keepdims=1, noop_with_empty_axes=0)
add_5 = mean_1 + 1e-05
val_379 = opset18.Sqrt(add_5)
rsqrt_1 = opset18.Reciprocal(val_379)
mul_9 = add_4 * rsqrt_1
mul_10 = model_layers_0_post_attention_layernorm_weight * mul_9
t_4 = opset18.Transpose(model_layers_0_mlp_gate_proj_weight, perm=[1, 0])
view_17 = mul_10 @ t_4
val_383 = opset18.Sigmoid(view_17)
silu = view_17 * val_383
t_5 = opset18.Transpose(model_layers_0_mlp_up_proj_weight, perm=[1, 0])
view_19 = mul_10 @ t_5
mul_11 = silu * view_19
t_6 = opset18.Transpose(model_layers_0_mlp_down_proj_weight, perm=[1, 0])
view_21 = mul_11 @ t_6
add_6 = add_4 + view_21
pow_3 = add_6**2.0
mean_2 = opset18.ReduceMean(pow_3, [-1], keepdims=1, noop_with_empty_axes=0)
add_7 = mean_2 + 1e-05
val_391 = opset18.Sqrt(add_7)
rsqrt_2 = opset18.Reciprocal(val_391)
mul_12 = add_6 * rsqrt_2
mul_13 = model_norm_weight * mul_12
t_7 = opset18.Transpose(lm_head_weight, perm=[1, 0])
view_23 = mul_13 @ t_7
to_copy_12 = opset18.Identity(view_23)
return to_copy_12, add_3, transpose_3
model = main_graph.to_model_proto()
return model
def make_model_with_random_weights():
input_layernorm_weight_0 = np.random.rand(2048).astype(np.float32)
post_attention_layernorm_weight0 = np.random.rand(2048).astype(np.float32)
norm_weight = np.random.rand(2048).astype(np.float32)
head_weight = np.random.rand(49152, 2048).astype(np.float32)
self_attn_q_proj_weight0 = np.random.rand(2048, 2048).astype(np.float32)
self_attn_k_proj_weight0 = np.random.rand(2048, 2048).astype(np.float32)
self_attn_v_proj_weight0 = np.random.rand(2048, 2048).astype(np.float32)
self_attn_o_proj_weight0 = np.random.rand(2048, 2048).astype(np.float32)
mlp_gate_proj_weight0 = np.random.rand(8192, 2048).astype(np.float32)
mlp_up_proj_weight0 = np.random.rand(8192, 2048).astype(np.float32)
mlp_down_proj_weight0 = np.random.rand(2048, 8192).astype(np.float32)
model = make_model(
ir.tensor(input_layernorm_weight_0),
ir.tensor(post_attention_layernorm_weight0),
ir.tensor(norm_weight),
ir.tensor(head_weight),
ir.tensor(self_attn_q_proj_weight0),
ir.tensor(self_attn_k_proj_weight0),
ir.tensor(self_attn_v_proj_weight0),
ir.tensor(self_attn_o_proj_weight0),
ir.tensor(mlp_gate_proj_weight0),
ir.tensor(mlp_up_proj_weight0),
ir.tensor(mlp_down_proj_weight0),
)
return model
class _SmollmTest1:
def get_onnx_model(self):
if not hasattr(self, "_onnx_model"):
model_proto = make_model_with_random_weights()
model = ir.serde.deserialize_model(model_proto)
self._onnx_model = model
return self._onnx_model
def get_ort_inputs(self):
if not hasattr(self, "_ort_inputs"):
inputs = {
"input0": np.random.randint(0, 49152, (1, 10)).astype(np.int64),
"input1": np.ones((1, 10), dtype=np.float32),
"input2": np.arange(10, dtype=np.int64).reshape(1, 10),
}
self._ort_inputs = inputs
return self._ort_inputs
def smollm_test_1():
return _SmollmTest1()