| program(1.0) |
| [buildInfo = dict<tensor<string, []>, tensor<string, []>>({{"coremlc-component-MIL", "3520.4.1"}, {"coremlc-version", "3520.5.1"}})] |
| { |
| func main<ios17>(tensor<fp32, [1, 256]> encoder_mask, tensor<fp32, [1, 256, 768]> encoder_output, tensor<int32, [1]> speaker_idx) { |
| tensor<int32, []> baked_flat_batch_dims_0 = const()[name = tensor<string, []>("baked_flat_batch_dims_0"), val = tensor<int32, []>(0)]; |
| tensor<bool, []> baked_flat_validate_indices_0 = const()[name = tensor<string, []>("baked_flat_validate_indices_0"), val = tensor<bool, []>(false)]; |
| tensor<fp16, [5, 84480]> dec_baked_context_embedding_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_baked_context_embedding_weight_to_fp16_quantized"), quantized_data = tensor<int8, [5, 84480]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(64))), scale = tensor<fp16, [5]>([0x1.a8p-4, 0x1.adcp-4, 0x1.cap-4, 0x1.c38p-4, 0x1.bdp-4]), zero_point = tensor<int8, [5]>([0, 0, 0, 0, 0])]; |
| tensor<string, []> speaker_idx_to_int16_dtype_0 = const()[name = tensor<string, []>("speaker_idx_to_int16_dtype_0"), val = tensor<string, []>("int16")]; |
| tensor<string, []> cast_73_dtype_0 = const()[name = tensor<string, []>("cast_73_dtype_0"), val = tensor<string, []>("int32")]; |
| tensor<int32, []> greater_equal_0_y_0 = const()[name = tensor<string, []>("greater_equal_0_y_0"), val = tensor<int32, []>(0)]; |
| tensor<int16, [1]> speaker_idx_to_int16 = cast(dtype = speaker_idx_to_int16_dtype_0, x = speaker_idx)[name = tensor<string, []>("cast_55")]; |
| tensor<int32, [1]> cast_73 = cast(dtype = cast_73_dtype_0, x = speaker_idx_to_int16)[name = tensor<string, []>("cast_54")]; |
| tensor<bool, [1]> greater_equal_0 = greater_equal(x = cast_73, y = greater_equal_0_y_0)[name = tensor<string, []>("greater_equal_0")]; |
| tensor<int32, []> slice_by_index_0 = const()[name = tensor<string, []>("slice_by_index_0"), val = tensor<int32, []>(5)]; |
| tensor<int32, [1]> add_0 = add(x = cast_73, y = slice_by_index_0)[name = tensor<string, []>("add_0")]; |
| tensor<int32, [1]> select_0 = select(a = cast_73, b = add_0, cond = greater_equal_0)[name = tensor<string, []>("select_0")]; |
| tensor<string, []> select_0_to_int16_dtype_0 = const()[name = tensor<string, []>("select_0_to_int16_dtype_0"), val = tensor<string, []>("int16")]; |
| tensor<string, []> cast_0_dtype_0 = const()[name = tensor<string, []>("cast_0_dtype_0"), val = tensor<string, []>("int32")]; |
| tensor<int32, []> greater_equal_0_y_0_1 = const()[name = tensor<string, []>("greater_equal_0_y_0_1"), val = tensor<int32, []>(0)]; |
| tensor<int16, [1]> select_0_to_int16 = cast(dtype = select_0_to_int16_dtype_0, x = select_0)[name = tensor<string, []>("cast_53")]; |
| tensor<int32, [1]> cast_0 = cast(dtype = cast_0_dtype_0, x = select_0_to_int16)[name = tensor<string, []>("cast_52")]; |
| tensor<bool, [1]> greater_equal_0_1 = greater_equal(x = cast_0, y = greater_equal_0_y_0_1)[name = tensor<string, []>("greater_equal_0_1")]; |
| tensor<int32, []> slice_by_index_0_1 = const()[name = tensor<string, []>("slice_by_index_0_1"), val = tensor<int32, []>(5)]; |
| tensor<int32, [1]> add_0_1 = add(x = cast_0, y = slice_by_index_0_1)[name = tensor<string, []>("add_0_1")]; |
| tensor<int32, [1]> select_0_1 = select(a = cast_0, b = add_0_1, cond = greater_equal_0_1)[name = tensor<string, []>("select_0_1")]; |
| tensor<int32, []> baked_flat_cast_fp16_cast_uint16_cast_uint16_axis_0 = const()[name = tensor<string, []>("baked_flat_cast_fp16_cast_uint16_cast_uint16_axis_0"), val = tensor<int32, []>(0)]; |
| tensor<fp16, [1, 84480]> baked_flat_cast_fp16_cast_uint16_cast_uint16 = gather(axis = baked_flat_cast_fp16_cast_uint16_cast_uint16_axis_0, batch_dims = baked_flat_batch_dims_0, indices = select_0_1, validate_indices = baked_flat_validate_indices_0, x = dec_baked_context_embedding_weight_to_fp16_quantized)[name = tensor<string, []>("baked_flat_cast_fp16_cast_uint16_cast_uint16")]; |
| tensor<int32, [3]> var_77 = const()[name = tensor<string, []>("op_77"), val = tensor<int32, [3]>([1, 110, 768])]; |
| tensor<fp16, [1, 110, 768]> baked_cast_fp16 = reshape(shape = var_77, x = baked_flat_cast_fp16_cast_uint16_cast_uint16)[name = tensor<string, []>("baked_cast_fp16")]; |
| tensor<int32, []> var_162 = const()[name = tensor<string, []>("op_162"), val = tensor<int32, []>(1)]; |
| tensor<bool, []> x_1_interleave_0 = const()[name = tensor<string, []>("x_1_interleave_0"), val = tensor<bool, []>(false)]; |
| tensor<fp16, [1, 1, 768]> bos_emb_to_fp16 = const()[name = tensor<string, []>("bos_emb_to_fp16"), val = tensor<fp16, [1, 1, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(422528)))]; |
| tensor<fp16, [1, 111, 768]> x_1_cast_fp16 = concat(axis = var_162, interleave = x_1_interleave_0, values = (baked_cast_fp16, bos_emb_to_fp16))[name = tensor<string, []>("x_1_cast_fp16")]; |
| tensor<fp16, [111, 768]> op_173_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("op_173_to_fp16_quantized"), quantized_data = tensor<int8, [111, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(424128))), scale = tensor<fp16, [111]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(509632))), zero_point = tensor<int8, [111]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(509440)))]; |
| tensor<fp16, [1, 111, 768]> x_3_cast_fp16 = add(x = x_1_cast_fp16, y = op_173_to_fp16_quantized)[name = tensor<string, []>("x_3_cast_fp16")]; |
| tensor<int32, []> var_201 = const()[name = tensor<string, []>("op_201"), val = tensor<int32, []>(-1)]; |
| tensor<int32, [1]> input_21_axes_0 = const()[name = tensor<string, []>("input_21_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_0_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_0_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(509952)))]; |
| tensor<fp16, []> var_199_to_fp16 = const()[name = tensor<string, []>("op_199_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)]; |
| tensor<fp16, [1, 111, 768]> input_21_cast_fp16 = layer_norm(axes = input_21_axes_0, epsilon = var_199_to_fp16, gamma = dec_layers_0_norm_self_weight_to_fp16, x = x_3_cast_fp16)[name = tensor<string, []>("input_21_cast_fp16")]; |
| tensor<fp16, [2304, 768]> dec_layers_0_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_0_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(511552))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2283456))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2281088)))]; |
| tensor<fp16, [2304]> linear_0_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_0_bias_0_to_fp16"), val = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2288128)))]; |
| tensor<fp16, [1, 111, 2304]> linear_0_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_0_self_attention_qkv_net_weight_to_fp16_quantized, x = input_21_cast_fp16)[name = tensor<string, []>("linear_0_cast_fp16")]; |
| tensor<int32, [5]> var_220 = const()[name = tensor<string, []>("op_220"), val = tensor<int32, [5]>([1, 111, 3, 12, 64])]; |
| tensor<fp16, [1, 111, 3, 12, 64]> qkv_3_cast_fp16 = reshape(shape = var_220, x = linear_0_cast_fp16)[name = tensor<string, []>("qkv_3_cast_fp16")]; |
| tensor<int32, [5]> q_1_begin_0 = const()[name = tensor<string, []>("q_1_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])]; |
| tensor<int32, [5]> q_1_end_0 = const()[name = tensor<string, []>("q_1_end_0"), val = tensor<int32, [5]>([1, 111, 1, 12, 64])]; |
| tensor<bool, [5]> q_1_end_mask_0 = const()[name = tensor<string, []>("q_1_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> q_1_squeeze_mask_0 = const()[name = tensor<string, []>("q_1_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> q_1_cast_fp16 = slice_by_index(begin = q_1_begin_0, end = q_1_end_0, end_mask = q_1_end_mask_0, squeeze_mask = q_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = tensor<string, []>("q_1_cast_fp16")]; |
| tensor<int32, [5]> new_k_1_begin_0 = const()[name = tensor<string, []>("new_k_1_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])]; |
| tensor<int32, [5]> new_k_1_end_0 = const()[name = tensor<string, []>("new_k_1_end_0"), val = tensor<int32, [5]>([1, 111, 2, 12, 64])]; |
| tensor<bool, [5]> new_k_1_end_mask_0 = const()[name = tensor<string, []>("new_k_1_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> new_k_1_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_1_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> new_k_1_cast_fp16 = slice_by_index(begin = new_k_1_begin_0, end = new_k_1_end_0, end_mask = new_k_1_end_mask_0, squeeze_mask = new_k_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = tensor<string, []>("new_k_1_cast_fp16")]; |
| tensor<string, []> new_k_1_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("new_k_1_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [5]> new_v_1_begin_0 = const()[name = tensor<string, []>("new_v_1_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])]; |
| tensor<int32, [5]> new_v_1_end_0 = const()[name = tensor<string, []>("new_v_1_end_0"), val = tensor<int32, [5]>([1, 111, 3, 12, 64])]; |
| tensor<bool, [5]> new_v_1_end_mask_0 = const()[name = tensor<string, []>("new_v_1_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> new_v_1_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_1_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> new_v_1_cast_fp16 = slice_by_index(begin = new_v_1_begin_0, end = new_v_1_end_0, end_mask = new_v_1_end_mask_0, squeeze_mask = new_v_1_squeeze_mask_0, x = qkv_3_cast_fp16)[name = tensor<string, []>("new_v_1_cast_fp16")]; |
| tensor<string, []> new_v_1_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("new_v_1_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [4]> var_235 = const()[name = tensor<string, []>("op_235"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<bool, []> var_237_transpose_x_0 = const()[name = tensor<string, []>("op_237_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_237_transpose_y_0 = const()[name = tensor<string, []>("op_237_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<int32, [4]> transpose_96_perm_0 = const()[name = tensor<string, []>("transpose_96_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<int32, [4]> transpose_97_perm_0 = const()[name = tensor<string, []>("transpose_97_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])]; |
| tensor<fp16, [1, 12, 64, 111]> transpose_97 = transpose(perm = transpose_97_perm_0, x = new_k_1_cast_fp16)[name = tensor<string, []>("transpose_262")]; |
| tensor<fp16, [1, 12, 111, 64]> transpose_96 = transpose(perm = transpose_96_perm_0, x = q_1_cast_fp16)[name = tensor<string, []>("transpose_263")]; |
| tensor<fp16, [1, 12, 111, 111]> var_237_cast_fp16 = matmul(transpose_x = var_237_transpose_x_0, transpose_y = var_237_transpose_y_0, x = transpose_96, y = transpose_97)[name = tensor<string, []>("op_237_cast_fp16")]; |
| tensor<fp16, []> var_238_to_fp16 = const()[name = tensor<string, []>("op_238_to_fp16"), val = tensor<fp16, []>(0x1p-3)]; |
| tensor<fp16, [1, 12, 111, 111]> scores_1_cast_fp16 = mul(x = var_237_cast_fp16, y = var_238_to_fp16)[name = tensor<string, []>("scores_1_cast_fp16")]; |
| tensor<fp16, []> var_196_to_fp16 = const()[name = tensor<string, []>("op_196_to_fp16"), val = tensor<fp16, []>(-inf)]; |
| tensor<fp16, [111, 111]> scores_3_cast_fp16_x_0 = const()[name = tensor<string, []>("scores_3_cast_fp16_x_0"), val = tensor<fp16, [111, 111]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2292800)))]; |
| tensor<fp16, [1, 12, 111, 111]> scores_3_cast_fp16 = add(x = scores_3_cast_fp16_x_0, y = scores_1_cast_fp16)[name = tensor<string, []>("scores_3_cast_fp16")]; |
| tensor<fp16, [1, 12, 111, 111]> probs_1_cast_fp16 = softmax(axis = var_201, x = scores_3_cast_fp16)[name = tensor<string, []>("probs_1_cast_fp16")]; |
| tensor<bool, []> var_258_transpose_x_0 = const()[name = tensor<string, []>("op_258_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_258_transpose_y_0 = const()[name = tensor<string, []>("op_258_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<fp16, [1, 12, 111, 64]> vT_1_cast_fp16 = transpose(perm = var_235, x = new_v_1_cast_fp16)[name = tensor<string, []>("transpose_261")]; |
| tensor<fp16, [1, 12, 111, 64]> var_258_cast_fp16 = matmul(transpose_x = var_258_transpose_x_0, transpose_y = var_258_transpose_y_0, x = probs_1_cast_fp16, y = vT_1_cast_fp16)[name = tensor<string, []>("op_258_cast_fp16")]; |
| tensor<int32, [4]> var_259 = const()[name = tensor<string, []>("op_259"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_263 = const()[name = tensor<string, []>("op_263"), val = tensor<int32, [3]>([1, 111, -1])]; |
| tensor<fp16, [1, 111, 12, 64]> y_1_cast_fp16 = transpose(perm = var_259, x = var_258_cast_fp16)[name = tensor<string, []>("transpose_260")]; |
| tensor<fp16, [1, 111, 768]> input_23_cast_fp16 = reshape(shape = var_263, x = y_1_cast_fp16)[name = tensor<string, []>("input_23_cast_fp16")]; |
| tensor<fp16, [768, 768]> dec_layers_0_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_0_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2317568))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2908288))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [768]> linear_1_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_1_bias_0_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2909888)))]; |
| tensor<fp16, [1, 111, 768]> linear_1_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_0_self_attention_o_net_weight_to_fp16_quantized, x = input_23_cast_fp16)[name = tensor<string, []>("linear_1_cast_fp16")]; |
| tensor<fp16, [1, 111, 768]> input_25_cast_fp16 = add(x = x_3_cast_fp16, y = linear_1_cast_fp16)[name = tensor<string, []>("input_25_cast_fp16")]; |
| tensor<int32, [1]> x_5_axes_0 = const()[name = tensor<string, []>("x_5_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_0_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_0_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2911488)))]; |
| tensor<fp16, [1, 111, 768]> x_5_cast_fp16 = layer_norm(axes = x_5_axes_0, epsilon = var_199_to_fp16, gamma = dec_layers_0_norm_xattn_query_weight_to_fp16, x = input_25_cast_fp16)[name = tensor<string, []>("x_5_cast_fp16")]; |
| tensor<int32, [1]> memory_1_axes_0 = const()[name = tensor<string, []>("memory_1_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<string, []> encoder_output_to_fp16_dtype_0 = const()[name = tensor<string, []>("encoder_output_to_fp16_dtype_0"), val = tensor<string, []>("fp16")]; |
| tensor<fp16, [768]> dec_layers_0_norm_xattn_memory_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_0_norm_xattn_memory_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2913088)))]; |
| tensor<fp16, [1, 256, 768]> encoder_output_to_fp16 = cast(dtype = encoder_output_to_fp16_dtype_0, x = encoder_output)[name = tensor<string, []>("cast_51")]; |
| tensor<fp16, [1, 256, 768]> memory_1_cast_fp16 = layer_norm(axes = memory_1_axes_0, epsilon = var_199_to_fp16, gamma = dec_layers_0_norm_xattn_memory_weight_to_fp16, x = encoder_output_to_fp16)[name = tensor<string, []>("memory_1_cast_fp16")]; |
| tensor<fp16, [128, 768]> dec_layers_0_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_0_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2914688))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3013248))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3013056)))]; |
| tensor<fp16, [128]> linear_2_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_2_bias_0_to_fp16"), val = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3013568)))]; |
| tensor<fp16, [1, 111, 128]> linear_2_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_0_cross_attention_q_net_weight_to_fp16_quantized, x = x_5_cast_fp16)[name = tensor<string, []>("linear_2_cast_fp16")]; |
| tensor<int32, [4]> var_285 = const()[name = tensor<string, []>("op_285"), val = tensor<int32, [4]>([1, 111, 1, 128])]; |
| tensor<fp16, [1, 111, 1, 128]> q_5_cast_fp16 = reshape(shape = var_285, x = linear_2_cast_fp16)[name = tensor<string, []>("q_5_cast_fp16")]; |
| tensor<fp16, [256, 768]> dec_layers_0_cross_attention_kv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_0_cross_attention_kv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [256, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3013888))), scale = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3210880))), zero_point = tensor<int8, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3210560)))]; |
| tensor<fp16, [256]> linear_3_bias_0_to_fp16 = const()[name = tensor<string, []>("linear_3_bias_0_to_fp16"), val = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3211456)))]; |
| tensor<fp16, [1, 256, 256]> linear_3_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = dec_layers_0_cross_attention_kv_net_weight_to_fp16_quantized, x = memory_1_cast_fp16)[name = tensor<string, []>("linear_3_cast_fp16")]; |
| tensor<int32, [5]> var_291 = const()[name = tensor<string, []>("op_291"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])]; |
| tensor<fp16, [1, 256, 2, 1, 128]> kv_1_cast_fp16 = reshape(shape = var_291, x = linear_3_cast_fp16)[name = tensor<string, []>("kv_1_cast_fp16")]; |
| tensor<int32, [5]> k_1_begin_0 = const()[name = tensor<string, []>("k_1_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])]; |
| tensor<int32, [5]> k_1_end_0 = const()[name = tensor<string, []>("k_1_end_0"), val = tensor<int32, [5]>([1, 256, 1, 1, 128])]; |
| tensor<bool, [5]> k_1_end_mask_0 = const()[name = tensor<string, []>("k_1_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> k_1_squeeze_mask_0 = const()[name = tensor<string, []>("k_1_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 256, 1, 128]> k_1_cast_fp16 = slice_by_index(begin = k_1_begin_0, end = k_1_end_0, end_mask = k_1_end_mask_0, squeeze_mask = k_1_squeeze_mask_0, x = kv_1_cast_fp16)[name = tensor<string, []>("k_1_cast_fp16")]; |
| tensor<string, []> k_1_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("k_1_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [5]> v_1_begin_0 = const()[name = tensor<string, []>("v_1_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])]; |
| tensor<int32, [5]> v_1_end_0 = const()[name = tensor<string, []>("v_1_end_0"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])]; |
| tensor<bool, [5]> v_1_end_mask_0 = const()[name = tensor<string, []>("v_1_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> v_1_squeeze_mask_0 = const()[name = tensor<string, []>("v_1_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 256, 1, 128]> v_1_cast_fp16 = slice_by_index(begin = v_1_begin_0, end = v_1_end_0, end_mask = v_1_end_mask_0, squeeze_mask = v_1_squeeze_mask_0, x = kv_1_cast_fp16)[name = tensor<string, []>("v_1_cast_fp16")]; |
| tensor<string, []> v_1_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("v_1_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [4]> var_304 = const()[name = tensor<string, []>("op_304"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<bool, []> var_306_transpose_x_0 = const()[name = tensor<string, []>("op_306_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_306_transpose_y_0 = const()[name = tensor<string, []>("op_306_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<int32, [4]> transpose_98_perm_0 = const()[name = tensor<string, []>("transpose_98_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<int32, [4]> transpose_99_perm_0 = const()[name = tensor<string, []>("transpose_99_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])]; |
| tensor<fp16, [1, 1, 128, 256]> transpose_99 = transpose(perm = transpose_99_perm_0, x = k_1_cast_fp16)[name = tensor<string, []>("transpose_258")]; |
| tensor<fp16, [1, 1, 111, 128]> transpose_98 = transpose(perm = transpose_98_perm_0, x = q_5_cast_fp16)[name = tensor<string, []>("transpose_259")]; |
| tensor<fp16, [1, 1, 111, 256]> var_306_cast_fp16 = matmul(transpose_x = var_306_transpose_x_0, transpose_y = var_306_transpose_y_0, x = transpose_98, y = transpose_99)[name = tensor<string, []>("op_306_cast_fp16")]; |
| tensor<fp16, []> var_307_to_fp16 = const()[name = tensor<string, []>("op_307_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)]; |
| tensor<fp16, [1, 1, 111, 256]> scores_7_cast_fp16 = mul(x = var_306_cast_fp16, y = var_307_to_fp16)[name = tensor<string, []>("scores_7_cast_fp16")]; |
| tensor<int32, [1]> var_310_axes_0 = const()[name = tensor<string, []>("op_310_axes_0"), val = tensor<int32, [1]>([1])]; |
| tensor<string, []> encoder_mask_to_fp16_dtype_0 = const()[name = tensor<string, []>("encoder_mask_to_fp16_dtype_0"), val = tensor<string, []>("fp16")]; |
| tensor<fp16, [1, 256]> encoder_mask_to_fp16 = cast(dtype = encoder_mask_to_fp16_dtype_0, x = encoder_mask)[name = tensor<string, []>("cast_50")]; |
| tensor<fp16, [1, 1, 256]> var_310_cast_fp16 = expand_dims(axes = var_310_axes_0, x = encoder_mask_to_fp16)[name = tensor<string, []>("op_310_cast_fp16")]; |
| tensor<int32, [1]> var_311_axes_0 = const()[name = tensor<string, []>("op_311_axes_0"), val = tensor<int32, [1]>([2])]; |
| tensor<fp16, [1, 1, 1, 256]> var_311_cast_fp16 = expand_dims(axes = var_311_axes_0, x = var_310_cast_fp16)[name = tensor<string, []>("op_311_cast_fp16")]; |
| tensor<fp16, []> var_186_promoted_1_to_fp16 = const()[name = tensor<string, []>("op_186_promoted_1_to_fp16"), val = tensor<fp16, []>(0x0p+0)]; |
| tensor<bool, [1, 1, 1, 256]> var_313_cast_fp16 = equal(x = var_311_cast_fp16, y = var_186_promoted_1_to_fp16)[name = tensor<string, []>("op_313_cast_fp16")]; |
| tensor<fp16, [1, 1, 111, 256]> scores_9_cast_fp16 = select(a = var_196_to_fp16, b = scores_7_cast_fp16, cond = var_313_cast_fp16)[name = tensor<string, []>("scores_9_cast_fp16")]; |
| tensor<fp16, [1, 1, 111, 256]> probs_3_cast_fp16 = softmax(axis = var_201, x = scores_9_cast_fp16)[name = tensor<string, []>("probs_3_cast_fp16")]; |
| tensor<bool, []> var_316_transpose_x_0 = const()[name = tensor<string, []>("op_316_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_316_transpose_y_0 = const()[name = tensor<string, []>("op_316_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<fp16, [1, 1, 256, 128]> vT_3_cast_fp16 = transpose(perm = var_304, x = v_1_cast_fp16)[name = tensor<string, []>("transpose_257")]; |
| tensor<fp16, [1, 1, 111, 128]> var_316_cast_fp16 = matmul(transpose_x = var_316_transpose_x_0, transpose_y = var_316_transpose_y_0, x = probs_3_cast_fp16, y = vT_3_cast_fp16)[name = tensor<string, []>("op_316_cast_fp16")]; |
| tensor<int32, [4]> var_317 = const()[name = tensor<string, []>("op_317"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_319 = const()[name = tensor<string, []>("op_319"), val = tensor<int32, [3]>([1, 111, -1])]; |
| tensor<fp16, [1, 111, 1, 128]> var_318_cast_fp16 = transpose(perm = var_317, x = var_316_cast_fp16)[name = tensor<string, []>("transpose_256")]; |
| tensor<fp16, [1, 111, 128]> input_27_cast_fp16 = reshape(shape = var_319, x = var_318_cast_fp16)[name = tensor<string, []>("input_27_cast_fp16")]; |
| tensor<fp16, [768, 128]> dec_layers_0_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_0_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3212032))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3310400))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 111, 768]> linear_4_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_0_cross_attention_o_net_weight_to_fp16_quantized, x = input_27_cast_fp16)[name = tensor<string, []>("linear_4_cast_fp16")]; |
| tensor<fp16, [1, 111, 768]> input_29_cast_fp16 = add(x = input_25_cast_fp16, y = linear_4_cast_fp16)[name = tensor<string, []>("input_29_cast_fp16")]; |
| tensor<int32, [1]> x_7_axes_0 = const()[name = tensor<string, []>("x_7_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_0_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_0_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3312000)))]; |
| tensor<fp16, [1, 111, 768]> x_7_cast_fp16 = layer_norm(axes = x_7_axes_0, epsilon = var_199_to_fp16, gamma = dec_layers_0_norm_pos_ff_weight_to_fp16, x = input_29_cast_fp16)[name = tensor<string, []>("x_7_cast_fp16")]; |
| tensor<int32, [3]> var_336 = const()[name = tensor<string, []>("op_336"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<string, []> y_3_pad_type_0 = const()[name = tensor<string, []>("y_3_pad_type_0"), val = tensor<string, []>("valid")]; |
| tensor<int32, [1]> y_3_strides_0 = const()[name = tensor<string, []>("y_3_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> y_3_pad_0 = const()[name = tensor<string, []>("y_3_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> y_3_dilations_0 = const()[name = tensor<string, []>("y_3_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, []> y_3_groups_0 = const()[name = tensor<string, []>("y_3_groups_0"), val = tensor<int32, []>(1)]; |
| tensor<fp16, [3072, 768, 1]> dec_layers_0_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_0_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3313600))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5676096))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5672960)))]; |
| tensor<fp16, [1, 768, 111]> x_11_cast_fp16 = transpose(perm = var_336, x = x_7_cast_fp16)[name = tensor<string, []>("transpose_255")]; |
| tensor<fp16, [1, 3072, 111]> y_3_cast_fp16 = conv(dilations = y_3_dilations_0, groups = y_3_groups_0, pad = y_3_pad_0, pad_type = y_3_pad_type_0, strides = y_3_strides_0, weight = dec_layers_0_pos_ff_proj_weight_to_fp16_quantized, x = x_11_cast_fp16)[name = tensor<string, []>("y_3_cast_fp16")]; |
| tensor<string, []> x_13_mode_0 = const()[name = tensor<string, []>("x_13_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 3072, 111]> x_13_cast_fp16 = gelu(mode = x_13_mode_0, x = y_3_cast_fp16)[name = tensor<string, []>("x_13_cast_fp16")]; |
| tensor<string, []> y_5_pad_type_0 = const()[name = tensor<string, []>("y_5_pad_type_0"), val = tensor<string, []>("valid")]; |
| tensor<int32, [1]> y_5_strides_0 = const()[name = tensor<string, []>("y_5_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> y_5_pad_0 = const()[name = tensor<string, []>("y_5_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> y_5_dilations_0 = const()[name = tensor<string, []>("y_5_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, []> y_5_groups_0 = const()[name = tensor<string, []>("y_5_groups_0"), val = tensor<int32, []>(1)]; |
| tensor<fp16, [768, 3072, 1]> dec_layers_0_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_0_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5682304))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8041664))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 768, 111]> y_5_cast_fp16 = conv(dilations = y_5_dilations_0, groups = y_5_groups_0, pad = y_5_pad_0, pad_type = y_5_pad_type_0, strides = y_5_strides_0, weight = dec_layers_0_pos_ff_o_net_weight_to_fp16_quantized, x = x_13_cast_fp16)[name = tensor<string, []>("y_5_cast_fp16")]; |
| tensor<int32, [3]> var_356 = const()[name = tensor<string, []>("op_356"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<fp16, [1, 111, 768]> h_9_cast_fp16 = transpose(perm = var_356, x = y_5_cast_fp16)[name = tensor<string, []>("transpose_254")]; |
| tensor<fp16, [1, 111, 768]> x_17_cast_fp16 = add(x = input_29_cast_fp16, y = h_9_cast_fp16)[name = tensor<string, []>("x_17_cast_fp16")]; |
| tensor<int32, []> var_386 = const()[name = tensor<string, []>("op_386"), val = tensor<int32, []>(-1)]; |
| tensor<int32, [1]> input_33_axes_0 = const()[name = tensor<string, []>("input_33_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_1_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_1_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8043264)))]; |
| tensor<fp16, []> var_384_to_fp16 = const()[name = tensor<string, []>("op_384_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)]; |
| tensor<fp16, [1, 111, 768]> input_33_cast_fp16 = layer_norm(axes = input_33_axes_0, epsilon = var_384_to_fp16, gamma = dec_layers_1_norm_self_weight_to_fp16, x = x_17_cast_fp16)[name = tensor<string, []>("input_33_cast_fp16")]; |
| tensor<fp16, [2304, 768]> dec_layers_1_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_1_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(8044864))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9814400))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2281088)))]; |
| tensor<fp16, [1, 111, 2304]> linear_5_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_1_self_attention_qkv_net_weight_to_fp16_quantized, x = input_33_cast_fp16)[name = tensor<string, []>("linear_5_cast_fp16")]; |
| tensor<int32, [5]> var_405 = const()[name = tensor<string, []>("op_405"), val = tensor<int32, [5]>([1, 111, 3, 12, 64])]; |
| tensor<fp16, [1, 111, 3, 12, 64]> qkv_7_cast_fp16 = reshape(shape = var_405, x = linear_5_cast_fp16)[name = tensor<string, []>("qkv_7_cast_fp16")]; |
| tensor<int32, [5]> q_9_begin_0 = const()[name = tensor<string, []>("q_9_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])]; |
| tensor<int32, [5]> q_9_end_0 = const()[name = tensor<string, []>("q_9_end_0"), val = tensor<int32, [5]>([1, 111, 1, 12, 64])]; |
| tensor<bool, [5]> q_9_end_mask_0 = const()[name = tensor<string, []>("q_9_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> q_9_squeeze_mask_0 = const()[name = tensor<string, []>("q_9_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> q_9_cast_fp16 = slice_by_index(begin = q_9_begin_0, end = q_9_end_0, end_mask = q_9_end_mask_0, squeeze_mask = q_9_squeeze_mask_0, x = qkv_7_cast_fp16)[name = tensor<string, []>("q_9_cast_fp16")]; |
| tensor<int32, [5]> new_k_3_begin_0 = const()[name = tensor<string, []>("new_k_3_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])]; |
| tensor<int32, [5]> new_k_3_end_0 = const()[name = tensor<string, []>("new_k_3_end_0"), val = tensor<int32, [5]>([1, 111, 2, 12, 64])]; |
| tensor<bool, [5]> new_k_3_end_mask_0 = const()[name = tensor<string, []>("new_k_3_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> new_k_3_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_3_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> new_k_3_cast_fp16 = slice_by_index(begin = new_k_3_begin_0, end = new_k_3_end_0, end_mask = new_k_3_end_mask_0, squeeze_mask = new_k_3_squeeze_mask_0, x = qkv_7_cast_fp16)[name = tensor<string, []>("new_k_3_cast_fp16")]; |
| tensor<string, []> new_k_3_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("new_k_3_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [5]> new_v_3_begin_0 = const()[name = tensor<string, []>("new_v_3_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])]; |
| tensor<int32, [5]> new_v_3_end_0 = const()[name = tensor<string, []>("new_v_3_end_0"), val = tensor<int32, [5]>([1, 111, 3, 12, 64])]; |
| tensor<bool, [5]> new_v_3_end_mask_0 = const()[name = tensor<string, []>("new_v_3_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> new_v_3_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_3_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> new_v_3_cast_fp16 = slice_by_index(begin = new_v_3_begin_0, end = new_v_3_end_0, end_mask = new_v_3_end_mask_0, squeeze_mask = new_v_3_squeeze_mask_0, x = qkv_7_cast_fp16)[name = tensor<string, []>("new_v_3_cast_fp16")]; |
| tensor<string, []> new_v_3_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("new_v_3_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [4]> var_420 = const()[name = tensor<string, []>("op_420"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<bool, []> var_422_transpose_x_0 = const()[name = tensor<string, []>("op_422_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_422_transpose_y_0 = const()[name = tensor<string, []>("op_422_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<int32, [4]> transpose_100_perm_0 = const()[name = tensor<string, []>("transpose_100_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<int32, [4]> transpose_101_perm_0 = const()[name = tensor<string, []>("transpose_101_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])]; |
| tensor<fp16, [1, 12, 64, 111]> transpose_101 = transpose(perm = transpose_101_perm_0, x = new_k_3_cast_fp16)[name = tensor<string, []>("transpose_252")]; |
| tensor<fp16, [1, 12, 111, 64]> transpose_100 = transpose(perm = transpose_100_perm_0, x = q_9_cast_fp16)[name = tensor<string, []>("transpose_253")]; |
| tensor<fp16, [1, 12, 111, 111]> var_422_cast_fp16 = matmul(transpose_x = var_422_transpose_x_0, transpose_y = var_422_transpose_y_0, x = transpose_100, y = transpose_101)[name = tensor<string, []>("op_422_cast_fp16")]; |
| tensor<fp16, []> var_423_to_fp16 = const()[name = tensor<string, []>("op_423_to_fp16"), val = tensor<fp16, []>(0x1p-3)]; |
| tensor<fp16, [1, 12, 111, 111]> scores_11_cast_fp16 = mul(x = var_422_cast_fp16, y = var_423_to_fp16)[name = tensor<string, []>("scores_11_cast_fp16")]; |
| tensor<fp16, []> var_381_to_fp16 = const()[name = tensor<string, []>("op_381_to_fp16"), val = tensor<fp16, []>(-inf)]; |
| tensor<fp16, [1, 12, 111, 111]> scores_13_cast_fp16 = add(x = scores_3_cast_fp16_x_0, y = scores_11_cast_fp16)[name = tensor<string, []>("scores_13_cast_fp16")]; |
| tensor<fp16, [1, 12, 111, 111]> probs_5_cast_fp16 = softmax(axis = var_386, x = scores_13_cast_fp16)[name = tensor<string, []>("probs_5_cast_fp16")]; |
| tensor<bool, []> var_443_transpose_x_0 = const()[name = tensor<string, []>("op_443_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_443_transpose_y_0 = const()[name = tensor<string, []>("op_443_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<fp16, [1, 12, 111, 64]> vT_5_cast_fp16 = transpose(perm = var_420, x = new_v_3_cast_fp16)[name = tensor<string, []>("transpose_251")]; |
| tensor<fp16, [1, 12, 111, 64]> var_443_cast_fp16 = matmul(transpose_x = var_443_transpose_x_0, transpose_y = var_443_transpose_y_0, x = probs_5_cast_fp16, y = vT_5_cast_fp16)[name = tensor<string, []>("op_443_cast_fp16")]; |
| tensor<int32, [4]> var_444 = const()[name = tensor<string, []>("op_444"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_448 = const()[name = tensor<string, []>("op_448"), val = tensor<int32, [3]>([1, 111, -1])]; |
| tensor<fp16, [1, 111, 12, 64]> y_7_cast_fp16 = transpose(perm = var_444, x = var_443_cast_fp16)[name = tensor<string, []>("transpose_250")]; |
| tensor<fp16, [1, 111, 768]> input_35_cast_fp16 = reshape(shape = var_448, x = y_7_cast_fp16)[name = tensor<string, []>("input_35_cast_fp16")]; |
| tensor<fp16, [768, 768]> dec_layers_1_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_1_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(9819072))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10408960))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 111, 768]> linear_6_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_1_self_attention_o_net_weight_to_fp16_quantized, x = input_35_cast_fp16)[name = tensor<string, []>("linear_6_cast_fp16")]; |
| tensor<fp16, [1, 111, 768]> input_37_cast_fp16 = add(x = x_17_cast_fp16, y = linear_6_cast_fp16)[name = tensor<string, []>("input_37_cast_fp16")]; |
| tensor<int32, [1]> x_21_axes_0 = const()[name = tensor<string, []>("x_21_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_1_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_1_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10410560)))]; |
| tensor<fp16, [1, 111, 768]> x_21_cast_fp16 = layer_norm(axes = x_21_axes_0, epsilon = var_384_to_fp16, gamma = dec_layers_1_norm_xattn_query_weight_to_fp16, x = input_37_cast_fp16)[name = tensor<string, []>("x_21_cast_fp16")]; |
| tensor<int32, [1]> memory_3_axes_0 = const()[name = tensor<string, []>("memory_3_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_1_norm_xattn_memory_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_1_norm_xattn_memory_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10412160)))]; |
| tensor<fp16, [1, 256, 768]> memory_3_cast_fp16 = layer_norm(axes = memory_3_axes_0, epsilon = var_384_to_fp16, gamma = dec_layers_1_norm_xattn_memory_weight_to_fp16, x = encoder_output_to_fp16)[name = tensor<string, []>("memory_3_cast_fp16")]; |
| tensor<fp16, [128, 768]> dec_layers_1_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_1_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10413760))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10512128))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3013056)))]; |
| tensor<fp16, [1, 111, 128]> linear_7_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_1_cross_attention_q_net_weight_to_fp16_quantized, x = x_21_cast_fp16)[name = tensor<string, []>("linear_7_cast_fp16")]; |
| tensor<int32, [4]> var_470 = const()[name = tensor<string, []>("op_470"), val = tensor<int32, [4]>([1, 111, 1, 128])]; |
| tensor<fp16, [1, 111, 1, 128]> q_13_cast_fp16 = reshape(shape = var_470, x = linear_7_cast_fp16)[name = tensor<string, []>("q_13_cast_fp16")]; |
| tensor<fp16, [256, 768]> dec_layers_1_cross_attention_kv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_1_cross_attention_kv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [256, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10512448))), scale = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10709120))), zero_point = tensor<int8, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3210560)))]; |
| tensor<fp16, [1, 256, 256]> linear_8_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = dec_layers_1_cross_attention_kv_net_weight_to_fp16_quantized, x = memory_3_cast_fp16)[name = tensor<string, []>("linear_8_cast_fp16")]; |
| tensor<int32, [5]> var_476 = const()[name = tensor<string, []>("op_476"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])]; |
| tensor<fp16, [1, 256, 2, 1, 128]> kv_3_cast_fp16 = reshape(shape = var_476, x = linear_8_cast_fp16)[name = tensor<string, []>("kv_3_cast_fp16")]; |
| tensor<int32, [5]> k_3_begin_0 = const()[name = tensor<string, []>("k_3_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])]; |
| tensor<int32, [5]> k_3_end_0 = const()[name = tensor<string, []>("k_3_end_0"), val = tensor<int32, [5]>([1, 256, 1, 1, 128])]; |
| tensor<bool, [5]> k_3_end_mask_0 = const()[name = tensor<string, []>("k_3_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> k_3_squeeze_mask_0 = const()[name = tensor<string, []>("k_3_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 256, 1, 128]> k_3_cast_fp16 = slice_by_index(begin = k_3_begin_0, end = k_3_end_0, end_mask = k_3_end_mask_0, squeeze_mask = k_3_squeeze_mask_0, x = kv_3_cast_fp16)[name = tensor<string, []>("k_3_cast_fp16")]; |
| tensor<string, []> k_3_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("k_3_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [5]> v_3_begin_0 = const()[name = tensor<string, []>("v_3_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])]; |
| tensor<int32, [5]> v_3_end_0 = const()[name = tensor<string, []>("v_3_end_0"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])]; |
| tensor<bool, [5]> v_3_end_mask_0 = const()[name = tensor<string, []>("v_3_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> v_3_squeeze_mask_0 = const()[name = tensor<string, []>("v_3_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 256, 1, 128]> v_3_cast_fp16 = slice_by_index(begin = v_3_begin_0, end = v_3_end_0, end_mask = v_3_end_mask_0, squeeze_mask = v_3_squeeze_mask_0, x = kv_3_cast_fp16)[name = tensor<string, []>("v_3_cast_fp16")]; |
| tensor<string, []> v_3_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("v_3_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [4]> var_489 = const()[name = tensor<string, []>("op_489"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<bool, []> var_491_transpose_x_0 = const()[name = tensor<string, []>("op_491_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_491_transpose_y_0 = const()[name = tensor<string, []>("op_491_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<int32, [4]> transpose_102_perm_0 = const()[name = tensor<string, []>("transpose_102_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<int32, [4]> transpose_103_perm_0 = const()[name = tensor<string, []>("transpose_103_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])]; |
| tensor<fp16, [1, 1, 128, 256]> transpose_103 = transpose(perm = transpose_103_perm_0, x = k_3_cast_fp16)[name = tensor<string, []>("transpose_248")]; |
| tensor<fp16, [1, 1, 111, 128]> transpose_102 = transpose(perm = transpose_102_perm_0, x = q_13_cast_fp16)[name = tensor<string, []>("transpose_249")]; |
| tensor<fp16, [1, 1, 111, 256]> var_491_cast_fp16 = matmul(transpose_x = var_491_transpose_x_0, transpose_y = var_491_transpose_y_0, x = transpose_102, y = transpose_103)[name = tensor<string, []>("op_491_cast_fp16")]; |
| tensor<fp16, []> var_492_to_fp16 = const()[name = tensor<string, []>("op_492_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)]; |
| tensor<fp16, [1, 1, 111, 256]> scores_17_cast_fp16 = mul(x = var_491_cast_fp16, y = var_492_to_fp16)[name = tensor<string, []>("scores_17_cast_fp16")]; |
| tensor<fp16, [1, 1, 111, 256]> scores_19_cast_fp16 = select(a = var_381_to_fp16, b = scores_17_cast_fp16, cond = var_313_cast_fp16)[name = tensor<string, []>("scores_19_cast_fp16")]; |
| tensor<fp16, [1, 1, 111, 256]> probs_7_cast_fp16 = softmax(axis = var_386, x = scores_19_cast_fp16)[name = tensor<string, []>("probs_7_cast_fp16")]; |
| tensor<bool, []> var_501_transpose_x_0 = const()[name = tensor<string, []>("op_501_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_501_transpose_y_0 = const()[name = tensor<string, []>("op_501_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<fp16, [1, 1, 256, 128]> vT_7_cast_fp16 = transpose(perm = var_489, x = v_3_cast_fp16)[name = tensor<string, []>("transpose_247")]; |
| tensor<fp16, [1, 1, 111, 128]> var_501_cast_fp16 = matmul(transpose_x = var_501_transpose_x_0, transpose_y = var_501_transpose_y_0, x = probs_7_cast_fp16, y = vT_7_cast_fp16)[name = tensor<string, []>("op_501_cast_fp16")]; |
| tensor<int32, [4]> var_502 = const()[name = tensor<string, []>("op_502"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_504 = const()[name = tensor<string, []>("op_504"), val = tensor<int32, [3]>([1, 111, -1])]; |
| tensor<fp16, [1, 111, 1, 128]> var_503_cast_fp16 = transpose(perm = var_502, x = var_501_cast_fp16)[name = tensor<string, []>("transpose_246")]; |
| tensor<fp16, [1, 111, 128]> input_39_cast_fp16 = reshape(shape = var_504, x = var_503_cast_fp16)[name = tensor<string, []>("input_39_cast_fp16")]; |
| tensor<fp16, [768, 128]> dec_layers_1_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_1_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10709696))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10808064))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 111, 768]> linear_9_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_1_cross_attention_o_net_weight_to_fp16_quantized, x = input_39_cast_fp16)[name = tensor<string, []>("linear_9_cast_fp16")]; |
| tensor<fp16, [1, 111, 768]> input_41_cast_fp16 = add(x = input_37_cast_fp16, y = linear_9_cast_fp16)[name = tensor<string, []>("input_41_cast_fp16")]; |
| tensor<int32, [1]> x_23_axes_0 = const()[name = tensor<string, []>("x_23_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_1_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_1_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10809664)))]; |
| tensor<fp16, [1, 111, 768]> x_23_cast_fp16 = layer_norm(axes = x_23_axes_0, epsilon = var_384_to_fp16, gamma = dec_layers_1_norm_pos_ff_weight_to_fp16, x = input_41_cast_fp16)[name = tensor<string, []>("x_23_cast_fp16")]; |
| tensor<int32, [3]> var_521 = const()[name = tensor<string, []>("op_521"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<string, []> y_9_pad_type_0 = const()[name = tensor<string, []>("y_9_pad_type_0"), val = tensor<string, []>("valid")]; |
| tensor<int32, [1]> y_9_strides_0 = const()[name = tensor<string, []>("y_9_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> y_9_pad_0 = const()[name = tensor<string, []>("y_9_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> y_9_dilations_0 = const()[name = tensor<string, []>("y_9_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, []> y_9_groups_0 = const()[name = tensor<string, []>("y_9_groups_0"), val = tensor<int32, []>(1)]; |
| tensor<fp16, [3072, 768, 1]> dec_layers_1_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_1_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(10811264))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13170624))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5672960)))]; |
| tensor<fp16, [1, 768, 111]> x_27_cast_fp16 = transpose(perm = var_521, x = x_23_cast_fp16)[name = tensor<string, []>("transpose_245")]; |
| tensor<fp16, [1, 3072, 111]> y_9_cast_fp16 = conv(dilations = y_9_dilations_0, groups = y_9_groups_0, pad = y_9_pad_0, pad_type = y_9_pad_type_0, strides = y_9_strides_0, weight = dec_layers_1_pos_ff_proj_weight_to_fp16_quantized, x = x_27_cast_fp16)[name = tensor<string, []>("y_9_cast_fp16")]; |
| tensor<string, []> x_29_mode_0 = const()[name = tensor<string, []>("x_29_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 3072, 111]> x_29_cast_fp16 = gelu(mode = x_29_mode_0, x = y_9_cast_fp16)[name = tensor<string, []>("x_29_cast_fp16")]; |
| tensor<string, []> y_11_pad_type_0 = const()[name = tensor<string, []>("y_11_pad_type_0"), val = tensor<string, []>("valid")]; |
| tensor<int32, [1]> y_11_strides_0 = const()[name = tensor<string, []>("y_11_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> y_11_pad_0 = const()[name = tensor<string, []>("y_11_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> y_11_dilations_0 = const()[name = tensor<string, []>("y_11_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, []> y_11_groups_0 = const()[name = tensor<string, []>("y_11_groups_0"), val = tensor<int32, []>(1)]; |
| tensor<fp16, [768, 3072, 1]> dec_layers_1_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_1_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(13176832))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15536192))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 768, 111]> y_11_cast_fp16 = conv(dilations = y_11_dilations_0, groups = y_11_groups_0, pad = y_11_pad_0, pad_type = y_11_pad_type_0, strides = y_11_strides_0, weight = dec_layers_1_pos_ff_o_net_weight_to_fp16_quantized, x = x_29_cast_fp16)[name = tensor<string, []>("y_11_cast_fp16")]; |
| tensor<int32, [3]> var_541 = const()[name = tensor<string, []>("op_541"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<fp16, [1, 111, 768]> h_19_cast_fp16 = transpose(perm = var_541, x = y_11_cast_fp16)[name = tensor<string, []>("transpose_244")]; |
| tensor<fp16, [1, 111, 768]> x_33_cast_fp16 = add(x = input_41_cast_fp16, y = h_19_cast_fp16)[name = tensor<string, []>("x_33_cast_fp16")]; |
| tensor<int32, []> var_571 = const()[name = tensor<string, []>("op_571"), val = tensor<int32, []>(-1)]; |
| tensor<int32, [1]> input_45_axes_0 = const()[name = tensor<string, []>("input_45_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_2_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_2_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15537792)))]; |
| tensor<fp16, []> var_569_to_fp16 = const()[name = tensor<string, []>("op_569_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)]; |
| tensor<fp16, [1, 111, 768]> input_45_cast_fp16 = layer_norm(axes = input_45_axes_0, epsilon = var_569_to_fp16, gamma = dec_layers_2_norm_self_weight_to_fp16, x = x_33_cast_fp16)[name = tensor<string, []>("input_45_cast_fp16")]; |
| tensor<fp16, [2304, 768]> dec_layers_2_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_2_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(15539392))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17308928))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2281088)))]; |
| tensor<fp16, [1, 111, 2304]> linear_10_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_2_self_attention_qkv_net_weight_to_fp16_quantized, x = input_45_cast_fp16)[name = tensor<string, []>("linear_10_cast_fp16")]; |
| tensor<int32, [5]> var_590 = const()[name = tensor<string, []>("op_590"), val = tensor<int32, [5]>([1, 111, 3, 12, 64])]; |
| tensor<fp16, [1, 111, 3, 12, 64]> qkv_11_cast_fp16 = reshape(shape = var_590, x = linear_10_cast_fp16)[name = tensor<string, []>("qkv_11_cast_fp16")]; |
| tensor<int32, [5]> q_17_begin_0 = const()[name = tensor<string, []>("q_17_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])]; |
| tensor<int32, [5]> q_17_end_0 = const()[name = tensor<string, []>("q_17_end_0"), val = tensor<int32, [5]>([1, 111, 1, 12, 64])]; |
| tensor<bool, [5]> q_17_end_mask_0 = const()[name = tensor<string, []>("q_17_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> q_17_squeeze_mask_0 = const()[name = tensor<string, []>("q_17_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> q_17_cast_fp16 = slice_by_index(begin = q_17_begin_0, end = q_17_end_0, end_mask = q_17_end_mask_0, squeeze_mask = q_17_squeeze_mask_0, x = qkv_11_cast_fp16)[name = tensor<string, []>("q_17_cast_fp16")]; |
| tensor<int32, [5]> new_k_5_begin_0 = const()[name = tensor<string, []>("new_k_5_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])]; |
| tensor<int32, [5]> new_k_5_end_0 = const()[name = tensor<string, []>("new_k_5_end_0"), val = tensor<int32, [5]>([1, 111, 2, 12, 64])]; |
| tensor<bool, [5]> new_k_5_end_mask_0 = const()[name = tensor<string, []>("new_k_5_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> new_k_5_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_5_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> new_k_5_cast_fp16 = slice_by_index(begin = new_k_5_begin_0, end = new_k_5_end_0, end_mask = new_k_5_end_mask_0, squeeze_mask = new_k_5_squeeze_mask_0, x = qkv_11_cast_fp16)[name = tensor<string, []>("new_k_5_cast_fp16")]; |
| tensor<string, []> new_k_5_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("new_k_5_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [5]> new_v_5_begin_0 = const()[name = tensor<string, []>("new_v_5_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])]; |
| tensor<int32, [5]> new_v_5_end_0 = const()[name = tensor<string, []>("new_v_5_end_0"), val = tensor<int32, [5]>([1, 111, 3, 12, 64])]; |
| tensor<bool, [5]> new_v_5_end_mask_0 = const()[name = tensor<string, []>("new_v_5_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> new_v_5_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_5_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> new_v_5_cast_fp16 = slice_by_index(begin = new_v_5_begin_0, end = new_v_5_end_0, end_mask = new_v_5_end_mask_0, squeeze_mask = new_v_5_squeeze_mask_0, x = qkv_11_cast_fp16)[name = tensor<string, []>("new_v_5_cast_fp16")]; |
| tensor<string, []> new_v_5_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("new_v_5_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [4]> var_605 = const()[name = tensor<string, []>("op_605"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<bool, []> var_607_transpose_x_0 = const()[name = tensor<string, []>("op_607_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_607_transpose_y_0 = const()[name = tensor<string, []>("op_607_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<int32, [4]> transpose_104_perm_0 = const()[name = tensor<string, []>("transpose_104_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<int32, [4]> transpose_105_perm_0 = const()[name = tensor<string, []>("transpose_105_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])]; |
| tensor<fp16, [1, 12, 64, 111]> transpose_105 = transpose(perm = transpose_105_perm_0, x = new_k_5_cast_fp16)[name = tensor<string, []>("transpose_242")]; |
| tensor<fp16, [1, 12, 111, 64]> transpose_104 = transpose(perm = transpose_104_perm_0, x = q_17_cast_fp16)[name = tensor<string, []>("transpose_243")]; |
| tensor<fp16, [1, 12, 111, 111]> var_607_cast_fp16 = matmul(transpose_x = var_607_transpose_x_0, transpose_y = var_607_transpose_y_0, x = transpose_104, y = transpose_105)[name = tensor<string, []>("op_607_cast_fp16")]; |
| tensor<fp16, []> var_608_to_fp16 = const()[name = tensor<string, []>("op_608_to_fp16"), val = tensor<fp16, []>(0x1p-3)]; |
| tensor<fp16, [1, 12, 111, 111]> scores_21_cast_fp16 = mul(x = var_607_cast_fp16, y = var_608_to_fp16)[name = tensor<string, []>("scores_21_cast_fp16")]; |
| tensor<fp16, []> var_566_to_fp16 = const()[name = tensor<string, []>("op_566_to_fp16"), val = tensor<fp16, []>(-inf)]; |
| tensor<fp16, [1, 12, 111, 111]> scores_23_cast_fp16 = add(x = scores_3_cast_fp16_x_0, y = scores_21_cast_fp16)[name = tensor<string, []>("scores_23_cast_fp16")]; |
| tensor<fp16, [1, 12, 111, 111]> probs_9_cast_fp16 = softmax(axis = var_571, x = scores_23_cast_fp16)[name = tensor<string, []>("probs_9_cast_fp16")]; |
| tensor<bool, []> var_628_transpose_x_0 = const()[name = tensor<string, []>("op_628_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_628_transpose_y_0 = const()[name = tensor<string, []>("op_628_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<fp16, [1, 12, 111, 64]> vT_9_cast_fp16 = transpose(perm = var_605, x = new_v_5_cast_fp16)[name = tensor<string, []>("transpose_241")]; |
| tensor<fp16, [1, 12, 111, 64]> var_628_cast_fp16 = matmul(transpose_x = var_628_transpose_x_0, transpose_y = var_628_transpose_y_0, x = probs_9_cast_fp16, y = vT_9_cast_fp16)[name = tensor<string, []>("op_628_cast_fp16")]; |
| tensor<int32, [4]> var_629 = const()[name = tensor<string, []>("op_629"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_633 = const()[name = tensor<string, []>("op_633"), val = tensor<int32, [3]>([1, 111, -1])]; |
| tensor<fp16, [1, 111, 12, 64]> y_13_cast_fp16 = transpose(perm = var_629, x = var_628_cast_fp16)[name = tensor<string, []>("transpose_240")]; |
| tensor<fp16, [1, 111, 768]> input_47_cast_fp16 = reshape(shape = var_633, x = y_13_cast_fp16)[name = tensor<string, []>("input_47_cast_fp16")]; |
| tensor<fp16, [768, 768]> dec_layers_2_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_2_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17313600))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17903488))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 111, 768]> linear_11_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_2_self_attention_o_net_weight_to_fp16_quantized, x = input_47_cast_fp16)[name = tensor<string, []>("linear_11_cast_fp16")]; |
| tensor<fp16, [1, 111, 768]> input_49_cast_fp16 = add(x = x_33_cast_fp16, y = linear_11_cast_fp16)[name = tensor<string, []>("input_49_cast_fp16")]; |
| tensor<int32, [1]> x_37_axes_0 = const()[name = tensor<string, []>("x_37_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_2_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_2_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17905088)))]; |
| tensor<fp16, [1, 111, 768]> x_37_cast_fp16 = layer_norm(axes = x_37_axes_0, epsilon = var_569_to_fp16, gamma = dec_layers_2_norm_xattn_query_weight_to_fp16, x = input_49_cast_fp16)[name = tensor<string, []>("x_37_cast_fp16")]; |
| tensor<int32, [1]> memory_5_axes_0 = const()[name = tensor<string, []>("memory_5_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_2_norm_xattn_memory_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_2_norm_xattn_memory_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17906688)))]; |
| tensor<fp16, [1, 256, 768]> memory_5_cast_fp16 = layer_norm(axes = memory_5_axes_0, epsilon = var_569_to_fp16, gamma = dec_layers_2_norm_xattn_memory_weight_to_fp16, x = encoder_output_to_fp16)[name = tensor<string, []>("memory_5_cast_fp16")]; |
| tensor<fp16, [128, 768]> dec_layers_2_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_2_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(17908288))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18006656))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3013056)))]; |
| tensor<fp16, [1, 111, 128]> linear_12_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_2_cross_attention_q_net_weight_to_fp16_quantized, x = x_37_cast_fp16)[name = tensor<string, []>("linear_12_cast_fp16")]; |
| tensor<int32, [4]> var_655 = const()[name = tensor<string, []>("op_655"), val = tensor<int32, [4]>([1, 111, 1, 128])]; |
| tensor<fp16, [1, 111, 1, 128]> q_21_cast_fp16 = reshape(shape = var_655, x = linear_12_cast_fp16)[name = tensor<string, []>("q_21_cast_fp16")]; |
| tensor<fp16, [256, 768]> dec_layers_2_cross_attention_kv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_2_cross_attention_kv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [256, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18006976))), scale = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18203648))), zero_point = tensor<int8, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3210560)))]; |
| tensor<fp16, [1, 256, 256]> linear_13_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = dec_layers_2_cross_attention_kv_net_weight_to_fp16_quantized, x = memory_5_cast_fp16)[name = tensor<string, []>("linear_13_cast_fp16")]; |
| tensor<int32, [5]> var_661 = const()[name = tensor<string, []>("op_661"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])]; |
| tensor<fp16, [1, 256, 2, 1, 128]> kv_5_cast_fp16 = reshape(shape = var_661, x = linear_13_cast_fp16)[name = tensor<string, []>("kv_5_cast_fp16")]; |
| tensor<int32, [5]> k_5_begin_0 = const()[name = tensor<string, []>("k_5_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])]; |
| tensor<int32, [5]> k_5_end_0 = const()[name = tensor<string, []>("k_5_end_0"), val = tensor<int32, [5]>([1, 256, 1, 1, 128])]; |
| tensor<bool, [5]> k_5_end_mask_0 = const()[name = tensor<string, []>("k_5_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> k_5_squeeze_mask_0 = const()[name = tensor<string, []>("k_5_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 256, 1, 128]> k_5_cast_fp16 = slice_by_index(begin = k_5_begin_0, end = k_5_end_0, end_mask = k_5_end_mask_0, squeeze_mask = k_5_squeeze_mask_0, x = kv_5_cast_fp16)[name = tensor<string, []>("k_5_cast_fp16")]; |
| tensor<string, []> k_5_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("k_5_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [5]> v_5_begin_0 = const()[name = tensor<string, []>("v_5_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])]; |
| tensor<int32, [5]> v_5_end_0 = const()[name = tensor<string, []>("v_5_end_0"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])]; |
| tensor<bool, [5]> v_5_end_mask_0 = const()[name = tensor<string, []>("v_5_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> v_5_squeeze_mask_0 = const()[name = tensor<string, []>("v_5_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 256, 1, 128]> v_5_cast_fp16 = slice_by_index(begin = v_5_begin_0, end = v_5_end_0, end_mask = v_5_end_mask_0, squeeze_mask = v_5_squeeze_mask_0, x = kv_5_cast_fp16)[name = tensor<string, []>("v_5_cast_fp16")]; |
| tensor<string, []> v_5_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("v_5_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [4]> var_674 = const()[name = tensor<string, []>("op_674"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<bool, []> var_676_transpose_x_0 = const()[name = tensor<string, []>("op_676_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_676_transpose_y_0 = const()[name = tensor<string, []>("op_676_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<int32, [4]> transpose_106_perm_0 = const()[name = tensor<string, []>("transpose_106_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<int32, [4]> transpose_107_perm_0 = const()[name = tensor<string, []>("transpose_107_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])]; |
| tensor<fp16, [1, 1, 128, 256]> transpose_107 = transpose(perm = transpose_107_perm_0, x = k_5_cast_fp16)[name = tensor<string, []>("transpose_238")]; |
| tensor<fp16, [1, 1, 111, 128]> transpose_106 = transpose(perm = transpose_106_perm_0, x = q_21_cast_fp16)[name = tensor<string, []>("transpose_239")]; |
| tensor<fp16, [1, 1, 111, 256]> var_676_cast_fp16 = matmul(transpose_x = var_676_transpose_x_0, transpose_y = var_676_transpose_y_0, x = transpose_106, y = transpose_107)[name = tensor<string, []>("op_676_cast_fp16")]; |
| tensor<fp16, []> var_677_to_fp16 = const()[name = tensor<string, []>("op_677_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)]; |
| tensor<fp16, [1, 1, 111, 256]> scores_27_cast_fp16 = mul(x = var_676_cast_fp16, y = var_677_to_fp16)[name = tensor<string, []>("scores_27_cast_fp16")]; |
| tensor<fp16, [1, 1, 111, 256]> scores_29_cast_fp16 = select(a = var_566_to_fp16, b = scores_27_cast_fp16, cond = var_313_cast_fp16)[name = tensor<string, []>("scores_29_cast_fp16")]; |
| tensor<fp16, [1, 1, 111, 256]> probs_11_cast_fp16 = softmax(axis = var_571, x = scores_29_cast_fp16)[name = tensor<string, []>("probs_11_cast_fp16")]; |
| tensor<bool, []> var_686_transpose_x_0 = const()[name = tensor<string, []>("op_686_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_686_transpose_y_0 = const()[name = tensor<string, []>("op_686_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<fp16, [1, 1, 256, 128]> vT_11_cast_fp16 = transpose(perm = var_674, x = v_5_cast_fp16)[name = tensor<string, []>("transpose_237")]; |
| tensor<fp16, [1, 1, 111, 128]> var_686_cast_fp16 = matmul(transpose_x = var_686_transpose_x_0, transpose_y = var_686_transpose_y_0, x = probs_11_cast_fp16, y = vT_11_cast_fp16)[name = tensor<string, []>("op_686_cast_fp16")]; |
| tensor<int32, [4]> var_687 = const()[name = tensor<string, []>("op_687"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_689 = const()[name = tensor<string, []>("op_689"), val = tensor<int32, [3]>([1, 111, -1])]; |
| tensor<fp16, [1, 111, 1, 128]> var_688_cast_fp16 = transpose(perm = var_687, x = var_686_cast_fp16)[name = tensor<string, []>("transpose_236")]; |
| tensor<fp16, [1, 111, 128]> input_51_cast_fp16 = reshape(shape = var_689, x = var_688_cast_fp16)[name = tensor<string, []>("input_51_cast_fp16")]; |
| tensor<fp16, [768, 128]> dec_layers_2_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_2_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18204224))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18302592))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 111, 768]> linear_14_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_2_cross_attention_o_net_weight_to_fp16_quantized, x = input_51_cast_fp16)[name = tensor<string, []>("linear_14_cast_fp16")]; |
| tensor<fp16, [1, 111, 768]> input_53_cast_fp16 = add(x = input_49_cast_fp16, y = linear_14_cast_fp16)[name = tensor<string, []>("input_53_cast_fp16")]; |
| tensor<int32, [1]> x_39_axes_0 = const()[name = tensor<string, []>("x_39_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_2_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_2_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18304192)))]; |
| tensor<fp16, [1, 111, 768]> x_39_cast_fp16 = layer_norm(axes = x_39_axes_0, epsilon = var_569_to_fp16, gamma = dec_layers_2_norm_pos_ff_weight_to_fp16, x = input_53_cast_fp16)[name = tensor<string, []>("x_39_cast_fp16")]; |
| tensor<int32, [3]> var_706 = const()[name = tensor<string, []>("op_706"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<string, []> y_15_pad_type_0 = const()[name = tensor<string, []>("y_15_pad_type_0"), val = tensor<string, []>("valid")]; |
| tensor<int32, [1]> y_15_strides_0 = const()[name = tensor<string, []>("y_15_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> y_15_pad_0 = const()[name = tensor<string, []>("y_15_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> y_15_dilations_0 = const()[name = tensor<string, []>("y_15_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, []> y_15_groups_0 = const()[name = tensor<string, []>("y_15_groups_0"), val = tensor<int32, []>(1)]; |
| tensor<fp16, [3072, 768, 1]> dec_layers_2_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_2_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(18305792))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20665152))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5672960)))]; |
| tensor<fp16, [1, 768, 111]> x_43_cast_fp16 = transpose(perm = var_706, x = x_39_cast_fp16)[name = tensor<string, []>("transpose_235")]; |
| tensor<fp16, [1, 3072, 111]> y_15_cast_fp16 = conv(dilations = y_15_dilations_0, groups = y_15_groups_0, pad = y_15_pad_0, pad_type = y_15_pad_type_0, strides = y_15_strides_0, weight = dec_layers_2_pos_ff_proj_weight_to_fp16_quantized, x = x_43_cast_fp16)[name = tensor<string, []>("y_15_cast_fp16")]; |
| tensor<string, []> x_45_mode_0 = const()[name = tensor<string, []>("x_45_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 3072, 111]> x_45_cast_fp16 = gelu(mode = x_45_mode_0, x = y_15_cast_fp16)[name = tensor<string, []>("x_45_cast_fp16")]; |
| tensor<string, []> y_17_pad_type_0 = const()[name = tensor<string, []>("y_17_pad_type_0"), val = tensor<string, []>("valid")]; |
| tensor<int32, [1]> y_17_strides_0 = const()[name = tensor<string, []>("y_17_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> y_17_pad_0 = const()[name = tensor<string, []>("y_17_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> y_17_dilations_0 = const()[name = tensor<string, []>("y_17_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, []> y_17_groups_0 = const()[name = tensor<string, []>("y_17_groups_0"), val = tensor<int32, []>(1)]; |
| tensor<fp16, [768, 3072, 1]> dec_layers_2_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_2_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(20671360))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23030720))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 768, 111]> y_17_cast_fp16 = conv(dilations = y_17_dilations_0, groups = y_17_groups_0, pad = y_17_pad_0, pad_type = y_17_pad_type_0, strides = y_17_strides_0, weight = dec_layers_2_pos_ff_o_net_weight_to_fp16_quantized, x = x_45_cast_fp16)[name = tensor<string, []>("y_17_cast_fp16")]; |
| tensor<int32, [3]> var_726 = const()[name = tensor<string, []>("op_726"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<fp16, [1, 111, 768]> h_29_cast_fp16 = transpose(perm = var_726, x = y_17_cast_fp16)[name = tensor<string, []>("transpose_234")]; |
| tensor<fp16, [1, 111, 768]> x_49_cast_fp16 = add(x = input_53_cast_fp16, y = h_29_cast_fp16)[name = tensor<string, []>("x_49_cast_fp16")]; |
| tensor<int32, []> var_756 = const()[name = tensor<string, []>("op_756"), val = tensor<int32, []>(-1)]; |
| tensor<int32, [1]> input_57_axes_0 = const()[name = tensor<string, []>("input_57_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_3_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_3_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23032320)))]; |
| tensor<fp16, []> var_754_to_fp16 = const()[name = tensor<string, []>("op_754_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)]; |
| tensor<fp16, [1, 111, 768]> input_57_cast_fp16 = layer_norm(axes = input_57_axes_0, epsilon = var_754_to_fp16, gamma = dec_layers_3_norm_self_weight_to_fp16, x = x_49_cast_fp16)[name = tensor<string, []>("input_57_cast_fp16")]; |
| tensor<fp16, [2304, 768]> dec_layers_3_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_3_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(23033920))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24803456))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2281088)))]; |
| tensor<fp16, [1, 111, 2304]> linear_15_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_3_self_attention_qkv_net_weight_to_fp16_quantized, x = input_57_cast_fp16)[name = tensor<string, []>("linear_15_cast_fp16")]; |
| tensor<int32, [5]> var_775 = const()[name = tensor<string, []>("op_775"), val = tensor<int32, [5]>([1, 111, 3, 12, 64])]; |
| tensor<fp16, [1, 111, 3, 12, 64]> qkv_15_cast_fp16 = reshape(shape = var_775, x = linear_15_cast_fp16)[name = tensor<string, []>("qkv_15_cast_fp16")]; |
| tensor<int32, [5]> q_25_begin_0 = const()[name = tensor<string, []>("q_25_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])]; |
| tensor<int32, [5]> q_25_end_0 = const()[name = tensor<string, []>("q_25_end_0"), val = tensor<int32, [5]>([1, 111, 1, 12, 64])]; |
| tensor<bool, [5]> q_25_end_mask_0 = const()[name = tensor<string, []>("q_25_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> q_25_squeeze_mask_0 = const()[name = tensor<string, []>("q_25_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> q_25_cast_fp16 = slice_by_index(begin = q_25_begin_0, end = q_25_end_0, end_mask = q_25_end_mask_0, squeeze_mask = q_25_squeeze_mask_0, x = qkv_15_cast_fp16)[name = tensor<string, []>("q_25_cast_fp16")]; |
| tensor<int32, [5]> new_k_7_begin_0 = const()[name = tensor<string, []>("new_k_7_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])]; |
| tensor<int32, [5]> new_k_7_end_0 = const()[name = tensor<string, []>("new_k_7_end_0"), val = tensor<int32, [5]>([1, 111, 2, 12, 64])]; |
| tensor<bool, [5]> new_k_7_end_mask_0 = const()[name = tensor<string, []>("new_k_7_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> new_k_7_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_7_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> new_k_7_cast_fp16 = slice_by_index(begin = new_k_7_begin_0, end = new_k_7_end_0, end_mask = new_k_7_end_mask_0, squeeze_mask = new_k_7_squeeze_mask_0, x = qkv_15_cast_fp16)[name = tensor<string, []>("new_k_7_cast_fp16")]; |
| tensor<string, []> new_k_7_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("new_k_7_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [5]> new_v_7_begin_0 = const()[name = tensor<string, []>("new_v_7_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])]; |
| tensor<int32, [5]> new_v_7_end_0 = const()[name = tensor<string, []>("new_v_7_end_0"), val = tensor<int32, [5]>([1, 111, 3, 12, 64])]; |
| tensor<bool, [5]> new_v_7_end_mask_0 = const()[name = tensor<string, []>("new_v_7_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> new_v_7_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_7_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> new_v_7_cast_fp16 = slice_by_index(begin = new_v_7_begin_0, end = new_v_7_end_0, end_mask = new_v_7_end_mask_0, squeeze_mask = new_v_7_squeeze_mask_0, x = qkv_15_cast_fp16)[name = tensor<string, []>("new_v_7_cast_fp16")]; |
| tensor<string, []> new_v_7_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("new_v_7_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [4]> var_790 = const()[name = tensor<string, []>("op_790"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<bool, []> var_792_transpose_x_0 = const()[name = tensor<string, []>("op_792_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_792_transpose_y_0 = const()[name = tensor<string, []>("op_792_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<int32, [4]> transpose_108_perm_0 = const()[name = tensor<string, []>("transpose_108_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<int32, [4]> transpose_109_perm_0 = const()[name = tensor<string, []>("transpose_109_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])]; |
| tensor<fp16, [1, 12, 64, 111]> transpose_109 = transpose(perm = transpose_109_perm_0, x = new_k_7_cast_fp16)[name = tensor<string, []>("transpose_232")]; |
| tensor<fp16, [1, 12, 111, 64]> transpose_108 = transpose(perm = transpose_108_perm_0, x = q_25_cast_fp16)[name = tensor<string, []>("transpose_233")]; |
| tensor<fp16, [1, 12, 111, 111]> var_792_cast_fp16 = matmul(transpose_x = var_792_transpose_x_0, transpose_y = var_792_transpose_y_0, x = transpose_108, y = transpose_109)[name = tensor<string, []>("op_792_cast_fp16")]; |
| tensor<fp16, []> var_793_to_fp16 = const()[name = tensor<string, []>("op_793_to_fp16"), val = tensor<fp16, []>(0x1p-3)]; |
| tensor<fp16, [1, 12, 111, 111]> scores_31_cast_fp16 = mul(x = var_792_cast_fp16, y = var_793_to_fp16)[name = tensor<string, []>("scores_31_cast_fp16")]; |
| tensor<fp16, []> var_751_to_fp16 = const()[name = tensor<string, []>("op_751_to_fp16"), val = tensor<fp16, []>(-inf)]; |
| tensor<fp16, [1, 12, 111, 111]> scores_33_cast_fp16 = add(x = scores_3_cast_fp16_x_0, y = scores_31_cast_fp16)[name = tensor<string, []>("scores_33_cast_fp16")]; |
| tensor<fp16, [1, 12, 111, 111]> probs_13_cast_fp16 = softmax(axis = var_756, x = scores_33_cast_fp16)[name = tensor<string, []>("probs_13_cast_fp16")]; |
| tensor<bool, []> var_813_transpose_x_0 = const()[name = tensor<string, []>("op_813_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_813_transpose_y_0 = const()[name = tensor<string, []>("op_813_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<fp16, [1, 12, 111, 64]> vT_13_cast_fp16 = transpose(perm = var_790, x = new_v_7_cast_fp16)[name = tensor<string, []>("transpose_231")]; |
| tensor<fp16, [1, 12, 111, 64]> var_813_cast_fp16 = matmul(transpose_x = var_813_transpose_x_0, transpose_y = var_813_transpose_y_0, x = probs_13_cast_fp16, y = vT_13_cast_fp16)[name = tensor<string, []>("op_813_cast_fp16")]; |
| tensor<int32, [4]> var_814 = const()[name = tensor<string, []>("op_814"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_818 = const()[name = tensor<string, []>("op_818"), val = tensor<int32, [3]>([1, 111, -1])]; |
| tensor<fp16, [1, 111, 12, 64]> y_19_cast_fp16 = transpose(perm = var_814, x = var_813_cast_fp16)[name = tensor<string, []>("transpose_230")]; |
| tensor<fp16, [1, 111, 768]> input_59_cast_fp16 = reshape(shape = var_818, x = y_19_cast_fp16)[name = tensor<string, []>("input_59_cast_fp16")]; |
| tensor<fp16, [768, 768]> dec_layers_3_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_3_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(24808128))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25398016))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 111, 768]> linear_16_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_3_self_attention_o_net_weight_to_fp16_quantized, x = input_59_cast_fp16)[name = tensor<string, []>("linear_16_cast_fp16")]; |
| tensor<fp16, [1, 111, 768]> input_61_cast_fp16 = add(x = x_49_cast_fp16, y = linear_16_cast_fp16)[name = tensor<string, []>("input_61_cast_fp16")]; |
| tensor<int32, [1]> x_53_axes_0 = const()[name = tensor<string, []>("x_53_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_3_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_3_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25399616)))]; |
| tensor<fp16, [1, 111, 768]> x_53_cast_fp16 = layer_norm(axes = x_53_axes_0, epsilon = var_754_to_fp16, gamma = dec_layers_3_norm_xattn_query_weight_to_fp16, x = input_61_cast_fp16)[name = tensor<string, []>("x_53_cast_fp16")]; |
| tensor<int32, [1]> memory_7_axes_0 = const()[name = tensor<string, []>("memory_7_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_3_norm_xattn_memory_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_3_norm_xattn_memory_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25401216)))]; |
| tensor<fp16, [1, 256, 768]> memory_7_cast_fp16 = layer_norm(axes = memory_7_axes_0, epsilon = var_754_to_fp16, gamma = dec_layers_3_norm_xattn_memory_weight_to_fp16, x = encoder_output_to_fp16)[name = tensor<string, []>("memory_7_cast_fp16")]; |
| tensor<fp16, [128, 768]> dec_layers_3_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_3_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25402816))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25501184))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3013056)))]; |
| tensor<fp16, [1, 111, 128]> linear_17_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_3_cross_attention_q_net_weight_to_fp16_quantized, x = x_53_cast_fp16)[name = tensor<string, []>("linear_17_cast_fp16")]; |
| tensor<int32, [4]> var_840 = const()[name = tensor<string, []>("op_840"), val = tensor<int32, [4]>([1, 111, 1, 128])]; |
| tensor<fp16, [1, 111, 1, 128]> q_29_cast_fp16 = reshape(shape = var_840, x = linear_17_cast_fp16)[name = tensor<string, []>("q_29_cast_fp16")]; |
| tensor<fp16, [256, 768]> dec_layers_3_cross_attention_kv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_3_cross_attention_kv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [256, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25501504))), scale = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25698176))), zero_point = tensor<int8, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3210560)))]; |
| tensor<fp16, [1, 256, 256]> linear_18_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = dec_layers_3_cross_attention_kv_net_weight_to_fp16_quantized, x = memory_7_cast_fp16)[name = tensor<string, []>("linear_18_cast_fp16")]; |
| tensor<int32, [5]> var_846 = const()[name = tensor<string, []>("op_846"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])]; |
| tensor<fp16, [1, 256, 2, 1, 128]> kv_7_cast_fp16 = reshape(shape = var_846, x = linear_18_cast_fp16)[name = tensor<string, []>("kv_7_cast_fp16")]; |
| tensor<int32, [5]> k_7_begin_0 = const()[name = tensor<string, []>("k_7_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])]; |
| tensor<int32, [5]> k_7_end_0 = const()[name = tensor<string, []>("k_7_end_0"), val = tensor<int32, [5]>([1, 256, 1, 1, 128])]; |
| tensor<bool, [5]> k_7_end_mask_0 = const()[name = tensor<string, []>("k_7_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> k_7_squeeze_mask_0 = const()[name = tensor<string, []>("k_7_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 256, 1, 128]> k_7_cast_fp16 = slice_by_index(begin = k_7_begin_0, end = k_7_end_0, end_mask = k_7_end_mask_0, squeeze_mask = k_7_squeeze_mask_0, x = kv_7_cast_fp16)[name = tensor<string, []>("k_7_cast_fp16")]; |
| tensor<string, []> k_7_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("k_7_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [5]> v_7_begin_0 = const()[name = tensor<string, []>("v_7_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])]; |
| tensor<int32, [5]> v_7_end_0 = const()[name = tensor<string, []>("v_7_end_0"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])]; |
| tensor<bool, [5]> v_7_end_mask_0 = const()[name = tensor<string, []>("v_7_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> v_7_squeeze_mask_0 = const()[name = tensor<string, []>("v_7_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 256, 1, 128]> v_7_cast_fp16 = slice_by_index(begin = v_7_begin_0, end = v_7_end_0, end_mask = v_7_end_mask_0, squeeze_mask = v_7_squeeze_mask_0, x = kv_7_cast_fp16)[name = tensor<string, []>("v_7_cast_fp16")]; |
| tensor<string, []> v_7_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("v_7_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [4]> var_859 = const()[name = tensor<string, []>("op_859"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<bool, []> var_861_transpose_x_0 = const()[name = tensor<string, []>("op_861_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_861_transpose_y_0 = const()[name = tensor<string, []>("op_861_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<int32, [4]> transpose_110_perm_0 = const()[name = tensor<string, []>("transpose_110_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<int32, [4]> transpose_111_perm_0 = const()[name = tensor<string, []>("transpose_111_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])]; |
| tensor<fp16, [1, 1, 128, 256]> transpose_111 = transpose(perm = transpose_111_perm_0, x = k_7_cast_fp16)[name = tensor<string, []>("transpose_228")]; |
| tensor<fp16, [1, 1, 111, 128]> transpose_110 = transpose(perm = transpose_110_perm_0, x = q_29_cast_fp16)[name = tensor<string, []>("transpose_229")]; |
| tensor<fp16, [1, 1, 111, 256]> var_861_cast_fp16 = matmul(transpose_x = var_861_transpose_x_0, transpose_y = var_861_transpose_y_0, x = transpose_110, y = transpose_111)[name = tensor<string, []>("op_861_cast_fp16")]; |
| tensor<fp16, []> var_862_to_fp16 = const()[name = tensor<string, []>("op_862_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)]; |
| tensor<fp16, [1, 1, 111, 256]> scores_37_cast_fp16 = mul(x = var_861_cast_fp16, y = var_862_to_fp16)[name = tensor<string, []>("scores_37_cast_fp16")]; |
| tensor<fp16, [1, 1, 111, 256]> scores_39_cast_fp16 = select(a = var_751_to_fp16, b = scores_37_cast_fp16, cond = var_313_cast_fp16)[name = tensor<string, []>("scores_39_cast_fp16")]; |
| tensor<fp16, [1, 1, 111, 256]> probs_15_cast_fp16 = softmax(axis = var_756, x = scores_39_cast_fp16)[name = tensor<string, []>("probs_15_cast_fp16")]; |
| tensor<bool, []> var_871_transpose_x_0 = const()[name = tensor<string, []>("op_871_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_871_transpose_y_0 = const()[name = tensor<string, []>("op_871_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<fp16, [1, 1, 256, 128]> vT_15_cast_fp16 = transpose(perm = var_859, x = v_7_cast_fp16)[name = tensor<string, []>("transpose_227")]; |
| tensor<fp16, [1, 1, 111, 128]> var_871_cast_fp16 = matmul(transpose_x = var_871_transpose_x_0, transpose_y = var_871_transpose_y_0, x = probs_15_cast_fp16, y = vT_15_cast_fp16)[name = tensor<string, []>("op_871_cast_fp16")]; |
| tensor<int32, [4]> var_872 = const()[name = tensor<string, []>("op_872"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_874 = const()[name = tensor<string, []>("op_874"), val = tensor<int32, [3]>([1, 111, -1])]; |
| tensor<fp16, [1, 111, 1, 128]> var_873_cast_fp16 = transpose(perm = var_872, x = var_871_cast_fp16)[name = tensor<string, []>("transpose_226")]; |
| tensor<fp16, [1, 111, 128]> input_63_cast_fp16 = reshape(shape = var_874, x = var_873_cast_fp16)[name = tensor<string, []>("input_63_cast_fp16")]; |
| tensor<fp16, [768, 128]> dec_layers_3_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_3_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25698752))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25797120))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 111, 768]> linear_19_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_3_cross_attention_o_net_weight_to_fp16_quantized, x = input_63_cast_fp16)[name = tensor<string, []>("linear_19_cast_fp16")]; |
| tensor<fp16, [1, 111, 768]> input_65_cast_fp16 = add(x = input_61_cast_fp16, y = linear_19_cast_fp16)[name = tensor<string, []>("input_65_cast_fp16")]; |
| tensor<int32, [1]> x_55_axes_0 = const()[name = tensor<string, []>("x_55_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_3_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_3_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25798720)))]; |
| tensor<fp16, [1, 111, 768]> x_55_cast_fp16 = layer_norm(axes = x_55_axes_0, epsilon = var_754_to_fp16, gamma = dec_layers_3_norm_pos_ff_weight_to_fp16, x = input_65_cast_fp16)[name = tensor<string, []>("x_55_cast_fp16")]; |
| tensor<int32, [3]> var_891 = const()[name = tensor<string, []>("op_891"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<string, []> y_21_pad_type_0 = const()[name = tensor<string, []>("y_21_pad_type_0"), val = tensor<string, []>("valid")]; |
| tensor<int32, [1]> y_21_strides_0 = const()[name = tensor<string, []>("y_21_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> y_21_pad_0 = const()[name = tensor<string, []>("y_21_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> y_21_dilations_0 = const()[name = tensor<string, []>("y_21_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, []> y_21_groups_0 = const()[name = tensor<string, []>("y_21_groups_0"), val = tensor<int32, []>(1)]; |
| tensor<fp16, [3072, 768, 1]> dec_layers_3_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_3_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(25800320))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28159680))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5672960)))]; |
| tensor<fp16, [1, 768, 111]> x_59_cast_fp16 = transpose(perm = var_891, x = x_55_cast_fp16)[name = tensor<string, []>("transpose_225")]; |
| tensor<fp16, [1, 3072, 111]> y_21_cast_fp16 = conv(dilations = y_21_dilations_0, groups = y_21_groups_0, pad = y_21_pad_0, pad_type = y_21_pad_type_0, strides = y_21_strides_0, weight = dec_layers_3_pos_ff_proj_weight_to_fp16_quantized, x = x_59_cast_fp16)[name = tensor<string, []>("y_21_cast_fp16")]; |
| tensor<string, []> x_61_mode_0 = const()[name = tensor<string, []>("x_61_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 3072, 111]> x_61_cast_fp16 = gelu(mode = x_61_mode_0, x = y_21_cast_fp16)[name = tensor<string, []>("x_61_cast_fp16")]; |
| tensor<string, []> y_23_pad_type_0 = const()[name = tensor<string, []>("y_23_pad_type_0"), val = tensor<string, []>("valid")]; |
| tensor<int32, [1]> y_23_strides_0 = const()[name = tensor<string, []>("y_23_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> y_23_pad_0 = const()[name = tensor<string, []>("y_23_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> y_23_dilations_0 = const()[name = tensor<string, []>("y_23_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, []> y_23_groups_0 = const()[name = tensor<string, []>("y_23_groups_0"), val = tensor<int32, []>(1)]; |
| tensor<fp16, [768, 3072, 1]> dec_layers_3_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_3_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(28165888))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30525248))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 768, 111]> y_23_cast_fp16 = conv(dilations = y_23_dilations_0, groups = y_23_groups_0, pad = y_23_pad_0, pad_type = y_23_pad_type_0, strides = y_23_strides_0, weight = dec_layers_3_pos_ff_o_net_weight_to_fp16_quantized, x = x_61_cast_fp16)[name = tensor<string, []>("y_23_cast_fp16")]; |
| tensor<int32, [3]> var_911 = const()[name = tensor<string, []>("op_911"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<fp16, [1, 111, 768]> h_39_cast_fp16 = transpose(perm = var_911, x = y_23_cast_fp16)[name = tensor<string, []>("transpose_224")]; |
| tensor<fp16, [1, 111, 768]> x_65_cast_fp16 = add(x = input_65_cast_fp16, y = h_39_cast_fp16)[name = tensor<string, []>("x_65_cast_fp16")]; |
| tensor<int32, []> var_941 = const()[name = tensor<string, []>("op_941"), val = tensor<int32, []>(-1)]; |
| tensor<int32, [1]> input_69_axes_0 = const()[name = tensor<string, []>("input_69_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_4_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_4_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30526848)))]; |
| tensor<fp16, []> var_939_to_fp16 = const()[name = tensor<string, []>("op_939_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)]; |
| tensor<fp16, [1, 111, 768]> input_69_cast_fp16 = layer_norm(axes = input_69_axes_0, epsilon = var_939_to_fp16, gamma = dec_layers_4_norm_self_weight_to_fp16, x = x_65_cast_fp16)[name = tensor<string, []>("input_69_cast_fp16")]; |
| tensor<fp16, [2304, 768]> dec_layers_4_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_4_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(30528448))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32297984))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2281088)))]; |
| tensor<fp16, [1, 111, 2304]> linear_20_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_4_self_attention_qkv_net_weight_to_fp16_quantized, x = input_69_cast_fp16)[name = tensor<string, []>("linear_20_cast_fp16")]; |
| tensor<int32, [5]> var_960 = const()[name = tensor<string, []>("op_960"), val = tensor<int32, [5]>([1, 111, 3, 12, 64])]; |
| tensor<fp16, [1, 111, 3, 12, 64]> qkv_19_cast_fp16 = reshape(shape = var_960, x = linear_20_cast_fp16)[name = tensor<string, []>("qkv_19_cast_fp16")]; |
| tensor<int32, [5]> q_33_begin_0 = const()[name = tensor<string, []>("q_33_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])]; |
| tensor<int32, [5]> q_33_end_0 = const()[name = tensor<string, []>("q_33_end_0"), val = tensor<int32, [5]>([1, 111, 1, 12, 64])]; |
| tensor<bool, [5]> q_33_end_mask_0 = const()[name = tensor<string, []>("q_33_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> q_33_squeeze_mask_0 = const()[name = tensor<string, []>("q_33_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> q_33_cast_fp16 = slice_by_index(begin = q_33_begin_0, end = q_33_end_0, end_mask = q_33_end_mask_0, squeeze_mask = q_33_squeeze_mask_0, x = qkv_19_cast_fp16)[name = tensor<string, []>("q_33_cast_fp16")]; |
| tensor<int32, [5]> new_k_9_begin_0 = const()[name = tensor<string, []>("new_k_9_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])]; |
| tensor<int32, [5]> new_k_9_end_0 = const()[name = tensor<string, []>("new_k_9_end_0"), val = tensor<int32, [5]>([1, 111, 2, 12, 64])]; |
| tensor<bool, [5]> new_k_9_end_mask_0 = const()[name = tensor<string, []>("new_k_9_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> new_k_9_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_9_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> new_k_9_cast_fp16 = slice_by_index(begin = new_k_9_begin_0, end = new_k_9_end_0, end_mask = new_k_9_end_mask_0, squeeze_mask = new_k_9_squeeze_mask_0, x = qkv_19_cast_fp16)[name = tensor<string, []>("new_k_9_cast_fp16")]; |
| tensor<string, []> new_k_9_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("new_k_9_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [5]> new_v_9_begin_0 = const()[name = tensor<string, []>("new_v_9_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])]; |
| tensor<int32, [5]> new_v_9_end_0 = const()[name = tensor<string, []>("new_v_9_end_0"), val = tensor<int32, [5]>([1, 111, 3, 12, 64])]; |
| tensor<bool, [5]> new_v_9_end_mask_0 = const()[name = tensor<string, []>("new_v_9_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> new_v_9_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_9_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> new_v_9_cast_fp16 = slice_by_index(begin = new_v_9_begin_0, end = new_v_9_end_0, end_mask = new_v_9_end_mask_0, squeeze_mask = new_v_9_squeeze_mask_0, x = qkv_19_cast_fp16)[name = tensor<string, []>("new_v_9_cast_fp16")]; |
| tensor<string, []> new_v_9_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("new_v_9_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [4]> var_975 = const()[name = tensor<string, []>("op_975"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<bool, []> var_977_transpose_x_0 = const()[name = tensor<string, []>("op_977_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_977_transpose_y_0 = const()[name = tensor<string, []>("op_977_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<int32, [4]> transpose_112_perm_0 = const()[name = tensor<string, []>("transpose_112_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<int32, [4]> transpose_113_perm_0 = const()[name = tensor<string, []>("transpose_113_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])]; |
| tensor<fp16, [1, 12, 64, 111]> transpose_113 = transpose(perm = transpose_113_perm_0, x = new_k_9_cast_fp16)[name = tensor<string, []>("transpose_222")]; |
| tensor<fp16, [1, 12, 111, 64]> transpose_112 = transpose(perm = transpose_112_perm_0, x = q_33_cast_fp16)[name = tensor<string, []>("transpose_223")]; |
| tensor<fp16, [1, 12, 111, 111]> var_977_cast_fp16 = matmul(transpose_x = var_977_transpose_x_0, transpose_y = var_977_transpose_y_0, x = transpose_112, y = transpose_113)[name = tensor<string, []>("op_977_cast_fp16")]; |
| tensor<fp16, []> var_978_to_fp16 = const()[name = tensor<string, []>("op_978_to_fp16"), val = tensor<fp16, []>(0x1p-3)]; |
| tensor<fp16, [1, 12, 111, 111]> scores_41_cast_fp16 = mul(x = var_977_cast_fp16, y = var_978_to_fp16)[name = tensor<string, []>("scores_41_cast_fp16")]; |
| tensor<fp16, []> var_936_to_fp16 = const()[name = tensor<string, []>("op_936_to_fp16"), val = tensor<fp16, []>(-inf)]; |
| tensor<fp16, [1, 12, 111, 111]> scores_43_cast_fp16 = add(x = scores_3_cast_fp16_x_0, y = scores_41_cast_fp16)[name = tensor<string, []>("scores_43_cast_fp16")]; |
| tensor<fp16, [1, 12, 111, 111]> probs_17_cast_fp16 = softmax(axis = var_941, x = scores_43_cast_fp16)[name = tensor<string, []>("probs_17_cast_fp16")]; |
| tensor<bool, []> var_998_transpose_x_0 = const()[name = tensor<string, []>("op_998_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_998_transpose_y_0 = const()[name = tensor<string, []>("op_998_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<fp16, [1, 12, 111, 64]> vT_17_cast_fp16 = transpose(perm = var_975, x = new_v_9_cast_fp16)[name = tensor<string, []>("transpose_221")]; |
| tensor<fp16, [1, 12, 111, 64]> var_998_cast_fp16 = matmul(transpose_x = var_998_transpose_x_0, transpose_y = var_998_transpose_y_0, x = probs_17_cast_fp16, y = vT_17_cast_fp16)[name = tensor<string, []>("op_998_cast_fp16")]; |
| tensor<int32, [4]> var_999 = const()[name = tensor<string, []>("op_999"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_1003 = const()[name = tensor<string, []>("op_1003"), val = tensor<int32, [3]>([1, 111, -1])]; |
| tensor<fp16, [1, 111, 12, 64]> y_25_cast_fp16 = transpose(perm = var_999, x = var_998_cast_fp16)[name = tensor<string, []>("transpose_220")]; |
| tensor<fp16, [1, 111, 768]> input_71_cast_fp16 = reshape(shape = var_1003, x = y_25_cast_fp16)[name = tensor<string, []>("input_71_cast_fp16")]; |
| tensor<fp16, [768, 768]> dec_layers_4_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_4_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32302656))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32892544))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 111, 768]> linear_21_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_4_self_attention_o_net_weight_to_fp16_quantized, x = input_71_cast_fp16)[name = tensor<string, []>("linear_21_cast_fp16")]; |
| tensor<fp16, [1, 111, 768]> input_73_cast_fp16 = add(x = x_65_cast_fp16, y = linear_21_cast_fp16)[name = tensor<string, []>("input_73_cast_fp16")]; |
| tensor<int32, [1]> x_69_axes_0 = const()[name = tensor<string, []>("x_69_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_4_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_4_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32894144)))]; |
| tensor<fp16, [1, 111, 768]> x_69_cast_fp16 = layer_norm(axes = x_69_axes_0, epsilon = var_939_to_fp16, gamma = dec_layers_4_norm_xattn_query_weight_to_fp16, x = input_73_cast_fp16)[name = tensor<string, []>("x_69_cast_fp16")]; |
| tensor<int32, [1]> memory_9_axes_0 = const()[name = tensor<string, []>("memory_9_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_4_norm_xattn_memory_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_4_norm_xattn_memory_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32895744)))]; |
| tensor<fp16, [1, 256, 768]> memory_9_cast_fp16 = layer_norm(axes = memory_9_axes_0, epsilon = var_939_to_fp16, gamma = dec_layers_4_norm_xattn_memory_weight_to_fp16, x = encoder_output_to_fp16)[name = tensor<string, []>("memory_9_cast_fp16")]; |
| tensor<fp16, [128, 768]> dec_layers_4_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_4_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32897344))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32995712))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3013056)))]; |
| tensor<fp16, [1, 111, 128]> linear_22_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_4_cross_attention_q_net_weight_to_fp16_quantized, x = x_69_cast_fp16)[name = tensor<string, []>("linear_22_cast_fp16")]; |
| tensor<int32, [4]> var_1025 = const()[name = tensor<string, []>("op_1025"), val = tensor<int32, [4]>([1, 111, 1, 128])]; |
| tensor<fp16, [1, 111, 1, 128]> q_37_cast_fp16 = reshape(shape = var_1025, x = linear_22_cast_fp16)[name = tensor<string, []>("q_37_cast_fp16")]; |
| tensor<fp16, [256, 768]> dec_layers_4_cross_attention_kv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_4_cross_attention_kv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [256, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(32996032))), scale = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33192704))), zero_point = tensor<int8, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3210560)))]; |
| tensor<fp16, [1, 256, 256]> linear_23_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = dec_layers_4_cross_attention_kv_net_weight_to_fp16_quantized, x = memory_9_cast_fp16)[name = tensor<string, []>("linear_23_cast_fp16")]; |
| tensor<int32, [5]> var_1031 = const()[name = tensor<string, []>("op_1031"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])]; |
| tensor<fp16, [1, 256, 2, 1, 128]> kv_9_cast_fp16 = reshape(shape = var_1031, x = linear_23_cast_fp16)[name = tensor<string, []>("kv_9_cast_fp16")]; |
| tensor<int32, [5]> k_9_begin_0 = const()[name = tensor<string, []>("k_9_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])]; |
| tensor<int32, [5]> k_9_end_0 = const()[name = tensor<string, []>("k_9_end_0"), val = tensor<int32, [5]>([1, 256, 1, 1, 128])]; |
| tensor<bool, [5]> k_9_end_mask_0 = const()[name = tensor<string, []>("k_9_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> k_9_squeeze_mask_0 = const()[name = tensor<string, []>("k_9_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 256, 1, 128]> k_9_cast_fp16 = slice_by_index(begin = k_9_begin_0, end = k_9_end_0, end_mask = k_9_end_mask_0, squeeze_mask = k_9_squeeze_mask_0, x = kv_9_cast_fp16)[name = tensor<string, []>("k_9_cast_fp16")]; |
| tensor<string, []> k_9_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("k_9_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [5]> v_9_begin_0 = const()[name = tensor<string, []>("v_9_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])]; |
| tensor<int32, [5]> v_9_end_0 = const()[name = tensor<string, []>("v_9_end_0"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])]; |
| tensor<bool, [5]> v_9_end_mask_0 = const()[name = tensor<string, []>("v_9_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> v_9_squeeze_mask_0 = const()[name = tensor<string, []>("v_9_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 256, 1, 128]> v_9_cast_fp16 = slice_by_index(begin = v_9_begin_0, end = v_9_end_0, end_mask = v_9_end_mask_0, squeeze_mask = v_9_squeeze_mask_0, x = kv_9_cast_fp16)[name = tensor<string, []>("v_9_cast_fp16")]; |
| tensor<string, []> v_9_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("v_9_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [4]> var_1044 = const()[name = tensor<string, []>("op_1044"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<bool, []> var_1046_transpose_x_0 = const()[name = tensor<string, []>("op_1046_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_1046_transpose_y_0 = const()[name = tensor<string, []>("op_1046_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<int32, [4]> transpose_114_perm_0 = const()[name = tensor<string, []>("transpose_114_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<int32, [4]> transpose_115_perm_0 = const()[name = tensor<string, []>("transpose_115_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])]; |
| tensor<fp16, [1, 1, 128, 256]> transpose_115 = transpose(perm = transpose_115_perm_0, x = k_9_cast_fp16)[name = tensor<string, []>("transpose_218")]; |
| tensor<fp16, [1, 1, 111, 128]> transpose_114 = transpose(perm = transpose_114_perm_0, x = q_37_cast_fp16)[name = tensor<string, []>("transpose_219")]; |
| tensor<fp16, [1, 1, 111, 256]> var_1046_cast_fp16 = matmul(transpose_x = var_1046_transpose_x_0, transpose_y = var_1046_transpose_y_0, x = transpose_114, y = transpose_115)[name = tensor<string, []>("op_1046_cast_fp16")]; |
| tensor<fp16, []> var_1047_to_fp16 = const()[name = tensor<string, []>("op_1047_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)]; |
| tensor<fp16, [1, 1, 111, 256]> scores_47_cast_fp16 = mul(x = var_1046_cast_fp16, y = var_1047_to_fp16)[name = tensor<string, []>("scores_47_cast_fp16")]; |
| tensor<fp16, [1, 1, 111, 256]> scores_49_cast_fp16 = select(a = var_936_to_fp16, b = scores_47_cast_fp16, cond = var_313_cast_fp16)[name = tensor<string, []>("scores_49_cast_fp16")]; |
| tensor<fp16, [1, 1, 111, 256]> probs_19_cast_fp16 = softmax(axis = var_941, x = scores_49_cast_fp16)[name = tensor<string, []>("probs_19_cast_fp16")]; |
| tensor<bool, []> var_1056_transpose_x_0 = const()[name = tensor<string, []>("op_1056_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_1056_transpose_y_0 = const()[name = tensor<string, []>("op_1056_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<fp16, [1, 1, 256, 128]> vT_19_cast_fp16 = transpose(perm = var_1044, x = v_9_cast_fp16)[name = tensor<string, []>("transpose_217")]; |
| tensor<fp16, [1, 1, 111, 128]> var_1056_cast_fp16 = matmul(transpose_x = var_1056_transpose_x_0, transpose_y = var_1056_transpose_y_0, x = probs_19_cast_fp16, y = vT_19_cast_fp16)[name = tensor<string, []>("op_1056_cast_fp16")]; |
| tensor<int32, [4]> var_1057 = const()[name = tensor<string, []>("op_1057"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_1059 = const()[name = tensor<string, []>("op_1059"), val = tensor<int32, [3]>([1, 111, -1])]; |
| tensor<fp16, [1, 111, 1, 128]> var_1058_cast_fp16 = transpose(perm = var_1057, x = var_1056_cast_fp16)[name = tensor<string, []>("transpose_216")]; |
| tensor<fp16, [1, 111, 128]> input_75_cast_fp16 = reshape(shape = var_1059, x = var_1058_cast_fp16)[name = tensor<string, []>("input_75_cast_fp16")]; |
| tensor<fp16, [768, 128]> dec_layers_4_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_4_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33193280))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33291648))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 111, 768]> linear_24_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_4_cross_attention_o_net_weight_to_fp16_quantized, x = input_75_cast_fp16)[name = tensor<string, []>("linear_24_cast_fp16")]; |
| tensor<fp16, [1, 111, 768]> input_77_cast_fp16 = add(x = input_73_cast_fp16, y = linear_24_cast_fp16)[name = tensor<string, []>("input_77_cast_fp16")]; |
| tensor<int32, [1]> x_71_axes_0 = const()[name = tensor<string, []>("x_71_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_4_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_4_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33293248)))]; |
| tensor<fp16, [1, 111, 768]> x_71_cast_fp16 = layer_norm(axes = x_71_axes_0, epsilon = var_939_to_fp16, gamma = dec_layers_4_norm_pos_ff_weight_to_fp16, x = input_77_cast_fp16)[name = tensor<string, []>("x_71_cast_fp16")]; |
| tensor<int32, [3]> var_1076 = const()[name = tensor<string, []>("op_1076"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<string, []> y_27_pad_type_0 = const()[name = tensor<string, []>("y_27_pad_type_0"), val = tensor<string, []>("valid")]; |
| tensor<int32, [1]> y_27_strides_0 = const()[name = tensor<string, []>("y_27_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> y_27_pad_0 = const()[name = tensor<string, []>("y_27_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> y_27_dilations_0 = const()[name = tensor<string, []>("y_27_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, []> y_27_groups_0 = const()[name = tensor<string, []>("y_27_groups_0"), val = tensor<int32, []>(1)]; |
| tensor<fp16, [3072, 768, 1]> dec_layers_4_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_4_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(33294848))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35654208))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5672960)))]; |
| tensor<fp16, [1, 768, 111]> x_75_cast_fp16 = transpose(perm = var_1076, x = x_71_cast_fp16)[name = tensor<string, []>("transpose_215")]; |
| tensor<fp16, [1, 3072, 111]> y_27_cast_fp16 = conv(dilations = y_27_dilations_0, groups = y_27_groups_0, pad = y_27_pad_0, pad_type = y_27_pad_type_0, strides = y_27_strides_0, weight = dec_layers_4_pos_ff_proj_weight_to_fp16_quantized, x = x_75_cast_fp16)[name = tensor<string, []>("y_27_cast_fp16")]; |
| tensor<string, []> x_77_mode_0 = const()[name = tensor<string, []>("x_77_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 3072, 111]> x_77_cast_fp16 = gelu(mode = x_77_mode_0, x = y_27_cast_fp16)[name = tensor<string, []>("x_77_cast_fp16")]; |
| tensor<string, []> y_29_pad_type_0 = const()[name = tensor<string, []>("y_29_pad_type_0"), val = tensor<string, []>("valid")]; |
| tensor<int32, [1]> y_29_strides_0 = const()[name = tensor<string, []>("y_29_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> y_29_pad_0 = const()[name = tensor<string, []>("y_29_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> y_29_dilations_0 = const()[name = tensor<string, []>("y_29_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, []> y_29_groups_0 = const()[name = tensor<string, []>("y_29_groups_0"), val = tensor<int32, []>(1)]; |
| tensor<fp16, [768, 3072, 1]> dec_layers_4_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_4_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(35660416))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38019776))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 768, 111]> y_29_cast_fp16 = conv(dilations = y_29_dilations_0, groups = y_29_groups_0, pad = y_29_pad_0, pad_type = y_29_pad_type_0, strides = y_29_strides_0, weight = dec_layers_4_pos_ff_o_net_weight_to_fp16_quantized, x = x_77_cast_fp16)[name = tensor<string, []>("y_29_cast_fp16")]; |
| tensor<int32, [3]> var_1096 = const()[name = tensor<string, []>("op_1096"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<fp16, [1, 111, 768]> h_49_cast_fp16 = transpose(perm = var_1096, x = y_29_cast_fp16)[name = tensor<string, []>("transpose_214")]; |
| tensor<fp16, [1, 111, 768]> x_81_cast_fp16 = add(x = input_77_cast_fp16, y = h_49_cast_fp16)[name = tensor<string, []>("x_81_cast_fp16")]; |
| tensor<int32, []> var_1126 = const()[name = tensor<string, []>("op_1126"), val = tensor<int32, []>(-1)]; |
| tensor<int32, [1]> input_81_axes_0 = const()[name = tensor<string, []>("input_81_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_5_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_5_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38021376)))]; |
| tensor<fp16, []> var_1124_to_fp16 = const()[name = tensor<string, []>("op_1124_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)]; |
| tensor<fp16, [1, 111, 768]> input_81_cast_fp16 = layer_norm(axes = input_81_axes_0, epsilon = var_1124_to_fp16, gamma = dec_layers_5_norm_self_weight_to_fp16, x = x_81_cast_fp16)[name = tensor<string, []>("input_81_cast_fp16")]; |
| tensor<fp16, [2304, 768]> dec_layers_5_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_5_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(38022976))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39792512))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2281088)))]; |
| tensor<fp16, [1, 111, 2304]> linear_25_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_5_self_attention_qkv_net_weight_to_fp16_quantized, x = input_81_cast_fp16)[name = tensor<string, []>("linear_25_cast_fp16")]; |
| tensor<int32, [5]> var_1145 = const()[name = tensor<string, []>("op_1145"), val = tensor<int32, [5]>([1, 111, 3, 12, 64])]; |
| tensor<fp16, [1, 111, 3, 12, 64]> qkv_23_cast_fp16 = reshape(shape = var_1145, x = linear_25_cast_fp16)[name = tensor<string, []>("qkv_23_cast_fp16")]; |
| tensor<int32, [5]> q_41_begin_0 = const()[name = tensor<string, []>("q_41_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])]; |
| tensor<int32, [5]> q_41_end_0 = const()[name = tensor<string, []>("q_41_end_0"), val = tensor<int32, [5]>([1, 111, 1, 12, 64])]; |
| tensor<bool, [5]> q_41_end_mask_0 = const()[name = tensor<string, []>("q_41_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> q_41_squeeze_mask_0 = const()[name = tensor<string, []>("q_41_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> q_41_cast_fp16 = slice_by_index(begin = q_41_begin_0, end = q_41_end_0, end_mask = q_41_end_mask_0, squeeze_mask = q_41_squeeze_mask_0, x = qkv_23_cast_fp16)[name = tensor<string, []>("q_41_cast_fp16")]; |
| tensor<int32, [5]> new_k_11_begin_0 = const()[name = tensor<string, []>("new_k_11_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])]; |
| tensor<int32, [5]> new_k_11_end_0 = const()[name = tensor<string, []>("new_k_11_end_0"), val = tensor<int32, [5]>([1, 111, 2, 12, 64])]; |
| tensor<bool, [5]> new_k_11_end_mask_0 = const()[name = tensor<string, []>("new_k_11_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> new_k_11_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_11_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> new_k_11_cast_fp16 = slice_by_index(begin = new_k_11_begin_0, end = new_k_11_end_0, end_mask = new_k_11_end_mask_0, squeeze_mask = new_k_11_squeeze_mask_0, x = qkv_23_cast_fp16)[name = tensor<string, []>("new_k_11_cast_fp16")]; |
| tensor<string, []> new_k_11_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("new_k_11_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [5]> new_v_11_begin_0 = const()[name = tensor<string, []>("new_v_11_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])]; |
| tensor<int32, [5]> new_v_11_end_0 = const()[name = tensor<string, []>("new_v_11_end_0"), val = tensor<int32, [5]>([1, 111, 3, 12, 64])]; |
| tensor<bool, [5]> new_v_11_end_mask_0 = const()[name = tensor<string, []>("new_v_11_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> new_v_11_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_11_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> new_v_11_cast_fp16 = slice_by_index(begin = new_v_11_begin_0, end = new_v_11_end_0, end_mask = new_v_11_end_mask_0, squeeze_mask = new_v_11_squeeze_mask_0, x = qkv_23_cast_fp16)[name = tensor<string, []>("new_v_11_cast_fp16")]; |
| tensor<string, []> new_v_11_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("new_v_11_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [4]> var_1160 = const()[name = tensor<string, []>("op_1160"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<bool, []> var_1162_transpose_x_0 = const()[name = tensor<string, []>("op_1162_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_1162_transpose_y_0 = const()[name = tensor<string, []>("op_1162_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<int32, [4]> transpose_116_perm_0 = const()[name = tensor<string, []>("transpose_116_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<int32, [4]> transpose_117_perm_0 = const()[name = tensor<string, []>("transpose_117_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])]; |
| tensor<fp16, [1, 12, 64, 111]> transpose_117 = transpose(perm = transpose_117_perm_0, x = new_k_11_cast_fp16)[name = tensor<string, []>("transpose_212")]; |
| tensor<fp16, [1, 12, 111, 64]> transpose_116 = transpose(perm = transpose_116_perm_0, x = q_41_cast_fp16)[name = tensor<string, []>("transpose_213")]; |
| tensor<fp16, [1, 12, 111, 111]> var_1162_cast_fp16 = matmul(transpose_x = var_1162_transpose_x_0, transpose_y = var_1162_transpose_y_0, x = transpose_116, y = transpose_117)[name = tensor<string, []>("op_1162_cast_fp16")]; |
| tensor<fp16, []> var_1163_to_fp16 = const()[name = tensor<string, []>("op_1163_to_fp16"), val = tensor<fp16, []>(0x1p-3)]; |
| tensor<fp16, [1, 12, 111, 111]> scores_51_cast_fp16 = mul(x = var_1162_cast_fp16, y = var_1163_to_fp16)[name = tensor<string, []>("scores_51_cast_fp16")]; |
| tensor<fp16, []> var_1121_to_fp16 = const()[name = tensor<string, []>("op_1121_to_fp16"), val = tensor<fp16, []>(-inf)]; |
| tensor<fp16, [1, 12, 111, 111]> scores_53_cast_fp16 = add(x = scores_3_cast_fp16_x_0, y = scores_51_cast_fp16)[name = tensor<string, []>("scores_53_cast_fp16")]; |
| tensor<fp16, [1, 12, 111, 111]> probs_21_cast_fp16 = softmax(axis = var_1126, x = scores_53_cast_fp16)[name = tensor<string, []>("probs_21_cast_fp16")]; |
| tensor<bool, []> var_1183_transpose_x_0 = const()[name = tensor<string, []>("op_1183_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_1183_transpose_y_0 = const()[name = tensor<string, []>("op_1183_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<fp16, [1, 12, 111, 64]> vT_21_cast_fp16 = transpose(perm = var_1160, x = new_v_11_cast_fp16)[name = tensor<string, []>("transpose_211")]; |
| tensor<fp16, [1, 12, 111, 64]> var_1183_cast_fp16 = matmul(transpose_x = var_1183_transpose_x_0, transpose_y = var_1183_transpose_y_0, x = probs_21_cast_fp16, y = vT_21_cast_fp16)[name = tensor<string, []>("op_1183_cast_fp16")]; |
| tensor<int32, [4]> var_1184 = const()[name = tensor<string, []>("op_1184"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_1188 = const()[name = tensor<string, []>("op_1188"), val = tensor<int32, [3]>([1, 111, -1])]; |
| tensor<fp16, [1, 111, 12, 64]> y_31_cast_fp16 = transpose(perm = var_1184, x = var_1183_cast_fp16)[name = tensor<string, []>("transpose_210")]; |
| tensor<fp16, [1, 111, 768]> input_83_cast_fp16 = reshape(shape = var_1188, x = y_31_cast_fp16)[name = tensor<string, []>("input_83_cast_fp16")]; |
| tensor<fp16, [768, 768]> dec_layers_5_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_5_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(39797184))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40387072))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 111, 768]> linear_26_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_5_self_attention_o_net_weight_to_fp16_quantized, x = input_83_cast_fp16)[name = tensor<string, []>("linear_26_cast_fp16")]; |
| tensor<fp16, [1, 111, 768]> input_85_cast_fp16 = add(x = x_81_cast_fp16, y = linear_26_cast_fp16)[name = tensor<string, []>("input_85_cast_fp16")]; |
| tensor<int32, [1]> x_85_axes_0 = const()[name = tensor<string, []>("x_85_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_5_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_5_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40388672)))]; |
| tensor<fp16, [1, 111, 768]> x_85_cast_fp16 = layer_norm(axes = x_85_axes_0, epsilon = var_1124_to_fp16, gamma = dec_layers_5_norm_xattn_query_weight_to_fp16, x = input_85_cast_fp16)[name = tensor<string, []>("x_85_cast_fp16")]; |
| tensor<int32, [1]> memory_11_axes_0 = const()[name = tensor<string, []>("memory_11_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_5_norm_xattn_memory_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_5_norm_xattn_memory_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40390272)))]; |
| tensor<fp16, [1, 256, 768]> memory_11_cast_fp16 = layer_norm(axes = memory_11_axes_0, epsilon = var_1124_to_fp16, gamma = dec_layers_5_norm_xattn_memory_weight_to_fp16, x = encoder_output_to_fp16)[name = tensor<string, []>("memory_11_cast_fp16")]; |
| tensor<fp16, [128, 768]> dec_layers_5_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_5_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40391872))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40490240))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3013056)))]; |
| tensor<fp16, [1, 111, 128]> linear_27_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_5_cross_attention_q_net_weight_to_fp16_quantized, x = x_85_cast_fp16)[name = tensor<string, []>("linear_27_cast_fp16")]; |
| tensor<int32, [4]> var_1210 = const()[name = tensor<string, []>("op_1210"), val = tensor<int32, [4]>([1, 111, 1, 128])]; |
| tensor<fp16, [1, 111, 1, 128]> q_45_cast_fp16 = reshape(shape = var_1210, x = linear_27_cast_fp16)[name = tensor<string, []>("q_45_cast_fp16")]; |
| tensor<fp16, [256, 768]> dec_layers_5_cross_attention_kv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_5_cross_attention_kv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [256, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40490560))), scale = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40687232))), zero_point = tensor<int8, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3210560)))]; |
| tensor<fp16, [1, 256, 256]> linear_28_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = dec_layers_5_cross_attention_kv_net_weight_to_fp16_quantized, x = memory_11_cast_fp16)[name = tensor<string, []>("linear_28_cast_fp16")]; |
| tensor<int32, [5]> var_1216 = const()[name = tensor<string, []>("op_1216"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])]; |
| tensor<fp16, [1, 256, 2, 1, 128]> kv_11_cast_fp16 = reshape(shape = var_1216, x = linear_28_cast_fp16)[name = tensor<string, []>("kv_11_cast_fp16")]; |
| tensor<int32, [5]> k_11_begin_0 = const()[name = tensor<string, []>("k_11_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])]; |
| tensor<int32, [5]> k_11_end_0 = const()[name = tensor<string, []>("k_11_end_0"), val = tensor<int32, [5]>([1, 256, 1, 1, 128])]; |
| tensor<bool, [5]> k_11_end_mask_0 = const()[name = tensor<string, []>("k_11_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> k_11_squeeze_mask_0 = const()[name = tensor<string, []>("k_11_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 256, 1, 128]> k_11_cast_fp16 = slice_by_index(begin = k_11_begin_0, end = k_11_end_0, end_mask = k_11_end_mask_0, squeeze_mask = k_11_squeeze_mask_0, x = kv_11_cast_fp16)[name = tensor<string, []>("k_11_cast_fp16")]; |
| tensor<string, []> k_11_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("k_11_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [5]> v_11_begin_0 = const()[name = tensor<string, []>("v_11_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])]; |
| tensor<int32, [5]> v_11_end_0 = const()[name = tensor<string, []>("v_11_end_0"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])]; |
| tensor<bool, [5]> v_11_end_mask_0 = const()[name = tensor<string, []>("v_11_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> v_11_squeeze_mask_0 = const()[name = tensor<string, []>("v_11_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 256, 1, 128]> v_11_cast_fp16 = slice_by_index(begin = v_11_begin_0, end = v_11_end_0, end_mask = v_11_end_mask_0, squeeze_mask = v_11_squeeze_mask_0, x = kv_11_cast_fp16)[name = tensor<string, []>("v_11_cast_fp16")]; |
| tensor<string, []> v_11_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("v_11_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [4]> var_1229 = const()[name = tensor<string, []>("op_1229"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<bool, []> var_1231_transpose_x_0 = const()[name = tensor<string, []>("op_1231_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_1231_transpose_y_0 = const()[name = tensor<string, []>("op_1231_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<int32, [4]> transpose_118_perm_0 = const()[name = tensor<string, []>("transpose_118_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<int32, [4]> transpose_119_perm_0 = const()[name = tensor<string, []>("transpose_119_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])]; |
| tensor<fp16, [1, 1, 128, 256]> transpose_119 = transpose(perm = transpose_119_perm_0, x = k_11_cast_fp16)[name = tensor<string, []>("transpose_208")]; |
| tensor<fp16, [1, 1, 111, 128]> transpose_118 = transpose(perm = transpose_118_perm_0, x = q_45_cast_fp16)[name = tensor<string, []>("transpose_209")]; |
| tensor<fp16, [1, 1, 111, 256]> var_1231_cast_fp16 = matmul(transpose_x = var_1231_transpose_x_0, transpose_y = var_1231_transpose_y_0, x = transpose_118, y = transpose_119)[name = tensor<string, []>("op_1231_cast_fp16")]; |
| tensor<fp16, []> var_1232_to_fp16 = const()[name = tensor<string, []>("op_1232_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)]; |
| tensor<fp16, [1, 1, 111, 256]> scores_57_cast_fp16 = mul(x = var_1231_cast_fp16, y = var_1232_to_fp16)[name = tensor<string, []>("scores_57_cast_fp16")]; |
| tensor<fp16, [1, 1, 111, 256]> scores_59_cast_fp16 = select(a = var_1121_to_fp16, b = scores_57_cast_fp16, cond = var_313_cast_fp16)[name = tensor<string, []>("scores_59_cast_fp16")]; |
| tensor<fp16, [1, 1, 111, 256]> probs_23_cast_fp16 = softmax(axis = var_1126, x = scores_59_cast_fp16)[name = tensor<string, []>("probs_23_cast_fp16")]; |
| tensor<bool, []> var_1241_transpose_x_0 = const()[name = tensor<string, []>("op_1241_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_1241_transpose_y_0 = const()[name = tensor<string, []>("op_1241_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<fp16, [1, 1, 256, 128]> vT_23_cast_fp16 = transpose(perm = var_1229, x = v_11_cast_fp16)[name = tensor<string, []>("transpose_207")]; |
| tensor<fp16, [1, 1, 111, 128]> var_1241_cast_fp16 = matmul(transpose_x = var_1241_transpose_x_0, transpose_y = var_1241_transpose_y_0, x = probs_23_cast_fp16, y = vT_23_cast_fp16)[name = tensor<string, []>("op_1241_cast_fp16")]; |
| tensor<int32, [4]> var_1242 = const()[name = tensor<string, []>("op_1242"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_1244 = const()[name = tensor<string, []>("op_1244"), val = tensor<int32, [3]>([1, 111, -1])]; |
| tensor<fp16, [1, 111, 1, 128]> var_1243_cast_fp16 = transpose(perm = var_1242, x = var_1241_cast_fp16)[name = tensor<string, []>("transpose_206")]; |
| tensor<fp16, [1, 111, 128]> input_87_cast_fp16 = reshape(shape = var_1244, x = var_1243_cast_fp16)[name = tensor<string, []>("input_87_cast_fp16")]; |
| tensor<fp16, [768, 128]> dec_layers_5_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_5_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40687808))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40786176))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 111, 768]> linear_29_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_5_cross_attention_o_net_weight_to_fp16_quantized, x = input_87_cast_fp16)[name = tensor<string, []>("linear_29_cast_fp16")]; |
| tensor<fp16, [1, 111, 768]> input_89_cast_fp16 = add(x = input_85_cast_fp16, y = linear_29_cast_fp16)[name = tensor<string, []>("input_89_cast_fp16")]; |
| tensor<int32, [1]> x_87_axes_0 = const()[name = tensor<string, []>("x_87_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_5_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_5_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40787776)))]; |
| tensor<fp16, [1, 111, 768]> x_87_cast_fp16 = layer_norm(axes = x_87_axes_0, epsilon = var_1124_to_fp16, gamma = dec_layers_5_norm_pos_ff_weight_to_fp16, x = input_89_cast_fp16)[name = tensor<string, []>("x_87_cast_fp16")]; |
| tensor<int32, [3]> var_1261 = const()[name = tensor<string, []>("op_1261"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<string, []> y_33_pad_type_0 = const()[name = tensor<string, []>("y_33_pad_type_0"), val = tensor<string, []>("valid")]; |
| tensor<int32, [1]> y_33_strides_0 = const()[name = tensor<string, []>("y_33_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> y_33_pad_0 = const()[name = tensor<string, []>("y_33_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> y_33_dilations_0 = const()[name = tensor<string, []>("y_33_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, []> y_33_groups_0 = const()[name = tensor<string, []>("y_33_groups_0"), val = tensor<int32, []>(1)]; |
| tensor<fp16, [3072, 768, 1]> dec_layers_5_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_5_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(40789376))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43148736))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5672960)))]; |
| tensor<fp16, [1, 768, 111]> x_91_cast_fp16 = transpose(perm = var_1261, x = x_87_cast_fp16)[name = tensor<string, []>("transpose_205")]; |
| tensor<fp16, [1, 3072, 111]> y_33_cast_fp16 = conv(dilations = y_33_dilations_0, groups = y_33_groups_0, pad = y_33_pad_0, pad_type = y_33_pad_type_0, strides = y_33_strides_0, weight = dec_layers_5_pos_ff_proj_weight_to_fp16_quantized, x = x_91_cast_fp16)[name = tensor<string, []>("y_33_cast_fp16")]; |
| tensor<string, []> x_93_mode_0 = const()[name = tensor<string, []>("x_93_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 3072, 111]> x_93_cast_fp16 = gelu(mode = x_93_mode_0, x = y_33_cast_fp16)[name = tensor<string, []>("x_93_cast_fp16")]; |
| tensor<string, []> y_35_pad_type_0 = const()[name = tensor<string, []>("y_35_pad_type_0"), val = tensor<string, []>("valid")]; |
| tensor<int32, [1]> y_35_strides_0 = const()[name = tensor<string, []>("y_35_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> y_35_pad_0 = const()[name = tensor<string, []>("y_35_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> y_35_dilations_0 = const()[name = tensor<string, []>("y_35_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, []> y_35_groups_0 = const()[name = tensor<string, []>("y_35_groups_0"), val = tensor<int32, []>(1)]; |
| tensor<fp16, [768, 3072, 1]> dec_layers_5_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_5_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(43154944))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45514304))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 768, 111]> y_35_cast_fp16 = conv(dilations = y_35_dilations_0, groups = y_35_groups_0, pad = y_35_pad_0, pad_type = y_35_pad_type_0, strides = y_35_strides_0, weight = dec_layers_5_pos_ff_o_net_weight_to_fp16_quantized, x = x_93_cast_fp16)[name = tensor<string, []>("y_35_cast_fp16")]; |
| tensor<int32, [3]> var_1281 = const()[name = tensor<string, []>("op_1281"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<fp16, [1, 111, 768]> h_59_cast_fp16 = transpose(perm = var_1281, x = y_35_cast_fp16)[name = tensor<string, []>("transpose_204")]; |
| tensor<fp16, [1, 111, 768]> x_97_cast_fp16 = add(x = input_89_cast_fp16, y = h_59_cast_fp16)[name = tensor<string, []>("x_97_cast_fp16")]; |
| tensor<int32, []> var_1311 = const()[name = tensor<string, []>("op_1311"), val = tensor<int32, []>(-1)]; |
| tensor<int32, [1]> input_93_axes_0 = const()[name = tensor<string, []>("input_93_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_6_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_6_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45515904)))]; |
| tensor<fp16, []> var_1309_to_fp16 = const()[name = tensor<string, []>("op_1309_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)]; |
| tensor<fp16, [1, 111, 768]> input_93_cast_fp16 = layer_norm(axes = input_93_axes_0, epsilon = var_1309_to_fp16, gamma = dec_layers_6_norm_self_weight_to_fp16, x = x_97_cast_fp16)[name = tensor<string, []>("input_93_cast_fp16")]; |
| tensor<fp16, [2304, 768]> dec_layers_6_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_6_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(45517504))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47287040))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2281088)))]; |
| tensor<fp16, [1, 111, 2304]> linear_30_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_6_self_attention_qkv_net_weight_to_fp16_quantized, x = input_93_cast_fp16)[name = tensor<string, []>("linear_30_cast_fp16")]; |
| tensor<int32, [5]> var_1330 = const()[name = tensor<string, []>("op_1330"), val = tensor<int32, [5]>([1, 111, 3, 12, 64])]; |
| tensor<fp16, [1, 111, 3, 12, 64]> qkv_27_cast_fp16 = reshape(shape = var_1330, x = linear_30_cast_fp16)[name = tensor<string, []>("qkv_27_cast_fp16")]; |
| tensor<int32, [5]> q_49_begin_0 = const()[name = tensor<string, []>("q_49_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])]; |
| tensor<int32, [5]> q_49_end_0 = const()[name = tensor<string, []>("q_49_end_0"), val = tensor<int32, [5]>([1, 111, 1, 12, 64])]; |
| tensor<bool, [5]> q_49_end_mask_0 = const()[name = tensor<string, []>("q_49_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> q_49_squeeze_mask_0 = const()[name = tensor<string, []>("q_49_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> q_49_cast_fp16 = slice_by_index(begin = q_49_begin_0, end = q_49_end_0, end_mask = q_49_end_mask_0, squeeze_mask = q_49_squeeze_mask_0, x = qkv_27_cast_fp16)[name = tensor<string, []>("q_49_cast_fp16")]; |
| tensor<int32, [5]> new_k_13_begin_0 = const()[name = tensor<string, []>("new_k_13_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])]; |
| tensor<int32, [5]> new_k_13_end_0 = const()[name = tensor<string, []>("new_k_13_end_0"), val = tensor<int32, [5]>([1, 111, 2, 12, 64])]; |
| tensor<bool, [5]> new_k_13_end_mask_0 = const()[name = tensor<string, []>("new_k_13_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> new_k_13_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_13_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> new_k_13_cast_fp16 = slice_by_index(begin = new_k_13_begin_0, end = new_k_13_end_0, end_mask = new_k_13_end_mask_0, squeeze_mask = new_k_13_squeeze_mask_0, x = qkv_27_cast_fp16)[name = tensor<string, []>("new_k_13_cast_fp16")]; |
| tensor<string, []> new_k_13_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("new_k_13_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [5]> new_v_13_begin_0 = const()[name = tensor<string, []>("new_v_13_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])]; |
| tensor<int32, [5]> new_v_13_end_0 = const()[name = tensor<string, []>("new_v_13_end_0"), val = tensor<int32, [5]>([1, 111, 3, 12, 64])]; |
| tensor<bool, [5]> new_v_13_end_mask_0 = const()[name = tensor<string, []>("new_v_13_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> new_v_13_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_13_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> new_v_13_cast_fp16 = slice_by_index(begin = new_v_13_begin_0, end = new_v_13_end_0, end_mask = new_v_13_end_mask_0, squeeze_mask = new_v_13_squeeze_mask_0, x = qkv_27_cast_fp16)[name = tensor<string, []>("new_v_13_cast_fp16")]; |
| tensor<string, []> new_v_13_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("new_v_13_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [4]> var_1345 = const()[name = tensor<string, []>("op_1345"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<bool, []> var_1347_transpose_x_0 = const()[name = tensor<string, []>("op_1347_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_1347_transpose_y_0 = const()[name = tensor<string, []>("op_1347_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<int32, [4]> transpose_120_perm_0 = const()[name = tensor<string, []>("transpose_120_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<int32, [4]> transpose_121_perm_0 = const()[name = tensor<string, []>("transpose_121_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])]; |
| tensor<fp16, [1, 12, 64, 111]> transpose_121 = transpose(perm = transpose_121_perm_0, x = new_k_13_cast_fp16)[name = tensor<string, []>("transpose_202")]; |
| tensor<fp16, [1, 12, 111, 64]> transpose_120 = transpose(perm = transpose_120_perm_0, x = q_49_cast_fp16)[name = tensor<string, []>("transpose_203")]; |
| tensor<fp16, [1, 12, 111, 111]> var_1347_cast_fp16 = matmul(transpose_x = var_1347_transpose_x_0, transpose_y = var_1347_transpose_y_0, x = transpose_120, y = transpose_121)[name = tensor<string, []>("op_1347_cast_fp16")]; |
| tensor<fp16, []> var_1348_to_fp16 = const()[name = tensor<string, []>("op_1348_to_fp16"), val = tensor<fp16, []>(0x1p-3)]; |
| tensor<fp16, [1, 12, 111, 111]> scores_61_cast_fp16 = mul(x = var_1347_cast_fp16, y = var_1348_to_fp16)[name = tensor<string, []>("scores_61_cast_fp16")]; |
| tensor<fp16, []> var_1306_to_fp16 = const()[name = tensor<string, []>("op_1306_to_fp16"), val = tensor<fp16, []>(-inf)]; |
| tensor<fp16, [1, 12, 111, 111]> scores_63_cast_fp16 = add(x = scores_3_cast_fp16_x_0, y = scores_61_cast_fp16)[name = tensor<string, []>("scores_63_cast_fp16")]; |
| tensor<fp16, [1, 12, 111, 111]> probs_25_cast_fp16 = softmax(axis = var_1311, x = scores_63_cast_fp16)[name = tensor<string, []>("probs_25_cast_fp16")]; |
| tensor<bool, []> var_1368_transpose_x_0 = const()[name = tensor<string, []>("op_1368_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_1368_transpose_y_0 = const()[name = tensor<string, []>("op_1368_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<fp16, [1, 12, 111, 64]> vT_25_cast_fp16 = transpose(perm = var_1345, x = new_v_13_cast_fp16)[name = tensor<string, []>("transpose_201")]; |
| tensor<fp16, [1, 12, 111, 64]> var_1368_cast_fp16 = matmul(transpose_x = var_1368_transpose_x_0, transpose_y = var_1368_transpose_y_0, x = probs_25_cast_fp16, y = vT_25_cast_fp16)[name = tensor<string, []>("op_1368_cast_fp16")]; |
| tensor<int32, [4]> var_1369 = const()[name = tensor<string, []>("op_1369"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_1373 = const()[name = tensor<string, []>("op_1373"), val = tensor<int32, [3]>([1, 111, -1])]; |
| tensor<fp16, [1, 111, 12, 64]> y_37_cast_fp16 = transpose(perm = var_1369, x = var_1368_cast_fp16)[name = tensor<string, []>("transpose_200")]; |
| tensor<fp16, [1, 111, 768]> input_95_cast_fp16 = reshape(shape = var_1373, x = y_37_cast_fp16)[name = tensor<string, []>("input_95_cast_fp16")]; |
| tensor<fp16, [768, 768]> dec_layers_6_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_6_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47291712))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47881600))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 111, 768]> linear_31_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_6_self_attention_o_net_weight_to_fp16_quantized, x = input_95_cast_fp16)[name = tensor<string, []>("linear_31_cast_fp16")]; |
| tensor<fp16, [1, 111, 768]> input_97_cast_fp16 = add(x = x_97_cast_fp16, y = linear_31_cast_fp16)[name = tensor<string, []>("input_97_cast_fp16")]; |
| tensor<int32, [1]> x_101_axes_0 = const()[name = tensor<string, []>("x_101_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_6_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_6_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47883200)))]; |
| tensor<fp16, [1, 111, 768]> x_101_cast_fp16 = layer_norm(axes = x_101_axes_0, epsilon = var_1309_to_fp16, gamma = dec_layers_6_norm_xattn_query_weight_to_fp16, x = input_97_cast_fp16)[name = tensor<string, []>("x_101_cast_fp16")]; |
| tensor<int32, [1]> memory_13_axes_0 = const()[name = tensor<string, []>("memory_13_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_6_norm_xattn_memory_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_6_norm_xattn_memory_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47884800)))]; |
| tensor<fp16, [1, 256, 768]> memory_13_cast_fp16 = layer_norm(axes = memory_13_axes_0, epsilon = var_1309_to_fp16, gamma = dec_layers_6_norm_xattn_memory_weight_to_fp16, x = encoder_output_to_fp16)[name = tensor<string, []>("memory_13_cast_fp16")]; |
| tensor<fp16, [128, 768]> dec_layers_6_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_6_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47886400))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47984768))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3013056)))]; |
| tensor<fp16, [1, 111, 128]> linear_32_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_6_cross_attention_q_net_weight_to_fp16_quantized, x = x_101_cast_fp16)[name = tensor<string, []>("linear_32_cast_fp16")]; |
| tensor<int32, [4]> var_1395 = const()[name = tensor<string, []>("op_1395"), val = tensor<int32, [4]>([1, 111, 1, 128])]; |
| tensor<fp16, [1, 111, 1, 128]> q_53_cast_fp16 = reshape(shape = var_1395, x = linear_32_cast_fp16)[name = tensor<string, []>("q_53_cast_fp16")]; |
| tensor<fp16, [256, 768]> dec_layers_6_cross_attention_kv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_6_cross_attention_kv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [256, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(47985088))), scale = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48181760))), zero_point = tensor<int8, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3210560)))]; |
| tensor<fp16, [1, 256, 256]> linear_33_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = dec_layers_6_cross_attention_kv_net_weight_to_fp16_quantized, x = memory_13_cast_fp16)[name = tensor<string, []>("linear_33_cast_fp16")]; |
| tensor<int32, [5]> var_1401 = const()[name = tensor<string, []>("op_1401"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])]; |
| tensor<fp16, [1, 256, 2, 1, 128]> kv_13_cast_fp16 = reshape(shape = var_1401, x = linear_33_cast_fp16)[name = tensor<string, []>("kv_13_cast_fp16")]; |
| tensor<int32, [5]> k_13_begin_0 = const()[name = tensor<string, []>("k_13_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])]; |
| tensor<int32, [5]> k_13_end_0 = const()[name = tensor<string, []>("k_13_end_0"), val = tensor<int32, [5]>([1, 256, 1, 1, 128])]; |
| tensor<bool, [5]> k_13_end_mask_0 = const()[name = tensor<string, []>("k_13_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> k_13_squeeze_mask_0 = const()[name = tensor<string, []>("k_13_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 256, 1, 128]> k_13_cast_fp16 = slice_by_index(begin = k_13_begin_0, end = k_13_end_0, end_mask = k_13_end_mask_0, squeeze_mask = k_13_squeeze_mask_0, x = kv_13_cast_fp16)[name = tensor<string, []>("k_13_cast_fp16")]; |
| tensor<string, []> k_13_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("k_13_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [5]> v_13_begin_0 = const()[name = tensor<string, []>("v_13_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])]; |
| tensor<int32, [5]> v_13_end_0 = const()[name = tensor<string, []>("v_13_end_0"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])]; |
| tensor<bool, [5]> v_13_end_mask_0 = const()[name = tensor<string, []>("v_13_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> v_13_squeeze_mask_0 = const()[name = tensor<string, []>("v_13_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 256, 1, 128]> v_13_cast_fp16 = slice_by_index(begin = v_13_begin_0, end = v_13_end_0, end_mask = v_13_end_mask_0, squeeze_mask = v_13_squeeze_mask_0, x = kv_13_cast_fp16)[name = tensor<string, []>("v_13_cast_fp16")]; |
| tensor<string, []> v_13_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("v_13_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [4]> var_1414 = const()[name = tensor<string, []>("op_1414"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<bool, []> var_1416_transpose_x_0 = const()[name = tensor<string, []>("op_1416_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_1416_transpose_y_0 = const()[name = tensor<string, []>("op_1416_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<int32, [4]> transpose_122_perm_0 = const()[name = tensor<string, []>("transpose_122_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<int32, [4]> transpose_123_perm_0 = const()[name = tensor<string, []>("transpose_123_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])]; |
| tensor<fp16, [1, 1, 128, 256]> transpose_123 = transpose(perm = transpose_123_perm_0, x = k_13_cast_fp16)[name = tensor<string, []>("transpose_198")]; |
| tensor<fp16, [1, 1, 111, 128]> transpose_122 = transpose(perm = transpose_122_perm_0, x = q_53_cast_fp16)[name = tensor<string, []>("transpose_199")]; |
| tensor<fp16, [1, 1, 111, 256]> var_1416_cast_fp16 = matmul(transpose_x = var_1416_transpose_x_0, transpose_y = var_1416_transpose_y_0, x = transpose_122, y = transpose_123)[name = tensor<string, []>("op_1416_cast_fp16")]; |
| tensor<fp16, []> var_1417_to_fp16 = const()[name = tensor<string, []>("op_1417_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)]; |
| tensor<fp16, [1, 1, 111, 256]> scores_67_cast_fp16 = mul(x = var_1416_cast_fp16, y = var_1417_to_fp16)[name = tensor<string, []>("scores_67_cast_fp16")]; |
| tensor<fp16, [1, 1, 111, 256]> scores_69_cast_fp16 = select(a = var_1306_to_fp16, b = scores_67_cast_fp16, cond = var_313_cast_fp16)[name = tensor<string, []>("scores_69_cast_fp16")]; |
| tensor<fp16, [1, 1, 111, 256]> probs_27_cast_fp16 = softmax(axis = var_1311, x = scores_69_cast_fp16)[name = tensor<string, []>("probs_27_cast_fp16")]; |
| tensor<bool, []> var_1426_transpose_x_0 = const()[name = tensor<string, []>("op_1426_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_1426_transpose_y_0 = const()[name = tensor<string, []>("op_1426_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<fp16, [1, 1, 256, 128]> vT_27_cast_fp16 = transpose(perm = var_1414, x = v_13_cast_fp16)[name = tensor<string, []>("transpose_197")]; |
| tensor<fp16, [1, 1, 111, 128]> var_1426_cast_fp16 = matmul(transpose_x = var_1426_transpose_x_0, transpose_y = var_1426_transpose_y_0, x = probs_27_cast_fp16, y = vT_27_cast_fp16)[name = tensor<string, []>("op_1426_cast_fp16")]; |
| tensor<int32, [4]> var_1427 = const()[name = tensor<string, []>("op_1427"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_1429 = const()[name = tensor<string, []>("op_1429"), val = tensor<int32, [3]>([1, 111, -1])]; |
| tensor<fp16, [1, 111, 1, 128]> var_1428_cast_fp16 = transpose(perm = var_1427, x = var_1426_cast_fp16)[name = tensor<string, []>("transpose_196")]; |
| tensor<fp16, [1, 111, 128]> input_99_cast_fp16 = reshape(shape = var_1429, x = var_1428_cast_fp16)[name = tensor<string, []>("input_99_cast_fp16")]; |
| tensor<fp16, [768, 128]> dec_layers_6_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_6_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48182336))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48280704))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 111, 768]> linear_34_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_6_cross_attention_o_net_weight_to_fp16_quantized, x = input_99_cast_fp16)[name = tensor<string, []>("linear_34_cast_fp16")]; |
| tensor<fp16, [1, 111, 768]> input_101_cast_fp16 = add(x = input_97_cast_fp16, y = linear_34_cast_fp16)[name = tensor<string, []>("input_101_cast_fp16")]; |
| tensor<int32, [1]> x_103_axes_0 = const()[name = tensor<string, []>("x_103_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_6_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_6_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48282304)))]; |
| tensor<fp16, [1, 111, 768]> x_103_cast_fp16 = layer_norm(axes = x_103_axes_0, epsilon = var_1309_to_fp16, gamma = dec_layers_6_norm_pos_ff_weight_to_fp16, x = input_101_cast_fp16)[name = tensor<string, []>("x_103_cast_fp16")]; |
| tensor<int32, [3]> var_1446 = const()[name = tensor<string, []>("op_1446"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<string, []> y_39_pad_type_0 = const()[name = tensor<string, []>("y_39_pad_type_0"), val = tensor<string, []>("valid")]; |
| tensor<int32, [1]> y_39_strides_0 = const()[name = tensor<string, []>("y_39_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> y_39_pad_0 = const()[name = tensor<string, []>("y_39_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> y_39_dilations_0 = const()[name = tensor<string, []>("y_39_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, []> y_39_groups_0 = const()[name = tensor<string, []>("y_39_groups_0"), val = tensor<int32, []>(1)]; |
| tensor<fp16, [3072, 768, 1]> dec_layers_6_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_6_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(48283904))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50643264))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5672960)))]; |
| tensor<fp16, [1, 768, 111]> x_107_cast_fp16 = transpose(perm = var_1446, x = x_103_cast_fp16)[name = tensor<string, []>("transpose_195")]; |
| tensor<fp16, [1, 3072, 111]> y_39_cast_fp16 = conv(dilations = y_39_dilations_0, groups = y_39_groups_0, pad = y_39_pad_0, pad_type = y_39_pad_type_0, strides = y_39_strides_0, weight = dec_layers_6_pos_ff_proj_weight_to_fp16_quantized, x = x_107_cast_fp16)[name = tensor<string, []>("y_39_cast_fp16")]; |
| tensor<string, []> x_109_mode_0 = const()[name = tensor<string, []>("x_109_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 3072, 111]> x_109_cast_fp16 = gelu(mode = x_109_mode_0, x = y_39_cast_fp16)[name = tensor<string, []>("x_109_cast_fp16")]; |
| tensor<string, []> y_41_pad_type_0 = const()[name = tensor<string, []>("y_41_pad_type_0"), val = tensor<string, []>("valid")]; |
| tensor<int32, [1]> y_41_strides_0 = const()[name = tensor<string, []>("y_41_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> y_41_pad_0 = const()[name = tensor<string, []>("y_41_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> y_41_dilations_0 = const()[name = tensor<string, []>("y_41_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, []> y_41_groups_0 = const()[name = tensor<string, []>("y_41_groups_0"), val = tensor<int32, []>(1)]; |
| tensor<fp16, [768, 3072, 1]> dec_layers_6_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_6_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(50649472))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53008832))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 768, 111]> y_41_cast_fp16 = conv(dilations = y_41_dilations_0, groups = y_41_groups_0, pad = y_41_pad_0, pad_type = y_41_pad_type_0, strides = y_41_strides_0, weight = dec_layers_6_pos_ff_o_net_weight_to_fp16_quantized, x = x_109_cast_fp16)[name = tensor<string, []>("y_41_cast_fp16")]; |
| tensor<int32, [3]> var_1466 = const()[name = tensor<string, []>("op_1466"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<fp16, [1, 111, 768]> h_69_cast_fp16 = transpose(perm = var_1466, x = y_41_cast_fp16)[name = tensor<string, []>("transpose_194")]; |
| tensor<fp16, [1, 111, 768]> x_113_cast_fp16 = add(x = input_101_cast_fp16, y = h_69_cast_fp16)[name = tensor<string, []>("x_113_cast_fp16")]; |
| tensor<int32, []> var_1496 = const()[name = tensor<string, []>("op_1496"), val = tensor<int32, []>(-1)]; |
| tensor<int32, [1]> input_105_axes_0 = const()[name = tensor<string, []>("input_105_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_7_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_7_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53010432)))]; |
| tensor<fp16, []> var_1494_to_fp16 = const()[name = tensor<string, []>("op_1494_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)]; |
| tensor<fp16, [1, 111, 768]> input_105_cast_fp16 = layer_norm(axes = input_105_axes_0, epsilon = var_1494_to_fp16, gamma = dec_layers_7_norm_self_weight_to_fp16, x = x_113_cast_fp16)[name = tensor<string, []>("input_105_cast_fp16")]; |
| tensor<fp16, [2304, 768]> dec_layers_7_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_7_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(53012032))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54781568))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2281088)))]; |
| tensor<fp16, [1, 111, 2304]> linear_35_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_7_self_attention_qkv_net_weight_to_fp16_quantized, x = input_105_cast_fp16)[name = tensor<string, []>("linear_35_cast_fp16")]; |
| tensor<int32, [5]> var_1515 = const()[name = tensor<string, []>("op_1515"), val = tensor<int32, [5]>([1, 111, 3, 12, 64])]; |
| tensor<fp16, [1, 111, 3, 12, 64]> qkv_31_cast_fp16 = reshape(shape = var_1515, x = linear_35_cast_fp16)[name = tensor<string, []>("qkv_31_cast_fp16")]; |
| tensor<int32, [5]> q_57_begin_0 = const()[name = tensor<string, []>("q_57_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])]; |
| tensor<int32, [5]> q_57_end_0 = const()[name = tensor<string, []>("q_57_end_0"), val = tensor<int32, [5]>([1, 111, 1, 12, 64])]; |
| tensor<bool, [5]> q_57_end_mask_0 = const()[name = tensor<string, []>("q_57_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> q_57_squeeze_mask_0 = const()[name = tensor<string, []>("q_57_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> q_57_cast_fp16 = slice_by_index(begin = q_57_begin_0, end = q_57_end_0, end_mask = q_57_end_mask_0, squeeze_mask = q_57_squeeze_mask_0, x = qkv_31_cast_fp16)[name = tensor<string, []>("q_57_cast_fp16")]; |
| tensor<int32, [5]> new_k_15_begin_0 = const()[name = tensor<string, []>("new_k_15_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])]; |
| tensor<int32, [5]> new_k_15_end_0 = const()[name = tensor<string, []>("new_k_15_end_0"), val = tensor<int32, [5]>([1, 111, 2, 12, 64])]; |
| tensor<bool, [5]> new_k_15_end_mask_0 = const()[name = tensor<string, []>("new_k_15_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> new_k_15_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_15_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> new_k_15_cast_fp16 = slice_by_index(begin = new_k_15_begin_0, end = new_k_15_end_0, end_mask = new_k_15_end_mask_0, squeeze_mask = new_k_15_squeeze_mask_0, x = qkv_31_cast_fp16)[name = tensor<string, []>("new_k_15_cast_fp16")]; |
| tensor<string, []> new_k_15_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("new_k_15_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [5]> new_v_15_begin_0 = const()[name = tensor<string, []>("new_v_15_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])]; |
| tensor<int32, [5]> new_v_15_end_0 = const()[name = tensor<string, []>("new_v_15_end_0"), val = tensor<int32, [5]>([1, 111, 3, 12, 64])]; |
| tensor<bool, [5]> new_v_15_end_mask_0 = const()[name = tensor<string, []>("new_v_15_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> new_v_15_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_15_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> new_v_15_cast_fp16 = slice_by_index(begin = new_v_15_begin_0, end = new_v_15_end_0, end_mask = new_v_15_end_mask_0, squeeze_mask = new_v_15_squeeze_mask_0, x = qkv_31_cast_fp16)[name = tensor<string, []>("new_v_15_cast_fp16")]; |
| tensor<string, []> new_v_15_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("new_v_15_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [4]> var_1530 = const()[name = tensor<string, []>("op_1530"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<bool, []> var_1532_transpose_x_0 = const()[name = tensor<string, []>("op_1532_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_1532_transpose_y_0 = const()[name = tensor<string, []>("op_1532_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<int32, [4]> transpose_124_perm_0 = const()[name = tensor<string, []>("transpose_124_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<int32, [4]> transpose_125_perm_0 = const()[name = tensor<string, []>("transpose_125_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])]; |
| tensor<fp16, [1, 12, 64, 111]> transpose_125 = transpose(perm = transpose_125_perm_0, x = new_k_15_cast_fp16)[name = tensor<string, []>("transpose_192")]; |
| tensor<fp16, [1, 12, 111, 64]> transpose_124 = transpose(perm = transpose_124_perm_0, x = q_57_cast_fp16)[name = tensor<string, []>("transpose_193")]; |
| tensor<fp16, [1, 12, 111, 111]> var_1532_cast_fp16 = matmul(transpose_x = var_1532_transpose_x_0, transpose_y = var_1532_transpose_y_0, x = transpose_124, y = transpose_125)[name = tensor<string, []>("op_1532_cast_fp16")]; |
| tensor<fp16, []> var_1533_to_fp16 = const()[name = tensor<string, []>("op_1533_to_fp16"), val = tensor<fp16, []>(0x1p-3)]; |
| tensor<fp16, [1, 12, 111, 111]> scores_71_cast_fp16 = mul(x = var_1532_cast_fp16, y = var_1533_to_fp16)[name = tensor<string, []>("scores_71_cast_fp16")]; |
| tensor<fp16, []> var_1491_to_fp16 = const()[name = tensor<string, []>("op_1491_to_fp16"), val = tensor<fp16, []>(-inf)]; |
| tensor<fp16, [1, 12, 111, 111]> scores_73_cast_fp16 = add(x = scores_3_cast_fp16_x_0, y = scores_71_cast_fp16)[name = tensor<string, []>("scores_73_cast_fp16")]; |
| tensor<fp16, [1, 12, 111, 111]> probs_29_cast_fp16 = softmax(axis = var_1496, x = scores_73_cast_fp16)[name = tensor<string, []>("probs_29_cast_fp16")]; |
| tensor<bool, []> var_1553_transpose_x_0 = const()[name = tensor<string, []>("op_1553_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_1553_transpose_y_0 = const()[name = tensor<string, []>("op_1553_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<fp16, [1, 12, 111, 64]> vT_29_cast_fp16 = transpose(perm = var_1530, x = new_v_15_cast_fp16)[name = tensor<string, []>("transpose_191")]; |
| tensor<fp16, [1, 12, 111, 64]> var_1553_cast_fp16 = matmul(transpose_x = var_1553_transpose_x_0, transpose_y = var_1553_transpose_y_0, x = probs_29_cast_fp16, y = vT_29_cast_fp16)[name = tensor<string, []>("op_1553_cast_fp16")]; |
| tensor<int32, [4]> var_1554 = const()[name = tensor<string, []>("op_1554"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_1558 = const()[name = tensor<string, []>("op_1558"), val = tensor<int32, [3]>([1, 111, -1])]; |
| tensor<fp16, [1, 111, 12, 64]> y_43_cast_fp16 = transpose(perm = var_1554, x = var_1553_cast_fp16)[name = tensor<string, []>("transpose_190")]; |
| tensor<fp16, [1, 111, 768]> input_107_cast_fp16 = reshape(shape = var_1558, x = y_43_cast_fp16)[name = tensor<string, []>("input_107_cast_fp16")]; |
| tensor<fp16, [768, 768]> dec_layers_7_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_7_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(54786240))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55376128))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 111, 768]> linear_36_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_7_self_attention_o_net_weight_to_fp16_quantized, x = input_107_cast_fp16)[name = tensor<string, []>("linear_36_cast_fp16")]; |
| tensor<fp16, [1, 111, 768]> input_109_cast_fp16 = add(x = x_113_cast_fp16, y = linear_36_cast_fp16)[name = tensor<string, []>("input_109_cast_fp16")]; |
| tensor<int32, [1]> x_117_axes_0 = const()[name = tensor<string, []>("x_117_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_7_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_7_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55377728)))]; |
| tensor<fp16, [1, 111, 768]> x_117_cast_fp16 = layer_norm(axes = x_117_axes_0, epsilon = var_1494_to_fp16, gamma = dec_layers_7_norm_xattn_query_weight_to_fp16, x = input_109_cast_fp16)[name = tensor<string, []>("x_117_cast_fp16")]; |
| tensor<int32, [1]> memory_15_axes_0 = const()[name = tensor<string, []>("memory_15_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_7_norm_xattn_memory_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_7_norm_xattn_memory_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55379328)))]; |
| tensor<fp16, [1, 256, 768]> memory_15_cast_fp16 = layer_norm(axes = memory_15_axes_0, epsilon = var_1494_to_fp16, gamma = dec_layers_7_norm_xattn_memory_weight_to_fp16, x = encoder_output_to_fp16)[name = tensor<string, []>("memory_15_cast_fp16")]; |
| tensor<fp16, [128, 768]> dec_layers_7_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_7_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55380928))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55479296))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3013056)))]; |
| tensor<fp16, [1, 111, 128]> linear_37_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_7_cross_attention_q_net_weight_to_fp16_quantized, x = x_117_cast_fp16)[name = tensor<string, []>("linear_37_cast_fp16")]; |
| tensor<int32, [4]> var_1580 = const()[name = tensor<string, []>("op_1580"), val = tensor<int32, [4]>([1, 111, 1, 128])]; |
| tensor<fp16, [1, 111, 1, 128]> q_61_cast_fp16 = reshape(shape = var_1580, x = linear_37_cast_fp16)[name = tensor<string, []>("q_61_cast_fp16")]; |
| tensor<fp16, [256, 768]> dec_layers_7_cross_attention_kv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_7_cross_attention_kv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [256, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55479616))), scale = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55676288))), zero_point = tensor<int8, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3210560)))]; |
| tensor<fp16, [1, 256, 256]> linear_38_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = dec_layers_7_cross_attention_kv_net_weight_to_fp16_quantized, x = memory_15_cast_fp16)[name = tensor<string, []>("linear_38_cast_fp16")]; |
| tensor<int32, [5]> var_1586 = const()[name = tensor<string, []>("op_1586"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])]; |
| tensor<fp16, [1, 256, 2, 1, 128]> kv_15_cast_fp16 = reshape(shape = var_1586, x = linear_38_cast_fp16)[name = tensor<string, []>("kv_15_cast_fp16")]; |
| tensor<int32, [5]> k_15_begin_0 = const()[name = tensor<string, []>("k_15_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])]; |
| tensor<int32, [5]> k_15_end_0 = const()[name = tensor<string, []>("k_15_end_0"), val = tensor<int32, [5]>([1, 256, 1, 1, 128])]; |
| tensor<bool, [5]> k_15_end_mask_0 = const()[name = tensor<string, []>("k_15_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> k_15_squeeze_mask_0 = const()[name = tensor<string, []>("k_15_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 256, 1, 128]> k_15_cast_fp16 = slice_by_index(begin = k_15_begin_0, end = k_15_end_0, end_mask = k_15_end_mask_0, squeeze_mask = k_15_squeeze_mask_0, x = kv_15_cast_fp16)[name = tensor<string, []>("k_15_cast_fp16")]; |
| tensor<string, []> k_15_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("k_15_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [5]> v_15_begin_0 = const()[name = tensor<string, []>("v_15_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])]; |
| tensor<int32, [5]> v_15_end_0 = const()[name = tensor<string, []>("v_15_end_0"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])]; |
| tensor<bool, [5]> v_15_end_mask_0 = const()[name = tensor<string, []>("v_15_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> v_15_squeeze_mask_0 = const()[name = tensor<string, []>("v_15_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 256, 1, 128]> v_15_cast_fp16 = slice_by_index(begin = v_15_begin_0, end = v_15_end_0, end_mask = v_15_end_mask_0, squeeze_mask = v_15_squeeze_mask_0, x = kv_15_cast_fp16)[name = tensor<string, []>("v_15_cast_fp16")]; |
| tensor<string, []> v_15_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("v_15_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [4]> var_1599 = const()[name = tensor<string, []>("op_1599"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<bool, []> var_1601_transpose_x_0 = const()[name = tensor<string, []>("op_1601_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_1601_transpose_y_0 = const()[name = tensor<string, []>("op_1601_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<int32, [4]> transpose_126_perm_0 = const()[name = tensor<string, []>("transpose_126_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<int32, [4]> transpose_127_perm_0 = const()[name = tensor<string, []>("transpose_127_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])]; |
| tensor<fp16, [1, 1, 128, 256]> transpose_127 = transpose(perm = transpose_127_perm_0, x = k_15_cast_fp16)[name = tensor<string, []>("transpose_188")]; |
| tensor<fp16, [1, 1, 111, 128]> transpose_126 = transpose(perm = transpose_126_perm_0, x = q_61_cast_fp16)[name = tensor<string, []>("transpose_189")]; |
| tensor<fp16, [1, 1, 111, 256]> var_1601_cast_fp16 = matmul(transpose_x = var_1601_transpose_x_0, transpose_y = var_1601_transpose_y_0, x = transpose_126, y = transpose_127)[name = tensor<string, []>("op_1601_cast_fp16")]; |
| tensor<fp16, []> var_1602_to_fp16 = const()[name = tensor<string, []>("op_1602_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)]; |
| tensor<fp16, [1, 1, 111, 256]> scores_77_cast_fp16 = mul(x = var_1601_cast_fp16, y = var_1602_to_fp16)[name = tensor<string, []>("scores_77_cast_fp16")]; |
| tensor<fp16, [1, 1, 111, 256]> scores_79_cast_fp16 = select(a = var_1491_to_fp16, b = scores_77_cast_fp16, cond = var_313_cast_fp16)[name = tensor<string, []>("scores_79_cast_fp16")]; |
| tensor<fp16, [1, 1, 111, 256]> probs_31_cast_fp16 = softmax(axis = var_1496, x = scores_79_cast_fp16)[name = tensor<string, []>("probs_31_cast_fp16")]; |
| tensor<bool, []> var_1611_transpose_x_0 = const()[name = tensor<string, []>("op_1611_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_1611_transpose_y_0 = const()[name = tensor<string, []>("op_1611_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<fp16, [1, 1, 256, 128]> vT_31_cast_fp16 = transpose(perm = var_1599, x = v_15_cast_fp16)[name = tensor<string, []>("transpose_187")]; |
| tensor<fp16, [1, 1, 111, 128]> var_1611_cast_fp16 = matmul(transpose_x = var_1611_transpose_x_0, transpose_y = var_1611_transpose_y_0, x = probs_31_cast_fp16, y = vT_31_cast_fp16)[name = tensor<string, []>("op_1611_cast_fp16")]; |
| tensor<int32, [4]> var_1612 = const()[name = tensor<string, []>("op_1612"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_1614 = const()[name = tensor<string, []>("op_1614"), val = tensor<int32, [3]>([1, 111, -1])]; |
| tensor<fp16, [1, 111, 1, 128]> var_1613_cast_fp16 = transpose(perm = var_1612, x = var_1611_cast_fp16)[name = tensor<string, []>("transpose_186")]; |
| tensor<fp16, [1, 111, 128]> input_111_cast_fp16 = reshape(shape = var_1614, x = var_1613_cast_fp16)[name = tensor<string, []>("input_111_cast_fp16")]; |
| tensor<fp16, [768, 128]> dec_layers_7_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_7_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55676864))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55775232))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 111, 768]> linear_39_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_7_cross_attention_o_net_weight_to_fp16_quantized, x = input_111_cast_fp16)[name = tensor<string, []>("linear_39_cast_fp16")]; |
| tensor<fp16, [1, 111, 768]> input_113_cast_fp16 = add(x = input_109_cast_fp16, y = linear_39_cast_fp16)[name = tensor<string, []>("input_113_cast_fp16")]; |
| tensor<int32, [1]> x_119_axes_0 = const()[name = tensor<string, []>("x_119_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_7_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_7_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55776832)))]; |
| tensor<fp16, [1, 111, 768]> x_119_cast_fp16 = layer_norm(axes = x_119_axes_0, epsilon = var_1494_to_fp16, gamma = dec_layers_7_norm_pos_ff_weight_to_fp16, x = input_113_cast_fp16)[name = tensor<string, []>("x_119_cast_fp16")]; |
| tensor<int32, [3]> var_1631 = const()[name = tensor<string, []>("op_1631"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<string, []> y_45_pad_type_0 = const()[name = tensor<string, []>("y_45_pad_type_0"), val = tensor<string, []>("valid")]; |
| tensor<int32, [1]> y_45_strides_0 = const()[name = tensor<string, []>("y_45_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> y_45_pad_0 = const()[name = tensor<string, []>("y_45_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> y_45_dilations_0 = const()[name = tensor<string, []>("y_45_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, []> y_45_groups_0 = const()[name = tensor<string, []>("y_45_groups_0"), val = tensor<int32, []>(1)]; |
| tensor<fp16, [3072, 768, 1]> dec_layers_7_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_7_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(55778432))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(58137792))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5672960)))]; |
| tensor<fp16, [1, 768, 111]> x_123_cast_fp16 = transpose(perm = var_1631, x = x_119_cast_fp16)[name = tensor<string, []>("transpose_185")]; |
| tensor<fp16, [1, 3072, 111]> y_45_cast_fp16 = conv(dilations = y_45_dilations_0, groups = y_45_groups_0, pad = y_45_pad_0, pad_type = y_45_pad_type_0, strides = y_45_strides_0, weight = dec_layers_7_pos_ff_proj_weight_to_fp16_quantized, x = x_123_cast_fp16)[name = tensor<string, []>("y_45_cast_fp16")]; |
| tensor<string, []> x_125_mode_0 = const()[name = tensor<string, []>("x_125_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 3072, 111]> x_125_cast_fp16 = gelu(mode = x_125_mode_0, x = y_45_cast_fp16)[name = tensor<string, []>("x_125_cast_fp16")]; |
| tensor<string, []> y_47_pad_type_0 = const()[name = tensor<string, []>("y_47_pad_type_0"), val = tensor<string, []>("valid")]; |
| tensor<int32, [1]> y_47_strides_0 = const()[name = tensor<string, []>("y_47_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> y_47_pad_0 = const()[name = tensor<string, []>("y_47_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> y_47_dilations_0 = const()[name = tensor<string, []>("y_47_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, []> y_47_groups_0 = const()[name = tensor<string, []>("y_47_groups_0"), val = tensor<int32, []>(1)]; |
| tensor<fp16, [768, 3072, 1]> dec_layers_7_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_7_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(58144000))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(60503360))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 768, 111]> y_47_cast_fp16 = conv(dilations = y_47_dilations_0, groups = y_47_groups_0, pad = y_47_pad_0, pad_type = y_47_pad_type_0, strides = y_47_strides_0, weight = dec_layers_7_pos_ff_o_net_weight_to_fp16_quantized, x = x_125_cast_fp16)[name = tensor<string, []>("y_47_cast_fp16")]; |
| tensor<int32, [3]> var_1651 = const()[name = tensor<string, []>("op_1651"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<fp16, [1, 111, 768]> h_79_cast_fp16 = transpose(perm = var_1651, x = y_47_cast_fp16)[name = tensor<string, []>("transpose_184")]; |
| tensor<fp16, [1, 111, 768]> x_129_cast_fp16 = add(x = input_113_cast_fp16, y = h_79_cast_fp16)[name = tensor<string, []>("x_129_cast_fp16")]; |
| tensor<int32, []> var_1681 = const()[name = tensor<string, []>("op_1681"), val = tensor<int32, []>(-1)]; |
| tensor<int32, [1]> input_117_axes_0 = const()[name = tensor<string, []>("input_117_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_8_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_8_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(60504960)))]; |
| tensor<fp16, []> var_1679_to_fp16 = const()[name = tensor<string, []>("op_1679_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)]; |
| tensor<fp16, [1, 111, 768]> input_117_cast_fp16 = layer_norm(axes = input_117_axes_0, epsilon = var_1679_to_fp16, gamma = dec_layers_8_norm_self_weight_to_fp16, x = x_129_cast_fp16)[name = tensor<string, []>("input_117_cast_fp16")]; |
| tensor<fp16, [2304, 768]> dec_layers_8_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_8_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(60506560))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62276096))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2281088)))]; |
| tensor<fp16, [1, 111, 2304]> linear_40_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_8_self_attention_qkv_net_weight_to_fp16_quantized, x = input_117_cast_fp16)[name = tensor<string, []>("linear_40_cast_fp16")]; |
| tensor<int32, [5]> var_1700 = const()[name = tensor<string, []>("op_1700"), val = tensor<int32, [5]>([1, 111, 3, 12, 64])]; |
| tensor<fp16, [1, 111, 3, 12, 64]> qkv_35_cast_fp16 = reshape(shape = var_1700, x = linear_40_cast_fp16)[name = tensor<string, []>("qkv_35_cast_fp16")]; |
| tensor<int32, [5]> q_65_begin_0 = const()[name = tensor<string, []>("q_65_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])]; |
| tensor<int32, [5]> q_65_end_0 = const()[name = tensor<string, []>("q_65_end_0"), val = tensor<int32, [5]>([1, 111, 1, 12, 64])]; |
| tensor<bool, [5]> q_65_end_mask_0 = const()[name = tensor<string, []>("q_65_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> q_65_squeeze_mask_0 = const()[name = tensor<string, []>("q_65_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> q_65_cast_fp16 = slice_by_index(begin = q_65_begin_0, end = q_65_end_0, end_mask = q_65_end_mask_0, squeeze_mask = q_65_squeeze_mask_0, x = qkv_35_cast_fp16)[name = tensor<string, []>("q_65_cast_fp16")]; |
| tensor<int32, [5]> new_k_17_begin_0 = const()[name = tensor<string, []>("new_k_17_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])]; |
| tensor<int32, [5]> new_k_17_end_0 = const()[name = tensor<string, []>("new_k_17_end_0"), val = tensor<int32, [5]>([1, 111, 2, 12, 64])]; |
| tensor<bool, [5]> new_k_17_end_mask_0 = const()[name = tensor<string, []>("new_k_17_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> new_k_17_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_17_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> new_k_17_cast_fp16 = slice_by_index(begin = new_k_17_begin_0, end = new_k_17_end_0, end_mask = new_k_17_end_mask_0, squeeze_mask = new_k_17_squeeze_mask_0, x = qkv_35_cast_fp16)[name = tensor<string, []>("new_k_17_cast_fp16")]; |
| tensor<string, []> new_k_17_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("new_k_17_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [5]> new_v_17_begin_0 = const()[name = tensor<string, []>("new_v_17_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])]; |
| tensor<int32, [5]> new_v_17_end_0 = const()[name = tensor<string, []>("new_v_17_end_0"), val = tensor<int32, [5]>([1, 111, 3, 12, 64])]; |
| tensor<bool, [5]> new_v_17_end_mask_0 = const()[name = tensor<string, []>("new_v_17_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> new_v_17_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_17_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> new_v_17_cast_fp16 = slice_by_index(begin = new_v_17_begin_0, end = new_v_17_end_0, end_mask = new_v_17_end_mask_0, squeeze_mask = new_v_17_squeeze_mask_0, x = qkv_35_cast_fp16)[name = tensor<string, []>("new_v_17_cast_fp16")]; |
| tensor<string, []> new_v_17_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("new_v_17_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [4]> var_1715 = const()[name = tensor<string, []>("op_1715"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<bool, []> var_1717_transpose_x_0 = const()[name = tensor<string, []>("op_1717_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_1717_transpose_y_0 = const()[name = tensor<string, []>("op_1717_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<int32, [4]> transpose_128_perm_0 = const()[name = tensor<string, []>("transpose_128_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<int32, [4]> transpose_129_perm_0 = const()[name = tensor<string, []>("transpose_129_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])]; |
| tensor<fp16, [1, 12, 64, 111]> transpose_129 = transpose(perm = transpose_129_perm_0, x = new_k_17_cast_fp16)[name = tensor<string, []>("transpose_182")]; |
| tensor<fp16, [1, 12, 111, 64]> transpose_128 = transpose(perm = transpose_128_perm_0, x = q_65_cast_fp16)[name = tensor<string, []>("transpose_183")]; |
| tensor<fp16, [1, 12, 111, 111]> var_1717_cast_fp16 = matmul(transpose_x = var_1717_transpose_x_0, transpose_y = var_1717_transpose_y_0, x = transpose_128, y = transpose_129)[name = tensor<string, []>("op_1717_cast_fp16")]; |
| tensor<fp16, []> var_1718_to_fp16 = const()[name = tensor<string, []>("op_1718_to_fp16"), val = tensor<fp16, []>(0x1p-3)]; |
| tensor<fp16, [1, 12, 111, 111]> scores_81_cast_fp16 = mul(x = var_1717_cast_fp16, y = var_1718_to_fp16)[name = tensor<string, []>("scores_81_cast_fp16")]; |
| tensor<fp16, []> var_1676_to_fp16 = const()[name = tensor<string, []>("op_1676_to_fp16"), val = tensor<fp16, []>(-inf)]; |
| tensor<fp16, [1, 12, 111, 111]> scores_83_cast_fp16 = add(x = scores_3_cast_fp16_x_0, y = scores_81_cast_fp16)[name = tensor<string, []>("scores_83_cast_fp16")]; |
| tensor<fp16, [1, 12, 111, 111]> probs_33_cast_fp16 = softmax(axis = var_1681, x = scores_83_cast_fp16)[name = tensor<string, []>("probs_33_cast_fp16")]; |
| tensor<bool, []> var_1738_transpose_x_0 = const()[name = tensor<string, []>("op_1738_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_1738_transpose_y_0 = const()[name = tensor<string, []>("op_1738_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<fp16, [1, 12, 111, 64]> vT_33_cast_fp16 = transpose(perm = var_1715, x = new_v_17_cast_fp16)[name = tensor<string, []>("transpose_181")]; |
| tensor<fp16, [1, 12, 111, 64]> var_1738_cast_fp16 = matmul(transpose_x = var_1738_transpose_x_0, transpose_y = var_1738_transpose_y_0, x = probs_33_cast_fp16, y = vT_33_cast_fp16)[name = tensor<string, []>("op_1738_cast_fp16")]; |
| tensor<int32, [4]> var_1739 = const()[name = tensor<string, []>("op_1739"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_1743 = const()[name = tensor<string, []>("op_1743"), val = tensor<int32, [3]>([1, 111, -1])]; |
| tensor<fp16, [1, 111, 12, 64]> y_49_cast_fp16 = transpose(perm = var_1739, x = var_1738_cast_fp16)[name = tensor<string, []>("transpose_180")]; |
| tensor<fp16, [1, 111, 768]> input_119_cast_fp16 = reshape(shape = var_1743, x = y_49_cast_fp16)[name = tensor<string, []>("input_119_cast_fp16")]; |
| tensor<fp16, [768, 768]> dec_layers_8_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_8_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62280768))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62870656))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 111, 768]> linear_41_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_8_self_attention_o_net_weight_to_fp16_quantized, x = input_119_cast_fp16)[name = tensor<string, []>("linear_41_cast_fp16")]; |
| tensor<fp16, [1, 111, 768]> input_121_cast_fp16 = add(x = x_129_cast_fp16, y = linear_41_cast_fp16)[name = tensor<string, []>("input_121_cast_fp16")]; |
| tensor<int32, [1]> x_133_axes_0 = const()[name = tensor<string, []>("x_133_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_8_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_8_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62872256)))]; |
| tensor<fp16, [1, 111, 768]> x_133_cast_fp16 = layer_norm(axes = x_133_axes_0, epsilon = var_1679_to_fp16, gamma = dec_layers_8_norm_xattn_query_weight_to_fp16, x = input_121_cast_fp16)[name = tensor<string, []>("x_133_cast_fp16")]; |
| tensor<int32, [1]> memory_17_axes_0 = const()[name = tensor<string, []>("memory_17_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_8_norm_xattn_memory_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_8_norm_xattn_memory_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62873856)))]; |
| tensor<fp16, [1, 256, 768]> memory_17_cast_fp16 = layer_norm(axes = memory_17_axes_0, epsilon = var_1679_to_fp16, gamma = dec_layers_8_norm_xattn_memory_weight_to_fp16, x = encoder_output_to_fp16)[name = tensor<string, []>("memory_17_cast_fp16")]; |
| tensor<fp16, [128, 768]> dec_layers_8_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_8_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62875456))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62973824))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3013056)))]; |
| tensor<fp16, [1, 111, 128]> linear_42_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_8_cross_attention_q_net_weight_to_fp16_quantized, x = x_133_cast_fp16)[name = tensor<string, []>("linear_42_cast_fp16")]; |
| tensor<int32, [4]> var_1765 = const()[name = tensor<string, []>("op_1765"), val = tensor<int32, [4]>([1, 111, 1, 128])]; |
| tensor<fp16, [1, 111, 1, 128]> q_69_cast_fp16 = reshape(shape = var_1765, x = linear_42_cast_fp16)[name = tensor<string, []>("q_69_cast_fp16")]; |
| tensor<fp16, [256, 768]> dec_layers_8_cross_attention_kv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_8_cross_attention_kv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [256, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(62974144))), scale = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63170816))), zero_point = tensor<int8, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3210560)))]; |
| tensor<fp16, [1, 256, 256]> linear_43_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = dec_layers_8_cross_attention_kv_net_weight_to_fp16_quantized, x = memory_17_cast_fp16)[name = tensor<string, []>("linear_43_cast_fp16")]; |
| tensor<int32, [5]> var_1771 = const()[name = tensor<string, []>("op_1771"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])]; |
| tensor<fp16, [1, 256, 2, 1, 128]> kv_17_cast_fp16 = reshape(shape = var_1771, x = linear_43_cast_fp16)[name = tensor<string, []>("kv_17_cast_fp16")]; |
| tensor<int32, [5]> k_17_begin_0 = const()[name = tensor<string, []>("k_17_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])]; |
| tensor<int32, [5]> k_17_end_0 = const()[name = tensor<string, []>("k_17_end_0"), val = tensor<int32, [5]>([1, 256, 1, 1, 128])]; |
| tensor<bool, [5]> k_17_end_mask_0 = const()[name = tensor<string, []>("k_17_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> k_17_squeeze_mask_0 = const()[name = tensor<string, []>("k_17_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 256, 1, 128]> k_17_cast_fp16 = slice_by_index(begin = k_17_begin_0, end = k_17_end_0, end_mask = k_17_end_mask_0, squeeze_mask = k_17_squeeze_mask_0, x = kv_17_cast_fp16)[name = tensor<string, []>("k_17_cast_fp16")]; |
| tensor<string, []> k_17_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("k_17_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [5]> v_17_begin_0 = const()[name = tensor<string, []>("v_17_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])]; |
| tensor<int32, [5]> v_17_end_0 = const()[name = tensor<string, []>("v_17_end_0"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])]; |
| tensor<bool, [5]> v_17_end_mask_0 = const()[name = tensor<string, []>("v_17_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> v_17_squeeze_mask_0 = const()[name = tensor<string, []>("v_17_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 256, 1, 128]> v_17_cast_fp16 = slice_by_index(begin = v_17_begin_0, end = v_17_end_0, end_mask = v_17_end_mask_0, squeeze_mask = v_17_squeeze_mask_0, x = kv_17_cast_fp16)[name = tensor<string, []>("v_17_cast_fp16")]; |
| tensor<string, []> v_17_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("v_17_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [4]> var_1784 = const()[name = tensor<string, []>("op_1784"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<bool, []> var_1786_transpose_x_0 = const()[name = tensor<string, []>("op_1786_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_1786_transpose_y_0 = const()[name = tensor<string, []>("op_1786_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<int32, [4]> transpose_130_perm_0 = const()[name = tensor<string, []>("transpose_130_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<int32, [4]> transpose_131_perm_0 = const()[name = tensor<string, []>("transpose_131_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])]; |
| tensor<fp16, [1, 1, 128, 256]> transpose_131 = transpose(perm = transpose_131_perm_0, x = k_17_cast_fp16)[name = tensor<string, []>("transpose_178")]; |
| tensor<fp16, [1, 1, 111, 128]> transpose_130 = transpose(perm = transpose_130_perm_0, x = q_69_cast_fp16)[name = tensor<string, []>("transpose_179")]; |
| tensor<fp16, [1, 1, 111, 256]> var_1786_cast_fp16 = matmul(transpose_x = var_1786_transpose_x_0, transpose_y = var_1786_transpose_y_0, x = transpose_130, y = transpose_131)[name = tensor<string, []>("op_1786_cast_fp16")]; |
| tensor<fp16, []> var_1787_to_fp16 = const()[name = tensor<string, []>("op_1787_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)]; |
| tensor<fp16, [1, 1, 111, 256]> scores_87_cast_fp16 = mul(x = var_1786_cast_fp16, y = var_1787_to_fp16)[name = tensor<string, []>("scores_87_cast_fp16")]; |
| tensor<fp16, [1, 1, 111, 256]> scores_89_cast_fp16 = select(a = var_1676_to_fp16, b = scores_87_cast_fp16, cond = var_313_cast_fp16)[name = tensor<string, []>("scores_89_cast_fp16")]; |
| tensor<fp16, [1, 1, 111, 256]> probs_35_cast_fp16 = softmax(axis = var_1681, x = scores_89_cast_fp16)[name = tensor<string, []>("probs_35_cast_fp16")]; |
| tensor<bool, []> var_1796_transpose_x_0 = const()[name = tensor<string, []>("op_1796_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_1796_transpose_y_0 = const()[name = tensor<string, []>("op_1796_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<fp16, [1, 1, 256, 128]> vT_35_cast_fp16 = transpose(perm = var_1784, x = v_17_cast_fp16)[name = tensor<string, []>("transpose_177")]; |
| tensor<fp16, [1, 1, 111, 128]> var_1796_cast_fp16 = matmul(transpose_x = var_1796_transpose_x_0, transpose_y = var_1796_transpose_y_0, x = probs_35_cast_fp16, y = vT_35_cast_fp16)[name = tensor<string, []>("op_1796_cast_fp16")]; |
| tensor<int32, [4]> var_1797 = const()[name = tensor<string, []>("op_1797"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_1799 = const()[name = tensor<string, []>("op_1799"), val = tensor<int32, [3]>([1, 111, -1])]; |
| tensor<fp16, [1, 111, 1, 128]> var_1798_cast_fp16 = transpose(perm = var_1797, x = var_1796_cast_fp16)[name = tensor<string, []>("transpose_176")]; |
| tensor<fp16, [1, 111, 128]> input_123_cast_fp16 = reshape(shape = var_1799, x = var_1798_cast_fp16)[name = tensor<string, []>("input_123_cast_fp16")]; |
| tensor<fp16, [768, 128]> dec_layers_8_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_8_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63171392))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63269760))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 111, 768]> linear_44_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_8_cross_attention_o_net_weight_to_fp16_quantized, x = input_123_cast_fp16)[name = tensor<string, []>("linear_44_cast_fp16")]; |
| tensor<fp16, [1, 111, 768]> input_125_cast_fp16 = add(x = input_121_cast_fp16, y = linear_44_cast_fp16)[name = tensor<string, []>("input_125_cast_fp16")]; |
| tensor<int32, [1]> x_135_axes_0 = const()[name = tensor<string, []>("x_135_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_8_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_8_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63271360)))]; |
| tensor<fp16, [1, 111, 768]> x_135_cast_fp16 = layer_norm(axes = x_135_axes_0, epsilon = var_1679_to_fp16, gamma = dec_layers_8_norm_pos_ff_weight_to_fp16, x = input_125_cast_fp16)[name = tensor<string, []>("x_135_cast_fp16")]; |
| tensor<int32, [3]> var_1816 = const()[name = tensor<string, []>("op_1816"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<string, []> y_51_pad_type_0 = const()[name = tensor<string, []>("y_51_pad_type_0"), val = tensor<string, []>("valid")]; |
| tensor<int32, [1]> y_51_strides_0 = const()[name = tensor<string, []>("y_51_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> y_51_pad_0 = const()[name = tensor<string, []>("y_51_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> y_51_dilations_0 = const()[name = tensor<string, []>("y_51_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, []> y_51_groups_0 = const()[name = tensor<string, []>("y_51_groups_0"), val = tensor<int32, []>(1)]; |
| tensor<fp16, [3072, 768, 1]> dec_layers_8_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_8_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(63272960))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65632320))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5672960)))]; |
| tensor<fp16, [1, 768, 111]> x_139_cast_fp16 = transpose(perm = var_1816, x = x_135_cast_fp16)[name = tensor<string, []>("transpose_175")]; |
| tensor<fp16, [1, 3072, 111]> y_51_cast_fp16 = conv(dilations = y_51_dilations_0, groups = y_51_groups_0, pad = y_51_pad_0, pad_type = y_51_pad_type_0, strides = y_51_strides_0, weight = dec_layers_8_pos_ff_proj_weight_to_fp16_quantized, x = x_139_cast_fp16)[name = tensor<string, []>("y_51_cast_fp16")]; |
| tensor<string, []> x_141_mode_0 = const()[name = tensor<string, []>("x_141_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 3072, 111]> x_141_cast_fp16 = gelu(mode = x_141_mode_0, x = y_51_cast_fp16)[name = tensor<string, []>("x_141_cast_fp16")]; |
| tensor<string, []> y_53_pad_type_0 = const()[name = tensor<string, []>("y_53_pad_type_0"), val = tensor<string, []>("valid")]; |
| tensor<int32, [1]> y_53_strides_0 = const()[name = tensor<string, []>("y_53_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> y_53_pad_0 = const()[name = tensor<string, []>("y_53_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> y_53_dilations_0 = const()[name = tensor<string, []>("y_53_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, []> y_53_groups_0 = const()[name = tensor<string, []>("y_53_groups_0"), val = tensor<int32, []>(1)]; |
| tensor<fp16, [768, 3072, 1]> dec_layers_8_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_8_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(65638528))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67997888))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 768, 111]> y_53_cast_fp16 = conv(dilations = y_53_dilations_0, groups = y_53_groups_0, pad = y_53_pad_0, pad_type = y_53_pad_type_0, strides = y_53_strides_0, weight = dec_layers_8_pos_ff_o_net_weight_to_fp16_quantized, x = x_141_cast_fp16)[name = tensor<string, []>("y_53_cast_fp16")]; |
| tensor<int32, [3]> var_1836 = const()[name = tensor<string, []>("op_1836"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<fp16, [1, 111, 768]> h_89_cast_fp16 = transpose(perm = var_1836, x = y_53_cast_fp16)[name = tensor<string, []>("transpose_174")]; |
| tensor<fp16, [1, 111, 768]> x_145_cast_fp16 = add(x = input_125_cast_fp16, y = h_89_cast_fp16)[name = tensor<string, []>("x_145_cast_fp16")]; |
| tensor<int32, []> var_1866 = const()[name = tensor<string, []>("op_1866"), val = tensor<int32, []>(-1)]; |
| tensor<int32, [1]> input_129_axes_0 = const()[name = tensor<string, []>("input_129_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_9_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_9_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(67999488)))]; |
| tensor<fp16, []> var_1864_to_fp16 = const()[name = tensor<string, []>("op_1864_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)]; |
| tensor<fp16, [1, 111, 768]> input_129_cast_fp16 = layer_norm(axes = input_129_axes_0, epsilon = var_1864_to_fp16, gamma = dec_layers_9_norm_self_weight_to_fp16, x = x_145_cast_fp16)[name = tensor<string, []>("input_129_cast_fp16")]; |
| tensor<fp16, [2304, 768]> dec_layers_9_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_9_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(68001088))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69770624))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2281088)))]; |
| tensor<fp16, [1, 111, 2304]> linear_45_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_9_self_attention_qkv_net_weight_to_fp16_quantized, x = input_129_cast_fp16)[name = tensor<string, []>("linear_45_cast_fp16")]; |
| tensor<int32, [5]> var_1885 = const()[name = tensor<string, []>("op_1885"), val = tensor<int32, [5]>([1, 111, 3, 12, 64])]; |
| tensor<fp16, [1, 111, 3, 12, 64]> qkv_39_cast_fp16 = reshape(shape = var_1885, x = linear_45_cast_fp16)[name = tensor<string, []>("qkv_39_cast_fp16")]; |
| tensor<int32, [5]> q_73_begin_0 = const()[name = tensor<string, []>("q_73_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])]; |
| tensor<int32, [5]> q_73_end_0 = const()[name = tensor<string, []>("q_73_end_0"), val = tensor<int32, [5]>([1, 111, 1, 12, 64])]; |
| tensor<bool, [5]> q_73_end_mask_0 = const()[name = tensor<string, []>("q_73_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> q_73_squeeze_mask_0 = const()[name = tensor<string, []>("q_73_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> q_73_cast_fp16 = slice_by_index(begin = q_73_begin_0, end = q_73_end_0, end_mask = q_73_end_mask_0, squeeze_mask = q_73_squeeze_mask_0, x = qkv_39_cast_fp16)[name = tensor<string, []>("q_73_cast_fp16")]; |
| tensor<int32, [5]> new_k_19_begin_0 = const()[name = tensor<string, []>("new_k_19_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])]; |
| tensor<int32, [5]> new_k_19_end_0 = const()[name = tensor<string, []>("new_k_19_end_0"), val = tensor<int32, [5]>([1, 111, 2, 12, 64])]; |
| tensor<bool, [5]> new_k_19_end_mask_0 = const()[name = tensor<string, []>("new_k_19_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> new_k_19_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_19_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> new_k_19_cast_fp16 = slice_by_index(begin = new_k_19_begin_0, end = new_k_19_end_0, end_mask = new_k_19_end_mask_0, squeeze_mask = new_k_19_squeeze_mask_0, x = qkv_39_cast_fp16)[name = tensor<string, []>("new_k_19_cast_fp16")]; |
| tensor<string, []> new_k_19_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("new_k_19_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [5]> new_v_19_begin_0 = const()[name = tensor<string, []>("new_v_19_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])]; |
| tensor<int32, [5]> new_v_19_end_0 = const()[name = tensor<string, []>("new_v_19_end_0"), val = tensor<int32, [5]>([1, 111, 3, 12, 64])]; |
| tensor<bool, [5]> new_v_19_end_mask_0 = const()[name = tensor<string, []>("new_v_19_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> new_v_19_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_19_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> new_v_19_cast_fp16 = slice_by_index(begin = new_v_19_begin_0, end = new_v_19_end_0, end_mask = new_v_19_end_mask_0, squeeze_mask = new_v_19_squeeze_mask_0, x = qkv_39_cast_fp16)[name = tensor<string, []>("new_v_19_cast_fp16")]; |
| tensor<string, []> new_v_19_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("new_v_19_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [4]> var_1900 = const()[name = tensor<string, []>("op_1900"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<bool, []> var_1902_transpose_x_0 = const()[name = tensor<string, []>("op_1902_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_1902_transpose_y_0 = const()[name = tensor<string, []>("op_1902_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<int32, [4]> transpose_132_perm_0 = const()[name = tensor<string, []>("transpose_132_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<int32, [4]> transpose_133_perm_0 = const()[name = tensor<string, []>("transpose_133_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])]; |
| tensor<fp16, [1, 12, 64, 111]> transpose_133 = transpose(perm = transpose_133_perm_0, x = new_k_19_cast_fp16)[name = tensor<string, []>("transpose_172")]; |
| tensor<fp16, [1, 12, 111, 64]> transpose_132 = transpose(perm = transpose_132_perm_0, x = q_73_cast_fp16)[name = tensor<string, []>("transpose_173")]; |
| tensor<fp16, [1, 12, 111, 111]> var_1902_cast_fp16 = matmul(transpose_x = var_1902_transpose_x_0, transpose_y = var_1902_transpose_y_0, x = transpose_132, y = transpose_133)[name = tensor<string, []>("op_1902_cast_fp16")]; |
| tensor<fp16, []> var_1903_to_fp16 = const()[name = tensor<string, []>("op_1903_to_fp16"), val = tensor<fp16, []>(0x1p-3)]; |
| tensor<fp16, [1, 12, 111, 111]> scores_91_cast_fp16 = mul(x = var_1902_cast_fp16, y = var_1903_to_fp16)[name = tensor<string, []>("scores_91_cast_fp16")]; |
| tensor<fp16, []> var_1861_to_fp16 = const()[name = tensor<string, []>("op_1861_to_fp16"), val = tensor<fp16, []>(-inf)]; |
| tensor<fp16, [1, 12, 111, 111]> scores_93_cast_fp16 = add(x = scores_3_cast_fp16_x_0, y = scores_91_cast_fp16)[name = tensor<string, []>("scores_93_cast_fp16")]; |
| tensor<fp16, [1, 12, 111, 111]> probs_37_cast_fp16 = softmax(axis = var_1866, x = scores_93_cast_fp16)[name = tensor<string, []>("probs_37_cast_fp16")]; |
| tensor<bool, []> var_1923_transpose_x_0 = const()[name = tensor<string, []>("op_1923_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_1923_transpose_y_0 = const()[name = tensor<string, []>("op_1923_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<fp16, [1, 12, 111, 64]> vT_37_cast_fp16 = transpose(perm = var_1900, x = new_v_19_cast_fp16)[name = tensor<string, []>("transpose_171")]; |
| tensor<fp16, [1, 12, 111, 64]> var_1923_cast_fp16 = matmul(transpose_x = var_1923_transpose_x_0, transpose_y = var_1923_transpose_y_0, x = probs_37_cast_fp16, y = vT_37_cast_fp16)[name = tensor<string, []>("op_1923_cast_fp16")]; |
| tensor<int32, [4]> var_1924 = const()[name = tensor<string, []>("op_1924"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_1928 = const()[name = tensor<string, []>("op_1928"), val = tensor<int32, [3]>([1, 111, -1])]; |
| tensor<fp16, [1, 111, 12, 64]> y_55_cast_fp16 = transpose(perm = var_1924, x = var_1923_cast_fp16)[name = tensor<string, []>("transpose_170")]; |
| tensor<fp16, [1, 111, 768]> input_131_cast_fp16 = reshape(shape = var_1928, x = y_55_cast_fp16)[name = tensor<string, []>("input_131_cast_fp16")]; |
| tensor<fp16, [768, 768]> dec_layers_9_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_9_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(69775296))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70365184))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 111, 768]> linear_46_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_9_self_attention_o_net_weight_to_fp16_quantized, x = input_131_cast_fp16)[name = tensor<string, []>("linear_46_cast_fp16")]; |
| tensor<fp16, [1, 111, 768]> input_133_cast_fp16 = add(x = x_145_cast_fp16, y = linear_46_cast_fp16)[name = tensor<string, []>("input_133_cast_fp16")]; |
| tensor<int32, [1]> x_149_axes_0 = const()[name = tensor<string, []>("x_149_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_9_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_9_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70366784)))]; |
| tensor<fp16, [1, 111, 768]> x_149_cast_fp16 = layer_norm(axes = x_149_axes_0, epsilon = var_1864_to_fp16, gamma = dec_layers_9_norm_xattn_query_weight_to_fp16, x = input_133_cast_fp16)[name = tensor<string, []>("x_149_cast_fp16")]; |
| tensor<int32, [1]> memory_19_axes_0 = const()[name = tensor<string, []>("memory_19_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_9_norm_xattn_memory_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_9_norm_xattn_memory_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70368384)))]; |
| tensor<fp16, [1, 256, 768]> memory_19_cast_fp16 = layer_norm(axes = memory_19_axes_0, epsilon = var_1864_to_fp16, gamma = dec_layers_9_norm_xattn_memory_weight_to_fp16, x = encoder_output_to_fp16)[name = tensor<string, []>("memory_19_cast_fp16")]; |
| tensor<fp16, [128, 768]> dec_layers_9_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_9_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70369984))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70468352))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3013056)))]; |
| tensor<fp16, [1, 111, 128]> linear_47_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_9_cross_attention_q_net_weight_to_fp16_quantized, x = x_149_cast_fp16)[name = tensor<string, []>("linear_47_cast_fp16")]; |
| tensor<int32, [4]> var_1950 = const()[name = tensor<string, []>("op_1950"), val = tensor<int32, [4]>([1, 111, 1, 128])]; |
| tensor<fp16, [1, 111, 1, 128]> q_77_cast_fp16 = reshape(shape = var_1950, x = linear_47_cast_fp16)[name = tensor<string, []>("q_77_cast_fp16")]; |
| tensor<fp16, [256, 768]> dec_layers_9_cross_attention_kv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_9_cross_attention_kv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [256, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70468672))), scale = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70665344))), zero_point = tensor<int8, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3210560)))]; |
| tensor<fp16, [1, 256, 256]> linear_48_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = dec_layers_9_cross_attention_kv_net_weight_to_fp16_quantized, x = memory_19_cast_fp16)[name = tensor<string, []>("linear_48_cast_fp16")]; |
| tensor<int32, [5]> var_1956 = const()[name = tensor<string, []>("op_1956"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])]; |
| tensor<fp16, [1, 256, 2, 1, 128]> kv_19_cast_fp16 = reshape(shape = var_1956, x = linear_48_cast_fp16)[name = tensor<string, []>("kv_19_cast_fp16")]; |
| tensor<int32, [5]> k_19_begin_0 = const()[name = tensor<string, []>("k_19_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])]; |
| tensor<int32, [5]> k_19_end_0 = const()[name = tensor<string, []>("k_19_end_0"), val = tensor<int32, [5]>([1, 256, 1, 1, 128])]; |
| tensor<bool, [5]> k_19_end_mask_0 = const()[name = tensor<string, []>("k_19_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> k_19_squeeze_mask_0 = const()[name = tensor<string, []>("k_19_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 256, 1, 128]> k_19_cast_fp16 = slice_by_index(begin = k_19_begin_0, end = k_19_end_0, end_mask = k_19_end_mask_0, squeeze_mask = k_19_squeeze_mask_0, x = kv_19_cast_fp16)[name = tensor<string, []>("k_19_cast_fp16")]; |
| tensor<string, []> k_19_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("k_19_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [5]> v_19_begin_0 = const()[name = tensor<string, []>("v_19_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])]; |
| tensor<int32, [5]> v_19_end_0 = const()[name = tensor<string, []>("v_19_end_0"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])]; |
| tensor<bool, [5]> v_19_end_mask_0 = const()[name = tensor<string, []>("v_19_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> v_19_squeeze_mask_0 = const()[name = tensor<string, []>("v_19_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 256, 1, 128]> v_19_cast_fp16 = slice_by_index(begin = v_19_begin_0, end = v_19_end_0, end_mask = v_19_end_mask_0, squeeze_mask = v_19_squeeze_mask_0, x = kv_19_cast_fp16)[name = tensor<string, []>("v_19_cast_fp16")]; |
| tensor<string, []> v_19_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("v_19_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [4]> var_1969 = const()[name = tensor<string, []>("op_1969"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<bool, []> var_1971_transpose_x_0 = const()[name = tensor<string, []>("op_1971_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_1971_transpose_y_0 = const()[name = tensor<string, []>("op_1971_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<int32, [4]> transpose_134_perm_0 = const()[name = tensor<string, []>("transpose_134_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<int32, [4]> transpose_135_perm_0 = const()[name = tensor<string, []>("transpose_135_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])]; |
| tensor<fp16, [1, 1, 128, 256]> transpose_135 = transpose(perm = transpose_135_perm_0, x = k_19_cast_fp16)[name = tensor<string, []>("transpose_168")]; |
| tensor<fp16, [1, 1, 111, 128]> transpose_134 = transpose(perm = transpose_134_perm_0, x = q_77_cast_fp16)[name = tensor<string, []>("transpose_169")]; |
| tensor<fp16, [1, 1, 111, 256]> var_1971_cast_fp16 = matmul(transpose_x = var_1971_transpose_x_0, transpose_y = var_1971_transpose_y_0, x = transpose_134, y = transpose_135)[name = tensor<string, []>("op_1971_cast_fp16")]; |
| tensor<fp16, []> var_1972_to_fp16 = const()[name = tensor<string, []>("op_1972_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)]; |
| tensor<fp16, [1, 1, 111, 256]> scores_97_cast_fp16 = mul(x = var_1971_cast_fp16, y = var_1972_to_fp16)[name = tensor<string, []>("scores_97_cast_fp16")]; |
| tensor<fp16, [1, 1, 111, 256]> scores_99_cast_fp16 = select(a = var_1861_to_fp16, b = scores_97_cast_fp16, cond = var_313_cast_fp16)[name = tensor<string, []>("scores_99_cast_fp16")]; |
| tensor<fp16, [1, 1, 111, 256]> probs_39_cast_fp16 = softmax(axis = var_1866, x = scores_99_cast_fp16)[name = tensor<string, []>("probs_39_cast_fp16")]; |
| tensor<bool, []> var_1981_transpose_x_0 = const()[name = tensor<string, []>("op_1981_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_1981_transpose_y_0 = const()[name = tensor<string, []>("op_1981_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<fp16, [1, 1, 256, 128]> vT_39_cast_fp16 = transpose(perm = var_1969, x = v_19_cast_fp16)[name = tensor<string, []>("transpose_167")]; |
| tensor<fp16, [1, 1, 111, 128]> var_1981_cast_fp16 = matmul(transpose_x = var_1981_transpose_x_0, transpose_y = var_1981_transpose_y_0, x = probs_39_cast_fp16, y = vT_39_cast_fp16)[name = tensor<string, []>("op_1981_cast_fp16")]; |
| tensor<int32, [4]> var_1982 = const()[name = tensor<string, []>("op_1982"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_1984 = const()[name = tensor<string, []>("op_1984"), val = tensor<int32, [3]>([1, 111, -1])]; |
| tensor<fp16, [1, 111, 1, 128]> var_1983_cast_fp16 = transpose(perm = var_1982, x = var_1981_cast_fp16)[name = tensor<string, []>("transpose_166")]; |
| tensor<fp16, [1, 111, 128]> input_135_cast_fp16 = reshape(shape = var_1984, x = var_1983_cast_fp16)[name = tensor<string, []>("input_135_cast_fp16")]; |
| tensor<fp16, [768, 128]> dec_layers_9_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_9_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70665920))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70764288))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 111, 768]> linear_49_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_9_cross_attention_o_net_weight_to_fp16_quantized, x = input_135_cast_fp16)[name = tensor<string, []>("linear_49_cast_fp16")]; |
| tensor<fp16, [1, 111, 768]> input_137_cast_fp16 = add(x = input_133_cast_fp16, y = linear_49_cast_fp16)[name = tensor<string, []>("input_137_cast_fp16")]; |
| tensor<int32, [1]> x_151_axes_0 = const()[name = tensor<string, []>("x_151_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_9_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_9_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70765888)))]; |
| tensor<fp16, [1, 111, 768]> x_151_cast_fp16 = layer_norm(axes = x_151_axes_0, epsilon = var_1864_to_fp16, gamma = dec_layers_9_norm_pos_ff_weight_to_fp16, x = input_137_cast_fp16)[name = tensor<string, []>("x_151_cast_fp16")]; |
| tensor<int32, [3]> var_2001 = const()[name = tensor<string, []>("op_2001"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<string, []> y_57_pad_type_0 = const()[name = tensor<string, []>("y_57_pad_type_0"), val = tensor<string, []>("valid")]; |
| tensor<int32, [1]> y_57_strides_0 = const()[name = tensor<string, []>("y_57_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> y_57_pad_0 = const()[name = tensor<string, []>("y_57_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> y_57_dilations_0 = const()[name = tensor<string, []>("y_57_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, []> y_57_groups_0 = const()[name = tensor<string, []>("y_57_groups_0"), val = tensor<int32, []>(1)]; |
| tensor<fp16, [3072, 768, 1]> dec_layers_9_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_9_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(70767488))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(73126848))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5672960)))]; |
| tensor<fp16, [1, 768, 111]> x_155_cast_fp16 = transpose(perm = var_2001, x = x_151_cast_fp16)[name = tensor<string, []>("transpose_165")]; |
| tensor<fp16, [1, 3072, 111]> y_57_cast_fp16 = conv(dilations = y_57_dilations_0, groups = y_57_groups_0, pad = y_57_pad_0, pad_type = y_57_pad_type_0, strides = y_57_strides_0, weight = dec_layers_9_pos_ff_proj_weight_to_fp16_quantized, x = x_155_cast_fp16)[name = tensor<string, []>("y_57_cast_fp16")]; |
| tensor<string, []> x_157_mode_0 = const()[name = tensor<string, []>("x_157_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 3072, 111]> x_157_cast_fp16 = gelu(mode = x_157_mode_0, x = y_57_cast_fp16)[name = tensor<string, []>("x_157_cast_fp16")]; |
| tensor<string, []> y_59_pad_type_0 = const()[name = tensor<string, []>("y_59_pad_type_0"), val = tensor<string, []>("valid")]; |
| tensor<int32, [1]> y_59_strides_0 = const()[name = tensor<string, []>("y_59_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> y_59_pad_0 = const()[name = tensor<string, []>("y_59_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> y_59_dilations_0 = const()[name = tensor<string, []>("y_59_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, []> y_59_groups_0 = const()[name = tensor<string, []>("y_59_groups_0"), val = tensor<int32, []>(1)]; |
| tensor<fp16, [768, 3072, 1]> dec_layers_9_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_9_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(73133056))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(75492416))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 768, 111]> y_59_cast_fp16 = conv(dilations = y_59_dilations_0, groups = y_59_groups_0, pad = y_59_pad_0, pad_type = y_59_pad_type_0, strides = y_59_strides_0, weight = dec_layers_9_pos_ff_o_net_weight_to_fp16_quantized, x = x_157_cast_fp16)[name = tensor<string, []>("y_59_cast_fp16")]; |
| tensor<int32, [3]> var_2021 = const()[name = tensor<string, []>("op_2021"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<fp16, [1, 111, 768]> h_99_cast_fp16 = transpose(perm = var_2021, x = y_59_cast_fp16)[name = tensor<string, []>("transpose_164")]; |
| tensor<fp16, [1, 111, 768]> x_161_cast_fp16 = add(x = input_137_cast_fp16, y = h_99_cast_fp16)[name = tensor<string, []>("x_161_cast_fp16")]; |
| tensor<int32, []> var_2051 = const()[name = tensor<string, []>("op_2051"), val = tensor<int32, []>(-1)]; |
| tensor<int32, [1]> input_141_axes_0 = const()[name = tensor<string, []>("input_141_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_10_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_10_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(75494016)))]; |
| tensor<fp16, []> var_2049_to_fp16 = const()[name = tensor<string, []>("op_2049_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)]; |
| tensor<fp16, [1, 111, 768]> input_141_cast_fp16 = layer_norm(axes = input_141_axes_0, epsilon = var_2049_to_fp16, gamma = dec_layers_10_norm_self_weight_to_fp16, x = x_161_cast_fp16)[name = tensor<string, []>("input_141_cast_fp16")]; |
| tensor<fp16, [2304, 768]> dec_layers_10_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_10_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(75495616))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77265152))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2281088)))]; |
| tensor<fp16, [1, 111, 2304]> linear_50_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_10_self_attention_qkv_net_weight_to_fp16_quantized, x = input_141_cast_fp16)[name = tensor<string, []>("linear_50_cast_fp16")]; |
| tensor<int32, [5]> var_2070 = const()[name = tensor<string, []>("op_2070"), val = tensor<int32, [5]>([1, 111, 3, 12, 64])]; |
| tensor<fp16, [1, 111, 3, 12, 64]> qkv_43_cast_fp16 = reshape(shape = var_2070, x = linear_50_cast_fp16)[name = tensor<string, []>("qkv_43_cast_fp16")]; |
| tensor<int32, [5]> q_81_begin_0 = const()[name = tensor<string, []>("q_81_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])]; |
| tensor<int32, [5]> q_81_end_0 = const()[name = tensor<string, []>("q_81_end_0"), val = tensor<int32, [5]>([1, 111, 1, 12, 64])]; |
| tensor<bool, [5]> q_81_end_mask_0 = const()[name = tensor<string, []>("q_81_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> q_81_squeeze_mask_0 = const()[name = tensor<string, []>("q_81_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> q_81_cast_fp16 = slice_by_index(begin = q_81_begin_0, end = q_81_end_0, end_mask = q_81_end_mask_0, squeeze_mask = q_81_squeeze_mask_0, x = qkv_43_cast_fp16)[name = tensor<string, []>("q_81_cast_fp16")]; |
| tensor<int32, [5]> new_k_21_begin_0 = const()[name = tensor<string, []>("new_k_21_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])]; |
| tensor<int32, [5]> new_k_21_end_0 = const()[name = tensor<string, []>("new_k_21_end_0"), val = tensor<int32, [5]>([1, 111, 2, 12, 64])]; |
| tensor<bool, [5]> new_k_21_end_mask_0 = const()[name = tensor<string, []>("new_k_21_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> new_k_21_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_21_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> new_k_21_cast_fp16 = slice_by_index(begin = new_k_21_begin_0, end = new_k_21_end_0, end_mask = new_k_21_end_mask_0, squeeze_mask = new_k_21_squeeze_mask_0, x = qkv_43_cast_fp16)[name = tensor<string, []>("new_k_21_cast_fp16")]; |
| tensor<string, []> new_k_21_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("new_k_21_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [5]> new_v_21_begin_0 = const()[name = tensor<string, []>("new_v_21_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])]; |
| tensor<int32, [5]> new_v_21_end_0 = const()[name = tensor<string, []>("new_v_21_end_0"), val = tensor<int32, [5]>([1, 111, 3, 12, 64])]; |
| tensor<bool, [5]> new_v_21_end_mask_0 = const()[name = tensor<string, []>("new_v_21_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> new_v_21_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_21_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> new_v_21_cast_fp16 = slice_by_index(begin = new_v_21_begin_0, end = new_v_21_end_0, end_mask = new_v_21_end_mask_0, squeeze_mask = new_v_21_squeeze_mask_0, x = qkv_43_cast_fp16)[name = tensor<string, []>("new_v_21_cast_fp16")]; |
| tensor<string, []> new_v_21_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("new_v_21_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [4]> var_2085 = const()[name = tensor<string, []>("op_2085"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<bool, []> var_2087_transpose_x_0 = const()[name = tensor<string, []>("op_2087_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_2087_transpose_y_0 = const()[name = tensor<string, []>("op_2087_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<int32, [4]> transpose_136_perm_0 = const()[name = tensor<string, []>("transpose_136_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<int32, [4]> transpose_137_perm_0 = const()[name = tensor<string, []>("transpose_137_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])]; |
| tensor<fp16, [1, 12, 64, 111]> transpose_137 = transpose(perm = transpose_137_perm_0, x = new_k_21_cast_fp16)[name = tensor<string, []>("transpose_162")]; |
| tensor<fp16, [1, 12, 111, 64]> transpose_136 = transpose(perm = transpose_136_perm_0, x = q_81_cast_fp16)[name = tensor<string, []>("transpose_163")]; |
| tensor<fp16, [1, 12, 111, 111]> var_2087_cast_fp16 = matmul(transpose_x = var_2087_transpose_x_0, transpose_y = var_2087_transpose_y_0, x = transpose_136, y = transpose_137)[name = tensor<string, []>("op_2087_cast_fp16")]; |
| tensor<fp16, []> var_2088_to_fp16 = const()[name = tensor<string, []>("op_2088_to_fp16"), val = tensor<fp16, []>(0x1p-3)]; |
| tensor<fp16, [1, 12, 111, 111]> scores_101_cast_fp16 = mul(x = var_2087_cast_fp16, y = var_2088_to_fp16)[name = tensor<string, []>("scores_101_cast_fp16")]; |
| tensor<fp16, []> var_2046_to_fp16 = const()[name = tensor<string, []>("op_2046_to_fp16"), val = tensor<fp16, []>(-inf)]; |
| tensor<fp16, [1, 12, 111, 111]> scores_103_cast_fp16 = add(x = scores_3_cast_fp16_x_0, y = scores_101_cast_fp16)[name = tensor<string, []>("scores_103_cast_fp16")]; |
| tensor<fp16, [1, 12, 111, 111]> probs_41_cast_fp16 = softmax(axis = var_2051, x = scores_103_cast_fp16)[name = tensor<string, []>("probs_41_cast_fp16")]; |
| tensor<bool, []> var_2108_transpose_x_0 = const()[name = tensor<string, []>("op_2108_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_2108_transpose_y_0 = const()[name = tensor<string, []>("op_2108_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<fp16, [1, 12, 111, 64]> vT_41_cast_fp16 = transpose(perm = var_2085, x = new_v_21_cast_fp16)[name = tensor<string, []>("transpose_161")]; |
| tensor<fp16, [1, 12, 111, 64]> var_2108_cast_fp16 = matmul(transpose_x = var_2108_transpose_x_0, transpose_y = var_2108_transpose_y_0, x = probs_41_cast_fp16, y = vT_41_cast_fp16)[name = tensor<string, []>("op_2108_cast_fp16")]; |
| tensor<int32, [4]> var_2109 = const()[name = tensor<string, []>("op_2109"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_2113 = const()[name = tensor<string, []>("op_2113"), val = tensor<int32, [3]>([1, 111, -1])]; |
| tensor<fp16, [1, 111, 12, 64]> y_61_cast_fp16 = transpose(perm = var_2109, x = var_2108_cast_fp16)[name = tensor<string, []>("transpose_160")]; |
| tensor<fp16, [1, 111, 768]> input_143_cast_fp16 = reshape(shape = var_2113, x = y_61_cast_fp16)[name = tensor<string, []>("input_143_cast_fp16")]; |
| tensor<fp16, [768, 768]> dec_layers_10_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_10_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77269824))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77859712))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 111, 768]> linear_51_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_10_self_attention_o_net_weight_to_fp16_quantized, x = input_143_cast_fp16)[name = tensor<string, []>("linear_51_cast_fp16")]; |
| tensor<fp16, [1, 111, 768]> input_145_cast_fp16 = add(x = x_161_cast_fp16, y = linear_51_cast_fp16)[name = tensor<string, []>("input_145_cast_fp16")]; |
| tensor<int32, [1]> x_165_axes_0 = const()[name = tensor<string, []>("x_165_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_10_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_10_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77861312)))]; |
| tensor<fp16, [1, 111, 768]> x_165_cast_fp16 = layer_norm(axes = x_165_axes_0, epsilon = var_2049_to_fp16, gamma = dec_layers_10_norm_xattn_query_weight_to_fp16, x = input_145_cast_fp16)[name = tensor<string, []>("x_165_cast_fp16")]; |
| tensor<int32, [1]> memory_21_axes_0 = const()[name = tensor<string, []>("memory_21_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_10_norm_xattn_memory_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_10_norm_xattn_memory_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77862912)))]; |
| tensor<fp16, [1, 256, 768]> memory_21_cast_fp16 = layer_norm(axes = memory_21_axes_0, epsilon = var_2049_to_fp16, gamma = dec_layers_10_norm_xattn_memory_weight_to_fp16, x = encoder_output_to_fp16)[name = tensor<string, []>("memory_21_cast_fp16")]; |
| tensor<fp16, [128, 768]> dec_layers_10_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_10_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77864512))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77962880))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3013056)))]; |
| tensor<fp16, [1, 111, 128]> linear_52_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_10_cross_attention_q_net_weight_to_fp16_quantized, x = x_165_cast_fp16)[name = tensor<string, []>("linear_52_cast_fp16")]; |
| tensor<int32, [4]> var_2135 = const()[name = tensor<string, []>("op_2135"), val = tensor<int32, [4]>([1, 111, 1, 128])]; |
| tensor<fp16, [1, 111, 1, 128]> q_85_cast_fp16 = reshape(shape = var_2135, x = linear_52_cast_fp16)[name = tensor<string, []>("q_85_cast_fp16")]; |
| tensor<fp16, [256, 768]> dec_layers_10_cross_attention_kv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_10_cross_attention_kv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [256, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(77963200))), scale = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78159872))), zero_point = tensor<int8, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3210560)))]; |
| tensor<fp16, [1, 256, 256]> linear_53_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = dec_layers_10_cross_attention_kv_net_weight_to_fp16_quantized, x = memory_21_cast_fp16)[name = tensor<string, []>("linear_53_cast_fp16")]; |
| tensor<int32, [5]> var_2141 = const()[name = tensor<string, []>("op_2141"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])]; |
| tensor<fp16, [1, 256, 2, 1, 128]> kv_21_cast_fp16 = reshape(shape = var_2141, x = linear_53_cast_fp16)[name = tensor<string, []>("kv_21_cast_fp16")]; |
| tensor<int32, [5]> k_21_begin_0 = const()[name = tensor<string, []>("k_21_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])]; |
| tensor<int32, [5]> k_21_end_0 = const()[name = tensor<string, []>("k_21_end_0"), val = tensor<int32, [5]>([1, 256, 1, 1, 128])]; |
| tensor<bool, [5]> k_21_end_mask_0 = const()[name = tensor<string, []>("k_21_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> k_21_squeeze_mask_0 = const()[name = tensor<string, []>("k_21_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 256, 1, 128]> k_21_cast_fp16 = slice_by_index(begin = k_21_begin_0, end = k_21_end_0, end_mask = k_21_end_mask_0, squeeze_mask = k_21_squeeze_mask_0, x = kv_21_cast_fp16)[name = tensor<string, []>("k_21_cast_fp16")]; |
| tensor<string, []> k_21_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("k_21_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [5]> v_21_begin_0 = const()[name = tensor<string, []>("v_21_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])]; |
| tensor<int32, [5]> v_21_end_0 = const()[name = tensor<string, []>("v_21_end_0"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])]; |
| tensor<bool, [5]> v_21_end_mask_0 = const()[name = tensor<string, []>("v_21_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> v_21_squeeze_mask_0 = const()[name = tensor<string, []>("v_21_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 256, 1, 128]> v_21_cast_fp16 = slice_by_index(begin = v_21_begin_0, end = v_21_end_0, end_mask = v_21_end_mask_0, squeeze_mask = v_21_squeeze_mask_0, x = kv_21_cast_fp16)[name = tensor<string, []>("v_21_cast_fp16")]; |
| tensor<string, []> v_21_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("v_21_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [4]> var_2154 = const()[name = tensor<string, []>("op_2154"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<bool, []> var_2156_transpose_x_0 = const()[name = tensor<string, []>("op_2156_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_2156_transpose_y_0 = const()[name = tensor<string, []>("op_2156_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<int32, [4]> transpose_138_perm_0 = const()[name = tensor<string, []>("transpose_138_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<int32, [4]> transpose_139_perm_0 = const()[name = tensor<string, []>("transpose_139_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])]; |
| tensor<fp16, [1, 1, 128, 256]> transpose_139 = transpose(perm = transpose_139_perm_0, x = k_21_cast_fp16)[name = tensor<string, []>("transpose_158")]; |
| tensor<fp16, [1, 1, 111, 128]> transpose_138 = transpose(perm = transpose_138_perm_0, x = q_85_cast_fp16)[name = tensor<string, []>("transpose_159")]; |
| tensor<fp16, [1, 1, 111, 256]> var_2156_cast_fp16 = matmul(transpose_x = var_2156_transpose_x_0, transpose_y = var_2156_transpose_y_0, x = transpose_138, y = transpose_139)[name = tensor<string, []>("op_2156_cast_fp16")]; |
| tensor<fp16, []> var_2157_to_fp16 = const()[name = tensor<string, []>("op_2157_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)]; |
| tensor<fp16, [1, 1, 111, 256]> scores_107_cast_fp16 = mul(x = var_2156_cast_fp16, y = var_2157_to_fp16)[name = tensor<string, []>("scores_107_cast_fp16")]; |
| tensor<fp16, [1, 1, 111, 256]> scores_109_cast_fp16 = select(a = var_2046_to_fp16, b = scores_107_cast_fp16, cond = var_313_cast_fp16)[name = tensor<string, []>("scores_109_cast_fp16")]; |
| tensor<fp16, [1, 1, 111, 256]> probs_43_cast_fp16 = softmax(axis = var_2051, x = scores_109_cast_fp16)[name = tensor<string, []>("probs_43_cast_fp16")]; |
| tensor<bool, []> var_2166_transpose_x_0 = const()[name = tensor<string, []>("op_2166_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_2166_transpose_y_0 = const()[name = tensor<string, []>("op_2166_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<fp16, [1, 1, 256, 128]> vT_43_cast_fp16 = transpose(perm = var_2154, x = v_21_cast_fp16)[name = tensor<string, []>("transpose_157")]; |
| tensor<fp16, [1, 1, 111, 128]> var_2166_cast_fp16 = matmul(transpose_x = var_2166_transpose_x_0, transpose_y = var_2166_transpose_y_0, x = probs_43_cast_fp16, y = vT_43_cast_fp16)[name = tensor<string, []>("op_2166_cast_fp16")]; |
| tensor<int32, [4]> var_2167 = const()[name = tensor<string, []>("op_2167"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_2169 = const()[name = tensor<string, []>("op_2169"), val = tensor<int32, [3]>([1, 111, -1])]; |
| tensor<fp16, [1, 111, 1, 128]> var_2168_cast_fp16 = transpose(perm = var_2167, x = var_2166_cast_fp16)[name = tensor<string, []>("transpose_156")]; |
| tensor<fp16, [1, 111, 128]> input_147_cast_fp16 = reshape(shape = var_2169, x = var_2168_cast_fp16)[name = tensor<string, []>("input_147_cast_fp16")]; |
| tensor<fp16, [768, 128]> dec_layers_10_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_10_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78160448))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78258816))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 111, 768]> linear_54_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_10_cross_attention_o_net_weight_to_fp16_quantized, x = input_147_cast_fp16)[name = tensor<string, []>("linear_54_cast_fp16")]; |
| tensor<fp16, [1, 111, 768]> input_149_cast_fp16 = add(x = input_145_cast_fp16, y = linear_54_cast_fp16)[name = tensor<string, []>("input_149_cast_fp16")]; |
| tensor<int32, [1]> x_167_axes_0 = const()[name = tensor<string, []>("x_167_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_10_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_10_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78260416)))]; |
| tensor<fp16, [1, 111, 768]> x_167_cast_fp16 = layer_norm(axes = x_167_axes_0, epsilon = var_2049_to_fp16, gamma = dec_layers_10_norm_pos_ff_weight_to_fp16, x = input_149_cast_fp16)[name = tensor<string, []>("x_167_cast_fp16")]; |
| tensor<int32, [3]> var_2186 = const()[name = tensor<string, []>("op_2186"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<string, []> y_63_pad_type_0 = const()[name = tensor<string, []>("y_63_pad_type_0"), val = tensor<string, []>("valid")]; |
| tensor<int32, [1]> y_63_strides_0 = const()[name = tensor<string, []>("y_63_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> y_63_pad_0 = const()[name = tensor<string, []>("y_63_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> y_63_dilations_0 = const()[name = tensor<string, []>("y_63_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, []> y_63_groups_0 = const()[name = tensor<string, []>("y_63_groups_0"), val = tensor<int32, []>(1)]; |
| tensor<fp16, [3072, 768, 1]> dec_layers_10_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_10_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(78262016))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80621376))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5672960)))]; |
| tensor<fp16, [1, 768, 111]> x_171_cast_fp16 = transpose(perm = var_2186, x = x_167_cast_fp16)[name = tensor<string, []>("transpose_155")]; |
| tensor<fp16, [1, 3072, 111]> y_63_cast_fp16 = conv(dilations = y_63_dilations_0, groups = y_63_groups_0, pad = y_63_pad_0, pad_type = y_63_pad_type_0, strides = y_63_strides_0, weight = dec_layers_10_pos_ff_proj_weight_to_fp16_quantized, x = x_171_cast_fp16)[name = tensor<string, []>("y_63_cast_fp16")]; |
| tensor<string, []> x_173_mode_0 = const()[name = tensor<string, []>("x_173_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 3072, 111]> x_173_cast_fp16 = gelu(mode = x_173_mode_0, x = y_63_cast_fp16)[name = tensor<string, []>("x_173_cast_fp16")]; |
| tensor<string, []> y_65_pad_type_0 = const()[name = tensor<string, []>("y_65_pad_type_0"), val = tensor<string, []>("valid")]; |
| tensor<int32, [1]> y_65_strides_0 = const()[name = tensor<string, []>("y_65_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> y_65_pad_0 = const()[name = tensor<string, []>("y_65_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> y_65_dilations_0 = const()[name = tensor<string, []>("y_65_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, []> y_65_groups_0 = const()[name = tensor<string, []>("y_65_groups_0"), val = tensor<int32, []>(1)]; |
| tensor<fp16, [768, 3072, 1]> dec_layers_10_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_10_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(80627584))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82986944))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 768, 111]> y_65_cast_fp16 = conv(dilations = y_65_dilations_0, groups = y_65_groups_0, pad = y_65_pad_0, pad_type = y_65_pad_type_0, strides = y_65_strides_0, weight = dec_layers_10_pos_ff_o_net_weight_to_fp16_quantized, x = x_173_cast_fp16)[name = tensor<string, []>("y_65_cast_fp16")]; |
| tensor<int32, [3]> var_2206 = const()[name = tensor<string, []>("op_2206"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<fp16, [1, 111, 768]> h_109_cast_fp16 = transpose(perm = var_2206, x = y_65_cast_fp16)[name = tensor<string, []>("transpose_154")]; |
| tensor<fp16, [1, 111, 768]> x_177_cast_fp16 = add(x = input_149_cast_fp16, y = h_109_cast_fp16)[name = tensor<string, []>("x_177_cast_fp16")]; |
| tensor<int32, []> var_2236 = const()[name = tensor<string, []>("op_2236"), val = tensor<int32, []>(-1)]; |
| tensor<int32, [1]> input_153_axes_0 = const()[name = tensor<string, []>("input_153_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_11_norm_self_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_11_norm_self_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82988544)))]; |
| tensor<fp16, []> var_2234_to_fp16 = const()[name = tensor<string, []>("op_2234_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)]; |
| tensor<fp16, [1, 111, 768]> input_153_cast_fp16 = layer_norm(axes = input_153_axes_0, epsilon = var_2234_to_fp16, gamma = dec_layers_11_norm_self_weight_to_fp16, x = x_177_cast_fp16)[name = tensor<string, []>("input_153_cast_fp16")]; |
| tensor<fp16, [2304, 768]> dec_layers_11_self_attention_qkv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_11_self_attention_qkv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [2304, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(82990144))), scale = tensor<fp16, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84759680))), zero_point = tensor<int8, [2304]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2281088)))]; |
| tensor<fp16, [1, 111, 2304]> linear_55_cast_fp16 = linear(bias = linear_0_bias_0_to_fp16, weight = dec_layers_11_self_attention_qkv_net_weight_to_fp16_quantized, x = input_153_cast_fp16)[name = tensor<string, []>("linear_55_cast_fp16")]; |
| tensor<int32, [5]> var_2255 = const()[name = tensor<string, []>("op_2255"), val = tensor<int32, [5]>([1, 111, 3, 12, 64])]; |
| tensor<fp16, [1, 111, 3, 12, 64]> qkv_cast_fp16 = reshape(shape = var_2255, x = linear_55_cast_fp16)[name = tensor<string, []>("qkv_cast_fp16")]; |
| tensor<int32, [5]> q_89_begin_0 = const()[name = tensor<string, []>("q_89_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])]; |
| tensor<int32, [5]> q_89_end_0 = const()[name = tensor<string, []>("q_89_end_0"), val = tensor<int32, [5]>([1, 111, 1, 12, 64])]; |
| tensor<bool, [5]> q_89_end_mask_0 = const()[name = tensor<string, []>("q_89_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> q_89_squeeze_mask_0 = const()[name = tensor<string, []>("q_89_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> q_89_cast_fp16 = slice_by_index(begin = q_89_begin_0, end = q_89_end_0, end_mask = q_89_end_mask_0, squeeze_mask = q_89_squeeze_mask_0, x = qkv_cast_fp16)[name = tensor<string, []>("q_89_cast_fp16")]; |
| tensor<int32, [5]> new_k_begin_0 = const()[name = tensor<string, []>("new_k_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])]; |
| tensor<int32, [5]> new_k_end_0 = const()[name = tensor<string, []>("new_k_end_0"), val = tensor<int32, [5]>([1, 111, 2, 12, 64])]; |
| tensor<bool, [5]> new_k_end_mask_0 = const()[name = tensor<string, []>("new_k_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> new_k_squeeze_mask_0 = const()[name = tensor<string, []>("new_k_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> new_k_cast_fp16 = slice_by_index(begin = new_k_begin_0, end = new_k_end_0, end_mask = new_k_end_mask_0, squeeze_mask = new_k_squeeze_mask_0, x = qkv_cast_fp16)[name = tensor<string, []>("new_k_cast_fp16")]; |
| tensor<string, []> new_k_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("new_k_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [5]> new_v_begin_0 = const()[name = tensor<string, []>("new_v_begin_0"), val = tensor<int32, [5]>([0, 0, 2, 0, 0])]; |
| tensor<int32, [5]> new_v_end_0 = const()[name = tensor<string, []>("new_v_end_0"), val = tensor<int32, [5]>([1, 111, 3, 12, 64])]; |
| tensor<bool, [5]> new_v_end_mask_0 = const()[name = tensor<string, []>("new_v_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> new_v_squeeze_mask_0 = const()[name = tensor<string, []>("new_v_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 111, 12, 64]> new_v_cast_fp16 = slice_by_index(begin = new_v_begin_0, end = new_v_end_0, end_mask = new_v_end_mask_0, squeeze_mask = new_v_squeeze_mask_0, x = qkv_cast_fp16)[name = tensor<string, []>("new_v_cast_fp16")]; |
| tensor<string, []> new_v_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("new_v_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [4]> var_2270 = const()[name = tensor<string, []>("op_2270"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<bool, []> var_2272_transpose_x_0 = const()[name = tensor<string, []>("op_2272_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_2272_transpose_y_0 = const()[name = tensor<string, []>("op_2272_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<int32, [4]> transpose_140_perm_0 = const()[name = tensor<string, []>("transpose_140_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<int32, [4]> transpose_141_perm_0 = const()[name = tensor<string, []>("transpose_141_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])]; |
| tensor<fp16, [1, 12, 64, 111]> transpose_141 = transpose(perm = transpose_141_perm_0, x = new_k_cast_fp16)[name = tensor<string, []>("transpose_152")]; |
| tensor<fp16, [1, 12, 111, 64]> transpose_140 = transpose(perm = transpose_140_perm_0, x = q_89_cast_fp16)[name = tensor<string, []>("transpose_153")]; |
| tensor<fp16, [1, 12, 111, 111]> var_2272_cast_fp16 = matmul(transpose_x = var_2272_transpose_x_0, transpose_y = var_2272_transpose_y_0, x = transpose_140, y = transpose_141)[name = tensor<string, []>("op_2272_cast_fp16")]; |
| tensor<fp16, []> var_2273_to_fp16 = const()[name = tensor<string, []>("op_2273_to_fp16"), val = tensor<fp16, []>(0x1p-3)]; |
| tensor<fp16, [1, 12, 111, 111]> scores_111_cast_fp16 = mul(x = var_2272_cast_fp16, y = var_2273_to_fp16)[name = tensor<string, []>("scores_111_cast_fp16")]; |
| tensor<fp16, []> var_2231_to_fp16 = const()[name = tensor<string, []>("op_2231_to_fp16"), val = tensor<fp16, []>(-inf)]; |
| tensor<fp16, [1, 12, 111, 111]> scores_113_cast_fp16 = add(x = scores_3_cast_fp16_x_0, y = scores_111_cast_fp16)[name = tensor<string, []>("scores_113_cast_fp16")]; |
| tensor<fp16, [1, 12, 111, 111]> probs_45_cast_fp16 = softmax(axis = var_2236, x = scores_113_cast_fp16)[name = tensor<string, []>("probs_45_cast_fp16")]; |
| tensor<bool, []> var_2293_transpose_x_0 = const()[name = tensor<string, []>("op_2293_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_2293_transpose_y_0 = const()[name = tensor<string, []>("op_2293_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<fp16, [1, 12, 111, 64]> vT_45_cast_fp16 = transpose(perm = var_2270, x = new_v_cast_fp16)[name = tensor<string, []>("transpose_151")]; |
| tensor<fp16, [1, 12, 111, 64]> var_2293_cast_fp16 = matmul(transpose_x = var_2293_transpose_x_0, transpose_y = var_2293_transpose_y_0, x = probs_45_cast_fp16, y = vT_45_cast_fp16)[name = tensor<string, []>("op_2293_cast_fp16")]; |
| tensor<int32, [4]> var_2294 = const()[name = tensor<string, []>("op_2294"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_2298 = const()[name = tensor<string, []>("op_2298"), val = tensor<int32, [3]>([1, 111, -1])]; |
| tensor<fp16, [1, 111, 12, 64]> y_67_cast_fp16 = transpose(perm = var_2294, x = var_2293_cast_fp16)[name = tensor<string, []>("transpose_150")]; |
| tensor<fp16, [1, 111, 768]> input_155_cast_fp16 = reshape(shape = var_2298, x = y_67_cast_fp16)[name = tensor<string, []>("input_155_cast_fp16")]; |
| tensor<fp16, [768, 768]> dec_layers_11_self_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_11_self_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(84764352))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85354240))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 111, 768]> linear_56_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_11_self_attention_o_net_weight_to_fp16_quantized, x = input_155_cast_fp16)[name = tensor<string, []>("linear_56_cast_fp16")]; |
| tensor<fp16, [1, 111, 768]> input_157_cast_fp16 = add(x = x_177_cast_fp16, y = linear_56_cast_fp16)[name = tensor<string, []>("input_157_cast_fp16")]; |
| tensor<int32, [1]> x_181_axes_0 = const()[name = tensor<string, []>("x_181_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_11_norm_xattn_query_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_11_norm_xattn_query_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85355840)))]; |
| tensor<fp16, [1, 111, 768]> x_181_cast_fp16 = layer_norm(axes = x_181_axes_0, epsilon = var_2234_to_fp16, gamma = dec_layers_11_norm_xattn_query_weight_to_fp16, x = input_157_cast_fp16)[name = tensor<string, []>("x_181_cast_fp16")]; |
| tensor<int32, [1]> memory_axes_0 = const()[name = tensor<string, []>("memory_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_11_norm_xattn_memory_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_11_norm_xattn_memory_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85357440)))]; |
| tensor<fp16, [1, 256, 768]> memory_cast_fp16 = layer_norm(axes = memory_axes_0, epsilon = var_2234_to_fp16, gamma = dec_layers_11_norm_xattn_memory_weight_to_fp16, x = encoder_output_to_fp16)[name = tensor<string, []>("memory_cast_fp16")]; |
| tensor<fp16, [128, 768]> dec_layers_11_cross_attention_q_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_11_cross_attention_q_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [128, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85359040))), scale = tensor<fp16, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85457408))), zero_point = tensor<int8, [128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3013056)))]; |
| tensor<fp16, [1, 111, 128]> linear_57_cast_fp16 = linear(bias = linear_2_bias_0_to_fp16, weight = dec_layers_11_cross_attention_q_net_weight_to_fp16_quantized, x = x_181_cast_fp16)[name = tensor<string, []>("linear_57_cast_fp16")]; |
| tensor<int32, [4]> var_2320 = const()[name = tensor<string, []>("op_2320"), val = tensor<int32, [4]>([1, 111, 1, 128])]; |
| tensor<fp16, [1, 111, 1, 128]> q_93_cast_fp16 = reshape(shape = var_2320, x = linear_57_cast_fp16)[name = tensor<string, []>("q_93_cast_fp16")]; |
| tensor<fp16, [256, 768]> dec_layers_11_cross_attention_kv_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_11_cross_attention_kv_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [256, 768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85457728))), scale = tensor<fp16, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85654400))), zero_point = tensor<int8, [256]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(3210560)))]; |
| tensor<fp16, [1, 256, 256]> linear_58_cast_fp16 = linear(bias = linear_3_bias_0_to_fp16, weight = dec_layers_11_cross_attention_kv_net_weight_to_fp16_quantized, x = memory_cast_fp16)[name = tensor<string, []>("linear_58_cast_fp16")]; |
| tensor<int32, [5]> var_2326 = const()[name = tensor<string, []>("op_2326"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])]; |
| tensor<fp16, [1, 256, 2, 1, 128]> kv_cast_fp16 = reshape(shape = var_2326, x = linear_58_cast_fp16)[name = tensor<string, []>("kv_cast_fp16")]; |
| tensor<int32, [5]> k_begin_0 = const()[name = tensor<string, []>("k_begin_0"), val = tensor<int32, [5]>([0, 0, 0, 0, 0])]; |
| tensor<int32, [5]> k_end_0 = const()[name = tensor<string, []>("k_end_0"), val = tensor<int32, [5]>([1, 256, 1, 1, 128])]; |
| tensor<bool, [5]> k_end_mask_0 = const()[name = tensor<string, []>("k_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> k_squeeze_mask_0 = const()[name = tensor<string, []>("k_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 256, 1, 128]> k_cast_fp16 = slice_by_index(begin = k_begin_0, end = k_end_0, end_mask = k_end_mask_0, squeeze_mask = k_squeeze_mask_0, x = kv_cast_fp16)[name = tensor<string, []>("k_cast_fp16")]; |
| tensor<string, []> k_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("k_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [5]> v_begin_0 = const()[name = tensor<string, []>("v_begin_0"), val = tensor<int32, [5]>([0, 0, 1, 0, 0])]; |
| tensor<int32, [5]> v_end_0 = const()[name = tensor<string, []>("v_end_0"), val = tensor<int32, [5]>([1, 256, 2, 1, 128])]; |
| tensor<bool, [5]> v_end_mask_0 = const()[name = tensor<string, []>("v_end_mask_0"), val = tensor<bool, [5]>([true, true, false, true, true])]; |
| tensor<bool, [5]> v_squeeze_mask_0 = const()[name = tensor<string, []>("v_squeeze_mask_0"), val = tensor<bool, [5]>([false, false, true, false, false])]; |
| tensor<fp16, [1, 256, 1, 128]> v_cast_fp16 = slice_by_index(begin = v_begin_0, end = v_end_0, end_mask = v_end_mask_0, squeeze_mask = v_squeeze_mask_0, x = kv_cast_fp16)[name = tensor<string, []>("v_cast_fp16")]; |
| tensor<string, []> v_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("v_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<int32, [4]> var_2339 = const()[name = tensor<string, []>("op_2339"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<bool, []> var_2341_transpose_x_0 = const()[name = tensor<string, []>("op_2341_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_2341_transpose_y_0 = const()[name = tensor<string, []>("op_2341_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<int32, [4]> transpose_142_perm_0 = const()[name = tensor<string, []>("transpose_142_perm_0"), val = tensor<int32, [4]>([0, 2, -3, -1])]; |
| tensor<int32, [4]> transpose_143_perm_0 = const()[name = tensor<string, []>("transpose_143_perm_0"), val = tensor<int32, [4]>([0, 2, -1, -3])]; |
| tensor<fp16, [1, 1, 128, 256]> transpose_143 = transpose(perm = transpose_143_perm_0, x = k_cast_fp16)[name = tensor<string, []>("transpose_148")]; |
| tensor<fp16, [1, 1, 111, 128]> transpose_142 = transpose(perm = transpose_142_perm_0, x = q_93_cast_fp16)[name = tensor<string, []>("transpose_149")]; |
| tensor<fp16, [1, 1, 111, 256]> var_2341_cast_fp16 = matmul(transpose_x = var_2341_transpose_x_0, transpose_y = var_2341_transpose_y_0, x = transpose_142, y = transpose_143)[name = tensor<string, []>("op_2341_cast_fp16")]; |
| tensor<fp16, []> var_2342_to_fp16 = const()[name = tensor<string, []>("op_2342_to_fp16"), val = tensor<fp16, []>(0x1.6ap-4)]; |
| tensor<fp16, [1, 1, 111, 256]> scores_117_cast_fp16 = mul(x = var_2341_cast_fp16, y = var_2342_to_fp16)[name = tensor<string, []>("scores_117_cast_fp16")]; |
| tensor<fp16, [1, 1, 111, 256]> scores_cast_fp16 = select(a = var_2231_to_fp16, b = scores_117_cast_fp16, cond = var_313_cast_fp16)[name = tensor<string, []>("scores_cast_fp16")]; |
| tensor<fp16, [1, 1, 111, 256]> probs_cast_fp16 = softmax(axis = var_2236, x = scores_cast_fp16)[name = tensor<string, []>("probs_cast_fp16")]; |
| tensor<bool, []> var_2351_transpose_x_0 = const()[name = tensor<string, []>("op_2351_transpose_x_0"), val = tensor<bool, []>(false)]; |
| tensor<bool, []> var_2351_transpose_y_0 = const()[name = tensor<string, []>("op_2351_transpose_y_0"), val = tensor<bool, []>(false)]; |
| tensor<fp16, [1, 1, 256, 128]> vT_cast_fp16 = transpose(perm = var_2339, x = v_cast_fp16)[name = tensor<string, []>("transpose_147")]; |
| tensor<fp16, [1, 1, 111, 128]> var_2351_cast_fp16 = matmul(transpose_x = var_2351_transpose_x_0, transpose_y = var_2351_transpose_y_0, x = probs_cast_fp16, y = vT_cast_fp16)[name = tensor<string, []>("op_2351_cast_fp16")]; |
| tensor<int32, [4]> var_2352 = const()[name = tensor<string, []>("op_2352"), val = tensor<int32, [4]>([0, 2, 1, 3])]; |
| tensor<int32, [3]> var_2354 = const()[name = tensor<string, []>("op_2354"), val = tensor<int32, [3]>([1, 111, -1])]; |
| tensor<fp16, [1, 111, 1, 128]> var_2353_cast_fp16 = transpose(perm = var_2352, x = var_2351_cast_fp16)[name = tensor<string, []>("transpose_146")]; |
| tensor<fp16, [1, 111, 128]> input_159_cast_fp16 = reshape(shape = var_2354, x = var_2353_cast_fp16)[name = tensor<string, []>("input_159_cast_fp16")]; |
| tensor<fp16, [768, 128]> dec_layers_11_cross_attention_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_11_cross_attention_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 128]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85654976))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85753344))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 111, 768]> linear_59_cast_fp16 = linear(bias = linear_1_bias_0_to_fp16, weight = dec_layers_11_cross_attention_o_net_weight_to_fp16_quantized, x = input_159_cast_fp16)[name = tensor<string, []>("linear_59_cast_fp16")]; |
| tensor<fp16, [1, 111, 768]> input_161_cast_fp16 = add(x = input_157_cast_fp16, y = linear_59_cast_fp16)[name = tensor<string, []>("input_161_cast_fp16")]; |
| tensor<int32, [1]> x_183_axes_0 = const()[name = tensor<string, []>("x_183_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_layers_11_norm_pos_ff_weight_to_fp16 = const()[name = tensor<string, []>("dec_layers_11_norm_pos_ff_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85754944)))]; |
| tensor<fp16, [1, 111, 768]> x_183_cast_fp16 = layer_norm(axes = x_183_axes_0, epsilon = var_2234_to_fp16, gamma = dec_layers_11_norm_pos_ff_weight_to_fp16, x = input_161_cast_fp16)[name = tensor<string, []>("x_183_cast_fp16")]; |
| tensor<int32, [3]> var_2371 = const()[name = tensor<string, []>("op_2371"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<string, []> y_69_pad_type_0 = const()[name = tensor<string, []>("y_69_pad_type_0"), val = tensor<string, []>("valid")]; |
| tensor<int32, [1]> y_69_strides_0 = const()[name = tensor<string, []>("y_69_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> y_69_pad_0 = const()[name = tensor<string, []>("y_69_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> y_69_dilations_0 = const()[name = tensor<string, []>("y_69_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, []> y_69_groups_0 = const()[name = tensor<string, []>("y_69_groups_0"), val = tensor<int32, []>(1)]; |
| tensor<fp16, [3072, 768, 1]> dec_layers_11_pos_ff_proj_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_11_pos_ff_proj_weight_to_fp16_quantized"), quantized_data = tensor<int8, [3072, 768, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(85756544))), scale = tensor<fp16, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(88115904))), zero_point = tensor<int8, [3072]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(5672960)))]; |
| tensor<fp16, [1, 768, 111]> x_187_cast_fp16 = transpose(perm = var_2371, x = x_183_cast_fp16)[name = tensor<string, []>("transpose_145")]; |
| tensor<fp16, [1, 3072, 111]> y_69_cast_fp16 = conv(dilations = y_69_dilations_0, groups = y_69_groups_0, pad = y_69_pad_0, pad_type = y_69_pad_type_0, strides = y_69_strides_0, weight = dec_layers_11_pos_ff_proj_weight_to_fp16_quantized, x = x_187_cast_fp16)[name = tensor<string, []>("y_69_cast_fp16")]; |
| tensor<string, []> x_189_mode_0 = const()[name = tensor<string, []>("x_189_mode_0"), val = tensor<string, []>("TANH_APPROXIMATION")]; |
| tensor<fp16, [1, 3072, 111]> x_189_cast_fp16 = gelu(mode = x_189_mode_0, x = y_69_cast_fp16)[name = tensor<string, []>("x_189_cast_fp16")]; |
| tensor<string, []> y_pad_type_0 = const()[name = tensor<string, []>("y_pad_type_0"), val = tensor<string, []>("valid")]; |
| tensor<int32, [1]> y_strides_0 = const()[name = tensor<string, []>("y_strides_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, [2]> y_pad_0 = const()[name = tensor<string, []>("y_pad_0"), val = tensor<int32, [2]>([0, 0])]; |
| tensor<int32, [1]> y_dilations_0 = const()[name = tensor<string, []>("y_dilations_0"), val = tensor<int32, [1]>([1])]; |
| tensor<int32, []> y_groups_0 = const()[name = tensor<string, []>("y_groups_0"), val = tensor<int32, []>(1)]; |
| tensor<fp16, [768, 3072, 1]> dec_layers_11_pos_ff_o_net_weight_to_fp16_quantized = constexpr_affine_dequantize()[axis = tensor<int32, []>(0), name = tensor<string, []>("dec_layers_11_pos_ff_o_net_weight_to_fp16_quantized"), quantized_data = tensor<int8, [768, 3072, 1]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(88122112))), scale = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(90481472))), zero_point = tensor<int8, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(2907456)))]; |
| tensor<fp16, [1, 768, 111]> y_cast_fp16 = conv(dilations = y_dilations_0, groups = y_groups_0, pad = y_pad_0, pad_type = y_pad_type_0, strides = y_strides_0, weight = dec_layers_11_pos_ff_o_net_weight_to_fp16_quantized, x = x_189_cast_fp16)[name = tensor<string, []>("y_cast_fp16")]; |
| tensor<int32, [3]> var_2391 = const()[name = tensor<string, []>("op_2391"), val = tensor<int32, [3]>([0, 2, 1])]; |
| tensor<fp16, [1, 111, 768]> h_cast_fp16 = transpose(perm = var_2391, x = y_cast_fp16)[name = tensor<string, []>("transpose_144")]; |
| tensor<fp16, [1, 111, 768]> x_193_cast_fp16 = add(x = input_161_cast_fp16, y = h_cast_fp16)[name = tensor<string, []>("x_193_cast_fp16")]; |
| tensor<int32, [1]> x_axes_0 = const()[name = tensor<string, []>("x_axes_0"), val = tensor<int32, [1]>([-1])]; |
| tensor<fp16, [768]> dec_norm_out_weight_to_fp16 = const()[name = tensor<string, []>("dec_norm_out_weight_to_fp16"), val = tensor<fp16, [768]>(BLOBFILE(path = tensor<string, []>("@model_path/weights/weight.bin"), offset = tensor<uint64, []>(90483072)))]; |
| tensor<fp16, []> var_2405_to_fp16 = const()[name = tensor<string, []>("op_2405_to_fp16"), val = tensor<fp16, []>(0x1.5p-17)]; |
| tensor<fp16, [1, 111, 768]> x_cast_fp16 = layer_norm(axes = x_axes_0, epsilon = var_2405_to_fp16, gamma = dec_norm_out_weight_to_fp16, x = x_193_cast_fp16)[name = tensor<string, []>("x_cast_fp16")]; |
| tensor<int32, [3]> var_2420_begin_0 = const()[name = tensor<string, []>("op_2420_begin_0"), val = tensor<int32, [3]>([0, -1, 0])]; |
| tensor<int32, [3]> var_2420_end_0 = const()[name = tensor<string, []>("op_2420_end_0"), val = tensor<int32, [3]>([1, 111, 768])]; |
| tensor<bool, [3]> var_2420_end_mask_0 = const()[name = tensor<string, []>("op_2420_end_mask_0"), val = tensor<bool, [3]>([true, true, true])]; |
| tensor<fp16, [1, 1, 768]> var_2420_cast_fp16 = slice_by_index(begin = var_2420_begin_0, end = var_2420_end_0, end_mask = var_2420_end_mask_0, x = x_cast_fp16)[name = tensor<string, []>("op_2420_cast_fp16")]; |
| tensor<string, []> var_2425_cast_fp16_to_fp32_dtype_0 = const()[name = tensor<string, []>("op_2425_cast_fp16_to_fp32_dtype_0"), val = tensor<string, []>("fp32")]; |
| tensor<fp32, [1, 111, 12, 64]> sa_k_0 = cast(dtype = new_k_1_cast_fp16_to_fp32_dtype_0, x = new_k_1_cast_fp16)[name = tensor<string, []>("cast_1")]; |
| tensor<fp32, [1, 111, 12, 64]> sa_v_0 = cast(dtype = new_v_1_cast_fp16_to_fp32_dtype_0, x = new_v_1_cast_fp16)[name = tensor<string, []>("cast_2")]; |
| tensor<fp32, [1, 256, 1, 128]> xa_k_0 = cast(dtype = k_1_cast_fp16_to_fp32_dtype_0, x = k_1_cast_fp16)[name = tensor<string, []>("cast_3")]; |
| tensor<fp32, [1, 256, 1, 128]> xa_v_0 = cast(dtype = v_1_cast_fp16_to_fp32_dtype_0, x = v_1_cast_fp16)[name = tensor<string, []>("cast_4")]; |
| tensor<fp32, [1, 111, 12, 64]> sa_k_1 = cast(dtype = new_k_3_cast_fp16_to_fp32_dtype_0, x = new_k_3_cast_fp16)[name = tensor<string, []>("cast_5")]; |
| tensor<fp32, [1, 111, 12, 64]> sa_v_1 = cast(dtype = new_v_3_cast_fp16_to_fp32_dtype_0, x = new_v_3_cast_fp16)[name = tensor<string, []>("cast_6")]; |
| tensor<fp32, [1, 256, 1, 128]> xa_k_1 = cast(dtype = k_3_cast_fp16_to_fp32_dtype_0, x = k_3_cast_fp16)[name = tensor<string, []>("cast_7")]; |
| tensor<fp32, [1, 256, 1, 128]> xa_v_1 = cast(dtype = v_3_cast_fp16_to_fp32_dtype_0, x = v_3_cast_fp16)[name = tensor<string, []>("cast_8")]; |
| tensor<fp32, [1, 111, 12, 64]> sa_k_2 = cast(dtype = new_k_5_cast_fp16_to_fp32_dtype_0, x = new_k_5_cast_fp16)[name = tensor<string, []>("cast_9")]; |
| tensor<fp32, [1, 111, 12, 64]> sa_v_2 = cast(dtype = new_v_5_cast_fp16_to_fp32_dtype_0, x = new_v_5_cast_fp16)[name = tensor<string, []>("cast_10")]; |
| tensor<fp32, [1, 256, 1, 128]> xa_k_2 = cast(dtype = k_5_cast_fp16_to_fp32_dtype_0, x = k_5_cast_fp16)[name = tensor<string, []>("cast_11")]; |
| tensor<fp32, [1, 256, 1, 128]> xa_v_2 = cast(dtype = v_5_cast_fp16_to_fp32_dtype_0, x = v_5_cast_fp16)[name = tensor<string, []>("cast_12")]; |
| tensor<fp32, [1, 111, 12, 64]> sa_k_3 = cast(dtype = new_k_7_cast_fp16_to_fp32_dtype_0, x = new_k_7_cast_fp16)[name = tensor<string, []>("cast_13")]; |
| tensor<fp32, [1, 111, 12, 64]> sa_v_3 = cast(dtype = new_v_7_cast_fp16_to_fp32_dtype_0, x = new_v_7_cast_fp16)[name = tensor<string, []>("cast_14")]; |
| tensor<fp32, [1, 256, 1, 128]> xa_k_3 = cast(dtype = k_7_cast_fp16_to_fp32_dtype_0, x = k_7_cast_fp16)[name = tensor<string, []>("cast_15")]; |
| tensor<fp32, [1, 256, 1, 128]> xa_v_3 = cast(dtype = v_7_cast_fp16_to_fp32_dtype_0, x = v_7_cast_fp16)[name = tensor<string, []>("cast_16")]; |
| tensor<fp32, [1, 111, 12, 64]> sa_k_4 = cast(dtype = new_k_9_cast_fp16_to_fp32_dtype_0, x = new_k_9_cast_fp16)[name = tensor<string, []>("cast_17")]; |
| tensor<fp32, [1, 111, 12, 64]> sa_v_4 = cast(dtype = new_v_9_cast_fp16_to_fp32_dtype_0, x = new_v_9_cast_fp16)[name = tensor<string, []>("cast_18")]; |
| tensor<fp32, [1, 256, 1, 128]> xa_k_4 = cast(dtype = k_9_cast_fp16_to_fp32_dtype_0, x = k_9_cast_fp16)[name = tensor<string, []>("cast_19")]; |
| tensor<fp32, [1, 256, 1, 128]> xa_v_4 = cast(dtype = v_9_cast_fp16_to_fp32_dtype_0, x = v_9_cast_fp16)[name = tensor<string, []>("cast_20")]; |
| tensor<fp32, [1, 111, 12, 64]> sa_k_5 = cast(dtype = new_k_11_cast_fp16_to_fp32_dtype_0, x = new_k_11_cast_fp16)[name = tensor<string, []>("cast_21")]; |
| tensor<fp32, [1, 111, 12, 64]> sa_v_5 = cast(dtype = new_v_11_cast_fp16_to_fp32_dtype_0, x = new_v_11_cast_fp16)[name = tensor<string, []>("cast_22")]; |
| tensor<fp32, [1, 256, 1, 128]> xa_k_5 = cast(dtype = k_11_cast_fp16_to_fp32_dtype_0, x = k_11_cast_fp16)[name = tensor<string, []>("cast_23")]; |
| tensor<fp32, [1, 256, 1, 128]> xa_v_5 = cast(dtype = v_11_cast_fp16_to_fp32_dtype_0, x = v_11_cast_fp16)[name = tensor<string, []>("cast_24")]; |
| tensor<fp32, [1, 111, 12, 64]> sa_k_6 = cast(dtype = new_k_13_cast_fp16_to_fp32_dtype_0, x = new_k_13_cast_fp16)[name = tensor<string, []>("cast_25")]; |
| tensor<fp32, [1, 111, 12, 64]> sa_v_6 = cast(dtype = new_v_13_cast_fp16_to_fp32_dtype_0, x = new_v_13_cast_fp16)[name = tensor<string, []>("cast_26")]; |
| tensor<fp32, [1, 256, 1, 128]> xa_k_6 = cast(dtype = k_13_cast_fp16_to_fp32_dtype_0, x = k_13_cast_fp16)[name = tensor<string, []>("cast_27")]; |
| tensor<fp32, [1, 256, 1, 128]> xa_v_6 = cast(dtype = v_13_cast_fp16_to_fp32_dtype_0, x = v_13_cast_fp16)[name = tensor<string, []>("cast_28")]; |
| tensor<fp32, [1, 111, 12, 64]> sa_k_7 = cast(dtype = new_k_15_cast_fp16_to_fp32_dtype_0, x = new_k_15_cast_fp16)[name = tensor<string, []>("cast_29")]; |
| tensor<fp32, [1, 111, 12, 64]> sa_v_7 = cast(dtype = new_v_15_cast_fp16_to_fp32_dtype_0, x = new_v_15_cast_fp16)[name = tensor<string, []>("cast_30")]; |
| tensor<fp32, [1, 256, 1, 128]> xa_k_7 = cast(dtype = k_15_cast_fp16_to_fp32_dtype_0, x = k_15_cast_fp16)[name = tensor<string, []>("cast_31")]; |
| tensor<fp32, [1, 256, 1, 128]> xa_v_7 = cast(dtype = v_15_cast_fp16_to_fp32_dtype_0, x = v_15_cast_fp16)[name = tensor<string, []>("cast_32")]; |
| tensor<fp32, [1, 111, 12, 64]> sa_k_8 = cast(dtype = new_k_17_cast_fp16_to_fp32_dtype_0, x = new_k_17_cast_fp16)[name = tensor<string, []>("cast_33")]; |
| tensor<fp32, [1, 111, 12, 64]> sa_v_8 = cast(dtype = new_v_17_cast_fp16_to_fp32_dtype_0, x = new_v_17_cast_fp16)[name = tensor<string, []>("cast_34")]; |
| tensor<fp32, [1, 256, 1, 128]> xa_k_8 = cast(dtype = k_17_cast_fp16_to_fp32_dtype_0, x = k_17_cast_fp16)[name = tensor<string, []>("cast_35")]; |
| tensor<fp32, [1, 256, 1, 128]> xa_v_8 = cast(dtype = v_17_cast_fp16_to_fp32_dtype_0, x = v_17_cast_fp16)[name = tensor<string, []>("cast_36")]; |
| tensor<fp32, [1, 111, 12, 64]> sa_k_9 = cast(dtype = new_k_19_cast_fp16_to_fp32_dtype_0, x = new_k_19_cast_fp16)[name = tensor<string, []>("cast_37")]; |
| tensor<fp32, [1, 111, 12, 64]> sa_v_9 = cast(dtype = new_v_19_cast_fp16_to_fp32_dtype_0, x = new_v_19_cast_fp16)[name = tensor<string, []>("cast_38")]; |
| tensor<fp32, [1, 256, 1, 128]> xa_k_9 = cast(dtype = k_19_cast_fp16_to_fp32_dtype_0, x = k_19_cast_fp16)[name = tensor<string, []>("cast_39")]; |
| tensor<fp32, [1, 256, 1, 128]> xa_v_9 = cast(dtype = v_19_cast_fp16_to_fp32_dtype_0, x = v_19_cast_fp16)[name = tensor<string, []>("cast_40")]; |
| tensor<fp32, [1, 111, 12, 64]> sa_k_10 = cast(dtype = new_k_21_cast_fp16_to_fp32_dtype_0, x = new_k_21_cast_fp16)[name = tensor<string, []>("cast_41")]; |
| tensor<fp32, [1, 111, 12, 64]> sa_v_10 = cast(dtype = new_v_21_cast_fp16_to_fp32_dtype_0, x = new_v_21_cast_fp16)[name = tensor<string, []>("cast_42")]; |
| tensor<fp32, [1, 256, 1, 128]> xa_k_10 = cast(dtype = k_21_cast_fp16_to_fp32_dtype_0, x = k_21_cast_fp16)[name = tensor<string, []>("cast_43")]; |
| tensor<fp32, [1, 256, 1, 128]> xa_v_10 = cast(dtype = v_21_cast_fp16_to_fp32_dtype_0, x = v_21_cast_fp16)[name = tensor<string, []>("cast_44")]; |
| tensor<fp32, [1, 111, 12, 64]> sa_k_11 = cast(dtype = new_k_cast_fp16_to_fp32_dtype_0, x = new_k_cast_fp16)[name = tensor<string, []>("cast_45")]; |
| tensor<fp32, [1, 111, 12, 64]> sa_v_11 = cast(dtype = new_v_cast_fp16_to_fp32_dtype_0, x = new_v_cast_fp16)[name = tensor<string, []>("cast_46")]; |
| tensor<fp32, [1, 256, 1, 128]> xa_k_11 = cast(dtype = k_cast_fp16_to_fp32_dtype_0, x = k_cast_fp16)[name = tensor<string, []>("cast_47")]; |
| tensor<fp32, [1, 256, 1, 128]> xa_v_11 = cast(dtype = v_cast_fp16_to_fp32_dtype_0, x = v_cast_fp16)[name = tensor<string, []>("cast_48")]; |
| tensor<fp32, [1, 1, 768]> h_last = cast(dtype = var_2425_cast_fp16_to_fp32_dtype_0, x = var_2420_cast_fp16)[name = tensor<string, []>("cast_49")]; |
| } -> (h_last, sa_k_0, sa_k_1, sa_k_2, sa_k_3, sa_k_4, sa_k_5, sa_k_6, sa_k_7, sa_k_8, sa_k_9, sa_k_10, sa_k_11, sa_v_0, sa_v_1, sa_v_2, sa_v_3, sa_v_4, sa_v_5, sa_v_6, sa_v_7, sa_v_8, sa_v_9, sa_v_10, sa_v_11, xa_k_0, xa_k_1, xa_k_2, xa_k_3, xa_k_4, xa_k_5, xa_k_6, xa_k_7, xa_k_8, xa_k_9, xa_k_10, xa_k_11, xa_v_0, xa_v_1, xa_v_2, xa_v_3, xa_v_4, xa_v_5, xa_v_6, xa_v_7, xa_v_8, xa_v_9, xa_v_10, xa_v_11); |
| } |