diff --git "a/graphs/vocoder.json" "b/graphs/vocoder.json" new file mode 100644--- /dev/null +++ "b/graphs/vocoder.json" @@ -0,0 +1,4356 @@ +{ + "ir_version": 9, + "opsets": [ + { + "domain": "", + "version": 19 + } + ], + "inputs": [ + "latent" + ], + "outputs": [ + "wav_tts" + ], + "weight_map": { + "tts.ttl.normalizer.scale": "w000000", + "tts.ae.latent_mean": "w000001", + "tts.ae.latent_std": "w000002", + "tts.ae.decoder.convnext.0.gamma": "w000003", + "tts.ae.decoder.convnext.0.dwconv.net.weight": "w000004", + "tts.ae.decoder.convnext.0.dwconv.net.bias": "w000005", + "tts.ae.decoder.convnext.0.norm.norm.weight": "w000006", + "tts.ae.decoder.convnext.0.norm.norm.bias": "w000007", + "tts.ae.decoder.convnext.0.pwconv1.weight": "w000008", + "tts.ae.decoder.convnext.0.pwconv1.bias": "w000009", + "tts.ae.decoder.convnext.0.pwconv2.weight": "w000010", + "tts.ae.decoder.convnext.0.pwconv2.bias": "w000011", + "tts.ae.decoder.convnext.1.gamma": "w000012", + "tts.ae.decoder.convnext.1.dwconv.net.weight": "w000013", + "tts.ae.decoder.convnext.1.dwconv.net.bias": "w000014", + "tts.ae.decoder.convnext.1.norm.norm.weight": "w000015", + "tts.ae.decoder.convnext.1.norm.norm.bias": "w000016", + "tts.ae.decoder.convnext.1.pwconv1.weight": "w000017", + "tts.ae.decoder.convnext.1.pwconv1.bias": "w000018", + "tts.ae.decoder.convnext.1.pwconv2.weight": "w000019", + "tts.ae.decoder.convnext.1.pwconv2.bias": "w000020", + "tts.ae.decoder.convnext.2.gamma": "w000021", + "tts.ae.decoder.convnext.2.dwconv.net.weight": "w000022", + "tts.ae.decoder.convnext.2.dwconv.net.bias": "w000023", + "tts.ae.decoder.convnext.2.norm.norm.weight": "w000024", + "tts.ae.decoder.convnext.2.norm.norm.bias": "w000025", + "tts.ae.decoder.convnext.2.pwconv1.weight": "w000026", + "tts.ae.decoder.convnext.2.pwconv1.bias": "w000027", + "tts.ae.decoder.convnext.2.pwconv2.weight": "w000028", + "tts.ae.decoder.convnext.2.pwconv2.bias": "w000029", + "tts.ae.decoder.convnext.3.gamma": "w000030", + "tts.ae.decoder.convnext.3.dwconv.net.weight": "w000031", + "tts.ae.decoder.convnext.3.dwconv.net.bias": "w000032", + "tts.ae.decoder.convnext.3.norm.norm.weight": "w000033", + "tts.ae.decoder.convnext.3.norm.norm.bias": "w000034", + "tts.ae.decoder.convnext.3.pwconv1.weight": "w000035", + "tts.ae.decoder.convnext.3.pwconv1.bias": "w000036", + "tts.ae.decoder.convnext.3.pwconv2.weight": "w000037", + "tts.ae.decoder.convnext.3.pwconv2.bias": "w000038", + "tts.ae.decoder.convnext.4.gamma": "w000039", + "tts.ae.decoder.convnext.4.dwconv.net.weight": "w000040", + "tts.ae.decoder.convnext.4.dwconv.net.bias": "w000041", + "tts.ae.decoder.convnext.4.norm.norm.weight": "w000042", + "tts.ae.decoder.convnext.4.norm.norm.bias": "w000043", + "tts.ae.decoder.convnext.4.pwconv1.weight": "w000044", + "tts.ae.decoder.convnext.4.pwconv1.bias": "w000045", + "tts.ae.decoder.convnext.4.pwconv2.weight": "w000046", + "tts.ae.decoder.convnext.4.pwconv2.bias": "w000047", + "tts.ae.decoder.convnext.5.gamma": "w000048", + "tts.ae.decoder.convnext.5.dwconv.net.weight": "w000049", + "tts.ae.decoder.convnext.5.dwconv.net.bias": "w000050", + "tts.ae.decoder.convnext.5.norm.norm.weight": "w000051", + "tts.ae.decoder.convnext.5.norm.norm.bias": "w000052", + "tts.ae.decoder.convnext.5.pwconv1.weight": "w000053", + "tts.ae.decoder.convnext.5.pwconv1.bias": "w000054", + "tts.ae.decoder.convnext.5.pwconv2.weight": "w000055", + "tts.ae.decoder.convnext.5.pwconv2.bias": "w000056", + "tts.ae.decoder.convnext.6.gamma": "w000057", + "tts.ae.decoder.convnext.6.dwconv.net.weight": "w000058", + "tts.ae.decoder.convnext.6.dwconv.net.bias": "w000059", + "tts.ae.decoder.convnext.6.norm.norm.weight": "w000060", + "tts.ae.decoder.convnext.6.norm.norm.bias": "w000061", + "tts.ae.decoder.convnext.6.pwconv1.weight": "w000062", + "tts.ae.decoder.convnext.6.pwconv1.bias": "w000063", + "tts.ae.decoder.convnext.6.pwconv2.weight": "w000064", + "tts.ae.decoder.convnext.6.pwconv2.bias": "w000065", + "tts.ae.decoder.convnext.7.gamma": "w000066", + "tts.ae.decoder.convnext.7.dwconv.net.weight": "w000067", + "tts.ae.decoder.convnext.7.dwconv.net.bias": "w000068", + "tts.ae.decoder.convnext.7.norm.norm.weight": "w000069", + "tts.ae.decoder.convnext.7.norm.norm.bias": "w000070", + "tts.ae.decoder.convnext.7.pwconv1.weight": "w000071", + "tts.ae.decoder.convnext.7.pwconv1.bias": "w000072", + "tts.ae.decoder.convnext.7.pwconv2.weight": "w000073", + "tts.ae.decoder.convnext.7.pwconv2.bias": "w000074", + "tts.ae.decoder.convnext.8.gamma": "w000075", + "tts.ae.decoder.convnext.8.dwconv.net.weight": "w000076", + "tts.ae.decoder.convnext.8.dwconv.net.bias": "w000077", + "tts.ae.decoder.convnext.8.norm.norm.weight": "w000078", + "tts.ae.decoder.convnext.8.norm.norm.bias": "w000079", + "tts.ae.decoder.convnext.8.pwconv1.weight": "w000080", + "tts.ae.decoder.convnext.8.pwconv1.bias": "w000081", + "tts.ae.decoder.convnext.8.pwconv2.weight": "w000082", + "tts.ae.decoder.convnext.8.pwconv2.bias": "w000083", + "tts.ae.decoder.convnext.9.gamma": "w000084", + "tts.ae.decoder.convnext.9.dwconv.net.weight": "w000085", + "tts.ae.decoder.convnext.9.dwconv.net.bias": "w000086", + "tts.ae.decoder.convnext.9.norm.norm.weight": "w000087", + "tts.ae.decoder.convnext.9.norm.norm.bias": "w000088", + "tts.ae.decoder.convnext.9.pwconv1.weight": "w000089", + "tts.ae.decoder.convnext.9.pwconv1.bias": "w000090", + "tts.ae.decoder.convnext.9.pwconv2.weight": "w000091", + "tts.ae.decoder.convnext.9.pwconv2.bias": "w000092", + "tts.ae.decoder.final_norm.norm.weight": "w000093", + "tts.ae.decoder.final_norm.norm.bias": "w000094", + "tts.ae.decoder.final_norm.norm.running_mean": "w000095", + "tts.ae.decoder.final_norm.norm.running_var": "w000096", + "tts.ae.decoder.head.layer1.net.weight": "w000097", + "tts.ae.decoder.head.layer1.net.bias": "w000098", + "tts.ae.decoder.head.layer2.weight": "w000099", + "onnx::Conv_1441": "w000100", + "onnx::Conv_1442": "w000101", + "onnx::PRelu_1506": "w000102", + "/Constant_output_0": "c000000", + "/Constant_1_output_0": "c000001", + "/Constant_2_output_0": "c000002", + "/Constant_3_output_0": "c000003", + "/Constant_4_output_0": "c000004", + "onnx::Unsqueeze_967": "c000005", + "/Constant_5_output_0": "c000006", + "/Constant_6_output_0": "c000007", + "onnx::Unsqueeze_973": "c000008", + "/Constant_7_output_0": "c000009", + "/Constant_8_output_0": "c000010", + "onnx::Unsqueeze_984": "c000011", + "onnx::Unsqueeze_986": "c000012", + "/Constant_9_output_0": "c000013", + "/decoder/embed/Constant_output_0": "c000014", + "/decoder/embed/Constant_1_output_0": "c000015", + "/decoder/embed/Constant_2_output_0": "c000016", + "/decoder/embed/Constant_3_output_0": "c000017", + "/decoder/embed/Constant_4_output_0": "c000018", + "/decoder/embed/Constant_5_output_0": "c000019", + "/decoder/embed/Constant_6_output_0": "c000020", + "/decoder/embed/Constant_7_output_0": "c000021", + "/decoder/convnext.0/dwconv/Constant_output_0": "c000022", + "/decoder/convnext.0/dwconv/Constant_1_output_0": "c000023", + "/decoder/convnext.0/dwconv/Constant_2_output_0": "c000024", + "/decoder/convnext.0/dwconv/Constant_3_output_0": "c000025", + "/decoder/convnext.0/dwconv/Constant_4_output_0": "c000026", + "/decoder/convnext.0/dwconv/Constant_5_output_0": "c000027", + "/decoder/convnext.0/dwconv/Constant_6_output_0": "c000028", + "/decoder/convnext.0/dwconv/Constant_7_output_0": "c000029", + "/decoder/convnext.0/act/Constant_output_0": "c000030", + "/decoder/convnext.0/act/Constant_1_output_0": "c000031", + "/decoder/convnext.0/act/Constant_2_output_0": "c000032", + "/decoder/convnext.1/dwconv/Constant_output_0": "c000033", + "/decoder/convnext.1/dwconv/Constant_1_output_0": "c000034", + "/decoder/convnext.1/dwconv/Constant_2_output_0": "c000035", + "/decoder/convnext.1/dwconv/Constant_3_output_0": "c000036", + "/decoder/convnext.1/dwconv/Constant_4_output_0": "c000037", + "/decoder/convnext.1/dwconv/Constant_5_output_0": "c000038", + "/decoder/convnext.1/dwconv/Constant_6_output_0": "c000039", + "/decoder/convnext.1/dwconv/Constant_7_output_0": "c000040", + "/decoder/convnext.1/act/Constant_output_0": "c000041", + "/decoder/convnext.1/act/Constant_1_output_0": "c000042", + "/decoder/convnext.1/act/Constant_2_output_0": "c000043", + "/decoder/convnext.2/dwconv/Constant_output_0": "c000044", + "/decoder/convnext.2/dwconv/Constant_1_output_0": "c000045", + "/decoder/convnext.2/dwconv/Constant_2_output_0": "c000046", + "/decoder/convnext.2/dwconv/Constant_3_output_0": "c000047", + "/decoder/convnext.2/dwconv/Constant_4_output_0": "c000048", + "/decoder/convnext.2/dwconv/Constant_5_output_0": "c000049", + "/decoder/convnext.2/dwconv/Constant_6_output_0": "c000050", + "/decoder/convnext.2/dwconv/Constant_7_output_0": "c000051", + "/decoder/convnext.2/act/Constant_output_0": "c000052", + "/decoder/convnext.2/act/Constant_1_output_0": "c000053", + "/decoder/convnext.2/act/Constant_2_output_0": "c000054", + "/decoder/convnext.3/dwconv/Constant_output_0": "c000055", + "/decoder/convnext.3/dwconv/Constant_1_output_0": "c000056", + "/decoder/convnext.3/dwconv/Constant_2_output_0": "c000057", + "/decoder/convnext.3/dwconv/Constant_3_output_0": "c000058", + "/decoder/convnext.3/dwconv/Constant_4_output_0": "c000059", + "/decoder/convnext.3/dwconv/Constant_5_output_0": "c000060", + "/decoder/convnext.3/dwconv/Constant_6_output_0": "c000061", + "/decoder/convnext.3/dwconv/Constant_7_output_0": "c000062", + "/decoder/convnext.3/act/Constant_output_0": "c000063", + "/decoder/convnext.3/act/Constant_1_output_0": "c000064", + "/decoder/convnext.3/act/Constant_2_output_0": "c000065", + "/decoder/convnext.4/dwconv/Constant_output_0": "c000066", + "/decoder/convnext.4/dwconv/Constant_1_output_0": "c000067", + "/decoder/convnext.4/dwconv/Constant_2_output_0": "c000068", + "/decoder/convnext.4/dwconv/Constant_3_output_0": "c000069", + "/decoder/convnext.4/dwconv/Constant_4_output_0": "c000070", + "/decoder/convnext.4/dwconv/Constant_5_output_0": "c000071", + "/decoder/convnext.4/dwconv/Constant_6_output_0": "c000072", + "/decoder/convnext.4/dwconv/Constant_7_output_0": "c000073", + "/decoder/convnext.4/act/Constant_output_0": "c000074", + "/decoder/convnext.4/act/Constant_1_output_0": "c000075", + "/decoder/convnext.4/act/Constant_2_output_0": "c000076", + "/decoder/convnext.5/dwconv/Constant_output_0": "c000077", + "/decoder/convnext.5/dwconv/Constant_1_output_0": "c000078", + "/decoder/convnext.5/dwconv/Constant_2_output_0": "c000079", + "/decoder/convnext.5/dwconv/Constant_3_output_0": "c000080", + "/decoder/convnext.5/dwconv/Constant_4_output_0": "c000081", + "/decoder/convnext.5/dwconv/Constant_5_output_0": "c000082", + "/decoder/convnext.5/dwconv/Constant_6_output_0": "c000083", + "/decoder/convnext.5/dwconv/Constant_7_output_0": "c000084", + "/decoder/convnext.5/act/Constant_output_0": "c000085", + "/decoder/convnext.5/act/Constant_1_output_0": "c000086", + "/decoder/convnext.5/act/Constant_2_output_0": "c000087", + "/decoder/convnext.6/dwconv/Constant_output_0": "c000088", + "/decoder/convnext.6/dwconv/Constant_1_output_0": "c000089", + "/decoder/convnext.6/dwconv/Constant_2_output_0": "c000090", + "/decoder/convnext.6/dwconv/Constant_3_output_0": "c000091", + "/decoder/convnext.6/dwconv/Constant_4_output_0": "c000092", + "/decoder/convnext.6/dwconv/Constant_5_output_0": "c000093", + "/decoder/convnext.6/dwconv/Constant_6_output_0": "c000094", + "/decoder/convnext.6/dwconv/Constant_7_output_0": "c000095", + "/decoder/convnext.6/act/Constant_output_0": "c000096", + "/decoder/convnext.6/act/Constant_1_output_0": "c000097", + "/decoder/convnext.6/act/Constant_2_output_0": "c000098", + "/decoder/convnext.7/dwconv/Constant_output_0": "c000099", + "/decoder/convnext.7/dwconv/Constant_1_output_0": "c000100", + "/decoder/convnext.7/dwconv/Constant_2_output_0": "c000101", + "/decoder/convnext.7/dwconv/Constant_3_output_0": "c000102", + "/decoder/convnext.7/dwconv/Constant_4_output_0": "c000103", + "/decoder/convnext.7/dwconv/Constant_5_output_0": "c000104", + "/decoder/convnext.7/dwconv/Constant_6_output_0": "c000105", + "/decoder/convnext.7/dwconv/Constant_7_output_0": "c000106", + "/decoder/convnext.7/act/Constant_output_0": "c000107", + "/decoder/convnext.7/act/Constant_1_output_0": "c000108", + "/decoder/convnext.7/act/Constant_2_output_0": "c000109", + "/decoder/convnext.8/dwconv/Constant_output_0": "c000110", + "/decoder/convnext.8/dwconv/Constant_1_output_0": "c000111", + "/decoder/convnext.8/dwconv/Constant_2_output_0": "c000112", + "/decoder/convnext.8/dwconv/Constant_3_output_0": "c000113", + "/decoder/convnext.8/dwconv/Constant_4_output_0": "c000114", + "/decoder/convnext.8/dwconv/Constant_5_output_0": "c000115", + "/decoder/convnext.8/dwconv/Constant_6_output_0": "c000116", + "/decoder/convnext.8/dwconv/Constant_7_output_0": "c000117", + "/decoder/convnext.8/act/Constant_output_0": "c000118", + "/decoder/convnext.8/act/Constant_1_output_0": "c000119", + "/decoder/convnext.8/act/Constant_2_output_0": "c000120", + "/decoder/convnext.9/dwconv/Constant_output_0": "c000121", + "/decoder/convnext.9/dwconv/Constant_1_output_0": "c000122", + "/decoder/convnext.9/dwconv/Constant_2_output_0": "c000123", + "/decoder/convnext.9/dwconv/Constant_3_output_0": "c000124", + "/decoder/convnext.9/dwconv/Constant_4_output_0": "c000125", + "/decoder/convnext.9/dwconv/Constant_5_output_0": "c000126", + "/decoder/convnext.9/dwconv/Constant_6_output_0": "c000127", + "/decoder/convnext.9/dwconv/Constant_7_output_0": "c000128", + "/decoder/convnext.9/act/Constant_output_0": "c000129", + "/decoder/convnext.9/act/Constant_1_output_0": "c000130", + "/decoder/convnext.9/act/Constant_2_output_0": "c000131", + "/decoder/head/layer1/Constant_output_0": "c000132", + "/decoder/head/layer1/Constant_1_output_0": "c000133", + "/decoder/head/layer1/Constant_2_output_0": "c000134", + "/decoder/head/layer1/Constant_3_output_0": "c000135", + "/decoder/head/layer1/Constant_4_output_0": "c000136", + "/decoder/head/layer1/Constant_5_output_0": "c000137", + "/decoder/head/layer1/Constant_6_output_0": "c000138", + "/decoder/head/layer1/Constant_7_output_0": "c000139", + "/decoder/head/Constant_output_0": "c000140", + "onnx::Unsqueeze_1434": "c000141", + "/decoder/head/Constant_1_output_0": "c000142" + }, + "nodes": [ + { + "op_type": "Div", + "name": "/Div", + "inputs": [ + "latent", + "tts.ttl.normalizer.scale" + ], + "outputs": [ + "/Div_output_0" + ], + "attrs": {} + }, + { + "op_type": "Shape", + "name": "/Shape", + "inputs": [ + "/Div_output_0" + ], + "outputs": [ + "/Shape_output_0" + ], + "attrs": {} + }, + { + "op_type": "Gather", + "name": "/Gather", + "inputs": [ + "/Shape_output_0", + "/Constant_output_0" + ], + "outputs": [ + "/Gather_output_0" + ], + "attrs": { + "axis": 0 + } + }, + { + "op_type": "Shape", + "name": "/Shape_1", + "inputs": [ + "/Div_output_0" + ], + "outputs": [ + "/Shape_1_output_0" + ], + "attrs": {} + }, + { + "op_type": "Slice", + "name": "/Slice", + "inputs": [ + "/Shape_1_output_0", + "/Constant_2_output_0", + "/Constant_3_output_0", + "/Constant_1_output_0" + ], + "outputs": [ + "/Slice_output_0" + ], + "attrs": {} + }, + { + "op_type": "Squeeze", + "name": "/Squeeze", + "inputs": [ + "/Slice_output_0", + "/Constant_4_output_0" + ], + "outputs": [ + "/Squeeze_output_0" + ], + "attrs": {} + }, + { + "op_type": "Unsqueeze", + "name": "/Unsqueeze", + "inputs": [ + "/Gather_output_0", + "onnx::Unsqueeze_967" + ], + "outputs": [ + "/Unsqueeze_output_0" + ], + "attrs": {} + }, + { + "op_type": "Unsqueeze", + "name": "/Unsqueeze_1", + "inputs": [ + "/Squeeze_output_0", + "onnx::Unsqueeze_973" + ], + "outputs": [ + "/Unsqueeze_1_output_0" + ], + "attrs": {} + }, + { + "op_type": "Concat", + "name": "/Concat", + "inputs": [ + "/Unsqueeze_output_0", + "/Constant_5_output_0", + "/Constant_6_output_0", + "/Unsqueeze_1_output_0" + ], + "outputs": [ + "/Concat_output_0" + ], + "attrs": { + "axis": 0 + } + }, + { + "op_type": "Reshape", + "name": "/Reshape", + "inputs": [ + "/Div_output_0", + "/Concat_output_0" + ], + "outputs": [ + "/Reshape_output_0" + ], + "attrs": { + "allowzero": 0 + } + }, + { + "op_type": "Transpose", + "name": "/Transpose", + "inputs": [ + "/Reshape_output_0" + ], + "outputs": [ + "/Transpose_output_0" + ], + "attrs": { + "perm": [ + 0, + 1, + 3, + 2 + ] + } + }, + { + "op_type": "Shape", + "name": "/Shape_2", + "inputs": [ + "/Transpose_output_0" + ], + "outputs": [ + "/Shape_2_output_0" + ], + "attrs": {} + }, + { + "op_type": "Gather", + "name": "/Gather_1", + "inputs": [ + "/Shape_2_output_0", + "/Constant_7_output_0" + ], + "outputs": [ + "/Gather_1_output_0" + ], + "attrs": { + "axis": 0 + } + }, + { + "op_type": "Shape", + "name": "/Shape_3", + "inputs": [ + "/Transpose_output_0" + ], + "outputs": [ + "/Shape_3_output_0" + ], + "attrs": {} + }, + { + "op_type": "Gather", + "name": "/Gather_2", + "inputs": [ + "/Shape_3_output_0", + "/Constant_8_output_0" + ], + "outputs": [ + "/Gather_2_output_0" + ], + "attrs": { + "axis": 0 + } + }, + { + "op_type": "Unsqueeze", + "name": "/Unsqueeze_2", + "inputs": [ + "/Gather_1_output_0", + "onnx::Unsqueeze_984" + ], + "outputs": [ + "/Unsqueeze_2_output_0" + ], + "attrs": {} + }, + { + "op_type": "Unsqueeze", + "name": "/Unsqueeze_3", + "inputs": [ + "/Gather_2_output_0", + "onnx::Unsqueeze_986" + ], + "outputs": [ + "/Unsqueeze_3_output_0" + ], + "attrs": {} + }, + { + "op_type": "Concat", + "name": "/Concat_1", + "inputs": [ + "/Unsqueeze_2_output_0", + "/Unsqueeze_3_output_0", + "/Constant_9_output_0" + ], + "outputs": [ + "/Concat_1_output_0" + ], + "attrs": { + "axis": 0 + } + }, + { + "op_type": "Reshape", + "name": "/Reshape_1", + "inputs": [ + "/Transpose_output_0", + "/Concat_1_output_0" + ], + "outputs": [ + "/Reshape_1_output_0" + ], + "attrs": { + "allowzero": 0 + } + }, + { + "op_type": "Mul", + "name": "/Mul", + "inputs": [ + "/Reshape_1_output_0", + "tts.ae.latent_std" + ], + "outputs": [ + "/Mul_output_0" + ], + "attrs": {} + }, + { + "op_type": "Add", + "name": "/Add", + "inputs": [ + "/Mul_output_0", + "tts.ae.latent_mean" + ], + "outputs": [ + "/Add_output_0" + ], + "attrs": {} + }, + { + "op_type": "ConstantOfShape", + "name": "/decoder/embed/ConstantOfShape", + "inputs": [ + "/decoder/embed/Constant_output_0" + ], + "outputs": [ + "/decoder/embed/ConstantOfShape_output_0" + ], + "attrs": { + "value": { + "dtype": "int64", + "shape": [ + 1 + ], + "data": [ + 0 + ] + } + } + }, + { + "op_type": "Concat", + "name": "/decoder/embed/Concat", + "inputs": [ + "/decoder/embed/Constant_1_output_0", + "/decoder/embed/ConstantOfShape_output_0" + ], + "outputs": [ + "/decoder/embed/Concat_output_0" + ], + "attrs": { + "axis": 0 + } + }, + { + "op_type": "Reshape", + "name": "/decoder/embed/Reshape", + "inputs": [ + "/decoder/embed/Concat_output_0", + "/decoder/embed/Constant_2_output_0" + ], + "outputs": [ + "/decoder/embed/Reshape_output_0" + ], + "attrs": { + "allowzero": 0 + } + }, + { + "op_type": "Slice", + "name": "/decoder/embed/Slice", + "inputs": [ + "/decoder/embed/Reshape_output_0", + "/decoder/embed/Constant_4_output_0", + "/decoder/embed/Constant_5_output_0", + "/decoder/embed/Constant_3_output_0", + "/decoder/embed/Constant_6_output_0" + ], + "outputs": [ + "/decoder/embed/Slice_output_0" + ], + "attrs": {} + }, + { + "op_type": "Transpose", + "name": "/decoder/embed/Transpose", + "inputs": [ + "/decoder/embed/Slice_output_0" + ], + "outputs": [ + "/decoder/embed/Transpose_output_0" + ], + "attrs": { + "perm": [ + 1, + 0 + ] + } + }, + { + "op_type": "Reshape", + "name": "/decoder/embed/Reshape_1", + "inputs": [ + "/decoder/embed/Transpose_output_0", + "/decoder/embed/Constant_7_output_0" + ], + "outputs": [ + "/decoder/embed/Reshape_1_output_0" + ], + "attrs": { + "allowzero": 0 + } + }, + { + "op_type": "Cast", + "name": "/decoder/embed/Cast", + "inputs": [ + "/decoder/embed/Reshape_1_output_0" + ], + "outputs": [ + "/decoder/embed/Cast_output_0" + ], + "attrs": { + "to": 7 + } + }, + { + "op_type": "Pad", + "name": "/decoder/embed/Pad", + "inputs": [ + "/Add_output_0", + "/decoder/embed/Cast_output_0" + ], + "outputs": [ + "/decoder/embed/Pad_output_0" + ], + "attrs": { + "mode": "edge" + } + }, + { + "op_type": "Conv", + "name": "/decoder/embed/net/Conv", + "inputs": [ + "/decoder/embed/Pad_output_0", + "onnx::Conv_1441", + "onnx::Conv_1442" + ], + "outputs": [ + "/decoder/embed/net/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 1, + "kernel_shape": [ + 7 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "ConstantOfShape", + "name": "/decoder/convnext.0/dwconv/ConstantOfShape", + "inputs": [ + "/decoder/convnext.0/dwconv/Constant_output_0" + ], + "outputs": [ + "/decoder/convnext.0/dwconv/ConstantOfShape_output_0" + ], + "attrs": { + "value": { + "dtype": "int64", + "shape": [ + 1 + ], + "data": [ + 0 + ] + } + } + }, + { + "op_type": "Concat", + "name": "/decoder/convnext.0/dwconv/Concat", + "inputs": [ + "/decoder/convnext.0/dwconv/Constant_1_output_0", + "/decoder/convnext.0/dwconv/ConstantOfShape_output_0" + ], + "outputs": [ + "/decoder/convnext.0/dwconv/Concat_output_0" + ], + "attrs": { + "axis": 0 + } + }, + { + "op_type": "Reshape", + "name": "/decoder/convnext.0/dwconv/Reshape", + "inputs": [ + "/decoder/convnext.0/dwconv/Concat_output_0", + "/decoder/convnext.0/dwconv/Constant_2_output_0" + ], + "outputs": [ + "/decoder/convnext.0/dwconv/Reshape_output_0" + ], + "attrs": { + "allowzero": 0 + } + }, + { + "op_type": "Slice", + "name": "/decoder/convnext.0/dwconv/Slice", + "inputs": [ + "/decoder/convnext.0/dwconv/Reshape_output_0", + "/decoder/convnext.0/dwconv/Constant_4_output_0", + "/decoder/convnext.0/dwconv/Constant_5_output_0", + "/decoder/convnext.0/dwconv/Constant_3_output_0", + "/decoder/convnext.0/dwconv/Constant_6_output_0" + ], + "outputs": [ + "/decoder/convnext.0/dwconv/Slice_output_0" + ], + "attrs": {} + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.0/dwconv/Transpose", + "inputs": [ + "/decoder/convnext.0/dwconv/Slice_output_0" + ], + "outputs": [ + "/decoder/convnext.0/dwconv/Transpose_output_0" + ], + "attrs": { + "perm": [ + 1, + 0 + ] + } + }, + { + "op_type": "Reshape", + "name": "/decoder/convnext.0/dwconv/Reshape_1", + "inputs": [ + "/decoder/convnext.0/dwconv/Transpose_output_0", + "/decoder/convnext.0/dwconv/Constant_7_output_0" + ], + "outputs": [ + "/decoder/convnext.0/dwconv/Reshape_1_output_0" + ], + "attrs": { + "allowzero": 0 + } + }, + { + "op_type": "Cast", + "name": "/decoder/convnext.0/dwconv/Cast", + "inputs": [ + "/decoder/convnext.0/dwconv/Reshape_1_output_0" + ], + "outputs": [ + "/decoder/convnext.0/dwconv/Cast_output_0" + ], + "attrs": { + "to": 7 + } + }, + { + "op_type": "Pad", + "name": "/decoder/convnext.0/dwconv/Pad", + "inputs": [ + "/decoder/embed/net/Conv_output_0", + "/decoder/convnext.0/dwconv/Cast_output_0" + ], + "outputs": [ + "/decoder/convnext.0/dwconv/Pad_output_0" + ], + "attrs": { + "mode": "edge" + } + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.0/dwconv/net/Conv", + "inputs": [ + "/decoder/convnext.0/dwconv/Pad_output_0", + "tts.ae.decoder.convnext.0.dwconv.net.weight", + "tts.ae.decoder.convnext.0.dwconv.net.bias" + ], + "outputs": [ + "/decoder/convnext.0/dwconv/net/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 512, + "kernel_shape": [ + 7 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.0/norm/Transpose", + "inputs": [ + "/decoder/convnext.0/dwconv/net/Conv_output_0" + ], + "outputs": [ + "/decoder/convnext.0/norm/Transpose_output_0" + ], + "attrs": { + "perm": [ + 0, + 2, + 1 + ] + } + }, + { + "op_type": "LayerNormalization", + "name": "/decoder/convnext.0/norm/norm/LayerNormalization", + "inputs": [ + "/decoder/convnext.0/norm/Transpose_output_0", + "tts.ae.decoder.convnext.0.norm.norm.weight", + "tts.ae.decoder.convnext.0.norm.norm.bias" + ], + "outputs": [ + "/decoder/convnext.0/norm/norm/LayerNormalization_output_0" + ], + "attrs": { + "axis": -1, + "epsilon": 9.999999974752427e-07 + } + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.0/norm/Transpose_1", + "inputs": [ + "/decoder/convnext.0/norm/norm/LayerNormalization_output_0" + ], + "outputs": [ + "/decoder/convnext.0/norm/Transpose_1_output_0" + ], + "attrs": { + "perm": [ + 0, + 2, + 1 + ] + } + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.0/pwconv1/Conv", + "inputs": [ + "/decoder/convnext.0/norm/Transpose_1_output_0", + "tts.ae.decoder.convnext.0.pwconv1.weight", + "tts.ae.decoder.convnext.0.pwconv1.bias" + ], + "outputs": [ + "/decoder/convnext.0/pwconv1/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 1, + "kernel_shape": [ + 1 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Div", + "name": "/decoder/convnext.0/act/Div", + "inputs": [ + "/decoder/convnext.0/pwconv1/Conv_output_0", + "/decoder/convnext.0/act/Constant_output_0" + ], + "outputs": [ + "/decoder/convnext.0/act/Div_output_0" + ], + "attrs": {} + }, + { + "op_type": "Erf", + "name": "/decoder/convnext.0/act/Erf", + "inputs": [ + "/decoder/convnext.0/act/Div_output_0" + ], + "outputs": [ + "/decoder/convnext.0/act/Erf_output_0" + ], + "attrs": {} + }, + { + "op_type": "Add", + "name": "/decoder/convnext.0/act/Add", + "inputs": [ + "/decoder/convnext.0/act/Erf_output_0", + "/decoder/convnext.0/act/Constant_1_output_0" + ], + "outputs": [ + "/decoder/convnext.0/act/Add_output_0" + ], + "attrs": {} + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.0/act/Mul", + "inputs": [ + "/decoder/convnext.0/pwconv1/Conv_output_0", + "/decoder/convnext.0/act/Add_output_0" + ], + "outputs": [ + "/decoder/convnext.0/act/Mul_output_0" + ], + "attrs": {} + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.0/act/Mul_1", + "inputs": [ + "/decoder/convnext.0/act/Mul_output_0", + "/decoder/convnext.0/act/Constant_2_output_0" + ], + "outputs": [ + "/decoder/convnext.0/act/Mul_1_output_0" + ], + "attrs": {} + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.0/pwconv2/Conv", + "inputs": [ + "/decoder/convnext.0/act/Mul_1_output_0", + "tts.ae.decoder.convnext.0.pwconv2.weight", + "tts.ae.decoder.convnext.0.pwconv2.bias" + ], + "outputs": [ + "/decoder/convnext.0/pwconv2/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 1, + "kernel_shape": [ + 1 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.0/Mul", + "inputs": [ + "tts.ae.decoder.convnext.0.gamma", + "/decoder/convnext.0/pwconv2/Conv_output_0" + ], + "outputs": [ + "/decoder/convnext.0/Mul_output_0" + ], + "attrs": {} + }, + { + "op_type": "Add", + "name": "/decoder/convnext.0/Add", + "inputs": [ + "/decoder/embed/net/Conv_output_0", + "/decoder/convnext.0/Mul_output_0" + ], + "outputs": [ + "/decoder/convnext.0/Add_output_0" + ], + "attrs": {} + }, + { + "op_type": "ConstantOfShape", + "name": "/decoder/convnext.1/dwconv/ConstantOfShape", + "inputs": [ + "/decoder/convnext.1/dwconv/Constant_output_0" + ], + "outputs": [ + "/decoder/convnext.1/dwconv/ConstantOfShape_output_0" + ], + "attrs": { + "value": { + "dtype": "int64", + "shape": [ + 1 + ], + "data": [ + 0 + ] + } + } + }, + { + "op_type": "Concat", + "name": "/decoder/convnext.1/dwconv/Concat", + "inputs": [ + "/decoder/convnext.1/dwconv/Constant_1_output_0", + "/decoder/convnext.1/dwconv/ConstantOfShape_output_0" + ], + "outputs": [ + "/decoder/convnext.1/dwconv/Concat_output_0" + ], + "attrs": { + "axis": 0 + } + }, + { + "op_type": "Reshape", + "name": "/decoder/convnext.1/dwconv/Reshape", + "inputs": [ + "/decoder/convnext.1/dwconv/Concat_output_0", + "/decoder/convnext.1/dwconv/Constant_2_output_0" + ], + "outputs": [ + "/decoder/convnext.1/dwconv/Reshape_output_0" + ], + "attrs": { + "allowzero": 0 + } + }, + { + "op_type": "Slice", + "name": "/decoder/convnext.1/dwconv/Slice", + "inputs": [ + "/decoder/convnext.1/dwconv/Reshape_output_0", + "/decoder/convnext.1/dwconv/Constant_4_output_0", + "/decoder/convnext.1/dwconv/Constant_5_output_0", + "/decoder/convnext.1/dwconv/Constant_3_output_0", + "/decoder/convnext.1/dwconv/Constant_6_output_0" + ], + "outputs": [ + "/decoder/convnext.1/dwconv/Slice_output_0" + ], + "attrs": {} + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.1/dwconv/Transpose", + "inputs": [ + "/decoder/convnext.1/dwconv/Slice_output_0" + ], + "outputs": [ + "/decoder/convnext.1/dwconv/Transpose_output_0" + ], + "attrs": { + "perm": [ + 1, + 0 + ] + } + }, + { + "op_type": "Reshape", + "name": "/decoder/convnext.1/dwconv/Reshape_1", + "inputs": [ + "/decoder/convnext.1/dwconv/Transpose_output_0", + "/decoder/convnext.1/dwconv/Constant_7_output_0" + ], + "outputs": [ + "/decoder/convnext.1/dwconv/Reshape_1_output_0" + ], + "attrs": { + "allowzero": 0 + } + }, + { + "op_type": "Cast", + "name": "/decoder/convnext.1/dwconv/Cast", + "inputs": [ + "/decoder/convnext.1/dwconv/Reshape_1_output_0" + ], + "outputs": [ + "/decoder/convnext.1/dwconv/Cast_output_0" + ], + "attrs": { + "to": 7 + } + }, + { + "op_type": "Pad", + "name": "/decoder/convnext.1/dwconv/Pad", + "inputs": [ + "/decoder/convnext.0/Add_output_0", + "/decoder/convnext.1/dwconv/Cast_output_0" + ], + "outputs": [ + "/decoder/convnext.1/dwconv/Pad_output_0" + ], + "attrs": { + "mode": "edge" + } + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.1/dwconv/net/Conv", + "inputs": [ + "/decoder/convnext.1/dwconv/Pad_output_0", + "tts.ae.decoder.convnext.1.dwconv.net.weight", + "tts.ae.decoder.convnext.1.dwconv.net.bias" + ], + "outputs": [ + "/decoder/convnext.1/dwconv/net/Conv_output_0" + ], + "attrs": { + "dilations": [ + 2 + ], + "group": 512, + "kernel_shape": [ + 7 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.1/norm/Transpose", + "inputs": [ + "/decoder/convnext.1/dwconv/net/Conv_output_0" + ], + "outputs": [ + "/decoder/convnext.1/norm/Transpose_output_0" + ], + "attrs": { + "perm": [ + 0, + 2, + 1 + ] + } + }, + { + "op_type": "LayerNormalization", + "name": "/decoder/convnext.1/norm/norm/LayerNormalization", + "inputs": [ + "/decoder/convnext.1/norm/Transpose_output_0", + "tts.ae.decoder.convnext.1.norm.norm.weight", + "tts.ae.decoder.convnext.1.norm.norm.bias" + ], + "outputs": [ + "/decoder/convnext.1/norm/norm/LayerNormalization_output_0" + ], + "attrs": { + "axis": -1, + "epsilon": 9.999999974752427e-07 + } + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.1/norm/Transpose_1", + "inputs": [ + "/decoder/convnext.1/norm/norm/LayerNormalization_output_0" + ], + "outputs": [ + "/decoder/convnext.1/norm/Transpose_1_output_0" + ], + "attrs": { + "perm": [ + 0, + 2, + 1 + ] + } + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.1/pwconv1/Conv", + "inputs": [ + "/decoder/convnext.1/norm/Transpose_1_output_0", + "tts.ae.decoder.convnext.1.pwconv1.weight", + "tts.ae.decoder.convnext.1.pwconv1.bias" + ], + "outputs": [ + "/decoder/convnext.1/pwconv1/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 1, + "kernel_shape": [ + 1 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Div", + "name": "/decoder/convnext.1/act/Div", + "inputs": [ + "/decoder/convnext.1/pwconv1/Conv_output_0", + "/decoder/convnext.1/act/Constant_output_0" + ], + "outputs": [ + "/decoder/convnext.1/act/Div_output_0" + ], + "attrs": {} + }, + { + "op_type": "Erf", + "name": "/decoder/convnext.1/act/Erf", + "inputs": [ + "/decoder/convnext.1/act/Div_output_0" + ], + "outputs": [ + "/decoder/convnext.1/act/Erf_output_0" + ], + "attrs": {} + }, + { + "op_type": "Add", + "name": "/decoder/convnext.1/act/Add", + "inputs": [ + "/decoder/convnext.1/act/Erf_output_0", + "/decoder/convnext.1/act/Constant_1_output_0" + ], + "outputs": [ + "/decoder/convnext.1/act/Add_output_0" + ], + "attrs": {} + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.1/act/Mul", + "inputs": [ + "/decoder/convnext.1/pwconv1/Conv_output_0", + "/decoder/convnext.1/act/Add_output_0" + ], + "outputs": [ + "/decoder/convnext.1/act/Mul_output_0" + ], + "attrs": {} + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.1/act/Mul_1", + "inputs": [ + "/decoder/convnext.1/act/Mul_output_0", + "/decoder/convnext.1/act/Constant_2_output_0" + ], + "outputs": [ + "/decoder/convnext.1/act/Mul_1_output_0" + ], + "attrs": {} + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.1/pwconv2/Conv", + "inputs": [ + "/decoder/convnext.1/act/Mul_1_output_0", + "tts.ae.decoder.convnext.1.pwconv2.weight", + "tts.ae.decoder.convnext.1.pwconv2.bias" + ], + "outputs": [ + "/decoder/convnext.1/pwconv2/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 1, + "kernel_shape": [ + 1 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.1/Mul", + "inputs": [ + "tts.ae.decoder.convnext.1.gamma", + "/decoder/convnext.1/pwconv2/Conv_output_0" + ], + "outputs": [ + "/decoder/convnext.1/Mul_output_0" + ], + "attrs": {} + }, + { + "op_type": "Add", + "name": "/decoder/convnext.1/Add", + "inputs": [ + "/decoder/convnext.0/Add_output_0", + "/decoder/convnext.1/Mul_output_0" + ], + "outputs": [ + "/decoder/convnext.1/Add_output_0" + ], + "attrs": {} + }, + { + "op_type": "ConstantOfShape", + "name": "/decoder/convnext.2/dwconv/ConstantOfShape", + "inputs": [ + "/decoder/convnext.2/dwconv/Constant_output_0" + ], + "outputs": [ + "/decoder/convnext.2/dwconv/ConstantOfShape_output_0" + ], + "attrs": { + "value": { + "dtype": "int64", + "shape": [ + 1 + ], + "data": [ + 0 + ] + } + } + }, + { + "op_type": "Concat", + "name": "/decoder/convnext.2/dwconv/Concat", + "inputs": [ + "/decoder/convnext.2/dwconv/Constant_1_output_0", + "/decoder/convnext.2/dwconv/ConstantOfShape_output_0" + ], + "outputs": [ + "/decoder/convnext.2/dwconv/Concat_output_0" + ], + "attrs": { + "axis": 0 + } + }, + { + "op_type": "Reshape", + "name": "/decoder/convnext.2/dwconv/Reshape", + "inputs": [ + "/decoder/convnext.2/dwconv/Concat_output_0", + "/decoder/convnext.2/dwconv/Constant_2_output_0" + ], + "outputs": [ + "/decoder/convnext.2/dwconv/Reshape_output_0" + ], + "attrs": { + "allowzero": 0 + } + }, + { + "op_type": "Slice", + "name": "/decoder/convnext.2/dwconv/Slice", + "inputs": [ + "/decoder/convnext.2/dwconv/Reshape_output_0", + "/decoder/convnext.2/dwconv/Constant_4_output_0", + "/decoder/convnext.2/dwconv/Constant_5_output_0", + "/decoder/convnext.2/dwconv/Constant_3_output_0", + "/decoder/convnext.2/dwconv/Constant_6_output_0" + ], + "outputs": [ + "/decoder/convnext.2/dwconv/Slice_output_0" + ], + "attrs": {} + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.2/dwconv/Transpose", + "inputs": [ + "/decoder/convnext.2/dwconv/Slice_output_0" + ], + "outputs": [ + "/decoder/convnext.2/dwconv/Transpose_output_0" + ], + "attrs": { + "perm": [ + 1, + 0 + ] + } + }, + { + "op_type": "Reshape", + "name": "/decoder/convnext.2/dwconv/Reshape_1", + "inputs": [ + "/decoder/convnext.2/dwconv/Transpose_output_0", + "/decoder/convnext.2/dwconv/Constant_7_output_0" + ], + "outputs": [ + "/decoder/convnext.2/dwconv/Reshape_1_output_0" + ], + "attrs": { + "allowzero": 0 + } + }, + { + "op_type": "Cast", + "name": "/decoder/convnext.2/dwconv/Cast", + "inputs": [ + "/decoder/convnext.2/dwconv/Reshape_1_output_0" + ], + "outputs": [ + "/decoder/convnext.2/dwconv/Cast_output_0" + ], + "attrs": { + "to": 7 + } + }, + { + "op_type": "Pad", + "name": "/decoder/convnext.2/dwconv/Pad", + "inputs": [ + "/decoder/convnext.1/Add_output_0", + "/decoder/convnext.2/dwconv/Cast_output_0" + ], + "outputs": [ + "/decoder/convnext.2/dwconv/Pad_output_0" + ], + "attrs": { + "mode": "edge" + } + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.2/dwconv/net/Conv", + "inputs": [ + "/decoder/convnext.2/dwconv/Pad_output_0", + "tts.ae.decoder.convnext.2.dwconv.net.weight", + "tts.ae.decoder.convnext.2.dwconv.net.bias" + ], + "outputs": [ + "/decoder/convnext.2/dwconv/net/Conv_output_0" + ], + "attrs": { + "dilations": [ + 4 + ], + "group": 512, + "kernel_shape": [ + 7 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.2/norm/Transpose", + "inputs": [ + "/decoder/convnext.2/dwconv/net/Conv_output_0" + ], + "outputs": [ + "/decoder/convnext.2/norm/Transpose_output_0" + ], + "attrs": { + "perm": [ + 0, + 2, + 1 + ] + } + }, + { + "op_type": "LayerNormalization", + "name": "/decoder/convnext.2/norm/norm/LayerNormalization", + "inputs": [ + "/decoder/convnext.2/norm/Transpose_output_0", + "tts.ae.decoder.convnext.2.norm.norm.weight", + "tts.ae.decoder.convnext.2.norm.norm.bias" + ], + "outputs": [ + "/decoder/convnext.2/norm/norm/LayerNormalization_output_0" + ], + "attrs": { + "axis": -1, + "epsilon": 9.999999974752427e-07 + } + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.2/norm/Transpose_1", + "inputs": [ + "/decoder/convnext.2/norm/norm/LayerNormalization_output_0" + ], + "outputs": [ + "/decoder/convnext.2/norm/Transpose_1_output_0" + ], + "attrs": { + "perm": [ + 0, + 2, + 1 + ] + } + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.2/pwconv1/Conv", + "inputs": [ + "/decoder/convnext.2/norm/Transpose_1_output_0", + "tts.ae.decoder.convnext.2.pwconv1.weight", + "tts.ae.decoder.convnext.2.pwconv1.bias" + ], + "outputs": [ + "/decoder/convnext.2/pwconv1/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 1, + "kernel_shape": [ + 1 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Div", + "name": "/decoder/convnext.2/act/Div", + "inputs": [ + "/decoder/convnext.2/pwconv1/Conv_output_0", + "/decoder/convnext.2/act/Constant_output_0" + ], + "outputs": [ + "/decoder/convnext.2/act/Div_output_0" + ], + "attrs": {} + }, + { + "op_type": "Erf", + "name": "/decoder/convnext.2/act/Erf", + "inputs": [ + "/decoder/convnext.2/act/Div_output_0" + ], + "outputs": [ + "/decoder/convnext.2/act/Erf_output_0" + ], + "attrs": {} + }, + { + "op_type": "Add", + "name": "/decoder/convnext.2/act/Add", + "inputs": [ + "/decoder/convnext.2/act/Erf_output_0", + "/decoder/convnext.2/act/Constant_1_output_0" + ], + "outputs": [ + "/decoder/convnext.2/act/Add_output_0" + ], + "attrs": {} + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.2/act/Mul", + "inputs": [ + "/decoder/convnext.2/pwconv1/Conv_output_0", + "/decoder/convnext.2/act/Add_output_0" + ], + "outputs": [ + "/decoder/convnext.2/act/Mul_output_0" + ], + "attrs": {} + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.2/act/Mul_1", + "inputs": [ + "/decoder/convnext.2/act/Mul_output_0", + "/decoder/convnext.2/act/Constant_2_output_0" + ], + "outputs": [ + "/decoder/convnext.2/act/Mul_1_output_0" + ], + "attrs": {} + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.2/pwconv2/Conv", + "inputs": [ + "/decoder/convnext.2/act/Mul_1_output_0", + "tts.ae.decoder.convnext.2.pwconv2.weight", + "tts.ae.decoder.convnext.2.pwconv2.bias" + ], + "outputs": [ + "/decoder/convnext.2/pwconv2/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 1, + "kernel_shape": [ + 1 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.2/Mul", + "inputs": [ + "tts.ae.decoder.convnext.2.gamma", + "/decoder/convnext.2/pwconv2/Conv_output_0" + ], + "outputs": [ + "/decoder/convnext.2/Mul_output_0" + ], + "attrs": {} + }, + { + "op_type": "Add", + "name": "/decoder/convnext.2/Add", + "inputs": [ + "/decoder/convnext.1/Add_output_0", + "/decoder/convnext.2/Mul_output_0" + ], + "outputs": [ + "/decoder/convnext.2/Add_output_0" + ], + "attrs": {} + }, + { + "op_type": "ConstantOfShape", + "name": "/decoder/convnext.3/dwconv/ConstantOfShape", + "inputs": [ + "/decoder/convnext.3/dwconv/Constant_output_0" + ], + "outputs": [ + "/decoder/convnext.3/dwconv/ConstantOfShape_output_0" + ], + "attrs": { + "value": { + "dtype": "int64", + "shape": [ + 1 + ], + "data": [ + 0 + ] + } + } + }, + { + "op_type": "Concat", + "name": "/decoder/convnext.3/dwconv/Concat", + "inputs": [ + "/decoder/convnext.3/dwconv/Constant_1_output_0", + "/decoder/convnext.3/dwconv/ConstantOfShape_output_0" + ], + "outputs": [ + "/decoder/convnext.3/dwconv/Concat_output_0" + ], + "attrs": { + "axis": 0 + } + }, + { + "op_type": "Reshape", + "name": "/decoder/convnext.3/dwconv/Reshape", + "inputs": [ + "/decoder/convnext.3/dwconv/Concat_output_0", + "/decoder/convnext.3/dwconv/Constant_2_output_0" + ], + "outputs": [ + "/decoder/convnext.3/dwconv/Reshape_output_0" + ], + "attrs": { + "allowzero": 0 + } + }, + { + "op_type": "Slice", + "name": "/decoder/convnext.3/dwconv/Slice", + "inputs": [ + "/decoder/convnext.3/dwconv/Reshape_output_0", + "/decoder/convnext.3/dwconv/Constant_4_output_0", + "/decoder/convnext.3/dwconv/Constant_5_output_0", + "/decoder/convnext.3/dwconv/Constant_3_output_0", + "/decoder/convnext.3/dwconv/Constant_6_output_0" + ], + "outputs": [ + "/decoder/convnext.3/dwconv/Slice_output_0" + ], + "attrs": {} + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.3/dwconv/Transpose", + "inputs": [ + "/decoder/convnext.3/dwconv/Slice_output_0" + ], + "outputs": [ + "/decoder/convnext.3/dwconv/Transpose_output_0" + ], + "attrs": { + "perm": [ + 1, + 0 + ] + } + }, + { + "op_type": "Reshape", + "name": "/decoder/convnext.3/dwconv/Reshape_1", + "inputs": [ + "/decoder/convnext.3/dwconv/Transpose_output_0", + "/decoder/convnext.3/dwconv/Constant_7_output_0" + ], + "outputs": [ + "/decoder/convnext.3/dwconv/Reshape_1_output_0" + ], + "attrs": { + "allowzero": 0 + } + }, + { + "op_type": "Cast", + "name": "/decoder/convnext.3/dwconv/Cast", + "inputs": [ + "/decoder/convnext.3/dwconv/Reshape_1_output_0" + ], + "outputs": [ + "/decoder/convnext.3/dwconv/Cast_output_0" + ], + "attrs": { + "to": 7 + } + }, + { + "op_type": "Pad", + "name": "/decoder/convnext.3/dwconv/Pad", + "inputs": [ + "/decoder/convnext.2/Add_output_0", + "/decoder/convnext.3/dwconv/Cast_output_0" + ], + "outputs": [ + "/decoder/convnext.3/dwconv/Pad_output_0" + ], + "attrs": { + "mode": "edge" + } + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.3/dwconv/net/Conv", + "inputs": [ + "/decoder/convnext.3/dwconv/Pad_output_0", + "tts.ae.decoder.convnext.3.dwconv.net.weight", + "tts.ae.decoder.convnext.3.dwconv.net.bias" + ], + "outputs": [ + "/decoder/convnext.3/dwconv/net/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 512, + "kernel_shape": [ + 7 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.3/norm/Transpose", + "inputs": [ + "/decoder/convnext.3/dwconv/net/Conv_output_0" + ], + "outputs": [ + "/decoder/convnext.3/norm/Transpose_output_0" + ], + "attrs": { + "perm": [ + 0, + 2, + 1 + ] + } + }, + { + "op_type": "LayerNormalization", + "name": "/decoder/convnext.3/norm/norm/LayerNormalization", + "inputs": [ + "/decoder/convnext.3/norm/Transpose_output_0", + "tts.ae.decoder.convnext.3.norm.norm.weight", + "tts.ae.decoder.convnext.3.norm.norm.bias" + ], + "outputs": [ + "/decoder/convnext.3/norm/norm/LayerNormalization_output_0" + ], + "attrs": { + "axis": -1, + "epsilon": 9.999999974752427e-07 + } + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.3/norm/Transpose_1", + "inputs": [ + "/decoder/convnext.3/norm/norm/LayerNormalization_output_0" + ], + "outputs": [ + "/decoder/convnext.3/norm/Transpose_1_output_0" + ], + "attrs": { + "perm": [ + 0, + 2, + 1 + ] + } + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.3/pwconv1/Conv", + "inputs": [ + "/decoder/convnext.3/norm/Transpose_1_output_0", + "tts.ae.decoder.convnext.3.pwconv1.weight", + "tts.ae.decoder.convnext.3.pwconv1.bias" + ], + "outputs": [ + "/decoder/convnext.3/pwconv1/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 1, + "kernel_shape": [ + 1 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Div", + "name": "/decoder/convnext.3/act/Div", + "inputs": [ + "/decoder/convnext.3/pwconv1/Conv_output_0", + "/decoder/convnext.3/act/Constant_output_0" + ], + "outputs": [ + "/decoder/convnext.3/act/Div_output_0" + ], + "attrs": {} + }, + { + "op_type": "Erf", + "name": "/decoder/convnext.3/act/Erf", + "inputs": [ + "/decoder/convnext.3/act/Div_output_0" + ], + "outputs": [ + "/decoder/convnext.3/act/Erf_output_0" + ], + "attrs": {} + }, + { + "op_type": "Add", + "name": "/decoder/convnext.3/act/Add", + "inputs": [ + "/decoder/convnext.3/act/Erf_output_0", + "/decoder/convnext.3/act/Constant_1_output_0" + ], + "outputs": [ + "/decoder/convnext.3/act/Add_output_0" + ], + "attrs": {} + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.3/act/Mul", + "inputs": [ + "/decoder/convnext.3/pwconv1/Conv_output_0", + "/decoder/convnext.3/act/Add_output_0" + ], + "outputs": [ + "/decoder/convnext.3/act/Mul_output_0" + ], + "attrs": {} + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.3/act/Mul_1", + "inputs": [ + "/decoder/convnext.3/act/Mul_output_0", + "/decoder/convnext.3/act/Constant_2_output_0" + ], + "outputs": [ + "/decoder/convnext.3/act/Mul_1_output_0" + ], + "attrs": {} + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.3/pwconv2/Conv", + "inputs": [ + "/decoder/convnext.3/act/Mul_1_output_0", + "tts.ae.decoder.convnext.3.pwconv2.weight", + "tts.ae.decoder.convnext.3.pwconv2.bias" + ], + "outputs": [ + "/decoder/convnext.3/pwconv2/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 1, + "kernel_shape": [ + 1 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.3/Mul", + "inputs": [ + "tts.ae.decoder.convnext.3.gamma", + "/decoder/convnext.3/pwconv2/Conv_output_0" + ], + "outputs": [ + "/decoder/convnext.3/Mul_output_0" + ], + "attrs": {} + }, + { + "op_type": "Add", + "name": "/decoder/convnext.3/Add", + "inputs": [ + "/decoder/convnext.2/Add_output_0", + "/decoder/convnext.3/Mul_output_0" + ], + "outputs": [ + "/decoder/convnext.3/Add_output_0" + ], + "attrs": {} + }, + { + "op_type": "ConstantOfShape", + "name": "/decoder/convnext.4/dwconv/ConstantOfShape", + "inputs": [ + "/decoder/convnext.4/dwconv/Constant_output_0" + ], + "outputs": [ + "/decoder/convnext.4/dwconv/ConstantOfShape_output_0" + ], + "attrs": { + "value": { + "dtype": "int64", + "shape": [ + 1 + ], + "data": [ + 0 + ] + } + } + }, + { + "op_type": "Concat", + "name": "/decoder/convnext.4/dwconv/Concat", + "inputs": [ + "/decoder/convnext.4/dwconv/Constant_1_output_0", + "/decoder/convnext.4/dwconv/ConstantOfShape_output_0" + ], + "outputs": [ + "/decoder/convnext.4/dwconv/Concat_output_0" + ], + "attrs": { + "axis": 0 + } + }, + { + "op_type": "Reshape", + "name": "/decoder/convnext.4/dwconv/Reshape", + "inputs": [ + "/decoder/convnext.4/dwconv/Concat_output_0", + "/decoder/convnext.4/dwconv/Constant_2_output_0" + ], + "outputs": [ + "/decoder/convnext.4/dwconv/Reshape_output_0" + ], + "attrs": { + "allowzero": 0 + } + }, + { + "op_type": "Slice", + "name": "/decoder/convnext.4/dwconv/Slice", + "inputs": [ + "/decoder/convnext.4/dwconv/Reshape_output_0", + "/decoder/convnext.4/dwconv/Constant_4_output_0", + "/decoder/convnext.4/dwconv/Constant_5_output_0", + "/decoder/convnext.4/dwconv/Constant_3_output_0", + "/decoder/convnext.4/dwconv/Constant_6_output_0" + ], + "outputs": [ + "/decoder/convnext.4/dwconv/Slice_output_0" + ], + "attrs": {} + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.4/dwconv/Transpose", + "inputs": [ + "/decoder/convnext.4/dwconv/Slice_output_0" + ], + "outputs": [ + "/decoder/convnext.4/dwconv/Transpose_output_0" + ], + "attrs": { + "perm": [ + 1, + 0 + ] + } + }, + { + "op_type": "Reshape", + "name": "/decoder/convnext.4/dwconv/Reshape_1", + "inputs": [ + "/decoder/convnext.4/dwconv/Transpose_output_0", + "/decoder/convnext.4/dwconv/Constant_7_output_0" + ], + "outputs": [ + "/decoder/convnext.4/dwconv/Reshape_1_output_0" + ], + "attrs": { + "allowzero": 0 + } + }, + { + "op_type": "Cast", + "name": "/decoder/convnext.4/dwconv/Cast", + "inputs": [ + "/decoder/convnext.4/dwconv/Reshape_1_output_0" + ], + "outputs": [ + "/decoder/convnext.4/dwconv/Cast_output_0" + ], + "attrs": { + "to": 7 + } + }, + { + "op_type": "Pad", + "name": "/decoder/convnext.4/dwconv/Pad", + "inputs": [ + "/decoder/convnext.3/Add_output_0", + "/decoder/convnext.4/dwconv/Cast_output_0" + ], + "outputs": [ + "/decoder/convnext.4/dwconv/Pad_output_0" + ], + "attrs": { + "mode": "edge" + } + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.4/dwconv/net/Conv", + "inputs": [ + "/decoder/convnext.4/dwconv/Pad_output_0", + "tts.ae.decoder.convnext.4.dwconv.net.weight", + "tts.ae.decoder.convnext.4.dwconv.net.bias" + ], + "outputs": [ + "/decoder/convnext.4/dwconv/net/Conv_output_0" + ], + "attrs": { + "dilations": [ + 2 + ], + "group": 512, + "kernel_shape": [ + 7 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.4/norm/Transpose", + "inputs": [ + "/decoder/convnext.4/dwconv/net/Conv_output_0" + ], + "outputs": [ + "/decoder/convnext.4/norm/Transpose_output_0" + ], + "attrs": { + "perm": [ + 0, + 2, + 1 + ] + } + }, + { + "op_type": "LayerNormalization", + "name": "/decoder/convnext.4/norm/norm/LayerNormalization", + "inputs": [ + "/decoder/convnext.4/norm/Transpose_output_0", + "tts.ae.decoder.convnext.4.norm.norm.weight", + "tts.ae.decoder.convnext.4.norm.norm.bias" + ], + "outputs": [ + "/decoder/convnext.4/norm/norm/LayerNormalization_output_0" + ], + "attrs": { + "axis": -1, + "epsilon": 9.999999974752427e-07 + } + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.4/norm/Transpose_1", + "inputs": [ + "/decoder/convnext.4/norm/norm/LayerNormalization_output_0" + ], + "outputs": [ + "/decoder/convnext.4/norm/Transpose_1_output_0" + ], + "attrs": { + "perm": [ + 0, + 2, + 1 + ] + } + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.4/pwconv1/Conv", + "inputs": [ + "/decoder/convnext.4/norm/Transpose_1_output_0", + "tts.ae.decoder.convnext.4.pwconv1.weight", + "tts.ae.decoder.convnext.4.pwconv1.bias" + ], + "outputs": [ + "/decoder/convnext.4/pwconv1/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 1, + "kernel_shape": [ + 1 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Div", + "name": "/decoder/convnext.4/act/Div", + "inputs": [ + "/decoder/convnext.4/pwconv1/Conv_output_0", + "/decoder/convnext.4/act/Constant_output_0" + ], + "outputs": [ + "/decoder/convnext.4/act/Div_output_0" + ], + "attrs": {} + }, + { + "op_type": "Erf", + "name": "/decoder/convnext.4/act/Erf", + "inputs": [ + "/decoder/convnext.4/act/Div_output_0" + ], + "outputs": [ + "/decoder/convnext.4/act/Erf_output_0" + ], + "attrs": {} + }, + { + "op_type": "Add", + "name": "/decoder/convnext.4/act/Add", + "inputs": [ + "/decoder/convnext.4/act/Erf_output_0", + "/decoder/convnext.4/act/Constant_1_output_0" + ], + "outputs": [ + "/decoder/convnext.4/act/Add_output_0" + ], + "attrs": {} + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.4/act/Mul", + "inputs": [ + "/decoder/convnext.4/pwconv1/Conv_output_0", + "/decoder/convnext.4/act/Add_output_0" + ], + "outputs": [ + "/decoder/convnext.4/act/Mul_output_0" + ], + "attrs": {} + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.4/act/Mul_1", + "inputs": [ + "/decoder/convnext.4/act/Mul_output_0", + "/decoder/convnext.4/act/Constant_2_output_0" + ], + "outputs": [ + "/decoder/convnext.4/act/Mul_1_output_0" + ], + "attrs": {} + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.4/pwconv2/Conv", + "inputs": [ + "/decoder/convnext.4/act/Mul_1_output_0", + "tts.ae.decoder.convnext.4.pwconv2.weight", + "tts.ae.decoder.convnext.4.pwconv2.bias" + ], + "outputs": [ + "/decoder/convnext.4/pwconv2/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 1, + "kernel_shape": [ + 1 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.4/Mul", + "inputs": [ + "tts.ae.decoder.convnext.4.gamma", + "/decoder/convnext.4/pwconv2/Conv_output_0" + ], + "outputs": [ + "/decoder/convnext.4/Mul_output_0" + ], + "attrs": {} + }, + { + "op_type": "Add", + "name": "/decoder/convnext.4/Add", + "inputs": [ + "/decoder/convnext.3/Add_output_0", + "/decoder/convnext.4/Mul_output_0" + ], + "outputs": [ + "/decoder/convnext.4/Add_output_0" + ], + "attrs": {} + }, + { + "op_type": "ConstantOfShape", + "name": "/decoder/convnext.5/dwconv/ConstantOfShape", + "inputs": [ + "/decoder/convnext.5/dwconv/Constant_output_0" + ], + "outputs": [ + "/decoder/convnext.5/dwconv/ConstantOfShape_output_0" + ], + "attrs": { + "value": { + "dtype": "int64", + "shape": [ + 1 + ], + "data": [ + 0 + ] + } + } + }, + { + "op_type": "Concat", + "name": "/decoder/convnext.5/dwconv/Concat", + "inputs": [ + "/decoder/convnext.5/dwconv/Constant_1_output_0", + "/decoder/convnext.5/dwconv/ConstantOfShape_output_0" + ], + "outputs": [ + "/decoder/convnext.5/dwconv/Concat_output_0" + ], + "attrs": { + "axis": 0 + } + }, + { + "op_type": "Reshape", + "name": "/decoder/convnext.5/dwconv/Reshape", + "inputs": [ + "/decoder/convnext.5/dwconv/Concat_output_0", + "/decoder/convnext.5/dwconv/Constant_2_output_0" + ], + "outputs": [ + "/decoder/convnext.5/dwconv/Reshape_output_0" + ], + "attrs": { + "allowzero": 0 + } + }, + { + "op_type": "Slice", + "name": "/decoder/convnext.5/dwconv/Slice", + "inputs": [ + "/decoder/convnext.5/dwconv/Reshape_output_0", + "/decoder/convnext.5/dwconv/Constant_4_output_0", + "/decoder/convnext.5/dwconv/Constant_5_output_0", + "/decoder/convnext.5/dwconv/Constant_3_output_0", + "/decoder/convnext.5/dwconv/Constant_6_output_0" + ], + "outputs": [ + "/decoder/convnext.5/dwconv/Slice_output_0" + ], + "attrs": {} + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.5/dwconv/Transpose", + "inputs": [ + "/decoder/convnext.5/dwconv/Slice_output_0" + ], + "outputs": [ + "/decoder/convnext.5/dwconv/Transpose_output_0" + ], + "attrs": { + "perm": [ + 1, + 0 + ] + } + }, + { + "op_type": "Reshape", + "name": "/decoder/convnext.5/dwconv/Reshape_1", + "inputs": [ + "/decoder/convnext.5/dwconv/Transpose_output_0", + "/decoder/convnext.5/dwconv/Constant_7_output_0" + ], + "outputs": [ + "/decoder/convnext.5/dwconv/Reshape_1_output_0" + ], + "attrs": { + "allowzero": 0 + } + }, + { + "op_type": "Cast", + "name": "/decoder/convnext.5/dwconv/Cast", + "inputs": [ + "/decoder/convnext.5/dwconv/Reshape_1_output_0" + ], + "outputs": [ + "/decoder/convnext.5/dwconv/Cast_output_0" + ], + "attrs": { + "to": 7 + } + }, + { + "op_type": "Pad", + "name": "/decoder/convnext.5/dwconv/Pad", + "inputs": [ + "/decoder/convnext.4/Add_output_0", + "/decoder/convnext.5/dwconv/Cast_output_0" + ], + "outputs": [ + "/decoder/convnext.5/dwconv/Pad_output_0" + ], + "attrs": { + "mode": "edge" + } + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.5/dwconv/net/Conv", + "inputs": [ + "/decoder/convnext.5/dwconv/Pad_output_0", + "tts.ae.decoder.convnext.5.dwconv.net.weight", + "tts.ae.decoder.convnext.5.dwconv.net.bias" + ], + "outputs": [ + "/decoder/convnext.5/dwconv/net/Conv_output_0" + ], + "attrs": { + "dilations": [ + 4 + ], + "group": 512, + "kernel_shape": [ + 7 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.5/norm/Transpose", + "inputs": [ + "/decoder/convnext.5/dwconv/net/Conv_output_0" + ], + "outputs": [ + "/decoder/convnext.5/norm/Transpose_output_0" + ], + "attrs": { + "perm": [ + 0, + 2, + 1 + ] + } + }, + { + "op_type": "LayerNormalization", + "name": "/decoder/convnext.5/norm/norm/LayerNormalization", + "inputs": [ + "/decoder/convnext.5/norm/Transpose_output_0", + "tts.ae.decoder.convnext.5.norm.norm.weight", + "tts.ae.decoder.convnext.5.norm.norm.bias" + ], + "outputs": [ + "/decoder/convnext.5/norm/norm/LayerNormalization_output_0" + ], + "attrs": { + "axis": -1, + "epsilon": 9.999999974752427e-07 + } + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.5/norm/Transpose_1", + "inputs": [ + "/decoder/convnext.5/norm/norm/LayerNormalization_output_0" + ], + "outputs": [ + "/decoder/convnext.5/norm/Transpose_1_output_0" + ], + "attrs": { + "perm": [ + 0, + 2, + 1 + ] + } + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.5/pwconv1/Conv", + "inputs": [ + "/decoder/convnext.5/norm/Transpose_1_output_0", + "tts.ae.decoder.convnext.5.pwconv1.weight", + "tts.ae.decoder.convnext.5.pwconv1.bias" + ], + "outputs": [ + "/decoder/convnext.5/pwconv1/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 1, + "kernel_shape": [ + 1 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Div", + "name": "/decoder/convnext.5/act/Div", + "inputs": [ + "/decoder/convnext.5/pwconv1/Conv_output_0", + "/decoder/convnext.5/act/Constant_output_0" + ], + "outputs": [ + "/decoder/convnext.5/act/Div_output_0" + ], + "attrs": {} + }, + { + "op_type": "Erf", + "name": "/decoder/convnext.5/act/Erf", + "inputs": [ + "/decoder/convnext.5/act/Div_output_0" + ], + "outputs": [ + "/decoder/convnext.5/act/Erf_output_0" + ], + "attrs": {} + }, + { + "op_type": "Add", + "name": "/decoder/convnext.5/act/Add", + "inputs": [ + "/decoder/convnext.5/act/Erf_output_0", + "/decoder/convnext.5/act/Constant_1_output_0" + ], + "outputs": [ + "/decoder/convnext.5/act/Add_output_0" + ], + "attrs": {} + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.5/act/Mul", + "inputs": [ + "/decoder/convnext.5/pwconv1/Conv_output_0", + "/decoder/convnext.5/act/Add_output_0" + ], + "outputs": [ + "/decoder/convnext.5/act/Mul_output_0" + ], + "attrs": {} + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.5/act/Mul_1", + "inputs": [ + "/decoder/convnext.5/act/Mul_output_0", + "/decoder/convnext.5/act/Constant_2_output_0" + ], + "outputs": [ + "/decoder/convnext.5/act/Mul_1_output_0" + ], + "attrs": {} + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.5/pwconv2/Conv", + "inputs": [ + "/decoder/convnext.5/act/Mul_1_output_0", + "tts.ae.decoder.convnext.5.pwconv2.weight", + "tts.ae.decoder.convnext.5.pwconv2.bias" + ], + "outputs": [ + "/decoder/convnext.5/pwconv2/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 1, + "kernel_shape": [ + 1 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.5/Mul", + "inputs": [ + "tts.ae.decoder.convnext.5.gamma", + "/decoder/convnext.5/pwconv2/Conv_output_0" + ], + "outputs": [ + "/decoder/convnext.5/Mul_output_0" + ], + "attrs": {} + }, + { + "op_type": "Add", + "name": "/decoder/convnext.5/Add", + "inputs": [ + "/decoder/convnext.4/Add_output_0", + "/decoder/convnext.5/Mul_output_0" + ], + "outputs": [ + "/decoder/convnext.5/Add_output_0" + ], + "attrs": {} + }, + { + "op_type": "ConstantOfShape", + "name": "/decoder/convnext.6/dwconv/ConstantOfShape", + "inputs": [ + "/decoder/convnext.6/dwconv/Constant_output_0" + ], + "outputs": [ + "/decoder/convnext.6/dwconv/ConstantOfShape_output_0" + ], + "attrs": { + "value": { + "dtype": "int64", + "shape": [ + 1 + ], + "data": [ + 0 + ] + } + } + }, + { + "op_type": "Concat", + "name": "/decoder/convnext.6/dwconv/Concat", + "inputs": [ + "/decoder/convnext.6/dwconv/Constant_1_output_0", + "/decoder/convnext.6/dwconv/ConstantOfShape_output_0" + ], + "outputs": [ + "/decoder/convnext.6/dwconv/Concat_output_0" + ], + "attrs": { + "axis": 0 + } + }, + { + "op_type": "Reshape", + "name": "/decoder/convnext.6/dwconv/Reshape", + "inputs": [ + "/decoder/convnext.6/dwconv/Concat_output_0", + "/decoder/convnext.6/dwconv/Constant_2_output_0" + ], + "outputs": [ + "/decoder/convnext.6/dwconv/Reshape_output_0" + ], + "attrs": { + "allowzero": 0 + } + }, + { + "op_type": "Slice", + "name": "/decoder/convnext.6/dwconv/Slice", + "inputs": [ + "/decoder/convnext.6/dwconv/Reshape_output_0", + "/decoder/convnext.6/dwconv/Constant_4_output_0", + "/decoder/convnext.6/dwconv/Constant_5_output_0", + "/decoder/convnext.6/dwconv/Constant_3_output_0", + "/decoder/convnext.6/dwconv/Constant_6_output_0" + ], + "outputs": [ + "/decoder/convnext.6/dwconv/Slice_output_0" + ], + "attrs": {} + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.6/dwconv/Transpose", + "inputs": [ + "/decoder/convnext.6/dwconv/Slice_output_0" + ], + "outputs": [ + "/decoder/convnext.6/dwconv/Transpose_output_0" + ], + "attrs": { + "perm": [ + 1, + 0 + ] + } + }, + { + "op_type": "Reshape", + "name": "/decoder/convnext.6/dwconv/Reshape_1", + "inputs": [ + "/decoder/convnext.6/dwconv/Transpose_output_0", + "/decoder/convnext.6/dwconv/Constant_7_output_0" + ], + "outputs": [ + "/decoder/convnext.6/dwconv/Reshape_1_output_0" + ], + "attrs": { + "allowzero": 0 + } + }, + { + "op_type": "Cast", + "name": "/decoder/convnext.6/dwconv/Cast", + "inputs": [ + "/decoder/convnext.6/dwconv/Reshape_1_output_0" + ], + "outputs": [ + "/decoder/convnext.6/dwconv/Cast_output_0" + ], + "attrs": { + "to": 7 + } + }, + { + "op_type": "Pad", + "name": "/decoder/convnext.6/dwconv/Pad", + "inputs": [ + "/decoder/convnext.5/Add_output_0", + "/decoder/convnext.6/dwconv/Cast_output_0" + ], + "outputs": [ + "/decoder/convnext.6/dwconv/Pad_output_0" + ], + "attrs": { + "mode": "edge" + } + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.6/dwconv/net/Conv", + "inputs": [ + "/decoder/convnext.6/dwconv/Pad_output_0", + "tts.ae.decoder.convnext.6.dwconv.net.weight", + "tts.ae.decoder.convnext.6.dwconv.net.bias" + ], + "outputs": [ + "/decoder/convnext.6/dwconv/net/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 512, + "kernel_shape": [ + 7 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.6/norm/Transpose", + "inputs": [ + "/decoder/convnext.6/dwconv/net/Conv_output_0" + ], + "outputs": [ + "/decoder/convnext.6/norm/Transpose_output_0" + ], + "attrs": { + "perm": [ + 0, + 2, + 1 + ] + } + }, + { + "op_type": "LayerNormalization", + "name": "/decoder/convnext.6/norm/norm/LayerNormalization", + "inputs": [ + "/decoder/convnext.6/norm/Transpose_output_0", + "tts.ae.decoder.convnext.6.norm.norm.weight", + "tts.ae.decoder.convnext.6.norm.norm.bias" + ], + "outputs": [ + "/decoder/convnext.6/norm/norm/LayerNormalization_output_0" + ], + "attrs": { + "axis": -1, + "epsilon": 9.999999974752427e-07 + } + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.6/norm/Transpose_1", + "inputs": [ + "/decoder/convnext.6/norm/norm/LayerNormalization_output_0" + ], + "outputs": [ + "/decoder/convnext.6/norm/Transpose_1_output_0" + ], + "attrs": { + "perm": [ + 0, + 2, + 1 + ] + } + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.6/pwconv1/Conv", + "inputs": [ + "/decoder/convnext.6/norm/Transpose_1_output_0", + "tts.ae.decoder.convnext.6.pwconv1.weight", + "tts.ae.decoder.convnext.6.pwconv1.bias" + ], + "outputs": [ + "/decoder/convnext.6/pwconv1/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 1, + "kernel_shape": [ + 1 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Div", + "name": "/decoder/convnext.6/act/Div", + "inputs": [ + "/decoder/convnext.6/pwconv1/Conv_output_0", + "/decoder/convnext.6/act/Constant_output_0" + ], + "outputs": [ + "/decoder/convnext.6/act/Div_output_0" + ], + "attrs": {} + }, + { + "op_type": "Erf", + "name": "/decoder/convnext.6/act/Erf", + "inputs": [ + "/decoder/convnext.6/act/Div_output_0" + ], + "outputs": [ + "/decoder/convnext.6/act/Erf_output_0" + ], + "attrs": {} + }, + { + "op_type": "Add", + "name": "/decoder/convnext.6/act/Add", + "inputs": [ + "/decoder/convnext.6/act/Erf_output_0", + "/decoder/convnext.6/act/Constant_1_output_0" + ], + "outputs": [ + "/decoder/convnext.6/act/Add_output_0" + ], + "attrs": {} + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.6/act/Mul", + "inputs": [ + "/decoder/convnext.6/pwconv1/Conv_output_0", + "/decoder/convnext.6/act/Add_output_0" + ], + "outputs": [ + "/decoder/convnext.6/act/Mul_output_0" + ], + "attrs": {} + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.6/act/Mul_1", + "inputs": [ + "/decoder/convnext.6/act/Mul_output_0", + "/decoder/convnext.6/act/Constant_2_output_0" + ], + "outputs": [ + "/decoder/convnext.6/act/Mul_1_output_0" + ], + "attrs": {} + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.6/pwconv2/Conv", + "inputs": [ + "/decoder/convnext.6/act/Mul_1_output_0", + "tts.ae.decoder.convnext.6.pwconv2.weight", + "tts.ae.decoder.convnext.6.pwconv2.bias" + ], + "outputs": [ + "/decoder/convnext.6/pwconv2/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 1, + "kernel_shape": [ + 1 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.6/Mul", + "inputs": [ + "tts.ae.decoder.convnext.6.gamma", + "/decoder/convnext.6/pwconv2/Conv_output_0" + ], + "outputs": [ + "/decoder/convnext.6/Mul_output_0" + ], + "attrs": {} + }, + { + "op_type": "Add", + "name": "/decoder/convnext.6/Add", + "inputs": [ + "/decoder/convnext.5/Add_output_0", + "/decoder/convnext.6/Mul_output_0" + ], + "outputs": [ + "/decoder/convnext.6/Add_output_0" + ], + "attrs": {} + }, + { + "op_type": "ConstantOfShape", + "name": "/decoder/convnext.7/dwconv/ConstantOfShape", + "inputs": [ + "/decoder/convnext.7/dwconv/Constant_output_0" + ], + "outputs": [ + "/decoder/convnext.7/dwconv/ConstantOfShape_output_0" + ], + "attrs": { + "value": { + "dtype": "int64", + "shape": [ + 1 + ], + "data": [ + 0 + ] + } + } + }, + { + "op_type": "Concat", + "name": "/decoder/convnext.7/dwconv/Concat", + "inputs": [ + "/decoder/convnext.7/dwconv/Constant_1_output_0", + "/decoder/convnext.7/dwconv/ConstantOfShape_output_0" + ], + "outputs": [ + "/decoder/convnext.7/dwconv/Concat_output_0" + ], + "attrs": { + "axis": 0 + } + }, + { + "op_type": "Reshape", + "name": "/decoder/convnext.7/dwconv/Reshape", + "inputs": [ + "/decoder/convnext.7/dwconv/Concat_output_0", + "/decoder/convnext.7/dwconv/Constant_2_output_0" + ], + "outputs": [ + "/decoder/convnext.7/dwconv/Reshape_output_0" + ], + "attrs": { + "allowzero": 0 + } + }, + { + "op_type": "Slice", + "name": "/decoder/convnext.7/dwconv/Slice", + "inputs": [ + "/decoder/convnext.7/dwconv/Reshape_output_0", + "/decoder/convnext.7/dwconv/Constant_4_output_0", + "/decoder/convnext.7/dwconv/Constant_5_output_0", + "/decoder/convnext.7/dwconv/Constant_3_output_0", + "/decoder/convnext.7/dwconv/Constant_6_output_0" + ], + "outputs": [ + "/decoder/convnext.7/dwconv/Slice_output_0" + ], + "attrs": {} + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.7/dwconv/Transpose", + "inputs": [ + "/decoder/convnext.7/dwconv/Slice_output_0" + ], + "outputs": [ + "/decoder/convnext.7/dwconv/Transpose_output_0" + ], + "attrs": { + "perm": [ + 1, + 0 + ] + } + }, + { + "op_type": "Reshape", + "name": "/decoder/convnext.7/dwconv/Reshape_1", + "inputs": [ + "/decoder/convnext.7/dwconv/Transpose_output_0", + "/decoder/convnext.7/dwconv/Constant_7_output_0" + ], + "outputs": [ + "/decoder/convnext.7/dwconv/Reshape_1_output_0" + ], + "attrs": { + "allowzero": 0 + } + }, + { + "op_type": "Cast", + "name": "/decoder/convnext.7/dwconv/Cast", + "inputs": [ + "/decoder/convnext.7/dwconv/Reshape_1_output_0" + ], + "outputs": [ + "/decoder/convnext.7/dwconv/Cast_output_0" + ], + "attrs": { + "to": 7 + } + }, + { + "op_type": "Pad", + "name": "/decoder/convnext.7/dwconv/Pad", + "inputs": [ + "/decoder/convnext.6/Add_output_0", + "/decoder/convnext.7/dwconv/Cast_output_0" + ], + "outputs": [ + "/decoder/convnext.7/dwconv/Pad_output_0" + ], + "attrs": { + "mode": "edge" + } + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.7/dwconv/net/Conv", + "inputs": [ + "/decoder/convnext.7/dwconv/Pad_output_0", + "tts.ae.decoder.convnext.7.dwconv.net.weight", + "tts.ae.decoder.convnext.7.dwconv.net.bias" + ], + "outputs": [ + "/decoder/convnext.7/dwconv/net/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 512, + "kernel_shape": [ + 7 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.7/norm/Transpose", + "inputs": [ + "/decoder/convnext.7/dwconv/net/Conv_output_0" + ], + "outputs": [ + "/decoder/convnext.7/norm/Transpose_output_0" + ], + "attrs": { + "perm": [ + 0, + 2, + 1 + ] + } + }, + { + "op_type": "LayerNormalization", + "name": "/decoder/convnext.7/norm/norm/LayerNormalization", + "inputs": [ + "/decoder/convnext.7/norm/Transpose_output_0", + "tts.ae.decoder.convnext.7.norm.norm.weight", + "tts.ae.decoder.convnext.7.norm.norm.bias" + ], + "outputs": [ + "/decoder/convnext.7/norm/norm/LayerNormalization_output_0" + ], + "attrs": { + "axis": -1, + "epsilon": 9.999999974752427e-07 + } + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.7/norm/Transpose_1", + "inputs": [ + "/decoder/convnext.7/norm/norm/LayerNormalization_output_0" + ], + "outputs": [ + "/decoder/convnext.7/norm/Transpose_1_output_0" + ], + "attrs": { + "perm": [ + 0, + 2, + 1 + ] + } + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.7/pwconv1/Conv", + "inputs": [ + "/decoder/convnext.7/norm/Transpose_1_output_0", + "tts.ae.decoder.convnext.7.pwconv1.weight", + "tts.ae.decoder.convnext.7.pwconv1.bias" + ], + "outputs": [ + "/decoder/convnext.7/pwconv1/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 1, + "kernel_shape": [ + 1 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Div", + "name": "/decoder/convnext.7/act/Div", + "inputs": [ + "/decoder/convnext.7/pwconv1/Conv_output_0", + "/decoder/convnext.7/act/Constant_output_0" + ], + "outputs": [ + "/decoder/convnext.7/act/Div_output_0" + ], + "attrs": {} + }, + { + "op_type": "Erf", + "name": "/decoder/convnext.7/act/Erf", + "inputs": [ + "/decoder/convnext.7/act/Div_output_0" + ], + "outputs": [ + "/decoder/convnext.7/act/Erf_output_0" + ], + "attrs": {} + }, + { + "op_type": "Add", + "name": "/decoder/convnext.7/act/Add", + "inputs": [ + "/decoder/convnext.7/act/Erf_output_0", + "/decoder/convnext.7/act/Constant_1_output_0" + ], + "outputs": [ + "/decoder/convnext.7/act/Add_output_0" + ], + "attrs": {} + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.7/act/Mul", + "inputs": [ + "/decoder/convnext.7/pwconv1/Conv_output_0", + "/decoder/convnext.7/act/Add_output_0" + ], + "outputs": [ + "/decoder/convnext.7/act/Mul_output_0" + ], + "attrs": {} + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.7/act/Mul_1", + "inputs": [ + "/decoder/convnext.7/act/Mul_output_0", + "/decoder/convnext.7/act/Constant_2_output_0" + ], + "outputs": [ + "/decoder/convnext.7/act/Mul_1_output_0" + ], + "attrs": {} + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.7/pwconv2/Conv", + "inputs": [ + "/decoder/convnext.7/act/Mul_1_output_0", + "tts.ae.decoder.convnext.7.pwconv2.weight", + "tts.ae.decoder.convnext.7.pwconv2.bias" + ], + "outputs": [ + "/decoder/convnext.7/pwconv2/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 1, + "kernel_shape": [ + 1 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.7/Mul", + "inputs": [ + "tts.ae.decoder.convnext.7.gamma", + "/decoder/convnext.7/pwconv2/Conv_output_0" + ], + "outputs": [ + "/decoder/convnext.7/Mul_output_0" + ], + "attrs": {} + }, + { + "op_type": "Add", + "name": "/decoder/convnext.7/Add", + "inputs": [ + "/decoder/convnext.6/Add_output_0", + "/decoder/convnext.7/Mul_output_0" + ], + "outputs": [ + "/decoder/convnext.7/Add_output_0" + ], + "attrs": {} + }, + { + "op_type": "ConstantOfShape", + "name": "/decoder/convnext.8/dwconv/ConstantOfShape", + "inputs": [ + "/decoder/convnext.8/dwconv/Constant_output_0" + ], + "outputs": [ + "/decoder/convnext.8/dwconv/ConstantOfShape_output_0" + ], + "attrs": { + "value": { + "dtype": "int64", + "shape": [ + 1 + ], + "data": [ + 0 + ] + } + } + }, + { + "op_type": "Concat", + "name": "/decoder/convnext.8/dwconv/Concat", + "inputs": [ + "/decoder/convnext.8/dwconv/Constant_1_output_0", + "/decoder/convnext.8/dwconv/ConstantOfShape_output_0" + ], + "outputs": [ + "/decoder/convnext.8/dwconv/Concat_output_0" + ], + "attrs": { + "axis": 0 + } + }, + { + "op_type": "Reshape", + "name": "/decoder/convnext.8/dwconv/Reshape", + "inputs": [ + "/decoder/convnext.8/dwconv/Concat_output_0", + "/decoder/convnext.8/dwconv/Constant_2_output_0" + ], + "outputs": [ + "/decoder/convnext.8/dwconv/Reshape_output_0" + ], + "attrs": { + "allowzero": 0 + } + }, + { + "op_type": "Slice", + "name": "/decoder/convnext.8/dwconv/Slice", + "inputs": [ + "/decoder/convnext.8/dwconv/Reshape_output_0", + "/decoder/convnext.8/dwconv/Constant_4_output_0", + "/decoder/convnext.8/dwconv/Constant_5_output_0", + "/decoder/convnext.8/dwconv/Constant_3_output_0", + "/decoder/convnext.8/dwconv/Constant_6_output_0" + ], + "outputs": [ + "/decoder/convnext.8/dwconv/Slice_output_0" + ], + "attrs": {} + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.8/dwconv/Transpose", + "inputs": [ + "/decoder/convnext.8/dwconv/Slice_output_0" + ], + "outputs": [ + "/decoder/convnext.8/dwconv/Transpose_output_0" + ], + "attrs": { + "perm": [ + 1, + 0 + ] + } + }, + { + "op_type": "Reshape", + "name": "/decoder/convnext.8/dwconv/Reshape_1", + "inputs": [ + "/decoder/convnext.8/dwconv/Transpose_output_0", + "/decoder/convnext.8/dwconv/Constant_7_output_0" + ], + "outputs": [ + "/decoder/convnext.8/dwconv/Reshape_1_output_0" + ], + "attrs": { + "allowzero": 0 + } + }, + { + "op_type": "Cast", + "name": "/decoder/convnext.8/dwconv/Cast", + "inputs": [ + "/decoder/convnext.8/dwconv/Reshape_1_output_0" + ], + "outputs": [ + "/decoder/convnext.8/dwconv/Cast_output_0" + ], + "attrs": { + "to": 7 + } + }, + { + "op_type": "Pad", + "name": "/decoder/convnext.8/dwconv/Pad", + "inputs": [ + "/decoder/convnext.7/Add_output_0", + "/decoder/convnext.8/dwconv/Cast_output_0" + ], + "outputs": [ + "/decoder/convnext.8/dwconv/Pad_output_0" + ], + "attrs": { + "mode": "edge" + } + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.8/dwconv/net/Conv", + "inputs": [ + "/decoder/convnext.8/dwconv/Pad_output_0", + "tts.ae.decoder.convnext.8.dwconv.net.weight", + "tts.ae.decoder.convnext.8.dwconv.net.bias" + ], + "outputs": [ + "/decoder/convnext.8/dwconv/net/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 512, + "kernel_shape": [ + 7 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.8/norm/Transpose", + "inputs": [ + "/decoder/convnext.8/dwconv/net/Conv_output_0" + ], + "outputs": [ + "/decoder/convnext.8/norm/Transpose_output_0" + ], + "attrs": { + "perm": [ + 0, + 2, + 1 + ] + } + }, + { + "op_type": "LayerNormalization", + "name": "/decoder/convnext.8/norm/norm/LayerNormalization", + "inputs": [ + "/decoder/convnext.8/norm/Transpose_output_0", + "tts.ae.decoder.convnext.8.norm.norm.weight", + "tts.ae.decoder.convnext.8.norm.norm.bias" + ], + "outputs": [ + "/decoder/convnext.8/norm/norm/LayerNormalization_output_0" + ], + "attrs": { + "axis": -1, + "epsilon": 9.999999974752427e-07 + } + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.8/norm/Transpose_1", + "inputs": [ + "/decoder/convnext.8/norm/norm/LayerNormalization_output_0" + ], + "outputs": [ + "/decoder/convnext.8/norm/Transpose_1_output_0" + ], + "attrs": { + "perm": [ + 0, + 2, + 1 + ] + } + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.8/pwconv1/Conv", + "inputs": [ + "/decoder/convnext.8/norm/Transpose_1_output_0", + "tts.ae.decoder.convnext.8.pwconv1.weight", + "tts.ae.decoder.convnext.8.pwconv1.bias" + ], + "outputs": [ + "/decoder/convnext.8/pwconv1/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 1, + "kernel_shape": [ + 1 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Div", + "name": "/decoder/convnext.8/act/Div", + "inputs": [ + "/decoder/convnext.8/pwconv1/Conv_output_0", + "/decoder/convnext.8/act/Constant_output_0" + ], + "outputs": [ + "/decoder/convnext.8/act/Div_output_0" + ], + "attrs": {} + }, + { + "op_type": "Erf", + "name": "/decoder/convnext.8/act/Erf", + "inputs": [ + "/decoder/convnext.8/act/Div_output_0" + ], + "outputs": [ + "/decoder/convnext.8/act/Erf_output_0" + ], + "attrs": {} + }, + { + "op_type": "Add", + "name": "/decoder/convnext.8/act/Add", + "inputs": [ + "/decoder/convnext.8/act/Erf_output_0", + "/decoder/convnext.8/act/Constant_1_output_0" + ], + "outputs": [ + "/decoder/convnext.8/act/Add_output_0" + ], + "attrs": {} + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.8/act/Mul", + "inputs": [ + "/decoder/convnext.8/pwconv1/Conv_output_0", + "/decoder/convnext.8/act/Add_output_0" + ], + "outputs": [ + "/decoder/convnext.8/act/Mul_output_0" + ], + "attrs": {} + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.8/act/Mul_1", + "inputs": [ + "/decoder/convnext.8/act/Mul_output_0", + "/decoder/convnext.8/act/Constant_2_output_0" + ], + "outputs": [ + "/decoder/convnext.8/act/Mul_1_output_0" + ], + "attrs": {} + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.8/pwconv2/Conv", + "inputs": [ + "/decoder/convnext.8/act/Mul_1_output_0", + "tts.ae.decoder.convnext.8.pwconv2.weight", + "tts.ae.decoder.convnext.8.pwconv2.bias" + ], + "outputs": [ + "/decoder/convnext.8/pwconv2/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 1, + "kernel_shape": [ + 1 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.8/Mul", + "inputs": [ + "tts.ae.decoder.convnext.8.gamma", + "/decoder/convnext.8/pwconv2/Conv_output_0" + ], + "outputs": [ + "/decoder/convnext.8/Mul_output_0" + ], + "attrs": {} + }, + { + "op_type": "Add", + "name": "/decoder/convnext.8/Add", + "inputs": [ + "/decoder/convnext.7/Add_output_0", + "/decoder/convnext.8/Mul_output_0" + ], + "outputs": [ + "/decoder/convnext.8/Add_output_0" + ], + "attrs": {} + }, + { + "op_type": "ConstantOfShape", + "name": "/decoder/convnext.9/dwconv/ConstantOfShape", + "inputs": [ + "/decoder/convnext.9/dwconv/Constant_output_0" + ], + "outputs": [ + "/decoder/convnext.9/dwconv/ConstantOfShape_output_0" + ], + "attrs": { + "value": { + "dtype": "int64", + "shape": [ + 1 + ], + "data": [ + 0 + ] + } + } + }, + { + "op_type": "Concat", + "name": "/decoder/convnext.9/dwconv/Concat", + "inputs": [ + "/decoder/convnext.9/dwconv/Constant_1_output_0", + "/decoder/convnext.9/dwconv/ConstantOfShape_output_0" + ], + "outputs": [ + "/decoder/convnext.9/dwconv/Concat_output_0" + ], + "attrs": { + "axis": 0 + } + }, + { + "op_type": "Reshape", + "name": "/decoder/convnext.9/dwconv/Reshape", + "inputs": [ + "/decoder/convnext.9/dwconv/Concat_output_0", + "/decoder/convnext.9/dwconv/Constant_2_output_0" + ], + "outputs": [ + "/decoder/convnext.9/dwconv/Reshape_output_0" + ], + "attrs": { + "allowzero": 0 + } + }, + { + "op_type": "Slice", + "name": "/decoder/convnext.9/dwconv/Slice", + "inputs": [ + "/decoder/convnext.9/dwconv/Reshape_output_0", + "/decoder/convnext.9/dwconv/Constant_4_output_0", + "/decoder/convnext.9/dwconv/Constant_5_output_0", + "/decoder/convnext.9/dwconv/Constant_3_output_0", + "/decoder/convnext.9/dwconv/Constant_6_output_0" + ], + "outputs": [ + "/decoder/convnext.9/dwconv/Slice_output_0" + ], + "attrs": {} + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.9/dwconv/Transpose", + "inputs": [ + "/decoder/convnext.9/dwconv/Slice_output_0" + ], + "outputs": [ + "/decoder/convnext.9/dwconv/Transpose_output_0" + ], + "attrs": { + "perm": [ + 1, + 0 + ] + } + }, + { + "op_type": "Reshape", + "name": "/decoder/convnext.9/dwconv/Reshape_1", + "inputs": [ + "/decoder/convnext.9/dwconv/Transpose_output_0", + "/decoder/convnext.9/dwconv/Constant_7_output_0" + ], + "outputs": [ + "/decoder/convnext.9/dwconv/Reshape_1_output_0" + ], + "attrs": { + "allowzero": 0 + } + }, + { + "op_type": "Cast", + "name": "/decoder/convnext.9/dwconv/Cast", + "inputs": [ + "/decoder/convnext.9/dwconv/Reshape_1_output_0" + ], + "outputs": [ + "/decoder/convnext.9/dwconv/Cast_output_0" + ], + "attrs": { + "to": 7 + } + }, + { + "op_type": "Pad", + "name": "/decoder/convnext.9/dwconv/Pad", + "inputs": [ + "/decoder/convnext.8/Add_output_0", + "/decoder/convnext.9/dwconv/Cast_output_0" + ], + "outputs": [ + "/decoder/convnext.9/dwconv/Pad_output_0" + ], + "attrs": { + "mode": "edge" + } + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.9/dwconv/net/Conv", + "inputs": [ + "/decoder/convnext.9/dwconv/Pad_output_0", + "tts.ae.decoder.convnext.9.dwconv.net.weight", + "tts.ae.decoder.convnext.9.dwconv.net.bias" + ], + "outputs": [ + "/decoder/convnext.9/dwconv/net/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 512, + "kernel_shape": [ + 7 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.9/norm/Transpose", + "inputs": [ + "/decoder/convnext.9/dwconv/net/Conv_output_0" + ], + "outputs": [ + "/decoder/convnext.9/norm/Transpose_output_0" + ], + "attrs": { + "perm": [ + 0, + 2, + 1 + ] + } + }, + { + "op_type": "LayerNormalization", + "name": "/decoder/convnext.9/norm/norm/LayerNormalization", + "inputs": [ + "/decoder/convnext.9/norm/Transpose_output_0", + "tts.ae.decoder.convnext.9.norm.norm.weight", + "tts.ae.decoder.convnext.9.norm.norm.bias" + ], + "outputs": [ + "/decoder/convnext.9/norm/norm/LayerNormalization_output_0" + ], + "attrs": { + "axis": -1, + "epsilon": 9.999999974752427e-07 + } + }, + { + "op_type": "Transpose", + "name": "/decoder/convnext.9/norm/Transpose_1", + "inputs": [ + "/decoder/convnext.9/norm/norm/LayerNormalization_output_0" + ], + "outputs": [ + "/decoder/convnext.9/norm/Transpose_1_output_0" + ], + "attrs": { + "perm": [ + 0, + 2, + 1 + ] + } + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.9/pwconv1/Conv", + "inputs": [ + "/decoder/convnext.9/norm/Transpose_1_output_0", + "tts.ae.decoder.convnext.9.pwconv1.weight", + "tts.ae.decoder.convnext.9.pwconv1.bias" + ], + "outputs": [ + "/decoder/convnext.9/pwconv1/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 1, + "kernel_shape": [ + 1 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Div", + "name": "/decoder/convnext.9/act/Div", + "inputs": [ + "/decoder/convnext.9/pwconv1/Conv_output_0", + "/decoder/convnext.9/act/Constant_output_0" + ], + "outputs": [ + "/decoder/convnext.9/act/Div_output_0" + ], + "attrs": {} + }, + { + "op_type": "Erf", + "name": "/decoder/convnext.9/act/Erf", + "inputs": [ + "/decoder/convnext.9/act/Div_output_0" + ], + "outputs": [ + "/decoder/convnext.9/act/Erf_output_0" + ], + "attrs": {} + }, + { + "op_type": "Add", + "name": "/decoder/convnext.9/act/Add", + "inputs": [ + "/decoder/convnext.9/act/Erf_output_0", + "/decoder/convnext.9/act/Constant_1_output_0" + ], + "outputs": [ + "/decoder/convnext.9/act/Add_output_0" + ], + "attrs": {} + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.9/act/Mul", + "inputs": [ + "/decoder/convnext.9/pwconv1/Conv_output_0", + "/decoder/convnext.9/act/Add_output_0" + ], + "outputs": [ + "/decoder/convnext.9/act/Mul_output_0" + ], + "attrs": {} + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.9/act/Mul_1", + "inputs": [ + "/decoder/convnext.9/act/Mul_output_0", + "/decoder/convnext.9/act/Constant_2_output_0" + ], + "outputs": [ + "/decoder/convnext.9/act/Mul_1_output_0" + ], + "attrs": {} + }, + { + "op_type": "Conv", + "name": "/decoder/convnext.9/pwconv2/Conv", + "inputs": [ + "/decoder/convnext.9/act/Mul_1_output_0", + "tts.ae.decoder.convnext.9.pwconv2.weight", + "tts.ae.decoder.convnext.9.pwconv2.bias" + ], + "outputs": [ + "/decoder/convnext.9/pwconv2/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 1, + "kernel_shape": [ + 1 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Mul", + "name": "/decoder/convnext.9/Mul", + "inputs": [ + "tts.ae.decoder.convnext.9.gamma", + "/decoder/convnext.9/pwconv2/Conv_output_0" + ], + "outputs": [ + "/decoder/convnext.9/Mul_output_0" + ], + "attrs": {} + }, + { + "op_type": "Add", + "name": "/decoder/convnext.9/Add", + "inputs": [ + "/decoder/convnext.8/Add_output_0", + "/decoder/convnext.9/Mul_output_0" + ], + "outputs": [ + "/decoder/convnext.9/Add_output_0" + ], + "attrs": {} + }, + { + "op_type": "BatchNormalization", + "name": "/decoder/final_norm/BatchNormalization", + "inputs": [ + "/decoder/convnext.9/Add_output_0", + "tts.ae.decoder.final_norm.norm.weight", + "tts.ae.decoder.final_norm.norm.bias", + "tts.ae.decoder.final_norm.norm.running_mean", + "tts.ae.decoder.final_norm.norm.running_var" + ], + "outputs": [ + "/decoder/final_norm/BatchNormalization_output_0" + ], + "attrs": { + "epsilon": 9.999999747378752e-06, + "momentum": 0.8999999761581421, + "training_mode": 0 + } + }, + { + "op_type": "ConstantOfShape", + "name": "/decoder/head/layer1/ConstantOfShape", + "inputs": [ + "/decoder/head/layer1/Constant_output_0" + ], + "outputs": [ + "/decoder/head/layer1/ConstantOfShape_output_0" + ], + "attrs": { + "value": { + "dtype": "int64", + "shape": [ + 1 + ], + "data": [ + 0 + ] + } + } + }, + { + "op_type": "Concat", + "name": "/decoder/head/layer1/Concat", + "inputs": [ + "/decoder/head/layer1/Constant_1_output_0", + "/decoder/head/layer1/ConstantOfShape_output_0" + ], + "outputs": [ + "/decoder/head/layer1/Concat_output_0" + ], + "attrs": { + "axis": 0 + } + }, + { + "op_type": "Reshape", + "name": "/decoder/head/layer1/Reshape", + "inputs": [ + "/decoder/head/layer1/Concat_output_0", + "/decoder/head/layer1/Constant_2_output_0" + ], + "outputs": [ + "/decoder/head/layer1/Reshape_output_0" + ], + "attrs": { + "allowzero": 0 + } + }, + { + "op_type": "Slice", + "name": "/decoder/head/layer1/Slice", + "inputs": [ + "/decoder/head/layer1/Reshape_output_0", + "/decoder/head/layer1/Constant_4_output_0", + "/decoder/head/layer1/Constant_5_output_0", + "/decoder/head/layer1/Constant_3_output_0", + "/decoder/head/layer1/Constant_6_output_0" + ], + "outputs": [ + "/decoder/head/layer1/Slice_output_0" + ], + "attrs": {} + }, + { + "op_type": "Transpose", + "name": "/decoder/head/layer1/Transpose", + "inputs": [ + "/decoder/head/layer1/Slice_output_0" + ], + "outputs": [ + "/decoder/head/layer1/Transpose_output_0" + ], + "attrs": { + "perm": [ + 1, + 0 + ] + } + }, + { + "op_type": "Reshape", + "name": "/decoder/head/layer1/Reshape_1", + "inputs": [ + "/decoder/head/layer1/Transpose_output_0", + "/decoder/head/layer1/Constant_7_output_0" + ], + "outputs": [ + "/decoder/head/layer1/Reshape_1_output_0" + ], + "attrs": { + "allowzero": 0 + } + }, + { + "op_type": "Cast", + "name": "/decoder/head/layer1/Cast", + "inputs": [ + "/decoder/head/layer1/Reshape_1_output_0" + ], + "outputs": [ + "/decoder/head/layer1/Cast_output_0" + ], + "attrs": { + "to": 7 + } + }, + { + "op_type": "Pad", + "name": "/decoder/head/layer1/Pad", + "inputs": [ + "/decoder/final_norm/BatchNormalization_output_0", + "/decoder/head/layer1/Cast_output_0" + ], + "outputs": [ + "/decoder/head/layer1/Pad_output_0" + ], + "attrs": { + "mode": "edge" + } + }, + { + "op_type": "Conv", + "name": "/decoder/head/layer1/net/Conv", + "inputs": [ + "/decoder/head/layer1/Pad_output_0", + "tts.ae.decoder.head.layer1.net.weight", + "tts.ae.decoder.head.layer1.net.bias" + ], + "outputs": [ + "/decoder/head/layer1/net/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 1, + "kernel_shape": [ + 3 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "PRelu", + "name": "/decoder/head/act/PRelu", + "inputs": [ + "/decoder/head/layer1/net/Conv_output_0", + "onnx::PRelu_1506" + ], + "outputs": [ + "/decoder/head/act/PRelu_output_0" + ], + "attrs": {} + }, + { + "op_type": "Conv", + "name": "/decoder/head/layer2/Conv", + "inputs": [ + "/decoder/head/act/PRelu_output_0", + "tts.ae.decoder.head.layer2.weight" + ], + "outputs": [ + "/decoder/head/layer2/Conv_output_0" + ], + "attrs": { + "dilations": [ + 1 + ], + "group": 1, + "kernel_shape": [ + 1 + ], + "pads": [ + 0, + 0 + ], + "strides": [ + 1 + ] + } + }, + { + "op_type": "Transpose", + "name": "/decoder/head/Transpose", + "inputs": [ + "/decoder/head/layer2/Conv_output_0" + ], + "outputs": [ + "/decoder/head/Transpose_output_0" + ], + "attrs": { + "perm": [ + 0, + 2, + 1 + ] + } + }, + { + "op_type": "Shape", + "name": "/decoder/head/Shape", + "inputs": [ + "/decoder/head/Transpose_output_0" + ], + "outputs": [ + "/decoder/head/Shape_output_0" + ], + "attrs": {} + }, + { + "op_type": "Gather", + "name": "/decoder/head/Gather", + "inputs": [ + "/decoder/head/Shape_output_0", + "/decoder/head/Constant_output_0" + ], + "outputs": [ + "/decoder/head/Gather_output_0" + ], + "attrs": { + "axis": 0 + } + }, + { + "op_type": "Unsqueeze", + "name": "/decoder/head/Unsqueeze", + "inputs": [ + "/decoder/head/Gather_output_0", + "onnx::Unsqueeze_1434" + ], + "outputs": [ + "/decoder/head/Unsqueeze_output_0" + ], + "attrs": {} + }, + { + "op_type": "Concat", + "name": "/decoder/head/Concat", + "inputs": [ + "/decoder/head/Unsqueeze_output_0", + "/decoder/head/Constant_1_output_0" + ], + "outputs": [ + "/decoder/head/Concat_output_0" + ], + "attrs": { + "axis": 0 + } + }, + { + "op_type": "Reshape", + "name": "/decoder/head/Reshape", + "inputs": [ + "/decoder/head/Transpose_output_0", + "/decoder/head/Concat_output_0" + ], + "outputs": [ + "wav_tts" + ], + "attrs": { + "allowzero": 0 + } + } + ] +} \ No newline at end of file