ncnn / tools /pnnx /src /pass_level0 /shape_inference.cpp

thanks to ncnn ❤

be903e2 over 2 years ago

11.4 kB

	// Tencent is pleased to support the open source community by making ncnn available.
	//
	// Copyright (C) 2021 THL A29 Limited, a Tencent company. All rights reserved.
	//
	// Licensed under the BSD 3-Clause License (the "License"); you may not use this file except
	// in compliance with the License. You may obtain a copy of the License at
	//
	// https://opensource.org/licenses/BSD-3-Clause
	//
	// Unless required by applicable law or agreed to in writing, software distributed
	// under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
	// CONDITIONS OF ANY KIND, either express or implied. See the License for the
	// specific language governing permissions and limitations under the License.

	#include "shape_inference.h"
	#include <unordered_set>

	#include "storezip.h"
	#include "pass_level0/constant_unpooling.h"
	#include "pass_level0/convert_half_to_float.h"
	#include "pass_level0/flatten_input.h"
	#include "pass_level0/inline_block.h"
	#include "pass_level0/reset_device.h"
	#include "pass_level0/shape_inference.h"

	namespace pnnx {

	static bool value_link_input(const torch::jit::Value* v, const std::vector<torch::jit::Value*>& inputs, bool ignore_aten_size)
	{
	if (ignore_aten_size)
	{
	// any intermediate shape is constant with static input shape
	std::string optype = v->node()->kind().toDisplayString();
	if (optype == "aten::size"
	\|\| optype == "aten::new_empty"
	\|\| optype == "aten::new_full"
	\|\| optype == "aten::new_ones"
	\|\| optype == "aten::new_zeros"
	\|\| optype == "aten::empty_like"
	\|\| optype == "aten::full_like"
	\|\| optype == "aten::ones_like"
	\|\| optype == "aten::zeros_like"
	\|\| optype == "aten::_shape_as_tensor")
	return false;
	}

	for (auto x : inputs)
	{
	if (v == x)
	return true;
	}

	for (size_t i = 0; i < v->node()->inputs().size(); i++)
	{
	bool link = value_link_input(v->node()->inputs()[i], inputs, ignore_aten_size);
	if (link)
	return true;
	}

	return false;
	}

	static bool value_link_output(const torch::jit::Value* v, const std::vector<torch::jit::Value*>& outputs)
	{
	for (auto x : outputs)
	{
	if (v == x)
	return true;
	}

	for (size_t i = 0; i < v->uses().size(); i++)
	{
	auto node = v->uses()[i].user;
	for (auto x : node->outputs())
	{
	bool link = value_link_output(x, outputs);
	if (link)
	return true;
	}

	std::string op_type = node->kind().toDisplayString();
	bool is_inplace_op = op_type.size() > 2 && op_type[op_type.size() - 2] != '_' && op_type[op_type.size() - 1] == '_';
	if (is_inplace_op)
	{
	// optimize me: track other inplace op inputs
	return true;
	}
	}

	return false;
	}

	void shape_inference(const torch::jit::Module& mod, std::shared_ptr<torch::jit::Graph>& graph, const std::vector<at::Tensor>& input_tensors, const std::vector<at::Tensor>& input_tensors2, const std::vector<std::string>& module_operators, const std::string& ptpath, const std::string& device, std::set<std::string>& foldable_constants, const std::string& foldable_constants_zippath)
	{
	// collect all intermediate output tensors
	std::vector<std::unordered_set<std::string> > more_value_names;
	std::vector<std::vector<torch::jit::Value*> > more_values;
	{
	std::unordered_set<std::string> value_names;
	std::vector<torch::jit::Value*> values;
	for (const auto& n : graph->nodes())
	{
	for (const auto& v : n->outputs())
	{
	auto tensor_type = v->type()->cast<torch::jit::TensorType>();
	if (!tensor_type)
	continue;

	value_names.insert(v->debugName());
	values.push_back(v);
	}

	// too many intermediate blobs in one inference results oom
	if (value_names.size() >= 1000)
	{
	more_value_names.push_back(value_names);
	value_names.clear();

	more_values.push_back(values);
	values.clear();
	}
	}

	if (value_names.size() > 0)
	{
	more_value_names.push_back(value_names);
	more_values.push_back(values);
	}
	}

	// collect graph inputs outputs
	std::vector<torch::jit::Value*> g_inputs;
	for (size_t i = 1; i < graph->inputs().size(); i++)
	{
	g_inputs.push_back(graph->inputs()[i]);
	}
	std::vector<torch::jit::Value*> g_outputs;
	for (size_t i = 0; i < graph->outputs().size(); i++)
	{
	g_outputs.push_back(graph->outputs()[i]);
	}

	std::vector<torch::jit::IValue> inputs;
	for (size_t i = 0; i < input_tensors.size(); i++)
	{
	const at::Tensor& it = input_tensors[i];
	inputs.push_back(it);
	}

	std::vector<torch::jit::IValue> inputs2;
	for (size_t i = 0; i < input_tensors2.size(); i++)
	{
	const at::Tensor& it = input_tensors2[i];
	inputs2.push_back(it);
	}

	StoreZipWriter zip;
	zip.open(foldable_constants_zippath);

	for (size_t p = 0; p < more_value_names.size(); p++)
	{
	std::unordered_set<std::string>& value_names = more_value_names[p];
	std::vector<torch::jit::Value*>& values = more_values[p];

	// auto mod2 = mod.deepcopy();

	torch::jit::Module mod2 = torch::jit::load(ptpath, (device == "gpu") ? c10::kCUDA : c10::kCPU);
	mod2.eval();

	convert_half_to_float(mod2);

	auto method = mod2.find_method("forward");
	if (!method)
	{
	method = mod2.get_methods()[0];
	}

	auto graph2 = method->graph();

	inline_block(graph2, module_operators);

	reset_device(graph2, device);

	flatten_input(graph2);

	constant_unpooling(graph2);

	std::vector<torch::jit::Value*> values2;
	for (auto n : graph2->nodes())
	{
	for (const auto& v : n->outputs())
	{
	auto tensor_type = v->type()->cast<torch::jit::TensorType>();
	if (!tensor_type)
	continue;

	if (value_names.find(v->debugName()) != value_names.end())
	{
	values2.push_back(v);
	// fprintf(stderr, "%s ", v->debugName().c_str());
	}
	}
	}
	fprintf(stderr, "\n----------------\n\n");

	// set new graph output
	torch::jit::Node* new_return_node = graph2->createTuple(at::ArrayRef<torch::jit::Value*>(values2));

	graph2->appendNode(new_return_node);

	graph2->eraseOutput(0);
	graph2->registerOutput(new_return_node->outputs()[0]);

	// construct schema for new inputs and outputs
	{
	auto oldfs = method->function().getSchema();

	std::vector<c10::Argument> arguments;
	std::vector<c10::Argument> returns;
	for (size_t i = 0; i < graph2->inputs().size(); i++)
	{
	auto v = graph2->inputs()[i];
	arguments.push_back(c10::Argument(v->debugName(), v->type()));
	}
	for (size_t i = 0; i < graph2->outputs().size(); i++)
	{
	auto v = graph2->outputs()[i];
	returns.push_back(c10::Argument(v->debugName(), v->type()));
	}

	c10::FunctionSchema newfs(oldfs.name(), oldfs.overload_name(), arguments, returns);
	method->function().setSchema(newfs);
	}

	// inference for all tensors
	auto outputs = mod2.copy().get_method(method->name())(inputs).toTuple();

	if (input_tensors2.empty())
	{
	// assign shape info
	for (size_t i = 0; i < values2.size(); i++)
	{
	auto v = values[i];
	auto t = outputs->elements()[i].toTensor();

	v->setType(c10::TensorType::create(t));

	// check if value that does not depend on inputs
	if (!value_link_input(v, g_inputs, true) && value_link_output(v, g_outputs))
	{
	// fprintf(stderr, "foldable_constant %s\n", v->debugName().c_str());
	foldable_constants.insert(v->debugName());

	at::Tensor t2 = t.cpu().contiguous();
	zip.write_file(v->debugName(), (const char*)t2.data_ptr(), t2.nbytes());
	}
	}
	}
	else
	{
	// assign dynamic shape info
	auto outputs2 = mod2.copy().get_method(method->name())(inputs2).toTuple();

	fprintf(stderr, "assign dynamic shape info\n");

	for (size_t i = 0; i < values2.size(); i++)
	{
	auto v = values[i];
	auto t = outputs->elements()[i].toTensor();
	auto t2 = outputs2->elements()[i].toTensor();

	auto type1 = c10::TensorType::create(t);
	auto type2 = c10::TensorType::create(t2);

	std::vector<c10::ShapeSymbol> sizes1 = type1->symbolic_sizes().sizes().value();
	std::vector<c10::ShapeSymbol> sizes2 = type2->symbolic_sizes().sizes().value();

	for (size_t i = 0; i < sizes1.size(); i++)
	{
	if (sizes1[i] == sizes2[i])
	continue;

	sizes1[i] = c10::ShapeSymbol::fromStaticSize(-1);
	}

	auto finaltype = type1->withSymbolicShapes(c10::SymbolicShape(sizes1));

	v->setType(finaltype);

	// check if value that does not depend on inputs
	if (!value_link_input(v, g_inputs, false) && value_link_output(v, g_outputs))
	{
	// fprintf(stderr, "foldable_constant %s\n", v->debugName().c_str());
	foldable_constants.insert(v->debugName());

	at::Tensor t2 = t.cpu().contiguous();
	zip.write_file(v->debugName(), (const char*)t2.data_ptr(), t2.nbytes());
	}
	}
	}
	}

	zip.close();

	if (input_tensors2.empty())
	{
	for (size_t i = 0; i < input_tensors.size(); i++)
	{
	auto type = c10::TensorType::create(input_tensors[i]);

	graph->inputs()[1 + i]->setType(type);
	}
	}
	else
	{
	for (size_t i = 0; i < input_tensors.size(); i++)
	{
	auto type1 = c10::TensorType::create(input_tensors[i]);
	auto type2 = c10::TensorType::create(input_tensors2[i]);

	std::vector<c10::ShapeSymbol> sizes1 = type1->symbolic_sizes().sizes().value();
	std::vector<c10::ShapeSymbol> sizes2 = type2->symbolic_sizes().sizes().value();

	for (size_t i = 0; i < sizes1.size(); i++)
	{
	if (sizes1[i] == sizes2[i])
	continue;

	sizes1[i] = c10::ShapeSymbol::fromStaticSize(-1);
	}

	auto finaltype = type1->withSymbolicShapes(c10::SymbolicShape(sizes1));

	graph->inputs()[1 + i]->setType(finaltype);
	}
	}
	}

	} // namespace pnnx