Spaces:

bilegentile
/

test

Runtime error

App Files Files Community

test / modules /intel /openvino /__init__.py

bilegentile

Upload folder using huggingface_hub

c19ca42 verified almost 2 years ago

raw

history blame contribute delete

21.1 kB

	import os
	import sys
	import torch
	import nncf

	from openvino.frontend import FrontEndManager
	from openvino.frontend.pytorch.fx_decoder import TorchFXPythonDecoder
	from openvino.frontend.pytorch.torchdynamo.partition import Partitioner
	from openvino.runtime import Core, Type, PartialShape, serialize

	from torch._dynamo.backends.common import fake_tensor_unsupported
	from torch._dynamo.backends.registry import register_backend
	from torch.fx.experimental.proxy_tensor import make_fx
	from torch.fx import GraphModule
	from torch.utils._pytree import tree_flatten

	from types import MappingProxyType
	from hashlib import sha256
	import functools

	from modules import shared, devices, sd_models


	def BUILD_MAP_UNPACK(self, inst):
	items = self.popn(inst.argval)
	# ensure everything is a dict
	items = [BuiltinVariable(dict).call_function(self, [x], {}) for x in items] # noqa: F821
	result = dict()
	for x in items:
	assert isinstance(x, ConstDictVariable) # noqa: F821
	result.update(x.items)
	self.push(
	ConstDictVariable( # noqa: F821
	result,
	dict,
	mutable_local=MutableLocal(), # noqa: F821
	**VariableTracker.propagate(items), # noqa: F821
	)
	)
	tmp_torch = sys.modules["torch"]
	tmp_torch.BUILD_MAP_UNPACK_WITH_CALL = BUILD_MAP_UNPACK
	max_openvino_partitions = 0

	DEFAULT_OPENVINO_PYTHON_CONFIG = MappingProxyType(
	{
	"use_python_fusion_cache": True,
	"allow_single_op_fusion": True,
	},
	)

	class OpenVINOGraphModule(torch.nn.Module):
	def __init__(self, gm, partition_id, use_python_fusion_cache, model_hash_str: str = None, file_name=""):
	super().__init__()
	self.gm = gm
	self.partition_id = partition_id
	self.executor_parameters = {"use_python_fusion_cache": use_python_fusion_cache,
	"model_hash_str": model_hash_str}
	self.file_name = file_name

	def __call__(self, *args):
	result = openvino_execute(self.gm, *args, executor_parameters=self.executor_parameters, partition_id=self.partition_id, file_name=self.file_name)
	return result

	def get_device_list():
	core = Core()
	return core.available_devices

	def get_device():
	if hasattr(shared, "opts") and len(shared.opts.openvino_devices) == 1:
	return shared.opts.openvino_devices[0]

	core = Core()
	if hasattr(shared, "opts") and len(shared.opts.openvino_devices) > 1:
	device = ""
	available_devices = shared.opts.openvino_devices.copy()
	available_devices.remove("CPU")
	for hetero_device in available_devices:
	device = f"{device},{hetero_device}"
	if "CPU" in shared.opts.openvino_devices:
	device = f"{device},CPU"
	device = f"HETERO:{device[1:]}"
	elif any(openvino_cpu in cpu_module.lower() for cpu_module in shared.cmd_opts.use_cpu for openvino_cpu in ["openvino", "all"]):
	device = "CPU"
	elif shared.cmd_opts.device_id is not None:
	device = f"GPU.{shared.cmd_opts.device_id}"
	if device not in core.available_devices:
	device = "GPU.0" if "GPU.0" in core.available_devices else "GPU" if "GPU" in core.available_devices else "CPU"
	elif "GPU" in core.available_devices:
	device = "GPU"
	elif "GPU.1" in core.available_devices:
	device = "GPU.1"
	elif "GPU.0" in core.available_devices:
	device = "GPU.0"
	else:
	device = core.available_devices[-1]
	shared.log.warning(f"OpenVINO: No compatible GPU detected! Using {device}")
	return device

	def get_openvino_device():
	core = Core()
	try:
	return core.get_property(get_device(), "FULL_DEVICE_NAME")
	except Exception:
	return f"OpenVINO {get_device()}"

	def cached_model_name(model_hash_str, device, args, cache_root, reversed = False):
	if model_hash_str is None:
	return None

	model_cache_dir = cache_root + "/model/"

	try:
	os.makedirs(model_cache_dir, exist_ok=True)
	file_name = model_cache_dir + model_hash_str + "_" + device
	except OSError as error:
	shared.log.error(f"Cache directory {cache_root} cannot be created. Model caching is disabled. Error: {error}")
	return None

	inputs_str = ""
	for input_data in args:
	if isinstance(input_data, torch.SymInt):
	if reversed:
	inputs_str = "_" + "torch.SymInt" + inputs_str
	else:
	inputs_str += "_" + "torch.SymInt1"
	else:
	if reversed:
	inputs_str = "_" + str(input_data.type()) + str(input_data.size())[11:-1].replace(" ", "") + inputs_str
	else:
	inputs_str += "_" + str(input_data.type()) + str(input_data.size())[11:-1].replace(" ", "")
	inputs_str = sha256(inputs_str.encode('utf-8')).hexdigest()
	file_name += "_" + inputs_str

	return file_name

	def check_fully_supported(self, graph_module: GraphModule) -> bool:
	num_fused = 0
	for node in graph_module.graph.nodes:
	if node.op == "call_module" and "fused_" in node.name:
	num_fused += 1
	elif node.op != "placeholder" and node.op != "output":
	return False
	if num_fused == 1:
	return True
	return False

	Partitioner.check_fully_supported = functools.partial(check_fully_supported, Partitioner)

	def execute(
	gm,
	*args,
	executor = "openvino",
	executor_parameters = None,
	file_name = ""
	):
	if executor == "openvino":
	return openvino_execute_partitioned(gm, *args, executor_parameters=executor_parameters, file_name=file_name)
	elif executor == "strictly_openvino":
	return openvino_execute(gm, *args, executor_parameters=executor_parameters, file_name=file_name)

	msg = "Received unexpected value for 'executor': {0}. Allowed values are: openvino, strictly_openvino.".format(executor)
	raise ValueError(msg)

	def execute_cached(compiled_model, *args):
	flat_args, _ = tree_flatten(args)
	ov_inputs = [a.detach().cpu().numpy() for a in flat_args]

	if (shared.compiled_model_state.cn_model == []):
	ov_inputs.reverse()

	res = compiled_model(ov_inputs)
	result = [torch.from_numpy(res[out]) for out in compiled_model.outputs]
	return result

	def openvino_compile(gm: GraphModule, *example_inputs, model_hash_str: str = None, file_name=""):
	core = Core()

	device = get_device()
	cache_root = shared.opts.openvino_cache_path
	global dont_use_4bit_nncf
	global dont_use_nncf
	global dont_use_quant

	if file_name is not None and os.path.isfile(file_name + ".xml") and os.path.isfile(file_name + ".bin"):
	om = core.read_model(file_name + ".xml")
	else:
	fe_manager = FrontEndManager()
	fe = fe_manager.load_by_framework("pytorch")

	input_shapes = []
	input_types = []
	for input_data in example_inputs:
	if isinstance(input_data, torch.SymInt):
	input_types.append(torch.SymInt)
	input_shapes.append(1)
	else:
	input_types.append(input_data.type())
	input_shapes.append(input_data.size())

	decoder = TorchFXPythonDecoder(gm, gm, input_shapes=input_shapes, input_types=input_types)

	im = fe.load(decoder)

	om = fe.convert(im)

	if (file_name is not None):
	serialize(om, file_name + ".xml", file_name + ".bin")
	if (shared.compiled_model_state.cn_model != []):
	f = open(file_name + ".txt", "w")
	for input_data in example_inputs:
	f.write(str(input_data.size()))
	f.write("\n")
	f.close()

	dtype_mapping = {
	torch.float32: Type.f32,
	torch.float64: Type.f64,
	torch.float16: Type.f16,
	torch.int64: Type.i64,
	torch.int32: Type.i32,
	torch.uint8: Type.u8,
	torch.int8: Type.i8,
	torch.bool: Type.boolean
	}

	for idx, input_data in enumerate(example_inputs):
	om.inputs[idx].get_node().set_element_type(dtype_mapping[input_data.dtype])
	om.inputs[idx].get_node().set_partial_shape(PartialShape(list(input_data.shape)))
	om.validate_nodes_and_infer_types()

	if shared.opts.nncf_quantize and not dont_use_quant:
	new_inputs = []
	for idx, _ in enumerate(example_inputs):
	new_inputs.append(example_inputs[idx].detach().cpu().numpy())
	new_inputs = [new_inputs]
	if shared.opts.nncf_quant_mode == "INT8":
	om = nncf.quantize(om, nncf.Dataset(new_inputs))
	else:
	om = nncf.quantize(om, nncf.Dataset(new_inputs), mode=getattr(nncf.QuantizationMode, shared.opts.nncf_quant_mode),
	advanced_parameters=nncf.quantization.advanced_parameters.AdvancedQuantizationParameters(
	overflow_fix=nncf.quantization.advanced_parameters.OverflowFix.DISABLE, backend_params=None))

	if shared.opts.nncf_compress_weights and not dont_use_nncf:
	if dont_use_4bit_nncf or shared.opts.nncf_compress_weights_mode == "INT8":
	om = nncf.compress_weights(om)
	else:
	om = nncf.compress_weights(om, mode=getattr(nncf.CompressWeightsMode, shared.opts.nncf_compress_weights_mode), group_size=8, ratio=shared.opts.nncf_compress_weights_raito)


	if model_hash_str is not None:
	core.set_property({'CACHE_DIR': cache_root + '/blob'})
	dont_use_nncf = False
	dont_use_quant = False
	dont_use_4bit_nncf = False

	compiled_model = core.compile_model(om, device)
	return compiled_model

	def openvino_compile_cached_model(cached_model_path, *example_inputs):
	core = Core()
	om = core.read_model(cached_model_path + ".xml")

	global dont_use_4bit_nncf
	global dont_use_nncf
	global dont_use_quant

	dtype_mapping = {
	torch.float32: Type.f32,
	torch.float64: Type.f64,
	torch.float16: Type.f16,
	torch.int64: Type.i64,
	torch.int32: Type.i32,
	torch.uint8: Type.u8,
	torch.int8: Type.i8,
	torch.bool: Type.boolean
	}

	for idx, input_data in enumerate(example_inputs):
	om.inputs[idx].get_node().set_element_type(dtype_mapping[input_data.dtype])
	om.inputs[idx].get_node().set_partial_shape(PartialShape(list(input_data.shape)))
	om.validate_nodes_and_infer_types()

	if shared.opts.nncf_quantize and not dont_use_quant:
	new_inputs = []
	for idx, _ in enumerate(example_inputs):
	new_inputs.append(example_inputs[idx].detach().cpu().numpy())
	new_inputs = [new_inputs]
	if shared.opts.nncf_quant_mode == "INT8":
	om = nncf.quantize(om, nncf.Dataset(new_inputs))
	else:
	om = nncf.quantize(om, nncf.Dataset(new_inputs), mode=getattr(nncf.QuantizationMode, shared.opts.nncf_quant_mode),
	advanced_parameters=nncf.quantization.advanced_parameters.AdvancedQuantizationParameters(
	overflow_fix=nncf.quantization.advanced_parameters.OverflowFix.DISABLE, backend_params=None))

	if shared.opts.nncf_compress_weights and not dont_use_nncf:
	if dont_use_4bit_nncf or shared.opts.nncf_compress_weights_mode == "INT8":
	om = nncf.compress_weights(om)
	else:
	om = nncf.compress_weights(om, mode=getattr(nncf.CompressWeightsMode, shared.opts.nncf_compress_weights_mode), group_size=8, ratio=shared.opts.nncf_compress_weights_raito)

	core.set_property({'CACHE_DIR': shared.opts.openvino_cache_path + '/blob'})
	dont_use_nncf = False
	dont_use_quant = False
	dont_use_4bit_nncf = False

	compiled_model = core.compile_model(om, get_device())
	return compiled_model

	def openvino_execute(gm: GraphModule, *args, executor_parameters=None, partition_id, file_name=""):
	executor_parameters = executor_parameters or DEFAULT_OPENVINO_PYTHON_CONFIG

	use_cache = executor_parameters.get(
	"use_python_fusion_cache",
	DEFAULT_OPENVINO_PYTHON_CONFIG["use_python_fusion_cache"],
	)

	model_hash_str = executor_parameters.get("model_hash_str", None)
	if model_hash_str is not None:
	model_hash_str = model_hash_str + str(partition_id)

	if use_cache and (partition_id in shared.compiled_model_state.compiled_cache):
	compiled = shared.compiled_model_state.compiled_cache[partition_id]
	else:
	if (shared.compiled_model_state.cn_model != [] and file_name is not None
	and os.path.isfile(file_name + ".xml") and os.path.isfile(file_name + ".bin")):
	compiled = openvino_compile_cached_model(file_name, *args)
	else:
	compiled = openvino_compile(gm, *args, model_hash_str=model_hash_str, file_name=file_name)
	shared.compiled_model_state.compiled_cache[partition_id] = compiled

	flat_args, _ = tree_flatten(args)
	ov_inputs = [a.detach().cpu().numpy() for a in flat_args]

	res = compiled(ov_inputs)

	results1 = [torch.from_numpy(res[out]) for out in compiled.outputs]
	if len(results1) == 1:
	return results1[0]
	return results1

	def openvino_execute_partitioned(gm: GraphModule, *args, executor_parameters=None, file_name=""):
	executor_parameters = executor_parameters or DEFAULT_OPENVINO_PYTHON_CONFIG

	use_python_fusion_cache = executor_parameters.get(
	"use_python_fusion_cache",
	DEFAULT_OPENVINO_PYTHON_CONFIG["use_python_fusion_cache"],
	)
	model_hash_str = executor_parameters.get("model_hash_str", None)

	signature = str(id(gm))
	for idx, input_data in enumerate(args):
	if isinstance(input_data, torch.Tensor):
	signature = signature + "_" + str(idx) + ":" + str(input_data.type())[6:] + ":" + str(input_data.size())[11:-1].replace(" ", "")
	else:
	signature = signature + "_" + str(idx) + ":" + type(input_data).__name__ + ":val(" + str(input_data) + ")"

	if signature not in shared.compiled_model_state.partitioned_modules:
	shared.compiled_model_state.partitioned_modules[signature] = partition_graph(gm, use_python_fusion_cache=use_python_fusion_cache,
	model_hash_str=model_hash_str, file_name=file_name)

	return shared.compiled_model_state.partitioned_modules[signature](*args)

	def partition_graph(gm: GraphModule, use_python_fusion_cache: bool, model_hash_str: str = None, file_name=""):
	global max_openvino_partitions
	for node in gm.graph.nodes:
	if node.op == "call_module" and "fused_" in node.name:
	openvino_submodule = getattr(gm, node.name)
	gm.delete_submodule(node.target)
	gm.add_submodule(
	node.target,
	OpenVINOGraphModule(openvino_submodule, shared.compiled_model_state.partition_id, use_python_fusion_cache,
	model_hash_str=model_hash_str, file_name=file_name),
	)
	shared.compiled_model_state.partition_id = shared.compiled_model_state.partition_id + 1

	return gm

	def generate_subgraph_str(tensor):
	if hasattr(tensor, "weight"):
	shared.compiled_model_state.model_hash_str = shared.compiled_model_state.model_hash_str + sha256(str(tensor.weight).encode('utf-8')).hexdigest()
	return tensor

	def get_subgraph_type(tensor):
	global subgraph_type
	subgraph_type.append(type(tensor))
	return tensor

	@register_backend
	@fake_tensor_unsupported
	def openvino_fx(subgraph, example_inputs):
	global dont_use_4bit_nncf
	global dont_use_nncf
	global dont_use_quant
	global subgraph_type

	dont_use_4bit_nncf = False
	dont_use_nncf = False
	dont_use_quant = False
	dont_use_faketensors = False
	executor_parameters = None
	inputs_reversed = False
	maybe_fs_cached_name = None

	subgraph_type = []
	subgraph.apply(get_subgraph_type)

	# SD 1.5 / SDXL VAE
	if (subgraph_type[0] is torch.nn.modules.conv.Conv2d and
	subgraph_type[1] is torch.nn.modules.conv.Conv2d and
	subgraph_type[2] is torch.nn.modules.normalization.GroupNorm and
	subgraph_type[3] is torch.nn.modules.activation.SiLU):

	dont_use_4bit_nncf = True
	dont_use_nncf = bool("VAE" not in shared.opts.nncf_compress_weights)
	dont_use_quant = bool("VAE" not in shared.opts.nncf_quantize)

	# SD 1.5 / SDXL Text Encoder
	elif (subgraph_type[0] is torch.nn.modules.sparse.Embedding and
	subgraph_type[1] is torch.nn.modules.sparse.Embedding and
	subgraph_type[2] is torch.nn.modules.normalization.LayerNorm and
	subgraph_type[3] is torch.nn.modules.linear.Linear):

	dont_use_faketensors = True
	dont_use_nncf = bool("Text Encoder" not in shared.opts.nncf_compress_weights)
	dont_use_quant = bool("Text Encoder" not in shared.opts.nncf_quantize)

	if not shared.opts.openvino_disable_model_caching:
	os.environ.setdefault('OPENVINO_TORCH_MODEL_CACHING', "1")

	# Create a hash to be used for caching
	subgraph.apply(generate_subgraph_str)
	shared.compiled_model_state.model_hash_str = shared.compiled_model_state.model_hash_str + sha256(subgraph.code.encode('utf-8')).hexdigest()
	shared.compiled_model_state.model_hash_str = sha256(shared.compiled_model_state.model_hash_str.encode('utf-8')).hexdigest()

	executor_parameters = {"model_hash_str": shared.compiled_model_state.model_hash_str}
	# Check if the model was fully supported and already cached
	example_inputs.reverse()
	inputs_reversed = True
	maybe_fs_cached_name = cached_model_name(shared.compiled_model_state.model_hash_str + "_fs", get_device(), example_inputs, shared.opts.openvino_cache_path)

	if os.path.isfile(maybe_fs_cached_name + ".xml") and os.path.isfile(maybe_fs_cached_name + ".bin"):
	example_inputs_reordered = []
	if (os.path.isfile(maybe_fs_cached_name + ".txt")):
	f = open(maybe_fs_cached_name + ".txt", "r")
	for input_data in example_inputs:
	shape = f.readline()
	if (str(input_data.size()) != shape):
	for idx1, input_data1 in enumerate(example_inputs):
	if (str(input_data1.size()).strip() == str(shape).strip()):
	example_inputs_reordered.append(example_inputs[idx1])
	example_inputs = example_inputs_reordered

	if dont_use_faketensors or shared.opts.openvino_disable_memory_cleanup:
	pass
	else:
	# Delete unused subgraphs
	subgraph = subgraph.apply(sd_models.convert_to_faketensors)
	devices.torch_gc(force=True)

	# Model is fully supported and already cached. Run the cached OV model directly.
	compiled_model = openvino_compile_cached_model(maybe_fs_cached_name, *example_inputs)

	def _call(*args):
	if (shared.compiled_model_state.cn_model != [] and str(shared.compiled_model_state.cn_model) in maybe_fs_cached_name):
	args_reordered = []
	if (os.path.isfile(maybe_fs_cached_name + ".txt")):
	f = open(maybe_fs_cached_name + ".txt", "r")
	for input_data in args:
	shape = f.readline()
	if (str(input_data.size()) != shape):
	for idx1, input_data1 in enumerate(args):
	if (str(input_data1.size()).strip() == str(shape).strip()):
	args_reordered.append(args[idx1])
	args = args_reordered

	res = execute_cached(compiled_model, *args)
	shared.compiled_model_state.partition_id = shared.compiled_model_state.partition_id + 1
	return res
	return _call
	else:
	os.environ.setdefault('OPENVINO_TORCH_MODEL_CACHING', "0")
	maybe_fs_cached_name = None

	if inputs_reversed:
	example_inputs.reverse()
	model = make_fx(subgraph)(*example_inputs)
	for node in model.graph.nodes:
	if node.target == torch.ops.aten.mul_.Tensor:
	node.target = torch.ops.aten.mul.Tensor
	with devices.inference_context():
	model.eval()
	partitioner = Partitioner()
	compiled_model = partitioner.make_partitions(model)

	if executor_parameters is not None and 'model_hash_str' in executor_parameters:
	# Check if the model is fully supported.
	fully_supported = partitioner.check_fully_supported(compiled_model)
	if fully_supported:
	executor_parameters["model_hash_str"] += "_fs"

	def _call(*args):
	res = execute(compiled_model, *args, executor="openvino",
	executor_parameters=executor_parameters, file_name=maybe_fs_cached_name)
	return res
	return _call