Spaces:
Runtime error
Runtime error
| import os | |
| import sys | |
| import torch | |
| import nncf | |
| from openvino.frontend import FrontEndManager | |
| from openvino.frontend.pytorch.fx_decoder import TorchFXPythonDecoder | |
| from openvino.frontend.pytorch.torchdynamo.partition import Partitioner | |
| from openvino.runtime import Core, Type, PartialShape, serialize | |
| from torch._dynamo.backends.common import fake_tensor_unsupported | |
| from torch._dynamo.backends.registry import register_backend | |
| from torch.fx.experimental.proxy_tensor import make_fx | |
| from torch.fx import GraphModule | |
| from torch.utils._pytree import tree_flatten | |
| from types import MappingProxyType | |
| from hashlib import sha256 | |
| import functools | |
| from modules import shared, devices, sd_models | |
| def BUILD_MAP_UNPACK(self, inst): | |
| items = self.popn(inst.argval) | |
| # ensure everything is a dict | |
| items = [BuiltinVariable(dict).call_function(self, [x], {}) for x in items] # noqa: F821 | |
| result = dict() | |
| for x in items: | |
| assert isinstance(x, ConstDictVariable) # noqa: F821 | |
| result.update(x.items) | |
| self.push( | |
| ConstDictVariable( # noqa: F821 | |
| result, | |
| dict, | |
| mutable_local=MutableLocal(), # noqa: F821 | |
| **VariableTracker.propagate(items), # noqa: F821 | |
| ) | |
| ) | |
| tmp_torch = sys.modules["torch"] | |
| tmp_torch.BUILD_MAP_UNPACK_WITH_CALL = BUILD_MAP_UNPACK | |
| max_openvino_partitions = 0 | |
| DEFAULT_OPENVINO_PYTHON_CONFIG = MappingProxyType( | |
| { | |
| "use_python_fusion_cache": True, | |
| "allow_single_op_fusion": True, | |
| }, | |
| ) | |
| class OpenVINOGraphModule(torch.nn.Module): | |
| def __init__(self, gm, partition_id, use_python_fusion_cache, model_hash_str: str = None, file_name=""): | |
| super().__init__() | |
| self.gm = gm | |
| self.partition_id = partition_id | |
| self.executor_parameters = {"use_python_fusion_cache": use_python_fusion_cache, | |
| "model_hash_str": model_hash_str} | |
| self.file_name = file_name | |
| def __call__(self, *args): | |
| result = openvino_execute(self.gm, *args, executor_parameters=self.executor_parameters, partition_id=self.partition_id, file_name=self.file_name) | |
| return result | |
| def get_device_list(): | |
| core = Core() | |
| return core.available_devices | |
| def get_device(): | |
| if hasattr(shared, "opts") and len(shared.opts.openvino_devices) == 1: | |
| return shared.opts.openvino_devices[0] | |
| core = Core() | |
| if hasattr(shared, "opts") and len(shared.opts.openvino_devices) > 1: | |
| device = "" | |
| available_devices = shared.opts.openvino_devices.copy() | |
| available_devices.remove("CPU") | |
| for hetero_device in available_devices: | |
| device = f"{device},{hetero_device}" | |
| if "CPU" in shared.opts.openvino_devices: | |
| device = f"{device},CPU" | |
| device = f"HETERO:{device[1:]}" | |
| elif any(openvino_cpu in cpu_module.lower() for cpu_module in shared.cmd_opts.use_cpu for openvino_cpu in ["openvino", "all"]): | |
| device = "CPU" | |
| elif shared.cmd_opts.device_id is not None: | |
| device = f"GPU.{shared.cmd_opts.device_id}" | |
| if device not in core.available_devices: | |
| device = "GPU.0" if "GPU.0" in core.available_devices else "GPU" if "GPU" in core.available_devices else "CPU" | |
| elif "GPU" in core.available_devices: | |
| device = "GPU" | |
| elif "GPU.1" in core.available_devices: | |
| device = "GPU.1" | |
| elif "GPU.0" in core.available_devices: | |
| device = "GPU.0" | |
| else: | |
| device = core.available_devices[-1] | |
| shared.log.warning(f"OpenVINO: No compatible GPU detected! Using {device}") | |
| return device | |
| def get_openvino_device(): | |
| core = Core() | |
| try: | |
| return core.get_property(get_device(), "FULL_DEVICE_NAME") | |
| except Exception: | |
| return f"OpenVINO {get_device()}" | |
| def cached_model_name(model_hash_str, device, args, cache_root, reversed = False): | |
| if model_hash_str is None: | |
| return None | |
| model_cache_dir = cache_root + "/model/" | |
| try: | |
| os.makedirs(model_cache_dir, exist_ok=True) | |
| file_name = model_cache_dir + model_hash_str + "_" + device | |
| except OSError as error: | |
| shared.log.error(f"Cache directory {cache_root} cannot be created. Model caching is disabled. Error: {error}") | |
| return None | |
| inputs_str = "" | |
| for input_data in args: | |
| if isinstance(input_data, torch.SymInt): | |
| if reversed: | |
| inputs_str = "_" + "torch.SymInt" + inputs_str | |
| else: | |
| inputs_str += "_" + "torch.SymInt1" | |
| else: | |
| if reversed: | |
| inputs_str = "_" + str(input_data.type()) + str(input_data.size())[11:-1].replace(" ", "") + inputs_str | |
| else: | |
| inputs_str += "_" + str(input_data.type()) + str(input_data.size())[11:-1].replace(" ", "") | |
| inputs_str = sha256(inputs_str.encode('utf-8')).hexdigest() | |
| file_name += "_" + inputs_str | |
| return file_name | |
| def check_fully_supported(self, graph_module: GraphModule) -> bool: | |
| num_fused = 0 | |
| for node in graph_module.graph.nodes: | |
| if node.op == "call_module" and "fused_" in node.name: | |
| num_fused += 1 | |
| elif node.op != "placeholder" and node.op != "output": | |
| return False | |
| if num_fused == 1: | |
| return True | |
| return False | |
| Partitioner.check_fully_supported = functools.partial(check_fully_supported, Partitioner) | |
| def execute( | |
| gm, | |
| *args, | |
| executor = "openvino", | |
| executor_parameters = None, | |
| file_name = "" | |
| ): | |
| if executor == "openvino": | |
| return openvino_execute_partitioned(gm, *args, executor_parameters=executor_parameters, file_name=file_name) | |
| elif executor == "strictly_openvino": | |
| return openvino_execute(gm, *args, executor_parameters=executor_parameters, file_name=file_name) | |
| msg = "Received unexpected value for 'executor': {0}. Allowed values are: openvino, strictly_openvino.".format(executor) | |
| raise ValueError(msg) | |
| def execute_cached(compiled_model, *args): | |
| flat_args, _ = tree_flatten(args) | |
| ov_inputs = [a.detach().cpu().numpy() for a in flat_args] | |
| if (shared.compiled_model_state.cn_model == []): | |
| ov_inputs.reverse() | |
| res = compiled_model(ov_inputs) | |
| result = [torch.from_numpy(res[out]) for out in compiled_model.outputs] | |
| return result | |
| def openvino_compile(gm: GraphModule, *example_inputs, model_hash_str: str = None, file_name=""): | |
| core = Core() | |
| device = get_device() | |
| cache_root = shared.opts.openvino_cache_path | |
| global dont_use_4bit_nncf | |
| global dont_use_nncf | |
| global dont_use_quant | |
| if file_name is not None and os.path.isfile(file_name + ".xml") and os.path.isfile(file_name + ".bin"): | |
| om = core.read_model(file_name + ".xml") | |
| else: | |
| fe_manager = FrontEndManager() | |
| fe = fe_manager.load_by_framework("pytorch") | |
| input_shapes = [] | |
| input_types = [] | |
| for input_data in example_inputs: | |
| if isinstance(input_data, torch.SymInt): | |
| input_types.append(torch.SymInt) | |
| input_shapes.append(1) | |
| else: | |
| input_types.append(input_data.type()) | |
| input_shapes.append(input_data.size()) | |
| decoder = TorchFXPythonDecoder(gm, gm, input_shapes=input_shapes, input_types=input_types) | |
| im = fe.load(decoder) | |
| om = fe.convert(im) | |
| if (file_name is not None): | |
| serialize(om, file_name + ".xml", file_name + ".bin") | |
| if (shared.compiled_model_state.cn_model != []): | |
| f = open(file_name + ".txt", "w") | |
| for input_data in example_inputs: | |
| f.write(str(input_data.size())) | |
| f.write("\n") | |
| f.close() | |
| dtype_mapping = { | |
| torch.float32: Type.f32, | |
| torch.float64: Type.f64, | |
| torch.float16: Type.f16, | |
| torch.int64: Type.i64, | |
| torch.int32: Type.i32, | |
| torch.uint8: Type.u8, | |
| torch.int8: Type.i8, | |
| torch.bool: Type.boolean | |
| } | |
| for idx, input_data in enumerate(example_inputs): | |
| om.inputs[idx].get_node().set_element_type(dtype_mapping[input_data.dtype]) | |
| om.inputs[idx].get_node().set_partial_shape(PartialShape(list(input_data.shape))) | |
| om.validate_nodes_and_infer_types() | |
| if shared.opts.nncf_quantize and not dont_use_quant: | |
| new_inputs = [] | |
| for idx, _ in enumerate(example_inputs): | |
| new_inputs.append(example_inputs[idx].detach().cpu().numpy()) | |
| new_inputs = [new_inputs] | |
| if shared.opts.nncf_quant_mode == "INT8": | |
| om = nncf.quantize(om, nncf.Dataset(new_inputs)) | |
| else: | |
| om = nncf.quantize(om, nncf.Dataset(new_inputs), mode=getattr(nncf.QuantizationMode, shared.opts.nncf_quant_mode), | |
| advanced_parameters=nncf.quantization.advanced_parameters.AdvancedQuantizationParameters( | |
| overflow_fix=nncf.quantization.advanced_parameters.OverflowFix.DISABLE, backend_params=None)) | |
| if shared.opts.nncf_compress_weights and not dont_use_nncf: | |
| if dont_use_4bit_nncf or shared.opts.nncf_compress_weights_mode == "INT8": | |
| om = nncf.compress_weights(om) | |
| else: | |
| om = nncf.compress_weights(om, mode=getattr(nncf.CompressWeightsMode, shared.opts.nncf_compress_weights_mode), group_size=8, ratio=shared.opts.nncf_compress_weights_raito) | |
| if model_hash_str is not None: | |
| core.set_property({'CACHE_DIR': cache_root + '/blob'}) | |
| dont_use_nncf = False | |
| dont_use_quant = False | |
| dont_use_4bit_nncf = False | |
| compiled_model = core.compile_model(om, device) | |
| return compiled_model | |
| def openvino_compile_cached_model(cached_model_path, *example_inputs): | |
| core = Core() | |
| om = core.read_model(cached_model_path + ".xml") | |
| global dont_use_4bit_nncf | |
| global dont_use_nncf | |
| global dont_use_quant | |
| dtype_mapping = { | |
| torch.float32: Type.f32, | |
| torch.float64: Type.f64, | |
| torch.float16: Type.f16, | |
| torch.int64: Type.i64, | |
| torch.int32: Type.i32, | |
| torch.uint8: Type.u8, | |
| torch.int8: Type.i8, | |
| torch.bool: Type.boolean | |
| } | |
| for idx, input_data in enumerate(example_inputs): | |
| om.inputs[idx].get_node().set_element_type(dtype_mapping[input_data.dtype]) | |
| om.inputs[idx].get_node().set_partial_shape(PartialShape(list(input_data.shape))) | |
| om.validate_nodes_and_infer_types() | |
| if shared.opts.nncf_quantize and not dont_use_quant: | |
| new_inputs = [] | |
| for idx, _ in enumerate(example_inputs): | |
| new_inputs.append(example_inputs[idx].detach().cpu().numpy()) | |
| new_inputs = [new_inputs] | |
| if shared.opts.nncf_quant_mode == "INT8": | |
| om = nncf.quantize(om, nncf.Dataset(new_inputs)) | |
| else: | |
| om = nncf.quantize(om, nncf.Dataset(new_inputs), mode=getattr(nncf.QuantizationMode, shared.opts.nncf_quant_mode), | |
| advanced_parameters=nncf.quantization.advanced_parameters.AdvancedQuantizationParameters( | |
| overflow_fix=nncf.quantization.advanced_parameters.OverflowFix.DISABLE, backend_params=None)) | |
| if shared.opts.nncf_compress_weights and not dont_use_nncf: | |
| if dont_use_4bit_nncf or shared.opts.nncf_compress_weights_mode == "INT8": | |
| om = nncf.compress_weights(om) | |
| else: | |
| om = nncf.compress_weights(om, mode=getattr(nncf.CompressWeightsMode, shared.opts.nncf_compress_weights_mode), group_size=8, ratio=shared.opts.nncf_compress_weights_raito) | |
| core.set_property({'CACHE_DIR': shared.opts.openvino_cache_path + '/blob'}) | |
| dont_use_nncf = False | |
| dont_use_quant = False | |
| dont_use_4bit_nncf = False | |
| compiled_model = core.compile_model(om, get_device()) | |
| return compiled_model | |
| def openvino_execute(gm: GraphModule, *args, executor_parameters=None, partition_id, file_name=""): | |
| executor_parameters = executor_parameters or DEFAULT_OPENVINO_PYTHON_CONFIG | |
| use_cache = executor_parameters.get( | |
| "use_python_fusion_cache", | |
| DEFAULT_OPENVINO_PYTHON_CONFIG["use_python_fusion_cache"], | |
| ) | |
| model_hash_str = executor_parameters.get("model_hash_str", None) | |
| if model_hash_str is not None: | |
| model_hash_str = model_hash_str + str(partition_id) | |
| if use_cache and (partition_id in shared.compiled_model_state.compiled_cache): | |
| compiled = shared.compiled_model_state.compiled_cache[partition_id] | |
| else: | |
| if (shared.compiled_model_state.cn_model != [] and file_name is not None | |
| and os.path.isfile(file_name + ".xml") and os.path.isfile(file_name + ".bin")): | |
| compiled = openvino_compile_cached_model(file_name, *args) | |
| else: | |
| compiled = openvino_compile(gm, *args, model_hash_str=model_hash_str, file_name=file_name) | |
| shared.compiled_model_state.compiled_cache[partition_id] = compiled | |
| flat_args, _ = tree_flatten(args) | |
| ov_inputs = [a.detach().cpu().numpy() for a in flat_args] | |
| res = compiled(ov_inputs) | |
| results1 = [torch.from_numpy(res[out]) for out in compiled.outputs] | |
| if len(results1) == 1: | |
| return results1[0] | |
| return results1 | |
| def openvino_execute_partitioned(gm: GraphModule, *args, executor_parameters=None, file_name=""): | |
| executor_parameters = executor_parameters or DEFAULT_OPENVINO_PYTHON_CONFIG | |
| use_python_fusion_cache = executor_parameters.get( | |
| "use_python_fusion_cache", | |
| DEFAULT_OPENVINO_PYTHON_CONFIG["use_python_fusion_cache"], | |
| ) | |
| model_hash_str = executor_parameters.get("model_hash_str", None) | |
| signature = str(id(gm)) | |
| for idx, input_data in enumerate(args): | |
| if isinstance(input_data, torch.Tensor): | |
| signature = signature + "_" + str(idx) + ":" + str(input_data.type())[6:] + ":" + str(input_data.size())[11:-1].replace(" ", "") | |
| else: | |
| signature = signature + "_" + str(idx) + ":" + type(input_data).__name__ + ":val(" + str(input_data) + ")" | |
| if signature not in shared.compiled_model_state.partitioned_modules: | |
| shared.compiled_model_state.partitioned_modules[signature] = partition_graph(gm, use_python_fusion_cache=use_python_fusion_cache, | |
| model_hash_str=model_hash_str, file_name=file_name) | |
| return shared.compiled_model_state.partitioned_modules[signature](*args) | |
| def partition_graph(gm: GraphModule, use_python_fusion_cache: bool, model_hash_str: str = None, file_name=""): | |
| global max_openvino_partitions | |
| for node in gm.graph.nodes: | |
| if node.op == "call_module" and "fused_" in node.name: | |
| openvino_submodule = getattr(gm, node.name) | |
| gm.delete_submodule(node.target) | |
| gm.add_submodule( | |
| node.target, | |
| OpenVINOGraphModule(openvino_submodule, shared.compiled_model_state.partition_id, use_python_fusion_cache, | |
| model_hash_str=model_hash_str, file_name=file_name), | |
| ) | |
| shared.compiled_model_state.partition_id = shared.compiled_model_state.partition_id + 1 | |
| return gm | |
| def generate_subgraph_str(tensor): | |
| if hasattr(tensor, "weight"): | |
| shared.compiled_model_state.model_hash_str = shared.compiled_model_state.model_hash_str + sha256(str(tensor.weight).encode('utf-8')).hexdigest() | |
| return tensor | |
| def get_subgraph_type(tensor): | |
| global subgraph_type | |
| subgraph_type.append(type(tensor)) | |
| return tensor | |
| def openvino_fx(subgraph, example_inputs): | |
| global dont_use_4bit_nncf | |
| global dont_use_nncf | |
| global dont_use_quant | |
| global subgraph_type | |
| dont_use_4bit_nncf = False | |
| dont_use_nncf = False | |
| dont_use_quant = False | |
| dont_use_faketensors = False | |
| executor_parameters = None | |
| inputs_reversed = False | |
| maybe_fs_cached_name = None | |
| subgraph_type = [] | |
| subgraph.apply(get_subgraph_type) | |
| # SD 1.5 / SDXL VAE | |
| if (subgraph_type[0] is torch.nn.modules.conv.Conv2d and | |
| subgraph_type[1] is torch.nn.modules.conv.Conv2d and | |
| subgraph_type[2] is torch.nn.modules.normalization.GroupNorm and | |
| subgraph_type[3] is torch.nn.modules.activation.SiLU): | |
| dont_use_4bit_nncf = True | |
| dont_use_nncf = bool("VAE" not in shared.opts.nncf_compress_weights) | |
| dont_use_quant = bool("VAE" not in shared.opts.nncf_quantize) | |
| # SD 1.5 / SDXL Text Encoder | |
| elif (subgraph_type[0] is torch.nn.modules.sparse.Embedding and | |
| subgraph_type[1] is torch.nn.modules.sparse.Embedding and | |
| subgraph_type[2] is torch.nn.modules.normalization.LayerNorm and | |
| subgraph_type[3] is torch.nn.modules.linear.Linear): | |
| dont_use_faketensors = True | |
| dont_use_nncf = bool("Text Encoder" not in shared.opts.nncf_compress_weights) | |
| dont_use_quant = bool("Text Encoder" not in shared.opts.nncf_quantize) | |
| if not shared.opts.openvino_disable_model_caching: | |
| os.environ.setdefault('OPENVINO_TORCH_MODEL_CACHING', "1") | |
| # Create a hash to be used for caching | |
| subgraph.apply(generate_subgraph_str) | |
| shared.compiled_model_state.model_hash_str = shared.compiled_model_state.model_hash_str + sha256(subgraph.code.encode('utf-8')).hexdigest() | |
| shared.compiled_model_state.model_hash_str = sha256(shared.compiled_model_state.model_hash_str.encode('utf-8')).hexdigest() | |
| executor_parameters = {"model_hash_str": shared.compiled_model_state.model_hash_str} | |
| # Check if the model was fully supported and already cached | |
| example_inputs.reverse() | |
| inputs_reversed = True | |
| maybe_fs_cached_name = cached_model_name(shared.compiled_model_state.model_hash_str + "_fs", get_device(), example_inputs, shared.opts.openvino_cache_path) | |
| if os.path.isfile(maybe_fs_cached_name + ".xml") and os.path.isfile(maybe_fs_cached_name + ".bin"): | |
| example_inputs_reordered = [] | |
| if (os.path.isfile(maybe_fs_cached_name + ".txt")): | |
| f = open(maybe_fs_cached_name + ".txt", "r") | |
| for input_data in example_inputs: | |
| shape = f.readline() | |
| if (str(input_data.size()) != shape): | |
| for idx1, input_data1 in enumerate(example_inputs): | |
| if (str(input_data1.size()).strip() == str(shape).strip()): | |
| example_inputs_reordered.append(example_inputs[idx1]) | |
| example_inputs = example_inputs_reordered | |
| if dont_use_faketensors or shared.opts.openvino_disable_memory_cleanup: | |
| pass | |
| else: | |
| # Delete unused subgraphs | |
| subgraph = subgraph.apply(sd_models.convert_to_faketensors) | |
| devices.torch_gc(force=True) | |
| # Model is fully supported and already cached. Run the cached OV model directly. | |
| compiled_model = openvino_compile_cached_model(maybe_fs_cached_name, *example_inputs) | |
| def _call(*args): | |
| if (shared.compiled_model_state.cn_model != [] and str(shared.compiled_model_state.cn_model) in maybe_fs_cached_name): | |
| args_reordered = [] | |
| if (os.path.isfile(maybe_fs_cached_name + ".txt")): | |
| f = open(maybe_fs_cached_name + ".txt", "r") | |
| for input_data in args: | |
| shape = f.readline() | |
| if (str(input_data.size()) != shape): | |
| for idx1, input_data1 in enumerate(args): | |
| if (str(input_data1.size()).strip() == str(shape).strip()): | |
| args_reordered.append(args[idx1]) | |
| args = args_reordered | |
| res = execute_cached(compiled_model, *args) | |
| shared.compiled_model_state.partition_id = shared.compiled_model_state.partition_id + 1 | |
| return res | |
| return _call | |
| else: | |
| os.environ.setdefault('OPENVINO_TORCH_MODEL_CACHING', "0") | |
| maybe_fs_cached_name = None | |
| if inputs_reversed: | |
| example_inputs.reverse() | |
| model = make_fx(subgraph)(*example_inputs) | |
| for node in model.graph.nodes: | |
| if node.target == torch.ops.aten.mul_.Tensor: | |
| node.target = torch.ops.aten.mul.Tensor | |
| with devices.inference_context(): | |
| model.eval() | |
| partitioner = Partitioner() | |
| compiled_model = partitioner.make_partitions(model) | |
| if executor_parameters is not None and 'model_hash_str' in executor_parameters: | |
| # Check if the model is fully supported. | |
| fully_supported = partitioner.check_fully_supported(compiled_model) | |
| if fully_supported: | |
| executor_parameters["model_hash_str"] += "_fs" | |
| def _call(*args): | |
| res = execute(compiled_model, *args, executor="openvino", | |
| executor_parameters=executor_parameters, file_name=maybe_fs_cached_name) | |
| return res | |
| return _call | |