diff --git a/.gitattributes b/.gitattributes index b3d46e01c005ca592001dd904f0d4d9470b34ee5..93c30276ecd35480666c054a187a1c531da6753c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -206,3 +206,4 @@ SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/lite/expe SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/core/kernels/libtfkernel_sobol_op.so filter=lfs diff=lfs merge=lfs -text SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/tf2xla/ops/_xla_ops.so filter=lfs diff=lfs merge=lfs -text SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/stablehlo/stablehlo_extension.so filter=lfs diff=lfs merge=lfs -text +SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/pywrap_quantize_model.so filter=lfs diff=lfs merge=lfs -text diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/jit/__init__.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/jit/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/jit/__pycache__/__init__.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/jit/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..db2aed57a26ea9aa0ee63a08cbcb282ee56209ac Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/jit/__pycache__/__init__.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/jit/ops/__init__.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/jit/ops/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/jit/ops/__pycache__/__init__.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/jit/ops/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..12d816e0109b6e9eece89967cbffbec1a597b1d5 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/jit/ops/__pycache__/__init__.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/jit/ops/__pycache__/xla_ops.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/jit/ops/__pycache__/xla_ops.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1c38d06f2f77965af6d1ce25d39530419f578727 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/jit/ops/__pycache__/xla_ops.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/jit/ops/__pycache__/xla_ops_grad.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/jit/ops/__pycache__/xla_ops_grad.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2209a91501d91c973f0c95570c948a8f638504bd Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/jit/ops/__pycache__/xla_ops_grad.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/jit/ops/xla_ops.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/jit/ops/xla_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..0b488c088f1240a636dc3cb34da3acedf5731844 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/jit/ops/xla_ops.py @@ -0,0 +1,352 @@ +"""Python wrappers around TensorFlow ops. + +This file is MACHINE GENERATED! Do not edit. +""" + +import collections + +from tensorflow.python import pywrap_tfe as pywrap_tfe +from tensorflow.python.eager import context as _context +from tensorflow.python.eager import core as _core +from tensorflow.python.eager import execute as _execute +from tensorflow.python.framework import dtypes as _dtypes +from tensorflow.security.fuzzing.py import annotation_types as _atypes + +from tensorflow.python.framework import op_def_registry as _op_def_registry +from tensorflow.python.framework import ops as _ops +from tensorflow.python.framework import op_def_library as _op_def_library +from tensorflow.python.util.deprecation import deprecated_endpoints +from tensorflow.python.util import dispatch as _dispatch +from tensorflow.python.util.tf_export import tf_export + +from typing import TypeVar, List, Any +from typing_extensions import Annotated + +TV_XlaClusterOutput_T = TypeVar("TV_XlaClusterOutput_T", _atypes.BFloat16, _atypes.Bool, _atypes.Complex128, _atypes.Complex64, _atypes.Float16, _atypes.Float32, _atypes.Float64, _atypes.Float8e4m3fn, _atypes.Float8e5m2, _atypes.Half, _atypes.Int16, _atypes.Int32, _atypes.Int4, _atypes.Int64, _atypes.Int8, _atypes.QInt16, _atypes.QInt32, _atypes.QInt8, _atypes.QUInt16, _atypes.QUInt8, _atypes.Resource, _atypes.String, _atypes.UInt16, _atypes.UInt32, _atypes.UInt4, _atypes.UInt64, _atypes.UInt8, _atypes.Variant) + +@_dispatch.add_fallback_dispatch_list +@_dispatch.add_type_based_api_dispatcher +@tf_export('xla_cluster_output') +def xla_cluster_output(input: Annotated[Any, TV_XlaClusterOutput_T], name=None) -> Annotated[Any, TV_XlaClusterOutput_T]: + r"""Operator that connects the output of an XLA computation to other consumer graph nodes. + + Args: + input: A `Tensor`. + name: A name for the operation (optional). + + Returns: + A `Tensor`. Has the same type as `input`. + """ + _ctx = _context._context or _context.context() + tld = _ctx._thread_local_data + if tld.is_eager: + try: + _result = pywrap_tfe.TFE_Py_FastPathExecute( + _ctx, "XlaClusterOutput", name, input) + return _result + except _core._NotOkStatusException as e: + _ops.raise_from_not_ok_status(e, name) + except _core._FallbackException: + pass + try: + _result = _dispatcher_for_xla_cluster_output( + (input, name,), None) + if _result is not NotImplemented: + return _result + return xla_cluster_output_eager_fallback( + input, name=name, ctx=_ctx) + except _core._SymbolicException: + pass # Add nodes to the TensorFlow graph. + except (TypeError, ValueError): + _result = _dispatch.dispatch( + xla_cluster_output, (), dict(input=input, name=name) + ) + if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED: + return _result + raise + else: + _result = _dispatcher_for_xla_cluster_output( + (input, name,), None) + if _result is not NotImplemented: + return _result + # Add nodes to the TensorFlow graph. + try: + _, _, _op, _outputs = _op_def_library._apply_op_helper( + "XlaClusterOutput", input=input, name=name) + except (TypeError, ValueError): + _result = _dispatch.dispatch( + xla_cluster_output, (), dict(input=input, name=name) + ) + if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED: + return _result + raise + _result = _outputs[:] + if _execute.must_record_gradient(): + _attrs = ("T", _op._get_attr_type("T")) + _inputs_flat = _op.inputs + _execute.record_gradient( + "XlaClusterOutput", _inputs_flat, _attrs, _result) + _result, = _result + return _result + +XlaClusterOutput = tf_export("raw_ops.XlaClusterOutput")(_ops.to_raw_op(xla_cluster_output)) +_dispatcher_for_xla_cluster_output = xla_cluster_output._tf_type_based_dispatcher.Dispatch + + +def xla_cluster_output_eager_fallback(input: Annotated[Any, TV_XlaClusterOutput_T], name, ctx) -> Annotated[Any, TV_XlaClusterOutput_T]: + _attr_T, (input,) = _execute.args_to_matching_eager([input], ctx, []) + _inputs_flat = [input] + _attrs = ("T", _attr_T) + _result = _execute.execute(b"XlaClusterOutput", 1, inputs=_inputs_flat, + attrs=_attrs, ctx=ctx, name=name) + if _execute.must_record_gradient(): + _execute.record_gradient( + "XlaClusterOutput", _inputs_flat, _attrs, _result) + _result, = _result + return _result + + +@_dispatch.add_fallback_dispatch_list +@_dispatch.add_type_based_api_dispatcher +@tf_export('xla_launch') +def xla_launch(constants, args, resources: Annotated[List[Any], _atypes.Resource], Tresults, function, name=None): + r"""XLA Launch Op. For use by the XLA JIT only. + + Args: + constants: A list of `Tensor` objects. + args: A list of `Tensor` objects. + resources: A list of `Tensor` objects with type `resource`. + Tresults: A list of `tf.DTypes`. + function: A function decorated with @Defun. + name: A name for the operation (optional). + + Returns: + A list of `Tensor` objects of type `Tresults`. + """ + _ctx = _context._context or _context.context() + tld = _ctx._thread_local_data + if tld.is_eager: + try: + _result = pywrap_tfe.TFE_Py_FastPathExecute( + _ctx, "XlaLaunch", name, constants, args, resources, "Tresults", + Tresults, "function", function) + return _result + except _core._NotOkStatusException as e: + _ops.raise_from_not_ok_status(e, name) + except _core._FallbackException: + pass + try: + _result = _dispatcher_for_xla_launch( + (constants, args, resources, Tresults, function, name,), None) + if _result is not NotImplemented: + return _result + return xla_launch_eager_fallback( + constants, args, resources, Tresults=Tresults, function=function, + name=name, ctx=_ctx) + except _core._SymbolicException: + pass # Add nodes to the TensorFlow graph. + except (TypeError, ValueError): + _result = _dispatch.dispatch( + xla_launch, (), dict(constants=constants, args=args, + resources=resources, Tresults=Tresults, + function=function, name=name) + ) + if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED: + return _result + raise + else: + _result = _dispatcher_for_xla_launch( + (constants, args, resources, Tresults, function, name,), None) + if _result is not NotImplemented: + return _result + # Add nodes to the TensorFlow graph. + if not isinstance(resources, (list, tuple)): + raise TypeError( + "Expected list for 'resources' argument to " + "'xla_launch' Op, not %r." % resources) + _attr_Nresources = len(resources) + if not isinstance(Tresults, (list, tuple)): + raise TypeError( + "Expected list for 'Tresults' argument to " + "'xla_launch' Op, not %r." % Tresults) + Tresults = [_execute.make_type(_t, "Tresults") for _t in Tresults] + try: + _, _, _op, _outputs = _op_def_library._apply_op_helper( + "XlaLaunch", constants=constants, args=args, resources=resources, + Tresults=Tresults, function=function, name=name) + except (TypeError, ValueError): + _result = _dispatch.dispatch( + xla_launch, (), dict(constants=constants, args=args, + resources=resources, Tresults=Tresults, + function=function, name=name) + ) + if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED: + return _result + raise + _result = _outputs[:] + if not _result: + return _op + if _execute.must_record_gradient(): + _attrs = ("Tconstants", _op.get_attr("Tconstants"), "Targs", + _op.get_attr("Targs"), "Nresources", + _op._get_attr_int("Nresources"), "Tresults", + _op.get_attr("Tresults"), "function", _op.get_attr("function")) + _inputs_flat = _op.inputs + _execute.record_gradient( + "XlaLaunch", _inputs_flat, _attrs, _result) + return _result + +XlaLaunch = tf_export("raw_ops.XlaLaunch")(_ops.to_raw_op(xla_launch)) +_dispatcher_for_xla_launch = xla_launch._tf_type_based_dispatcher.Dispatch + + +def xla_launch_eager_fallback(constants, args, resources: Annotated[List[Any], _atypes.Resource], Tresults, function, name, ctx): + if not isinstance(resources, (list, tuple)): + raise TypeError( + "Expected list for 'resources' argument to " + "'xla_launch' Op, not %r." % resources) + _attr_Nresources = len(resources) + if not isinstance(Tresults, (list, tuple)): + raise TypeError( + "Expected list for 'Tresults' argument to " + "'xla_launch' Op, not %r." % Tresults) + Tresults = [_execute.make_type(_t, "Tresults") for _t in Tresults] + _attr_Tconstants, constants = _execute.convert_to_mixed_eager_tensors(constants, ctx) + _attr_Targs, args = _execute.convert_to_mixed_eager_tensors(args, ctx) + resources = _ops.convert_n_to_tensor(resources, _dtypes.resource) + _inputs_flat = list(constants) + list(args) + list(resources) + _attrs = ("Tconstants", _attr_Tconstants, "Targs", _attr_Targs, + "Nresources", _attr_Nresources, "Tresults", Tresults, "function", function) + _result = _execute.execute(b"XlaLaunch", len(Tresults), inputs=_inputs_flat, + attrs=_attrs, ctx=ctx, name=name) + if _execute.must_record_gradient(): + _execute.record_gradient( + "XlaLaunch", _inputs_flat, _attrs, _result) + return _result + + +@_dispatch.add_fallback_dispatch_list +@_dispatch.add_type_based_api_dispatcher +@tf_export('xla_launch_v2') +def xla_launch_v2(args, Tresults, constants, resources, function, name=None): + r"""XLA Launch Op. For use by the XLA JIT only. + + Args: + args: A list of `Tensor` objects. + Tresults: A list of `tf.DTypes`. + constants: A list of `ints`. + resources: A list of `ints`. + function: A function decorated with @Defun. + name: A name for the operation (optional). + + Returns: + A list of `Tensor` objects of type `Tresults`. + """ + _ctx = _context._context or _context.context() + tld = _ctx._thread_local_data + if tld.is_eager: + try: + _result = pywrap_tfe.TFE_Py_FastPathExecute( + _ctx, "XlaLaunchV2", name, args, "Tresults", Tresults, "constants", + constants, "resources", resources, "function", function) + return _result + except _core._NotOkStatusException as e: + _ops.raise_from_not_ok_status(e, name) + except _core._FallbackException: + pass + try: + _result = _dispatcher_for_xla_launch_v2( + (args, Tresults, constants, resources, function, name,), None) + if _result is not NotImplemented: + return _result + return xla_launch_v2_eager_fallback( + args, Tresults=Tresults, constants=constants, resources=resources, + function=function, name=name, ctx=_ctx) + except _core._SymbolicException: + pass # Add nodes to the TensorFlow graph. + except (TypeError, ValueError): + _result = _dispatch.dispatch( + xla_launch_v2, (), dict(args=args, Tresults=Tresults, + constants=constants, resources=resources, + function=function, name=name) + ) + if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED: + return _result + raise + else: + _result = _dispatcher_for_xla_launch_v2( + (args, Tresults, constants, resources, function, name,), None) + if _result is not NotImplemented: + return _result + # Add nodes to the TensorFlow graph. + if not isinstance(Tresults, (list, tuple)): + raise TypeError( + "Expected list for 'Tresults' argument to " + "'xla_launch_v2' Op, not %r." % Tresults) + Tresults = [_execute.make_type(_t, "Tresults") for _t in Tresults] + if not isinstance(constants, (list, tuple)): + raise TypeError( + "Expected list for 'constants' argument to " + "'xla_launch_v2' Op, not %r." % constants) + constants = [_execute.make_int(_i, "constants") for _i in constants] + if not isinstance(resources, (list, tuple)): + raise TypeError( + "Expected list for 'resources' argument to " + "'xla_launch_v2' Op, not %r." % resources) + resources = [_execute.make_int(_i, "resources") for _i in resources] + try: + _, _, _op, _outputs = _op_def_library._apply_op_helper( + "XlaLaunchV2", args=args, Tresults=Tresults, constants=constants, + resources=resources, function=function, name=name) + except (TypeError, ValueError): + _result = _dispatch.dispatch( + xla_launch_v2, (), dict(args=args, Tresults=Tresults, + constants=constants, resources=resources, + function=function, name=name) + ) + if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED: + return _result + raise + _result = _outputs[:] + if _execute.must_record_gradient(): + _attrs = ("Targs", _op.get_attr("Targs"), "Tresults", + _op.get_attr("Tresults"), "constants", + _op.get_attr("constants"), "resources", + _op.get_attr("resources"), "function", _op.get_attr("function")) + _inputs_flat = _op.inputs + _execute.record_gradient( + "XlaLaunchV2", _inputs_flat, _attrs, _result) + return _result + +XlaLaunchV2 = tf_export("raw_ops.XlaLaunchV2")(_ops.to_raw_op(xla_launch_v2)) +_dispatcher_for_xla_launch_v2 = xla_launch_v2._tf_type_based_dispatcher.Dispatch + + +def xla_launch_v2_eager_fallback(args, Tresults, constants, resources, function, name, ctx): + if not isinstance(Tresults, (list, tuple)): + raise TypeError( + "Expected list for 'Tresults' argument to " + "'xla_launch_v2' Op, not %r." % Tresults) + Tresults = [_execute.make_type(_t, "Tresults") for _t in Tresults] + if not isinstance(constants, (list, tuple)): + raise TypeError( + "Expected list for 'constants' argument to " + "'xla_launch_v2' Op, not %r." % constants) + constants = [_execute.make_int(_i, "constants") for _i in constants] + if not isinstance(resources, (list, tuple)): + raise TypeError( + "Expected list for 'resources' argument to " + "'xla_launch_v2' Op, not %r." % resources) + resources = [_execute.make_int(_i, "resources") for _i in resources] + _attr_Targs, args = _execute.convert_to_mixed_eager_tensors(args, ctx) + _inputs_flat = list(args) + _attrs = ("Targs", _attr_Targs, "Tresults", Tresults, "constants", + constants, "resources", resources, "function", function) + _result = _execute.execute(b"XlaLaunchV2", len(Tresults), + inputs=_inputs_flat, attrs=_attrs, ctx=ctx, + name=name) + if _execute.must_record_gradient(): + _execute.record_gradient( + "XlaLaunchV2", _inputs_flat, _attrs, _result) + return _result + diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/jit/ops/xla_ops_grad.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/jit/ops/xla_ops_grad.py new file mode 100644 index 0000000000000000000000000000000000000000..cca48e48df59e39a176902024ab0e09c8aa9e86c --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/jit/ops/xla_ops_grad.py @@ -0,0 +1,25 @@ +"""Gradients for XLA ops.""" +# Copyright 2018 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== + +from tensorflow.python.framework import ops + + +@ops.RegisterGradient("XlaClusterOutput") +def _XlaClusterOutputGrad(_, grad): + del grad # unused + raise RuntimeError("Gradient computation of graph in xla.compile() is " + "prohibited because it can cause performance degradation." + "Please move gradient computation inside xla.compile().") diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/__init__.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/__pycache__/__init__.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..393aebef1f24dd289807b572bb0eb58a78294659 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/__pycache__/__init__.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/__pycache__/py_function_lib.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/__pycache__/py_function_lib.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b25b6e432df8b8ce3b44988836dbb19d89aa9504 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/__pycache__/py_function_lib.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/__pycache__/quantize_model.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/__pycache__/quantize_model.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..4519bc7cbb9d15ba888f615d5ac147f7ff18c06b Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/__pycache__/quantize_model.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/__pycache__/representative_dataset.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/__pycache__/representative_dataset.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..c57fd0c58cc87cc800ec228f7b1a342096f9469b Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/__pycache__/representative_dataset.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/__pycache__/save_model.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/__pycache__/save_model.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..47107e159d299dd75909b630ef53ac10e45bf5c4 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/__pycache__/save_model.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/py_function_lib.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/py_function_lib.py new file mode 100644 index 0000000000000000000000000000000000000000..edc900ab5e35058f9df1bc48839c3358131df7e2 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/py_function_lib.py @@ -0,0 +1,770 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Defines a wrapper class for overridden python method definitions.""" + +from collections.abc import Callable, Collection, Mapping, Sequence +import functools +import traceback +from typing import Optional, TypeVar + +from absl import logging + +from tensorflow.compiler.mlir.quantization.stablehlo import quantization_config_pb2 as stablehlo_quant_config_pb2 +from tensorflow.compiler.mlir.quantization.tensorflow import exported_model_pb2 +from tensorflow.compiler.mlir.quantization.tensorflow import quantization_options_pb2 +from tensorflow.compiler.mlir.quantization.tensorflow.calibrator import calibration_algorithm +from tensorflow.compiler.mlir.quantization.tensorflow.calibrator import calibration_statistics_pb2 +from tensorflow.compiler.mlir.quantization.tensorflow.python import pywrap_function_lib +from tensorflow.compiler.mlir.quantization.tensorflow.python import representative_dataset as rd +from tensorflow.compiler.mlir.quantization.tensorflow.python import save_model +from tensorflow.core.protobuf import meta_graph_pb2 +from tensorflow.core.protobuf import saver_pb2 +from tensorflow.python.client import session +from tensorflow.python.eager import context +from tensorflow.python.eager import wrap_function +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor_conversion +from tensorflow.python.lib.io import file_io +from tensorflow.python.saved_model import load +from tensorflow.python.saved_model import loader_impl +from tensorflow.python.trackable import autotrackable +from tensorflow.python.types import core + + +# Name of the saved model assets directory. +_ASSETS_DIR = 'assets' +_ASSETS_EXTRA_DIR = 'assets.extra' + +# Type variable for a type that is not `None`. This represents a return value of +# methods in `PyFunctionLibrary` that should not be `None`, as `None` represents +# that the execution was unsucessful, transfored as `std::nullopt_t` from c++. +NotNoneT = TypeVar('NotNoneT') + + +def _get_saver_def_or_none( + exported_model: exported_model_pb2.ExportedModel, +) -> Optional[saver_pb2.SaverDef]: + """Returns the SaverDef from ExportedModel, None otherwise. + + Args: + exported_model: ExportedModel to take the SaverDef from. + + Returns: + SaverDef instance if the field `saver_def` is set. None otherwise. + """ + if exported_model.HasField('saver_def'): + return exported_model.saver_def + return None + + +def _copy_assets(src_path: str, dst_path: str) -> None: + """Copies the assets directory of the saved model. + + Clones the contents of the assets/ directory from the source saved model + directory to the destination saved model directory. Nothing will be copied if + there are no assets directory in the source directory. + + Args: + src_path: Source saved model directory. + dst_path: Destination saved model directory. This directory must exist. + """ + for assets_dir_name in [_ASSETS_DIR, _ASSETS_EXTRA_DIR]: + src_assets_path = file_io.join(src_path, assets_dir_name) + if not file_io.file_exists_v2(src_assets_path): + # Do nothing if the source assets path does not exist. + continue + + dst_assets_path = file_io.join(dst_path, assets_dir_name) + file_io.create_dir_v2(dst_assets_path) + + for curr_dir, _, files in file_io.walk_v2(src_assets_path): + for asset_file_name in files: + src_asset_file = file_io.join(curr_dir, asset_file_name) + + # Construct the destination assets file path. + curr_dst_dir = curr_dir.replace(src_assets_path, dst_assets_path) + dst_asset_file = file_io.join(curr_dst_dir, asset_file_name) + + file_io.copy_v2(src_asset_file, dst_asset_file) + logging.info( + 'Copied asset file: %s -> %s', src_asset_file, dst_asset_file + ) + + +def _validate_representative_dataset( + representative_dataset: rd.RepresentativeDatasetOrMapping, + signature_keys: Collection[str], +) -> None: + """Validates the representative dataset, based on the signature keys. + + Representative dataset can be provided in two different forms: a single + instance of `RepresentativeDataset` or a map of signature key to the + corresponding `RepresentativeDataset`. These have a relationship with + `signature_keys`. + + This function validates the following conditions: + * If `len(signature_keys) > 1`, then `representative_dataset` should be a + mapping where the keys exactly match the elements in `signature_keys`. + * If `len(signature_keys) == 1`, then both a mapping and a single instance of + `RepresentativeDataset` are allowed. + * This function also assumes `len(signature_keys) > 0`. + + Args: + representative_dataset: A `RepresentativeDataset` or a map of string to + `RepresentativeDataset` to be validated. + signature_keys: A collection of strings that contains the signature keys, + each identifying a `SignatureDef`. + + Raises: + ValueError: Iff `representative_dataset` does not satisfy the conditions + above. + """ + if isinstance(representative_dataset, Mapping): + if set(signature_keys) != set(representative_dataset.keys()): + raise ValueError( + 'The signature keys and the keys of representative dataset map ' + f'do not match. Signature keys: {set(signature_keys)}, ' + f'representative dataset map: {set(representative_dataset.keys())}.' + ) + else: + if len(signature_keys) > 1: + raise ValueError( + 'Representative dataset is not a mapping ' + f'(got: {type(representative_dataset)}), ' + 'but there is more than one signature key provided. ' + 'Please provide a map of {signature_key -> dataset} ' + 'with more than one signature key.' + ) + + +def _replace_tensors_by_numpy_ndarrays( + repr_ds_map: rd.RepresentativeDatasetMapping, +) -> None: + """Replaces tf.Tensors by their evaluated numpy arrays. + + This assumes that tf.Tensors in representative samples are created in the + default Graph. It will raise an error if tensors are created in a different + graph. + + Args: + repr_ds_map: SignatureDef key -> RepresentativeDataset mapping. + """ + with session.Session() as sess: + for signature_def_key in repr_ds_map: + # Replaces the dataset with a new dataset where tf.Tensors are replaced + # by their evaluated values. + ds = repr_ds_map[signature_def_key] + repr_ds_map[signature_def_key] = rd.replace_tensors_by_numpy_ndarrays( + ds, sess + ) + + +def _create_sample_validator( + expected_input_keys: Collection[str], +) -> Callable[[rd.RepresentativeSample], rd.RepresentativeSample]: + """Creates a validator function for a representative sample. + + Args: + expected_input_keys: Input keys (keyword argument names) that the function + the sample will be used for is expecting to receive. + + Returns: + A callable that validates a `RepresentativeSample`. + """ + + def validator( + sample: rd.RepresentativeSample, + ) -> rd.RepresentativeSample: + """Validates a single instance of representative sample. + + This provides a simple check for `sample` that this is a mapping of + {input_key: input_value}. + + Args: + sample: A `RepresentativeSample` to validate. + + Returns: + `sample` iff it is valid. + + Raises: + ValueError: iff the sample isn't an instance of `Mapping`. + KeyError: iff the sample does not have the set of input keys that match + the input keys of the function. + """ + if not isinstance(sample, Mapping): + raise ValueError( + 'Invalid representative sample type. Provide a mapping ' + '(usually a dict) of {input_key: input_value}. ' + f'Got type: {type(sample)} instead.' + ) + + if set(sample.keys()) != expected_input_keys: + raise KeyError( + 'Invalid input keys for representative sample. The function expects ' + f'input keys of: {set(expected_input_keys)}. ' + f'Got: {set(sample.keys())}. Please provide correct input keys for ' + 'representative samples.' + ) + + return sample + + return validator + + +# TODO(b/249918070): Implement a progress bar. +def _log_sample_num_for_calibration( + representative_dataset: rd.RepresentativeDataset, +) -> rd.RepresentativeDataset: + """Logs the sample number for calibration. + + If in debug logging level, the "sample number / total num samples" is logged + for every 5 iterations. + + This is often useful when tracking the progress of the calibration step which + is often slow and may look stale if there's no logs being printed. + + Args: + representative_dataset: The representative dataset. + + Yields: + The representative samples from `representative_dataset` without any + modification. + """ + num_samples: Optional[int] = rd.get_num_samples(representative_dataset) + if num_samples is None: + total_num_samples = '?' + logging.info('Representative dataset size unknown.') + else: + total_num_samples = str(num_samples) + logging.info('Using representative dataset of size: %s', total_num_samples) + + sample_num = 0 + for sample in representative_dataset: + sample_num += 1 + + # Log the sample number for every 5 iterations. + logging.log_every_n( + logging.DEBUG, + 'Running representative sample for calibration: %d / %s', + 5, + sample_num, + total_num_samples, + ) + yield sample + + logging.info( + 'Running representative samples complete: %d / %s', + sample_num, + total_num_samples, + ) + + +def _run_function_for_calibration_graph_mode( + sess: session.Session, + signature_def: meta_graph_pb2.SignatureDef, + representative_dataset: rd.RepresentativeDataset, +) -> None: + """Runs the representative dataset through a function for calibration. + + NOTE: This is intended to be run in graph mode (TF1). + + The function is identified by the SignatureDef. + + Args: + sess: The Session object to run the function in. + signature_def: A SignatureDef that identifies a function by specifying the + inputs and outputs. + representative_dataset: The representative dataset to run through the + function. + """ + output_tensor_names = [ + output_tensor_info.name + for output_tensor_info in signature_def.outputs.values() + ] + + sample_validator = _create_sample_validator( + expected_input_keys=signature_def.inputs.keys() + ) + + for sample in map( + sample_validator, _log_sample_num_for_calibration(representative_dataset) + ): + # Create a mapping from input tensor name to the input tensor value. + # ex) "Placeholder:0" -> [0, 1, 2] + feed_dict = rd.create_feed_dict_from_input_data(sample, signature_def) + sess.run(output_tensor_names, feed_dict=feed_dict) + + +def _run_graph_for_calibration_graph_mode( + model_dir: str, + tags: Collection[str], + representative_dataset_map: rd.RepresentativeDatasetMapping, +) -> None: + """Runs the graph for calibration in graph mode. + + This function assumes _graph mode_ (used when legacy TF1 is used or when eager + mode is explicitly disabled) when running the graph. This step is used in + order to collect the statistics in CustomAggregatorOp for quantization using + the representative dataset for the actual data provided for inference. + + Args: + model_dir: Path to SavedModel directory. + tags: Collection of tags identifying the MetaGraphDef within the SavedModel. + representative_dataset_map: A map where signature keys are mapped to + corresponding representative datasets. + + Raises: + ValueError: When running the function with the representative dataset fails. + """ + # Replace tf.Tensors by numpy ndarrays in order to reuse the samples in a + # different graph when running the calibration. + _replace_tensors_by_numpy_ndarrays(representative_dataset_map) + + # Run the calibration in a new graph to avoid name collision, which could + # happen when the same model is loaded multiple times in the default graph. + with ops.Graph().as_default(), session.Session() as sess: + meta_graph: meta_graph_pb2.MetaGraphDef = loader_impl.load( + sess, tags, export_dir=model_dir + ) + + for signature_key, repr_ds in representative_dataset_map.items(): + sig_def = meta_graph.signature_def[signature_key] + + try: + _run_function_for_calibration_graph_mode( + sess, signature_def=sig_def, representative_dataset=repr_ds + ) + except Exception as ex: + raise ValueError( + 'Failed to run representative dataset through the ' + f'function with the signature key: {signature_key}.' + ) from ex + + +def _convert_values_to_tf_tensors( + sample: rd.RepresentativeSample, +) -> Mapping[str, core.Tensor]: + """Converts TensorLike values of `sample` to Tensors. + + Creates a copy of `sample`, where each value is converted to Tensors + unless it is already a Tensor. + The values are not converted in-place (i.e. `sample` is not mutated). + + Args: + sample: A representative sample, which is a map of {name -> tensorlike + value}. + + Returns: + Converted map of {name -> tensor}. + """ + tensor_mapping = {} + for name, tensorlike_value in sample.items(): + if isinstance(tensorlike_value, core.Tensor): + tensor_value = tensorlike_value + else: + tensor_value = tensor_conversion.convert_to_tensor_v2_with_dispatch( + tensorlike_value + ) + + tensor_mapping[name] = tensor_value + + return tensor_mapping + + +def _run_function_for_calibration_eager_mode( + func: wrap_function.WrappedFunction, + representative_dataset: rd.RepresentativeDataset, +) -> None: + """Runs the representative dataset through a function for calibration. + + NOTE: This is intended to be run in eager mode (TF2). + + Args: + func: The function to run the representative samples through. + representative_dataset: Representative dataset used for calibration. The + input keys and input values of the representative samples should match the + keyword arguments of `func`. + """ + _, keyword_args = func.structured_input_signature + sample_validator = _create_sample_validator( + expected_input_keys=keyword_args.keys() + ) + + for sample in map( + sample_validator, _log_sample_num_for_calibration(representative_dataset) + ): + # Convert any non-Tensor values from the sample to Tensors. + # This conversion is required because the model saved in `model_dir` is + # saved using TF1 SavedModelBuilder, which doesn't save the + # SavedObjectGraph. + func_kwargs = _convert_values_to_tf_tensors(sample) + func(**func_kwargs) + + +def _run_graph_for_calibration_eager_mode( + model_dir: str, + tags: Collection[str], + representative_dataset_map: rd.RepresentativeDatasetMapping, +) -> None: + """Runs the graph for calibration in eager mode. + + This function assumes _eager mode_ (enabled in TF2 by default) when running + the graph. This step is used in order to collect the statistics in + CustomAggregatorOp for quantization using the representative dataset for the + actual data provided for inference. + + Args: + model_dir: Path to SavedModel directory. + tags: Collection of tags identifying the MetaGraphDef within the SavedModel. + representative_dataset_map: A map where signature keys are mapped to + corresponding representative datasets. + + Raises: + ValueError: When running the function with the representative dataset fails. + """ + root: autotrackable.AutoTrackable = load.load(model_dir, tags) + for signature_key, repr_ds in representative_dataset_map.items(): + try: + _run_function_for_calibration_eager_mode( + func=root.signatures[signature_key], representative_dataset=repr_ds + ) + except Exception as ex: + raise ValueError( + 'Failed to run representative dataset through the ' + f'function with the signature key: {signature_key}.' + ) from ex + + +def _run_graph_for_calibration( + float_model_dir: str, + signature_keys: Sequence[str], + tags: Collection[str], + representative_dataset: rd.RepresentativeDatasetOrMapping, + force_graph_mode_calibration: bool, +) -> None: + """Runs the graph for calibration using representative datasets. + + Args: + float_model_dir: Path to the model to calibrate. + signature_keys: Sequence of keys identifying SignatureDef containing inputs + and outputs. + tags: Collection of tags identifying the MetaGraphDef within the SavedModel + to analyze. + representative_dataset: An iterator that returns a dictionary of {input_key: + input_value} or a mapping from signature keys to such iterators. When + `signature_keys` contains more than one signature key, + `representative_datsaet` should be a mapping that maps each signature keys + to the corresponding representative dataset. + force_graph_mode_calibration: If set to true, it forces calibration in graph + model instead of eager mode when the context is in eager mode. + + Raises: + ValueError iff: + * The representative dataset format is invalid. + * It fails to run the functions using the representative datasets. + """ + try: + _validate_representative_dataset(representative_dataset, signature_keys) + except Exception as ex: + raise ValueError('Invalid representative dataset.') from ex + + # If `representative_dataset` is not a mapping, convert to a mapping for the + # following functions to handle representative datasets more conveniently. + representative_dataset_map = representative_dataset + if not isinstance(representative_dataset, Mapping): + # `signature_keys` is guaranteed to have only one element after the + # validation. + representative_dataset_map = {signature_keys[0]: representative_dataset} + + try: + if context.executing_eagerly() and not force_graph_mode_calibration: + logging.info('Calibration step is executed in eager mode.') + _run_graph_for_calibration_eager_mode( + float_model_dir, tags, representative_dataset_map + ) + else: + logging.info('Calibration step is executed in graph mode.') + _run_graph_for_calibration_graph_mode( + float_model_dir, tags, representative_dataset_map + ) + except Exception as ex: + raise ValueError( + 'Failed to run graph for post-training quantization calibration.' + ) from ex + + logging.info('Calibration step complete.') + + +def _run_calibration( + saved_model_path: str, + signature_keys: Sequence[str], + tags: Collection[str], + force_graph_mode_calibration: bool, + representative_dataset_file_map: Mapping[ + str, quantization_options_pb2.RepresentativeDatasetFile + ], +) -> bool: + """Runs calibration and adds calibration statistics to exported model. + + Args: + saved_model_path: Path to the SavedModel to run calibration. + signature_keys: List of signature keys corresponding to SignatureDefs to run + calibration on. + tags: A set of tags that identify the MetaGraphDef. + force_graph_mode_calibration: If True, runs the calibration in graph mode. + representative_dataset_file_map: Signature key -> + `RepresentativeDatasetFile` mapping for running the calibration step. Each + dataset file stores the representative dataset for the function matching + the signature key. + + Returns: + `True` upon successfully running calibration. + """ + repr_dataset_map = rd.TfRecordRepresentativeDatasetLoader( + representative_dataset_file_map + ).load() + + # Uses the representative dataset to collect statistics for calibration. + # After this operation, min & max values are stored separately in a global + # CalibratorSingleton instance. + _run_graph_for_calibration( + saved_model_path, + signature_keys, + tags, + repr_dataset_map, + force_graph_mode_calibration, + ) + + # Dummy value to indicate successful run, as `None` would indicate error. See + # comments in `NotNoneT`. + return True + + +def _call_and_return_none_on_error( + func: Callable[[], NotNoneT], error_msg: str +) -> Optional[NotNoneT]: + """Calls `func` and returns `None` on error. + + This is used to gracefully return the 'error status' represented as `None`, as + raising exceptions from `PyFunctionLibrary` methods crashes the program. + + Args: + func: The function to run. The function should be a callable returning a + non-None value. + error_msg: The error message to log upon error. Used for debugging purposes. + + Returns: + `None` if the function raises an exception. The return value of `func` + otherwise. + """ + try: + return func() + except Exception as ex: # pylint: disable=broad-exception-caught; Required for graceful failing with pybind11. + # Prints the exception traceback for debuggability. + traceback.print_exception(ex) + # Additional error log for debuggability. + logging.error(error_msg) + return None + + +def _save_model_and_copy_assets( + exported_model: exported_model_pb2.ExportedModel, + src_saved_model_path: str, + dst_saved_model_path: str, + signature_def_map: Mapping[str, meta_graph_pb2.SignatureDef], + tags: Collection[str], +) -> bool: + """Saves the model and copies the assets from the source model. + + Args: + exported_model: ExportedModel to save. + src_saved_model_path: Path to the source SavedModel. This will be used to + copy the asset files to `dst_saved_model_path`. + dst_saved_model_path: Destination path to save the exported model. + signature_def_map: Signature key -> SignatureDef mapping. + tags: Tags to attach to the saved MetaGraphDef. + + Returns: + `True` upon successfully saving the model. + """ + save_model.save_model_v1( + exported_model.graph_def, + dst_saved_model_path, + signature_def_map, + tags, + init_op_name=exported_model.init_node_name, + saver_def=_get_saver_def_or_none(exported_model), + checkpoint_dir=exported_model.checkpoint_dir, + function_aliases=exported_model.function_aliases, + asset_file_defs=exported_model.asset_file_defs, + ) + + _copy_assets(src_saved_model_path, dst_saved_model_path) + + # Dummy value to indicate successful run, as `None` would indicate error. See + # comments in `NotNoneT`. + return True + + +class PyFunctionLibrary(pywrap_function_lib.PyFunctionLibrary): + """Wrapper class for overridden python method definitions. + + This class contains python methods that overrides C++ virtual functions + declared in `pywrap_function_lib.PyFunctionLibrary`. + """ + + # LINT.IfChange(save_exported_model) + def save_exported_model( + self, + dst_saved_model_path: str, + exported_model_serialized: bytes, + src_saved_model_path: str, + tags: set[str], + serialized_signature_def_map: dict[str, bytes], + ) -> Optional[bool]: + # LINT.ThenChange(py_function_lib.h:save_exported_model) + """Saves `ExportedModel` to `dst_saved_model_path` as a SavedModel. + + Args: + dst_saved_model_path: Destination path to save the exported model. + exported_model_serialized: Exported model to export as SavedModel. + src_saved_model_path: Path to the source SavedModel. This will be used to + copy the asset files to `dst_saved_model_path`. + tags: Tags to attach to the saved MetaGraphDef. + serialized_signature_def_map: Signature key -> serialized SignatureDef. + + Returns: + `True` upon successful execution. `None` when an error is raised + internally. + """ + exported_model = exported_model_pb2.ExportedModel.FromString( + exported_model_serialized + ) + + # Deserialize values in signature_def_map. + signature_def_map = {} + for key, serialized_signature_def in serialized_signature_def_map.items(): + signature_def_map[key] = meta_graph_pb2.SignatureDef.FromString( + serialized_signature_def + ) + + return _call_and_return_none_on_error( + func=functools.partial( + _save_model_and_copy_assets, + exported_model, + src_saved_model_path, + dst_saved_model_path, + signature_def_map, + tags, + ), + error_msg=( + f'Failed to save model "{dst_saved_model_path}",' + f' signature_def_map: {signature_def_map}, tags: {tags}.' + ), + ) + + # TODO: b/311097139 - Extract calibration related functions into a separate + # file. + # LINT.IfChange(run_calibration) + def run_calibration( + self, + saved_model_path: str, + signature_keys: list[str], + tags: set[str], + force_graph_mode_calibration: bool, + representative_dataset_file_map_serialized: dict[str, bytes], + ) -> Optional[bool]: + # LINT.ThenChange(py_function_lib.h:run_calibration) + """Runs calibration and adds calibration statistics to exported model. + + Args: + saved_model_path: Path to the SavedModel to run calibration. + signature_keys: List of signature keys corresponding to SignatureDefs to + run calibration on. + tags: A set of tags that identify the MetaGraphDef. + force_graph_mode_calibration: If True, runs the calibration in graph mode. + representative_dataset_file_map_serialized: Signature key -> + `RepresentativeDatasetFile` mapping for running the calibration step. + Each dataset file stores the representative dataset for the function + matching the signature key. + + Returns: + The error message if the function raises and exception. `None` otherwise. + """ + # Deserialize `RepresentativeDatasetFile` values. + dataset_file_map = {} + for ( + signature_key, + dataset_file_serialized, + ) in representative_dataset_file_map_serialized.items(): + dataset_file_map[signature_key] = ( + quantization_options_pb2.RepresentativeDatasetFile.FromString( + dataset_file_serialized + ) + ) + + return _call_and_return_none_on_error( + func=functools.partial( + _run_calibration, + saved_model_path, + signature_keys, + tags, + force_graph_mode_calibration, + dataset_file_map, + ), + error_msg=( + f'Failed to run calibration on model "{saved_model_path}",' + f' signature_keys: {signature_keys}, tags: {tags}.' + ), + ) + + # LINT.IfChange(get_calibration_min_max_value) + def get_calibration_min_max_value( + self, + calibration_statistics_serialized: bytes, + calibration_options_serialized: bytes, + ) -> Optional[tuple[float, float]]: + """Calculates min and max values from statistics. + + Args: + calibration_statistics_serialized: Serialized `CalibrationStatistics`. + This will be the source to calculate min and max values from. + calibration_options_serialized: Serialized `CalibrationOptions`. Specifies + how the min / max should be calculated. + + Returns: + (min_value, max_value): Min and max calculated using calib_opts. `None` + upon error. + """ + # LINT.ThenChange(py_function_lib.h:get_calibration_min_max_value) + + # Deserialize values passed from c++. + statistics = calibration_statistics_pb2.CalibrationStatistics.FromString( + calibration_statistics_serialized + ) + options = stablehlo_quant_config_pb2.CalibrationOptions.FromString( + calibration_options_serialized + ) + + return _call_and_return_none_on_error( + functools.partial( + calibration_algorithm.get_min_max_value, + statistics, + options, + ), + error_msg=( + f'Retrieving calibrated min / max failed. Options: {options}.' + ), + ) diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/pywrap_function_lib.pyi b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/pywrap_function_lib.pyi new file mode 100644 index 0000000000000000000000000000000000000000..8e4a7cee6203c7904caa4879737436e9581163af --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/pywrap_function_lib.pyi @@ -0,0 +1,48 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +from typing import Any, Optional + +class PyFunctionLibrary: + + # LINT.IfChange(save_exported_model) + def save_exported_model( + self, + dst_saved_model_path: str, + exported_model_serialized: bytes, + src_saved_model_path: str, + tags: set[str], + serialized_signature_def_map: dict[str, bytes], + ) -> Optional[bool]: ... + # LINT.ThenChange() + + # LINT.IfChange(run_calibration) + def run_calibration( + self, + saved_model_path: str, + signature_keys: list[str], + tags: set[str], + force_graph_mode_calibration: bool, + # Value type: RepresentativeDatasetFile. + representative_dataset_file_map_serialized: dict[str, bytes], + ) -> Optional[bool]: ... + # LINT.ThenChange() + + # LINT.IfChange(get_calibration_min_max_value) + def get_calibration_min_max_value( + self, + calibration_statistics_serialized: bytes, + calibration_options_serialized: bytes, + ) -> Optional[tuple[float, float]]: ... + # LINT.ThenChange() diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/pywrap_function_lib.so b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/pywrap_function_lib.so new file mode 100644 index 0000000000000000000000000000000000000000..93614825526a429b65c76acca9f4549ae5a64f6b Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/pywrap_function_lib.so differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/pywrap_quantize_model.so b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/pywrap_quantize_model.so new file mode 100644 index 0000000000000000000000000000000000000000..780875b7d25e7584ad593dc084cabbbe7683773d --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/quantization/tensorflow/python/pywrap_quantize_model.so @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ce6434a9bb81081892f212ae21a81fc331353532280da8b44ff7f8640c61b273 +size 2026097 diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/tensorflow/__init__.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/tensorflow/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/tensorflow/__pycache__/__init__.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/tensorflow/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..b9b601a4a53e70152f671a7ac7fbc760b3622837 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/tensorflow/__pycache__/__init__.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/tensorflow/__pycache__/gen_mlir_passthrough_op.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/tensorflow/__pycache__/gen_mlir_passthrough_op.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1a4dcffcbb97faf07005b908f98bcb692cf0e79e Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/tensorflow/__pycache__/gen_mlir_passthrough_op.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/tensorflow/gen_mlir_passthrough_op.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/tensorflow/gen_mlir_passthrough_op.py new file mode 100644 index 0000000000000000000000000000000000000000..63f02ed16b5af0796dfc14c306e532af941fba3a --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/mlir/tensorflow/gen_mlir_passthrough_op.py @@ -0,0 +1,127 @@ +"""Python wrappers around TensorFlow ops. + +This file is MACHINE GENERATED! Do not edit. +""" + +import collections + +from tensorflow.python import pywrap_tfe as pywrap_tfe +from tensorflow.python.eager import context as _context +from tensorflow.python.eager import core as _core +from tensorflow.python.eager import execute as _execute +from tensorflow.python.framework import dtypes as _dtypes +from tensorflow.security.fuzzing.py import annotation_types as _atypes + +from tensorflow.python.framework import op_def_registry as _op_def_registry +from tensorflow.python.framework import ops as _ops +from tensorflow.python.framework import op_def_library as _op_def_library +from tensorflow.python.util.deprecation import deprecated_endpoints +from tensorflow.python.util import dispatch as _dispatch +from tensorflow.python.util.tf_export import tf_export + +from typing import TypeVar, List, Any +from typing_extensions import Annotated + +@_dispatch.add_fallback_dispatch_list +@_dispatch.add_type_based_api_dispatcher +@tf_export('mlir_passthrough_op') +def mlir_passthrough_op(inputs, mlir_module: str, Toutputs, name=None): + r"""TODO: add doc. + + Args: + inputs: A list of `Tensor` objects. + mlir_module: A `string`. + Toutputs: A list of `tf.DTypes`. + name: A name for the operation (optional). + + Returns: + A list of `Tensor` objects of type `Toutputs`. + """ + _ctx = _context._context or _context.context() + tld = _ctx._thread_local_data + if tld.is_eager: + try: + _result = pywrap_tfe.TFE_Py_FastPathExecute( + _ctx, "MlirPassthroughOp", name, inputs, "mlir_module", mlir_module, + "Toutputs", Toutputs) + return _result + except _core._NotOkStatusException as e: + _ops.raise_from_not_ok_status(e, name) + except _core._FallbackException: + pass + try: + _result = _dispatcher_for_mlir_passthrough_op( + (inputs, mlir_module, Toutputs, name,), None) + if _result is not NotImplemented: + return _result + return mlir_passthrough_op_eager_fallback( + inputs, mlir_module=mlir_module, Toutputs=Toutputs, name=name, + ctx=_ctx) + except _core._SymbolicException: + pass # Add nodes to the TensorFlow graph. + except (TypeError, ValueError): + _result = _dispatch.dispatch( + mlir_passthrough_op, (), dict(inputs=inputs, + mlir_module=mlir_module, + Toutputs=Toutputs, name=name) + ) + if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED: + return _result + raise + else: + _result = _dispatcher_for_mlir_passthrough_op( + (inputs, mlir_module, Toutputs, name,), None) + if _result is not NotImplemented: + return _result + # Add nodes to the TensorFlow graph. + mlir_module = _execute.make_str(mlir_module, "mlir_module") + if not isinstance(Toutputs, (list, tuple)): + raise TypeError( + "Expected list for 'Toutputs' argument to " + "'mlir_passthrough_op' Op, not %r." % Toutputs) + Toutputs = [_execute.make_type(_t, "Toutputs") for _t in Toutputs] + try: + _, _, _op, _outputs = _op_def_library._apply_op_helper( + "MlirPassthroughOp", inputs=inputs, mlir_module=mlir_module, + Toutputs=Toutputs, name=name) + except (TypeError, ValueError): + _result = _dispatch.dispatch( + mlir_passthrough_op, (), dict(inputs=inputs, + mlir_module=mlir_module, + Toutputs=Toutputs, name=name) + ) + if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED: + return _result + raise + _result = _outputs[:] + if _execute.must_record_gradient(): + _attrs = ("mlir_module", _op.get_attr("mlir_module"), "Tinputs", + _op.get_attr("Tinputs"), "Toutputs", _op.get_attr("Toutputs")) + _inputs_flat = _op.inputs + _execute.record_gradient( + "MlirPassthroughOp", _inputs_flat, _attrs, _result) + return _result + +MlirPassthroughOp = tf_export("raw_ops.MlirPassthroughOp")(_ops.to_raw_op(mlir_passthrough_op)) +_dispatcher_for_mlir_passthrough_op = mlir_passthrough_op._tf_type_based_dispatcher.Dispatch + + +def mlir_passthrough_op_eager_fallback(inputs, mlir_module: str, Toutputs, name, ctx): + mlir_module = _execute.make_str(mlir_module, "mlir_module") + if not isinstance(Toutputs, (list, tuple)): + raise TypeError( + "Expected list for 'Toutputs' argument to " + "'mlir_passthrough_op' Op, not %r." % Toutputs) + Toutputs = [_execute.make_type(_t, "Toutputs") for _t in Toutputs] + _attr_Tinputs, inputs = _execute.convert_to_mixed_eager_tensors(inputs, ctx) + _inputs_flat = list(inputs) + _attrs = ("mlir_module", mlir_module, "Tinputs", _attr_Tinputs, "Toutputs", + Toutputs) + _result = _execute.execute(b"MlirPassthroughOp", len(Toutputs), + inputs=_inputs_flat, attrs=_attrs, ctx=ctx, + name=name) + if _execute.must_record_gradient(): + _execute.record_gradient( + "MlirPassthroughOp", _inputs_flat, _attrs, _result) + return _result + diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/__init__.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/__pycache__/__init__.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..72621675ea9e4fd611a19413296ec90ade835043 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/__pycache__/__init__.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/__pycache__/xla_data_pb2.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/__pycache__/xla_data_pb2.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..fe717f37ba3391c1b54d1a554f549528a4183f59 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/__pycache__/xla_data_pb2.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/service/__init__.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/service/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/service/__pycache__/__init__.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/service/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ae5a536b75c27b66d043a8304b9df6b3964e10c8 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/service/__pycache__/__init__.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/service/__pycache__/hlo_pb2.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/service/__pycache__/hlo_pb2.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..6e705734c7ecc7d9d28ad1c5090fc5f01aa6732a Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/service/__pycache__/hlo_pb2.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/service/hlo_pb2.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/service/hlo_pb2.py new file mode 100644 index 0000000000000000000000000000000000000000..6599cda46cc8c2d50fbb2d34448cba944ba426ad --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/service/hlo_pb2.py @@ -0,0 +1,104 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: xla/service/hlo.proto +"""Generated protocol buffer code.""" +from google.protobuf.internal import builder as _builder +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +from google.protobuf import any_pb2 as google_dot_protobuf_dot_any__pb2 +from tensorflow.compiler.xla import xla_data_pb2 as xla_dot_xla__data__pb2 + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x15xla/service/hlo.proto\x12\x03xla\x1a\x19google/protobuf/any.proto\x1a\x12xla/xla_data.proto\"\xd0\x17\n\x13HloInstructionProto\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0e\n\x06opcode\x18\x02 \x01(\t\x12\x1e\n\x05shape\x18\x03 \x01(\x0b\x32\x0f.xla.ShapeProto\x12!\n\x08metadata\x18\x07 \x01(\x0b\x32\x0f.xla.OpMetadata\x12\"\n\x07literal\x18\x08 \x01(\x0b\x32\x11.xla.LiteralProto\x12\x18\n\x10parameter_number\x18\t \x01(\x03\x12\x13\n\x0b\x66usion_kind\x18\x0b \x01(\t\x12\x13\n\x0btuple_index\x18\r \x01(\x03\x12\x12\n\ndimensions\x18\x0e \x03(\x03\x12\x1b\n\x06window\x18\x0f \x01(\x0b\x32\x0b.xla.Window\x12G\n\x1d\x63onvolution_dimension_numbers\x18\x10 \x01(\x0b\x32 .xla.ConvolutionDimensionNumbers\x12\x1b\n\x13\x66\x65\x61ture_group_count\x18\x32 \x01(\x03\x12\x19\n\x11\x62\x61tch_group_count\x18: \x01(\x03\x12\x42\n\x10slice_dimensions\x18\x11 \x03(\x0b\x32(.xla.HloInstructionProto.SliceDimensions\x12\x15\n\rexponent_bits\x18\x12 \x01(\x05\x12\x15\n\rmantissa_bits\x18\x13 \x01(\x05\x12\x1b\n\x13\x64ynamic_slice_sizes\x18\x14 \x03(\x03\x12*\n\x0epadding_config\x18\x15 \x01(\x0b\x32\x12.xla.PaddingConfig\x12\x16\n\x0eoutfeed_config\x18\x16 \x01(\x0c\x12-\n\x0c\x64istribution\x18\x17 \x01(\x0e\x32\x17.xla.RandomDistribution\x12\x0f\n\x07\x65psilon\x18\x18 \x01(\x02\x12\x15\n\rfeature_index\x18\x19 \x01(\x03\x12\x12\n\nchannel_id\x18\x1a \x01(\x03\x12\x15\n\rinfeed_config\x18\x1b \x01(\x0c\x12\x1a\n\x12\x63ustom_call_target\x18\x1c \x01(\t\x12&\n\routfeed_shape\x18\x1d \x01(\x0b\x32\x0f.xla.ShapeProto\x12\x37\n\x15\x64ot_dimension_numbers\x18\x1e \x01(\x0b\x32\x18.xla.DotDimensionNumbers\x12\x1e\n\x08\x66\x66t_type\x18\x1f \x01(\x0e\x32\x0c.xla.FftType\x12\x12\n\nfft_length\x18 \x03(\x03\x12\x1c\n\x14\x63omparison_direction\x18? \x01(\t\x12=\n\x18gather_dimension_numbers\x18! \x01(\x0b\x32\x1b.xla.GatherDimensionNumbers\x12\x1a\n\x12gather_slice_sizes\x18\" \x03(\x03\x12\n\n\x02id\x18# \x01(\x03\x12\x13\n\x0boperand_ids\x18$ \x03(\x03\x12\x1f\n\x17\x63ontrol_predecessor_ids\x18% \x03(\x03\x12\x1e\n\x16\x63\x61lled_computation_ids\x18& \x03(\x03\x12!\n\x08sharding\x18( \x01(\x0b\x32\x0f.xla.OpSharding\x12\x16\n\x0e\x62\x61\x63kend_config\x18+ \x01(\x0c\x12-\n\x0ereplica_groups\x18\x31 \x03(\x0b\x32\x11.xla.ReplicaGroupB\x02\x18\x01\x12\x19\n\rall_reduce_id\x18- \x01(\x03\x42\x02\x18\x01\x12\x1d\n\x15use_global_device_ids\x18G \x01(\x08\x12\x18\n\x10is_host_transfer\x18/ \x01(\x08\x12\x11\n\tis_stable\x18< \x01(\x08\x12?\n\x19scatter_dimension_numbers\x18\x30 \x01(\x0b\x32\x1c.xla.ScatterDimensionNumbers\x12.\n\x10precision_config\x18\x33 \x01(\x0b\x32\x14.xla.PrecisionConfig\x12.\n\x13source_target_pairs\x18\x34 \x03(\x0b\x32\x11.xla.SourceTarget\x12.\n\x15\x64omain_entry_sharding\x18\x36 \x01(\x0b\x32\x0f.xla.OpSharding\x12-\n\x14\x64omain_exit_sharding\x18\x37 \x01(\x0b\x32\x0f.xla.OpSharding\x12\x18\n\x10\x63onstrain_layout\x18\x38 \x01(\x08\x12\x33\n\x1aoperand_shapes_with_layout\x18\x39 \x03(\x0b\x32\x0f.xla.ShapeProto\x12=\n\x18triangular_solve_options\x18; \x01(\x0b\x32\x1b.xla.TriangularSolveOptions\x12.\n\x10\x63holesky_options\x18> \x01(\x0b\x32\x14.xla.CholeskyOptions\x12\x38\n\x15parameter_replication\x18= \x01(\x0b\x32\x19.xla.ParameterReplication\x12#\n\x1b\x63ustom_call_has_side_effect\x18\x41 \x01(\x08\x12;\n\x17output_operand_aliasing\x18J \x03(\x0b\x32\x1a.xla.OutputOperandAliasing\x12\x35\n\x14\x63ustom_call_schedule\x18L \x01(\x0e\x32\x17.xla.CustomCallSchedule\x12\r\n\x05\x64\x65lta\x18\x42 \x01(\x03\x12\x1a\n\x12indices_are_sorted\x18\x43 \x01(\x08\x12\x34\n\x13\x66rontend_attributes\x18\x44 \x01(\x0b\x32\x17.xla.FrontendAttributes\x12\x16\n\x0eunique_indices\x18\x45 \x01(\x08\x12+\n\rrng_algorithm\x18\x46 \x01(\x0e\x32\x14.xla.RandomAlgorithm\x12\x17\n\x0f\x63omparison_type\x18H \x01(\t\x12%\n\x19is_cross_program_prefetch\x18I \x01(\x08\x42\x02\x18\x01\x12&\n\x1c\x63ross_program_prefetch_index\x18P \x01(\x05H\x00\x12&\n\x0cpadding_type\x18K \x01(\x0e\x32\x10.xla.PaddingType\x12:\n\x17\x63ustom_call_api_version\x18M \x01(\x0e\x32\x19.xla.CustomCallApiVersion\x12\x1e\n\x16\x61sync_execution_thread\x18O \x01(\t\x12\t\n\x01k\x18Q \x01(\x03\x12\x0f\n\x07largest\x18U \x01(\x08\x12*\n\x0estatistics_viz\x18R \x01(\x0b\x32\x12.xla.StatisticsViz\x12-\n\x0c\x64ot_sparsity\x18V \x03(\x0b\x32\x17.xla.SparsityDescriptor\x12>\n\x16\x63ollective_device_list\x18W \x01(\x0b\x32\x1e.xla.CollectiveDeviceListProto\x12/\n\x0eoriginal_value\x18X \x01(\x0b\x32\x17.xla.OriginalValueProto\x12\x14\n\x0cis_composite\x18Y \x01(\x08\x1a?\n\x0fSliceDimensions\x12\r\n\x05start\x18\x01 \x01(\x03\x12\r\n\x05limit\x18\x02 \x01(\x03\x12\x0e\n\x06stride\x18\x03 \x01(\x03\x42\'\n%optional_cross_program_prefetch_indexJ\x04\x08\n\x10\x0bJ\x04\x08\x0c\x10\rJ\x04\x08\x04\x10\x05J\x04\x08\x05\x10\x06J\x04\x08\x06\x10\x07J\x04\x08,\x10-J\x04\x08\x35\x10\x36J\x04\x08.\x10/J\x04\x08)\x10*J\x04\x08*\x10+J\x04\x08@\x10\x41J\x04\x08N\x10OJ\x04\x08S\x10TJ\x04\x08T\x10UR\x0eparameter_nameR\x1e\x66used_instructions_computationR\roperand_namesR\x19\x63ontrol_predecessor_namesR\x18\x63\x61lled_computation_namesR\x11replica_group_idsR\x12\x63ustom_call_opaqueR\x12\x61ll_reduce_barrier\"\xe9\x01\n\x13HloComputationProto\x12\x0c\n\x04name\x18\x01 \x01(\t\x12.\n\x0cinstructions\x18\x02 \x03(\x0b\x32\x18.xla.HloInstructionProto\x12-\n\rprogram_shape\x18\x04 \x01(\x0b\x32\x16.xla.ProgramShapeProto\x12\n\n\x02id\x18\x05 \x01(\x03\x12\x0f\n\x07root_id\x18\x06 \x01(\x03\x12\x1d\n\x15is_fusion_computation\x18\x07 \x01(\x08\x12\x18\n\x10\x65xecution_thread\x18\x08 \x01(\tJ\x04\x08\x03\x10\x04R\troot_name\"\xd8\x01\n\x10HloScheduleProto\x12\x37\n\tsequences\x18\x01 \x03(\x0b\x32$.xla.HloScheduleProto.SequencesEntry\x1a.\n\x13InstructionSequence\x12\x17\n\x0finstruction_ids\x18\x01 \x03(\x03\x1a[\n\x0eSequencesEntry\x12\x0b\n\x03key\x18\x01 \x01(\x03\x12\x38\n\x05value\x18\x02 \x01(\x0b\x32).xla.HloScheduleProto.InstructionSequence:\x02\x38\x01\"\xdb\x01\n\x18HloInputOutputAliasProto\x12>\n\x07\x65ntries\x18\x01 \x03(\x0b\x32-.xla.HloInputOutputAliasProto.AliasEntryProto\x1a\x7f\n\x0f\x41liasEntryProto\x12\x1a\n\x12output_shape_index\x18\x01 \x03(\x03\x12\x18\n\x10parameter_number\x18\x02 \x01(\x03\x12\x1d\n\x15parameter_shape_index\x18\x03 \x03(\x03\x12\x17\n\x04kind\x18\x04 \x01(\x0e\x32\t.xla.Kind\"\xa8\x01\n\x13HloBufferDonorProto\x12?\n\x07\x65ntries\x18\x01 \x03(\x0b\x32..xla.HloBufferDonorProto.BufferDonorEntryProto\x1aP\n\x15\x42ufferDonorEntryProto\x12\x18\n\x10parameter_number\x18\x01 \x01(\x03\x12\x1d\n\x15parameter_shape_index\x18\x02 \x03(\x03\"H\n\x14\x43rossProgramPrefetch\x12\x11\n\tparameter\x18\x01 \x01(\x03\x12\r\n\x05index\x18\x02 \x03(\x03\x12\x0e\n\x06offset\x18\x03 \x01(\x03\"\xdd\x02\n\x14StackFrameIndexProto\x12\x12\n\nfile_names\x18\x01 \x03(\t\x12\x16\n\x0e\x66unction_names\x18\x02 \x03(\t\x12>\n\x0e\x66ile_locations\x18\x03 \x03(\x0b\x32&.xla.StackFrameIndexProto.FileLocation\x12:\n\x0cstack_frames\x18\x04 \x03(\x0b\x32$.xla.StackFrameIndexProto.StackFrame\x1a\\\n\x0c\x46ileLocation\x12\x14\n\x0c\x66ile_name_id\x18\x01 \x01(\x05\x12\x18\n\x10\x66unction_name_id\x18\x02 \x01(\x05\x12\x0c\n\x04line\x18\x03 \x01(\x05\x12\x0e\n\x06\x63olumn\x18\x04 \x01(\x05\x1a?\n\nStackFrame\x12\x18\n\x10\x66ile_location_id\x18\x01 \x01(\x05\x12\x17\n\x0fparent_frame_id\x18\x02 \x01(\x05\"\xdb\x08\n\x0eHloModuleProto\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x1e\n\x16\x65ntry_computation_name\x18\x02 \x01(\t\x12\x1c\n\x14\x65ntry_computation_id\x18\x06 \x01(\x03\x12.\n\x0c\x63omputations\x18\x03 \x03(\x0b\x32\x18.xla.HloComputationProto\x12\x32\n\x12host_program_shape\x18\x04 \x01(\x0b\x32\x16.xla.ProgramShapeProto\x12\n\n\x02id\x18\x05 \x01(\x03\x12\'\n\x08schedule\x18\x07 \x01(\x0b\x32\x15.xla.HloScheduleProto\x12\x39\n\x12input_output_alias\x18\x08 \x01(\x0b\x32\x1d.xla.HloInputOutputAliasProto\x12.\n\x0c\x62uffer_donor\x18\x12 \x01(\x0b\x32\x18.xla.HloBufferDonorProto\x12;\n\x18\x63ross_program_prefetches\x18\n \x03(\x0b\x32\x19.xla.CrossProgramPrefetch\x12\x12\n\nis_dynamic\x18\x0b \x01(\x08\x12-\n\x14spmd_output_sharding\x18\x0c \x01(\x0b\x32\x0f.xla.OpSharding\x12\x32\n\x19spmd_parameters_shardings\x18\x0e \x03(\x0b\x32\x0f.xla.OpSharding\x12\"\n\x1ause_auto_spmd_partitioning\x18\x10 \x01(\x08\x12\x35\n\x0cprofile_info\x18\r \x03(\x0b\x32\x1f.xla.HloModuleProto.ProfileInfo\x12\x35\n\x11\x64\x65vice_assignment\x18\x0f \x01(\x0b\x32\x1a.xla.DeviceAssignmentProto\x12\x34\n\x11stack_frame_index\x18\x11 \x01(\x0b\x32\x19.xla.StackFrameIndexProto\x12\x34\n\x13\x66rontend_attributes\x18\x13 \x01(\x0b\x32\x17.xla.FrontendAttributes\x1a\xd1\x01\n\x0bProfileInfo\x12\x35\n\x0cprofile_type\x18\x01 \x01(\x0e\x32\x1f.xla.HloModuleProto.ProfileType\x12\x18\n\x10relative_speedup\x18\x02 \x01(\x01\x12*\n\x0eprofile_source\x18\x03 \x01(\x0e\x32\x12.xla.ProfileSource\x12\x30\n\x11\x63ompilation_event\x18\x04 \x01(\x0e\x32\x15.xla.CompilationEvent\x12\x13\n\x0b\x66ingerprint\x18\x05 \x01(\t\"R\n\x0bProfileType\x12\x0b\n\x07INVALID\x10\x00\x12\x08\n\x04\x46LAG\x10\x01\x12\n\n\x06\x46USION\x10\x02\x12\n\n\x06LAYOUT\x10\x03\x12\x07\n\x03\x44OT\x10\x04\x12\x0b\n\x07\x46LAGNET\x10\x05J\x04\x08\t\x10\nR\x19\x64ynamic_parameter_binding\"\xd0\x01\n\x12LogicalBufferProto\x12\n\n\x02id\x18\x01 \x01(\x03\x12\x0c\n\x04size\x18\x02 \x01(\x03\x12\x34\n\ndefined_at\x18\x03 \x01(\x0b\x32 .xla.LogicalBufferProto.Location\x12\r\n\x05\x63olor\x18\x04 \x01(\x03\x1a[\n\x08Location\x12\x1c\n\x10instruction_name\x18\x02 \x01(\tB\x02\x18\x01\x12\x16\n\x0einstruction_id\x18\x04 \x01(\x03\x12\x13\n\x0bshape_index\x18\x03 \x03(\x03J\x04\x08\x01\x10\x02\"\xf8\x02\n\x15\x42ufferAllocationProto\x12\r\n\x05index\x18\x01 \x01(\x03\x12\x0c\n\x04size\x18\x02 \x01(\x03\x12\x17\n\x0fis_thread_local\x18\x03 \x01(\x08\x12\x10\n\x08is_tuple\x18\x0b \x01(\x08\x12&\n\x1eis_entry_computation_parameter\x18\x05 \x01(\x08\x12\x13\n\x0bis_constant\x18\x0c \x01(\x08\x12\x18\n\x10parameter_number\x18\x06 \x01(\x03\x12\x1d\n\x15parameter_shape_index\x18\n \x03(\x03\x12\x16\n\x0emaybe_live_out\x18\x07 \x01(\x08\x12\r\n\x05\x63olor\x18\x08 \x01(\x03\x12\x35\n\x08\x61ssigned\x18\t \x03(\x0b\x32#.xla.BufferAllocationProto.Assigned\x1a\x43\n\x08\x41ssigned\x12\x19\n\x11logical_buffer_id\x18\x01 \x01(\x03\x12\x0e\n\x06offset\x18\x02 \x01(\x03\x12\x0c\n\x04size\x18\x03 \x01(\x03\"\xd6\x02\n\x12HeapSimulatorTrace\x12-\n\x06\x65vents\x18\x01 \x03(\x0b\x32\x1d.xla.HeapSimulatorTrace.Event\x12\x1f\n\x17whole_module_simulation\x18\x02 \x01(\x08\x12\x1f\n\x17\x62uffer_allocation_index\x18\x03 \x01(\x03\x1a\xce\x01\n\x05\x45vent\x12\x30\n\x04kind\x18\x01 \x01(\x0e\x32\".xla.HeapSimulatorTrace.Event.Kind\x12\x11\n\tbuffer_id\x18\x02 \x01(\x03\x12\x18\n\x10\x63omputation_name\x18\x03 \x01(\t\x12\x18\n\x10instruction_name\x18\x04 \x01(\t\x12\x1f\n\x17share_with_canonical_id\x18\x05 \x01(\x03\"+\n\x04Kind\x12\t\n\x05\x41LLOC\x10\x00\x12\x08\n\x04\x46REE\x10\x01\x12\x0e\n\nSHARE_WITH\x10\x02\"M\n\x13HloModuleGroupProto\x12\x0c\n\x04name\x18\x01 \x01(\t\x12(\n\x0bhlo_modules\x18\x02 \x03(\x0b\x32\x13.xla.HloModuleProto\"\xd6\x02\n\x15\x42ufferAssignmentProto\x12\x30\n\x0flogical_buffers\x18\x01 \x03(\x0b\x32\x17.xla.LogicalBufferProto\x12>\n\x0e\x62uffer_aliases\x18\x02 \x03(\x0b\x32&.xla.BufferAssignmentProto.BufferAlias\x12\x36\n\x12\x62uffer_allocations\x18\x03 \x03(\x0b\x32\x1a.xla.BufferAllocationProto\x12\x36\n\x15heap_simulator_traces\x18\x04 \x03(\x0b\x32\x17.xla.HeapSimulatorTrace\x1a[\n\x0b\x42ufferAlias\x12\x18\n\x10source_buffer_id\x18\x01 \x01(\x03\x12\x32\n\x08location\x18\x02 \x01(\x0b\x32 .xla.LogicalBufferProto.Location\"~\n\x08HloProto\x12\'\n\nhlo_module\x18\x01 \x01(\x0b\x32\x13.xla.HloModuleProto\x12\x35\n\x11\x62uffer_assignment\x18\x03 \x01(\x0b\x32\x1a.xla.BufferAssignmentProtoJ\x04\x08\x02\x10\x03R\x0chlo_ordering\"\x8e\x01\n\x0bHloSnapshot\x12\x1a\n\x03hlo\x18\x01 \x01(\x0b\x32\r.xla.HloProto\x12$\n\targuments\x18\x02 \x03(\x0b\x32\x11.xla.LiteralProto\x12!\n\x06result\x18\x03 \x01(\x0b\x32\x11.xla.LiteralProto\x12\x1a\n\x12\x65xecution_platform\x18\x04 \x01(\t\"\xb9\x01\n\x16HloModuleMetadataProto\x12\x1b\n\x13\x63\x61nonical_module_id\x18\x01 \x01(\x03\x12\x19\n\x11module_group_name\x18\x02 \x01(\t\x12\x1a\n\x12original_module_id\x18\x03 \x01(\x03\x12\x1e\n\x16partitioned_module_ids\x18\x04 \x03(\x03\x12+\n\rpass_metadata\x18\x05 \x03(\x0b\x32\x14.xla.HloPassMetadata\"\x99\x02\n\x0fHloPassMetadata\x12\x0f\n\x07pass_id\x18\x01 \x01(\x03\x12\x11\n\tpass_name\x18\x02 \x01(\t\x12\x15\n\rpipeline_name\x18\x03 \x01(\t\x12\x16\n\x0e\x64ump_filenames\x18\x04 \x03(\t\x12\x16\n\x0emodule_changed\x18\x05 \x01(\x08\x12\x11\n\tmodule_id\x18\x06 \x01(\x03\x12\x1f\n\x17module_group_module_ids\x18\x07 \x03(\x03\x12\x1c\n\x14start_timestamp_usec\x18\x08 \x01(\x03\x12\x1a\n\x12\x65nd_timestamp_usec\x18\t \x01(\x03\x12-\n\x0f\x63ustom_metadata\x18\n \x01(\x0b\x32\x14.google.protobuf.Any*S\n\x12\x43ustomCallSchedule\x12\x11\n\rSCHEDULE_NONE\x10\x00\x12\x13\n\x0fSCHEDULE_LATEST\x10\x01\x12\x15\n\x11SCHEDULE_EARLIEST\x10\x02*\xb4\x01\n\x14\x43ustomCallApiVersion\x12\x1b\n\x17\x41PI_VERSION_UNSPECIFIED\x10\x00\x12\x18\n\x14\x41PI_VERSION_ORIGINAL\x10\x01\x12 \n\x1c\x41PI_VERSION_STATUS_RETURNING\x10\x02\x12(\n$API_VERSION_STATUS_RETURNING_UNIFIED\x10\x03\x12\x19\n\x15\x41PI_VERSION_TYPED_FFI\x10\x04*:\n\x04Kind\x12\x13\n\x0fUNDEFINED_ALIAS\x10\x00\x12\r\n\tMAY_ALIAS\x10\x01\x12\x0e\n\nMUST_ALIAS\x10\x02\x42\x03\xf8\x01\x01\x62\x06proto3') + +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'xla.service.hlo_pb2', globals()) +if _descriptor._USE_C_DESCRIPTORS == False: + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\370\001\001' + _HLOINSTRUCTIONPROTO.fields_by_name['replica_groups']._options = None + _HLOINSTRUCTIONPROTO.fields_by_name['replica_groups']._serialized_options = b'\030\001' + _HLOINSTRUCTIONPROTO.fields_by_name['all_reduce_id']._options = None + _HLOINSTRUCTIONPROTO.fields_by_name['all_reduce_id']._serialized_options = b'\030\001' + _HLOINSTRUCTIONPROTO.fields_by_name['is_cross_program_prefetch']._options = None + _HLOINSTRUCTIONPROTO.fields_by_name['is_cross_program_prefetch']._serialized_options = b'\030\001' + _HLOSCHEDULEPROTO_SEQUENCESENTRY._options = None + _HLOSCHEDULEPROTO_SEQUENCESENTRY._serialized_options = b'8\001' + _LOGICALBUFFERPROTO_LOCATION.fields_by_name['instruction_name']._options = None + _LOGICALBUFFERPROTO_LOCATION.fields_by_name['instruction_name']._serialized_options = b'\030\001' + _CUSTOMCALLSCHEDULE._serialized_start=7600 + _CUSTOMCALLSCHEDULE._serialized_end=7683 + _CUSTOMCALLAPIVERSION._serialized_start=7686 + _CUSTOMCALLAPIVERSION._serialized_end=7866 + _KIND._serialized_start=7868 + _KIND._serialized_end=7926 + _HLOINSTRUCTIONPROTO._serialized_start=78 + _HLOINSTRUCTIONPROTO._serialized_end=3102 + _HLOINSTRUCTIONPROTO_SLICEDIMENSIONS._serialized_start=2739 + _HLOINSTRUCTIONPROTO_SLICEDIMENSIONS._serialized_end=2802 + _HLOCOMPUTATIONPROTO._serialized_start=3105 + _HLOCOMPUTATIONPROTO._serialized_end=3338 + _HLOSCHEDULEPROTO._serialized_start=3341 + _HLOSCHEDULEPROTO._serialized_end=3557 + _HLOSCHEDULEPROTO_INSTRUCTIONSEQUENCE._serialized_start=3418 + _HLOSCHEDULEPROTO_INSTRUCTIONSEQUENCE._serialized_end=3464 + _HLOSCHEDULEPROTO_SEQUENCESENTRY._serialized_start=3466 + _HLOSCHEDULEPROTO_SEQUENCESENTRY._serialized_end=3557 + _HLOINPUTOUTPUTALIASPROTO._serialized_start=3560 + _HLOINPUTOUTPUTALIASPROTO._serialized_end=3779 + _HLOINPUTOUTPUTALIASPROTO_ALIASENTRYPROTO._serialized_start=3652 + _HLOINPUTOUTPUTALIASPROTO_ALIASENTRYPROTO._serialized_end=3779 + _HLOBUFFERDONORPROTO._serialized_start=3782 + _HLOBUFFERDONORPROTO._serialized_end=3950 + _HLOBUFFERDONORPROTO_BUFFERDONORENTRYPROTO._serialized_start=3870 + _HLOBUFFERDONORPROTO_BUFFERDONORENTRYPROTO._serialized_end=3950 + _CROSSPROGRAMPREFETCH._serialized_start=3952 + _CROSSPROGRAMPREFETCH._serialized_end=4024 + _STACKFRAMEINDEXPROTO._serialized_start=4027 + _STACKFRAMEINDEXPROTO._serialized_end=4376 + _STACKFRAMEINDEXPROTO_FILELOCATION._serialized_start=4219 + _STACKFRAMEINDEXPROTO_FILELOCATION._serialized_end=4311 + _STACKFRAMEINDEXPROTO_STACKFRAME._serialized_start=4313 + _STACKFRAMEINDEXPROTO_STACKFRAME._serialized_end=4376 + _HLOMODULEPROTO._serialized_start=4379 + _HLOMODULEPROTO._serialized_end=5494 + _HLOMODULEPROTO_PROFILEINFO._serialized_start=5168 + _HLOMODULEPROTO_PROFILEINFO._serialized_end=5377 + _HLOMODULEPROTO_PROFILETYPE._serialized_start=5379 + _HLOMODULEPROTO_PROFILETYPE._serialized_end=5461 + _LOGICALBUFFERPROTO._serialized_start=5497 + _LOGICALBUFFERPROTO._serialized_end=5705 + _LOGICALBUFFERPROTO_LOCATION._serialized_start=5614 + _LOGICALBUFFERPROTO_LOCATION._serialized_end=5705 + _BUFFERALLOCATIONPROTO._serialized_start=5708 + _BUFFERALLOCATIONPROTO._serialized_end=6084 + _BUFFERALLOCATIONPROTO_ASSIGNED._serialized_start=6017 + _BUFFERALLOCATIONPROTO_ASSIGNED._serialized_end=6084 + _HEAPSIMULATORTRACE._serialized_start=6087 + _HEAPSIMULATORTRACE._serialized_end=6429 + _HEAPSIMULATORTRACE_EVENT._serialized_start=6223 + _HEAPSIMULATORTRACE_EVENT._serialized_end=6429 + _HEAPSIMULATORTRACE_EVENT_KIND._serialized_start=6386 + _HEAPSIMULATORTRACE_EVENT_KIND._serialized_end=6429 + _HLOMODULEGROUPPROTO._serialized_start=6431 + _HLOMODULEGROUPPROTO._serialized_end=6508 + _BUFFERASSIGNMENTPROTO._serialized_start=6511 + _BUFFERASSIGNMENTPROTO._serialized_end=6853 + _BUFFERASSIGNMENTPROTO_BUFFERALIAS._serialized_start=6762 + _BUFFERASSIGNMENTPROTO_BUFFERALIAS._serialized_end=6853 + _HLOPROTO._serialized_start=6855 + _HLOPROTO._serialized_end=6981 + _HLOSNAPSHOT._serialized_start=6984 + _HLOSNAPSHOT._serialized_end=7126 + _HLOMODULEMETADATAPROTO._serialized_start=7129 + _HLOMODULEMETADATAPROTO._serialized_end=7314 + _HLOPASSMETADATA._serialized_start=7317 + _HLOPASSMETADATA._serialized_end=7598 +# @@protoc_insertion_point(module_scope) diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/tsl/__init__.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/tsl/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/tsl/__pycache__/__init__.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/tsl/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3d6ed6da2f62dab62f5af9cb5cef9ff130ad917d Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/tsl/__pycache__/__init__.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/tsl/protobuf/__init__.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/tsl/protobuf/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/tsl/protobuf/__pycache__/__init__.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/tsl/protobuf/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..f605233db3d60a8772171b0e9dba8606e5f01871 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/tsl/protobuf/__pycache__/__init__.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/tsl/protobuf/__pycache__/bfc_memory_map_pb2.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/tsl/protobuf/__pycache__/bfc_memory_map_pb2.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3b5758dc6d20ef4ac6010f375fd721a3e734eb13 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/tsl/protobuf/__pycache__/bfc_memory_map_pb2.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/tsl/protobuf/__pycache__/test_log_pb2.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/tsl/protobuf/__pycache__/test_log_pb2.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..51bcdd6d302ed3650ef11f29a998147d89f625af Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/tsl/protobuf/__pycache__/test_log_pb2.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/tsl/protobuf/bfc_memory_map_pb2.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/tsl/protobuf/bfc_memory_map_pb2.py new file mode 100644 index 0000000000000000000000000000000000000000..7bbb94a4cd54c1a70901ca06cd2a5f33756070c4 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/tsl/protobuf/bfc_memory_map_pb2.py @@ -0,0 +1,34 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: xla/tsl/protobuf/bfc_memory_map.proto +"""Generated protocol buffer code.""" +from google.protobuf.internal import builder as _builder +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n%xla/tsl/protobuf/bfc_memory_map.proto\x12\ntensorflow\"\x92\x01\n\x11MemAllocatorStats\x12\x12\n\nnum_allocs\x18\x01 \x01(\x03\x12\x14\n\x0c\x62ytes_in_use\x18\x02 \x01(\x03\x12\x19\n\x11peak_bytes_in_use\x18\x03 \x01(\x03\x12\x1a\n\x12largest_alloc_size\x18\x04 \x01(\x03\x12\x1c\n\x14\x66ragmentation_metric\x18\x05 \x01(\x02\"\xae\x01\n\x08MemChunk\x12\x0f\n\x07\x61\x64\x64ress\x18\x01 \x01(\x04\x12\x0c\n\x04size\x18\x02 \x01(\x03\x12\x16\n\x0erequested_size\x18\x03 \x01(\x03\x12\x0b\n\x03\x62in\x18\x04 \x01(\x05\x12\x0f\n\x07op_name\x18\x05 \x01(\t\x12\x16\n\x0e\x66reed_at_count\x18\x06 \x01(\x04\x12\x14\n\x0c\x61\x63tion_count\x18\x07 \x01(\x04\x12\x0e\n\x06in_use\x18\x08 \x01(\x08\x12\x0f\n\x07step_id\x18\t \x01(\x04\"\x8b\x01\n\nBinSummary\x12\x0b\n\x03\x62in\x18\x01 \x01(\x05\x12\x1a\n\x12total_bytes_in_use\x18\x02 \x01(\x03\x12\x1a\n\x12total_bytes_in_bin\x18\x03 \x01(\x03\x12\x1b\n\x13total_chunks_in_use\x18\x04 \x01(\x03\x12\x1b\n\x13total_chunks_in_bin\x18\x05 \x01(\x03\".\n\x08SnapShot\x12\x14\n\x0c\x61\x63tion_count\x18\x01 \x01(\x04\x12\x0c\n\x04size\x18\x02 \x01(\x03\"\xcd\x01\n\nMemoryDump\x12\x16\n\x0e\x61llocator_name\x18\x01 \x01(\t\x12+\n\x0b\x62in_summary\x18\x02 \x03(\x0b\x32\x16.tensorflow.BinSummary\x12#\n\x05\x63hunk\x18\x03 \x03(\x0b\x32\x14.tensorflow.MemChunk\x12\'\n\tsnap_shot\x18\x04 \x03(\x0b\x32\x14.tensorflow.SnapShot\x12,\n\x05stats\x18\x05 \x01(\x0b\x32\x1d.tensorflow.MemAllocatorStatsB@Z>github.com/google/tsl/tsl/go/protobuf/for_core_protos_go_protob\x06proto3') + +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'xla.tsl.protobuf.bfc_memory_map_pb2', globals()) +if _descriptor._USE_C_DESCRIPTORS == False: + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'Z>github.com/google/tsl/tsl/go/protobuf/for_core_protos_go_proto' + _MEMALLOCATORSTATS._serialized_start=54 + _MEMALLOCATORSTATS._serialized_end=200 + _MEMCHUNK._serialized_start=203 + _MEMCHUNK._serialized_end=377 + _BINSUMMARY._serialized_start=380 + _BINSUMMARY._serialized_end=519 + _SNAPSHOT._serialized_start=521 + _SNAPSHOT._serialized_end=567 + _MEMORYDUMP._serialized_start=570 + _MEMORYDUMP._serialized_end=775 +# @@protoc_insertion_point(module_scope) diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/tsl/protobuf/test_log_pb2.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/tsl/protobuf/test_log_pb2.py new file mode 100644 index 0000000000000000000000000000000000000000..f9dbfc3314a7d02531ba44d819eed5aa83e86cd1 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/tsl/protobuf/test_log_pb2.py @@ -0,0 +1,68 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: xla/tsl/protobuf/test_log.proto +"""Generated protocol buffer code.""" +from google.protobuf.internal import builder as _builder +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + +from google.protobuf import any_pb2 as google_dot_protobuf_dot_any__pb2 +from google.protobuf import wrappers_pb2 as google_dot_protobuf_dot_wrappers__pb2 + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1fxla/tsl/protobuf/test_log.proto\x12\ntensorflow\x1a\x19google/protobuf/any.proto\x1a\x1egoogle/protobuf/wrappers.proto\"D\n\nEntryValue\x12\x16\n\x0c\x64ouble_value\x18\x01 \x01(\x01H\x00\x12\x16\n\x0cstring_value\x18\x02 \x01(\tH\x00\x42\x06\n\x04kind\"\x8c\x01\n\x0bMetricEntry\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x01\x12/\n\tmin_value\x18\x03 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\x12/\n\tmax_value\x18\x04 \x01(\x0b\x32\x1c.google.protobuf.DoubleValue\"\x8f\x02\n\x0e\x42\x65nchmarkEntry\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\r\n\x05iters\x18\x02 \x01(\x03\x12\x10\n\x08\x63pu_time\x18\x03 \x01(\x01\x12\x11\n\twall_time\x18\x04 \x01(\x01\x12\x12\n\nthroughput\x18\x05 \x01(\x01\x12\x36\n\x06\x65xtras\x18\x06 \x03(\x0b\x32&.tensorflow.BenchmarkEntry.ExtrasEntry\x12(\n\x07metrics\x18\x07 \x03(\x0b\x32\x17.tensorflow.MetricEntry\x1a\x45\n\x0b\x45xtrasEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12%\n\x05value\x18\x02 \x01(\x0b\x32\x16.tensorflow.EntryValue:\x02\x38\x01\"=\n\x10\x42\x65nchmarkEntries\x12)\n\x05\x65ntry\x18\x01 \x03(\x0b\x32\x1a.tensorflow.BenchmarkEntry\"B\n\x12\x42uildConfiguration\x12\x0c\n\x04mode\x18\x01 \x01(\t\x12\x10\n\x08\x63\x63_flags\x18\x02 \x03(\t\x12\x0c\n\x04opts\x18\x03 \x03(\t\"f\n\x08\x43ommitId\x12\x14\n\nchangelist\x18\x01 \x01(\x03H\x00\x12\x0e\n\x04hash\x18\x02 \x01(\tH\x00\x12\x10\n\x08snapshot\x18\x03 \x01(\t\x12\x1a\n\x12pending_changelist\x18\x04 \x01(\x03\x42\x06\n\x04kind\"\xde\x01\n\x07\x43PUInfo\x12\x11\n\tnum_cores\x18\x01 \x01(\x03\x12\x19\n\x11num_cores_allowed\x18\x02 \x01(\x03\x12\x13\n\x0bmhz_per_cpu\x18\x03 \x01(\x01\x12\x10\n\x08\x63pu_info\x18\x04 \x01(\t\x12\x14\n\x0c\x63pu_governor\x18\x05 \x01(\t\x12\x36\n\ncache_size\x18\x06 \x03(\x0b\x32\".tensorflow.CPUInfo.CacheSizeEntry\x1a\x30\n\x0e\x43\x61\x63heSizeEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\x03:\x02\x38\x01\".\n\nMemoryInfo\x12\r\n\x05total\x18\x01 \x01(\x03\x12\x11\n\tavailable\x18\x02 \x01(\x03\"6\n\x07GPUInfo\x12\r\n\x05model\x18\x01 \x01(\t\x12\x0c\n\x04uuid\x18\x02 \x01(\t\x12\x0e\n\x06\x62us_id\x18\x03 \x01(\t\"p\n\x0cPlatformInfo\x12\x0c\n\x04\x62its\x18\x01 \x01(\t\x12\x0f\n\x07linkage\x18\x02 \x01(\t\x12\x0f\n\x07machine\x18\x03 \x01(\t\x12\x0f\n\x07release\x18\x04 \x01(\t\x12\x0e\n\x06system\x18\x05 \x01(\t\x12\x0f\n\x07version\x18\x06 \x01(\t\"e\n\x13\x41vailableDeviceInfo\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12\x14\n\x0cmemory_limit\x18\x03 \x01(\x03\x12\x1c\n\x14physical_description\x18\x04 \x01(\t\"\xb3\x02\n\x14MachineConfiguration\x12\x10\n\x08hostname\x18\x01 \x01(\t\x12\x19\n\x11serial_identifier\x18\x07 \x01(\t\x12/\n\rplatform_info\x18\x02 \x01(\x0b\x32\x18.tensorflow.PlatformInfo\x12%\n\x08\x63pu_info\x18\x03 \x01(\x0b\x32\x13.tensorflow.CPUInfo\x12)\n\x0b\x64\x65vice_info\x18\x04 \x03(\x0b\x32\x14.google.protobuf.Any\x12>\n\x15\x61vailable_device_info\x18\x05 \x03(\x0b\x32\x1f.tensorflow.AvailableDeviceInfo\x12+\n\x0bmemory_info\x18\x06 \x01(\x0b\x32\x16.tensorflow.MemoryInfo\"\x91\x01\n\x10RunConfiguration\x12\x10\n\x08\x61rgument\x18\x01 \x03(\t\x12;\n\x08\x65nv_vars\x18\x02 \x03(\x0b\x32).tensorflow.RunConfiguration.EnvVarsEntry\x1a.\n\x0c\x45nvVarsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"\xd0\x04\n\x0bTestResults\x12\x0e\n\x06target\x18\x01 \x01(\t\x12-\n\x07\x65ntries\x18\x02 \x01(\x0b\x32\x1c.tensorflow.BenchmarkEntries\x12;\n\x13\x62uild_configuration\x18\x03 \x01(\x0b\x32\x1e.tensorflow.BuildConfiguration\x12\'\n\tcommit_id\x18\x04 \x01(\x0b\x32\x14.tensorflow.CommitId\x12\x12\n\nstart_time\x18\x05 \x01(\x03\x12\x10\n\x08run_time\x18\x06 \x01(\x01\x12?\n\x15machine_configuration\x18\x07 \x01(\x0b\x32 .tensorflow.MachineConfiguration\x12\x37\n\x11run_configuration\x18\x08 \x01(\x0b\x32\x1c.tensorflow.RunConfiguration\x12\x0c\n\x04name\x18\t \x01(\t\x12=\n\x0e\x62\x65nchmark_type\x18\n \x01(\x0e\x32%.tensorflow.TestResults.BenchmarkType\x12\x10\n\x08run_mode\x18\x0b \x01(\t\x12\x12\n\ntf_version\x18\x0c \x01(\t\"\x88\x01\n\rBenchmarkType\x12\x0b\n\x07UNKNOWN\x10\x00\x12\x16\n\x12\x43PP_MICROBENCHMARK\x10\x01\x12\x14\n\x10PYTHON_BENCHMARK\x10\x02\x12\x15\n\x11\x41NDROID_BENCHMARK\x10\x03\x12\x12\n\x0e\x45\x44GE_BENCHMARK\x10\x04\x12\x11\n\rIOS_BENCHMARK\x10\x05\x42\x31\n\x1borg.tensorflow.util.testlogB\rTestLogProtosP\x01\xf8\x01\x01\x62\x06proto3') + +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'xla.tsl.protobuf.test_log_pb2', globals()) +if _descriptor._USE_C_DESCRIPTORS == False: + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\n\033org.tensorflow.util.testlogB\rTestLogProtosP\001\370\001\001' + _BENCHMARKENTRY_EXTRASENTRY._options = None + _BENCHMARKENTRY_EXTRASENTRY._serialized_options = b'8\001' + _CPUINFO_CACHESIZEENTRY._options = None + _CPUINFO_CACHESIZEENTRY._serialized_options = b'8\001' + _RUNCONFIGURATION_ENVVARSENTRY._options = None + _RUNCONFIGURATION_ENVVARSENTRY._serialized_options = b'8\001' + _ENTRYVALUE._serialized_start=106 + _ENTRYVALUE._serialized_end=174 + _METRICENTRY._serialized_start=177 + _METRICENTRY._serialized_end=317 + _BENCHMARKENTRY._serialized_start=320 + _BENCHMARKENTRY._serialized_end=591 + _BENCHMARKENTRY_EXTRASENTRY._serialized_start=522 + _BENCHMARKENTRY_EXTRASENTRY._serialized_end=591 + _BENCHMARKENTRIES._serialized_start=593 + _BENCHMARKENTRIES._serialized_end=654 + _BUILDCONFIGURATION._serialized_start=656 + _BUILDCONFIGURATION._serialized_end=722 + _COMMITID._serialized_start=724 + _COMMITID._serialized_end=826 + _CPUINFO._serialized_start=829 + _CPUINFO._serialized_end=1051 + _CPUINFO_CACHESIZEENTRY._serialized_start=1003 + _CPUINFO_CACHESIZEENTRY._serialized_end=1051 + _MEMORYINFO._serialized_start=1053 + _MEMORYINFO._serialized_end=1099 + _GPUINFO._serialized_start=1101 + _GPUINFO._serialized_end=1155 + _PLATFORMINFO._serialized_start=1157 + _PLATFORMINFO._serialized_end=1269 + _AVAILABLEDEVICEINFO._serialized_start=1271 + _AVAILABLEDEVICEINFO._serialized_end=1372 + _MACHINECONFIGURATION._serialized_start=1375 + _MACHINECONFIGURATION._serialized_end=1682 + _RUNCONFIGURATION._serialized_start=1685 + _RUNCONFIGURATION._serialized_end=1830 + _RUNCONFIGURATION_ENVVARSENTRY._serialized_start=1784 + _RUNCONFIGURATION_ENVVARSENTRY._serialized_end=1830 + _TESTRESULTS._serialized_start=1833 + _TESTRESULTS._serialized_end=2425 + _TESTRESULTS_BENCHMARKTYPE._serialized_start=2289 + _TESTRESULTS_BENCHMARKTYPE._serialized_end=2425 +# @@protoc_insertion_point(module_scope) diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/xla_data_pb2.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/xla_data_pb2.py new file mode 100644 index 0000000000000000000000000000000000000000..b8bbbcd7d66387422f7069c0025c8689d39a6f2f --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/compiler/xla/xla_data_pb2.py @@ -0,0 +1,148 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: xla/xla_data.proto +"""Generated protocol buffer code.""" +from google.protobuf.internal import builder as _builder +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x12xla/xla_data.proto\x12\x03xla\"\xb7\x01\n\rPaddingConfig\x12=\n\ndimensions\x18\x01 \x03(\x0b\x32).xla.PaddingConfig.PaddingConfigDimension\x1ag\n\x16PaddingConfigDimension\x12\x18\n\x10\x65\x64ge_padding_low\x18\x01 \x01(\x03\x12\x19\n\x11\x65\x64ge_padding_high\x18\x02 \x01(\x03\x12\x18\n\x10interior_padding\x18\x03 \x01(\x03\"\x1f\n\tTileProto\x12\x12\n\ndimensions\x18\x01 \x03(\x03\"<\n\x10SplitConfigProto\x12\x11\n\tdimension\x18\x01 \x01(\x03\x12\x15\n\rsplit_indices\x18\x02 \x03(\x03\"\xba\x04\n\x0bLayoutProto\x12*\n\x0f\x64im_level_types\x18\t \x03(\x0e\x32\x11.xla.DimLevelType\x12\x12\n\ndim_unique\x18\r \x03(\x08\x12\x13\n\x0b\x64im_ordered\x18\x0e \x03(\x08\x12\x16\n\x0eminor_to_major\x18\x01 \x03(\x03\x12\x1d\n\x05tiles\x18\x06 \x03(\x0b\x32\x0e.xla.TileProto\x12*\n\"tail_padding_alignment_in_elements\x18\x10 \x01(\x03\x12\x1c\n\x14\x65lement_size_in_bits\x18\x07 \x01(\x03\x12\x14\n\x0cmemory_space\x18\x08 \x01(\x03\x12\x30\n\x14index_primitive_type\x18\x0b \x01(\x0e\x32\x12.xla.PrimitiveType\x12\x32\n\x16pointer_primitive_type\x18\x0c \x01(\x0e\x32\x12.xla.PrimitiveType\x12\'\n\x0ephysical_shape\x18\n \x01(\x0b\x32\x0f.xla.ShapeProto\x12+\n#dynamic_shape_metadata_prefix_bytes\x18\x0f \x01(\x03\x12,\n\rsplit_configs\x18\x11 \x03(\x0b\x32\x15.xla.SplitConfigProtoJ\x04\x08\x02\x10\x03J\x04\x08\x03\x10\x04J\x04\x08\x04\x10\x05J\x04\x08\x05\x10\x06R\x11padded_dimensionsR\rpadding_valueR\x06\x66ormatR\x13max_sparse_elements\"\xbd\x01\n\nShapeProto\x12(\n\x0c\x65lement_type\x18\x02 \x01(\x0e\x32\x12.xla.PrimitiveType\x12\x12\n\ndimensions\x18\x03 \x03(\x03\x12%\n\x0ctuple_shapes\x18\x04 \x03(\x0b\x32\x0f.xla.ShapeProto\x12 \n\x06layout\x18\x05 \x01(\x0b\x32\x10.xla.LayoutProto\x12\x1c\n\x14is_dynamic_dimension\x18\x06 \x03(\x08J\x04\x08\x01\x10\x02R\x04rank\"r\n\x11ProgramShapeProto\x12#\n\nparameters\x18\x01 \x03(\x0b\x32\x0f.xla.ShapeProto\x12\x1f\n\x06result\x18\x02 \x01(\x0b\x32\x0f.xla.ShapeProto\x12\x17\n\x0fparameter_names\x18\x03 \x03(\t\"D\n\x10\x43omputationStats\x12\x12\n\nflop_count\x18\x01 \x01(\x01\x12\x1c\n\x14transcendental_count\x18\x02 \x01(\x01\"\xe0\x04\n\nOpMetadata\x12\x0f\n\x07op_type\x18\x01 \x01(\t\x12\x0f\n\x07op_name\x18\x02 \x01(\t\x12\x13\n\x0bsource_file\x18\x03 \x01(\t\x12\x13\n\x0bsource_line\x18\x04 \x01(\x05\x12*\n\x0cprofile_type\x18\x05 \x03(\x0e\x32\x10.xla.ProfileTypeB\x02\x18\x01\x12\'\n\x1fsize_of_generated_code_in_bytes\x18\x08 \x01(\x03\x12+\n#size_of_memory_working_set_in_bytes\x18\t \x01(\x03\x12\x31\n\x0cprofile_info\x18\n \x01(\x0b\x32\x1b.xla.OpMetadata.ProfileInfo\x12\x19\n\x11\x64\x65\x64uplicated_name\x18\x0c \x01(\t\x12\x17\n\x0fpreserve_layout\x18\r \x01(\x08\x12\x16\n\x0estack_frame_id\x18\x0f \x01(\x05\x12\x17\n\x0fscheduling_name\x18\x10 \x01(\t\x1a\xad\x01\n\x0bProfileInfo\x12&\n\x0cprofile_type\x18\x01 \x03(\x0e\x32\x10.xla.ProfileType\x12\x18\n\x10relative_speedup\x18\x02 \x01(\x01\x12*\n\x0eprofile_source\x18\x03 \x01(\x0e\x32\x12.xla.ProfileSource\x12\x30\n\x11\x63ompilation_event\x18\x04 \x01(\x0e\x32\x15.xla.CompilationEventJ\x04\x08\x06\x10\x07J\x04\x08\x07\x10\x08J\x04\x08\x0e\x10\x0fR\x10\x63reation_pass_idR\x18logical_creation_pass_id\"\x80\x02\n\x10\x45xecutionProfile\x12\x1d\n\x15\x63ompilation_cache_hit\x18\x01 \x01(\x08\x12\x17\n\x0f\x63ompile_time_ms\x18\x02 \x01(\x03\x12\x1b\n\x13\x63ompute_cycle_count\x18\x03 \x01(\x03\x12\x17\n\x0f\x63ompute_time_ns\x18\x04 \x01(\x03\x12$\n\x1c\x63ompute_and_transfer_time_ns\x18\x05 \x01(\x03\x12 \n\x18\x65xecutable_size_in_bytes\x18\x06 \x01(\x03\x12\x19\n\x11profile_cache_hit\x18\x07 \x01(\x08\x12\x1b\n\x13warmup_run_executed\x18\x08 \x01(\x08\"!\n\x0f\x45xecutionHandle\x12\x0e\n\x06handle\x18\x01 \x01(\x03\"\"\n\x10GlobalDataHandle\x12\x0e\n\x06handle\x18\x01 \x01(\x03\"4\n\x0c\x44\x65viceHandle\x12\x0e\n\x06handle\x18\x01 \x01(\x03\x12\x14\n\x0c\x64\x65vice_count\x18\x02 \x01(\x03\"\xb4\x01\n\rChannelHandle\x12\x0e\n\x06handle\x18\x01 \x01(\x03\x12,\n\x04type\x18\x02 \x01(\x0e\x32\x1e.xla.ChannelHandle.ChannelType\"e\n\x0b\x43hannelType\x12\x18\n\x14\x43HANNEL_TYPE_INVALID\x10\x00\x12\x14\n\x10\x44\x45VICE_TO_DEVICE\x10\x01\x12\x12\n\x0e\x44\x45VICE_TO_HOST\x10\x02\x12\x12\n\x0eHOST_TO_DEVICE\x10\x03\"\xc5\x01\n\x15\x44\x65viceAssignmentProto\x12\x15\n\rreplica_count\x18\x01 \x01(\x05\x12\x19\n\x11\x63omputation_count\x18\x02 \x01(\x05\x12I\n\x13\x63omputation_devices\x18\x03 \x03(\x0b\x32,.xla.DeviceAssignmentProto.ComputationDevice\x1a/\n\x11\x43omputationDevice\x12\x1a\n\x12replica_device_ids\x18\x01 \x03(\x03\"\xde\x03\n\x0cLiteralProto\x12\x1e\n\x05shape\x18\x01 \x01(\x0b\x32\x0f.xla.ShapeProto\x12\r\n\x05preds\x18\x02 \x03(\x08\x12\x0b\n\x03s2s\x18\x1a \x01(\x0c\x12\x0b\n\x03s4s\x18\x15 \x01(\x0c\x12\x0b\n\x03s8s\x18\x0f \x01(\x0c\x12\x0b\n\x03u2s\x18\x1b \x01(\x0c\x12\x0b\n\x03u4s\x18\x16 \x01(\x0c\x12\x0b\n\x03u8s\x18\x03 \x01(\x0c\x12\x0c\n\x04s32s\x18\x04 \x03(\x05\x12\x0c\n\x04s64s\x18\x05 \x03(\x03\x12\x0c\n\x04u32s\x18\x06 \x03(\r\x12\x0c\n\x04u64s\x18\x07 \x03(\x04\x12\x0c\n\x04\x66\x33\x32s\x18\x08 \x03(\x02\x12\x0c\n\x04\x66\x36\x34s\x18\t \x03(\x01\x12\x0c\n\x04\x63\x36\x34s\x18\x0c \x03(\x02\x12\r\n\x05\x63\x31\x32\x38s\x18\x12 \x03(\x01\x12)\n\x0etuple_literals\x18\n \x03(\x0b\x32\x11.xla.LiteralProto\x12\x0c\n\x04\x66\x31\x36s\x18\x0b \x01(\x0c\x12\r\n\x05\x62\x66\x31\x36s\x18\r \x01(\x0c\x12\x0c\n\x04u16s\x18\x10 \x01(\x0c\x12\x0c\n\x04s16s\x18\x11 \x01(\x0c\x12\x0f\n\x07\x66\x38\x65\x35m2s\x18\x13 \x01(\x0c\x12\x11\n\tf8e4m3fns\x18\x14 \x01(\x0c\x12\x16\n\x0e\x66\x38\x65\x34m3b11fnuzs\x18\x17 \x01(\x0c\x12\x13\n\x0b\x66\x38\x65\x35m2fnuzs\x18\x18 \x01(\x0c\x12\x13\n\x0b\x66\x38\x65\x34m3fnuzs\x18\x19 \x01(\x0c\x12\x16\n\x0esparse_indices\x18\x0e \x03(\x03\"\xa3\x01\n\x0fWindowDimension\x12\x0c\n\x04size\x18\x01 \x01(\x03\x12\x0e\n\x06stride\x18\x02 \x01(\x03\x12\x13\n\x0bpadding_low\x18\x03 \x01(\x03\x12\x14\n\x0cpadding_high\x18\x04 \x01(\x03\x12\x17\n\x0fwindow_dilation\x18\x05 \x01(\x03\x12\x15\n\rbase_dilation\x18\x06 \x01(\x03\x12\x17\n\x0fwindow_reversal\x18\x07 \x01(\x08\"2\n\x06Window\x12(\n\ndimensions\x18\x01 \x03(\x0b\x32\x14.xla.WindowDimension\"\xc2\x01\n\x16GatherDimensionNumbers\x12\x13\n\x0boffset_dims\x18\x01 \x03(\x03\x12\x1c\n\x14\x63ollapsed_slice_dims\x18\x02 \x03(\x03\x12\x17\n\x0fstart_index_map\x18\x03 \x03(\x03\x12\x18\n\x10index_vector_dim\x18\x04 \x01(\x03\x12\x1d\n\x15operand_batching_dims\x18\x05 \x03(\x03\x12#\n\x1bstart_indices_batching_dims\x18\x06 \x03(\x03\"\xd7\x01\n\x17ScatterDimensionNumbers\x12\x1a\n\x12update_window_dims\x18\x01 \x03(\x03\x12\x1c\n\x14inserted_window_dims\x18\x02 \x03(\x03\x12$\n\x1cscatter_dims_to_operand_dims\x18\x03 \x03(\x03\x12\x18\n\x10index_vector_dim\x18\x04 \x01(\x03\x12\x1b\n\x13input_batching_dims\x18\x05 \x03(\x03\x12%\n\x1dscatter_indices_batching_dims\x18\x06 \x03(\x03\"\xd8\x02\n\x1b\x43onvolutionDimensionNumbers\x12\x1d\n\x15input_batch_dimension\x18\x07 \x01(\x03\x12\x1f\n\x17input_feature_dimension\x18\x08 \x01(\x03\x12 \n\x18input_spatial_dimensions\x18\x0b \x03(\x03\x12&\n\x1ekernel_input_feature_dimension\x18\x03 \x01(\x03\x12\'\n\x1fkernel_output_feature_dimension\x18\x04 \x01(\x03\x12!\n\x19kernel_spatial_dimensions\x18\x06 \x03(\x03\x12\x1e\n\x16output_batch_dimension\x18\t \x01(\x03\x12 \n\x18output_feature_dimension\x18\n \x01(\x03\x12!\n\x19output_spatial_dimensions\x18\x0c \x03(\x03\"\x99\x01\n\x13\x44otDimensionNumbers\x12\"\n\x1alhs_contracting_dimensions\x18\x01 \x03(\x03\x12\"\n\x1arhs_contracting_dimensions\x18\x02 \x03(\x03\x12\x1c\n\x14lhs_batch_dimensions\x18\x03 \x03(\x03\x12\x1c\n\x14rhs_batch_dimensions\x18\x04 \x03(\x03\"m\n\x12SparsityDescriptor\x12\x1f\n\x04type\x18\x01 \x01(\x0e\x32\x11.xla.SparsityType\x12\r\n\x05index\x18\x02 \x01(\x05\x12\x11\n\tdimension\x18\x03 \x01(\x05\x12\t\n\x01n\x18\x04 \x01(\x05\x12\t\n\x01m\x18\x05 \x01(\x05\"\xdf\x01\n\x16TriangularSolveOptions\x12\x11\n\tleft_side\x18\x01 \x01(\x08\x12\r\n\x05lower\x18\x02 \x01(\x08\x12\x15\n\runit_diagonal\x18\x03 \x01(\x08\x12:\n\x0btranspose_a\x18\x04 \x01(\x0e\x32%.xla.TriangularSolveOptions.Transpose\"P\n\tTranspose\x12\x15\n\x11TRANSPOSE_INVALID\x10\x00\x12\x10\n\x0cNO_TRANSPOSE\x10\x01\x12\r\n\tTRANSPOSE\x10\x02\x12\x0b\n\x07\x41\x44JOINT\x10\x03\" \n\x0f\x43holeskyOptions\x12\r\n\x05lower\x18\x01 \x01(\x08\"!\n\x0bSortOptions\x12\x12\n\ndescending\x18\x01 \x01(\x08\"o\n\x12\x46rontendAttributes\x12-\n\x03map\x18\x01 \x03(\x0b\x32 .xla.FrontendAttributes.MapEntry\x1a*\n\x08MapEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12\r\n\x05value\x18\x02 \x01(\t:\x02\x38\x01\"0\n\tStatistic\x12\x11\n\tstat_name\x18\x01 \x01(\t\x12\x10\n\x08stat_val\x18\x02 \x01(\x01\"T\n\rStatisticsViz\x12\x1f\n\x17stat_index_to_visualize\x18\x01 \x01(\x03\x12\"\n\nstatistics\x18\x02 \x03(\x0b\x32\x0e.xla.Statistic\"\xd3\x04\n\nOpSharding\x12\"\n\x04type\x18\x01 \x01(\x0e\x32\x14.xla.OpSharding.Type\x12#\n\ntile_shape\x18\x02 \x01(\x0b\x32\x0f.xla.ShapeProto\x12\"\n\x1atile_assignment_dimensions\x18\x03 \x03(\x03\x12\x1f\n\x17tile_assignment_devices\x18\x04 \x03(\x03\x12(\n\x0ftuple_shardings\x18\x05 \x03(\x0b\x32\x0f.xla.OpSharding\x12\"\n\x1areplicate_on_last_tile_dim\x18\x06 \x01(\x08\x12!\n\x08metadata\x18\x07 \x03(\x0b\x32\x0f.xla.OpMetadata\x12,\n\x0elast_tile_dims\x18\x08 \x03(\x0e\x32\x14.xla.OpSharding.Type\x12\x19\n\x11iota_reshape_dims\x18\t \x03(\x03\x12\x1b\n\x13iota_transpose_perm\x18\n \x03(\x05\x12\x16\n\x0eis_shard_group\x18\x0b \x01(\x08\x12\x16\n\x0eshard_group_id\x18\x0c \x01(\x03\x12\x38\n\x10shard_group_type\x18\r \x01(\x0e\x32\x1e.xla.OpSharding.ShardGroupType\"R\n\x04Type\x12\x0e\n\nREPLICATED\x10\x00\x12\x0b\n\x07MAXIMAL\x10\x01\x12\t\n\x05TUPLE\x10\x02\x12\t\n\x05OTHER\x10\x03\x12\n\n\x06MANUAL\x10\x04\x12\x0b\n\x07UNKNOWN\x10\x05\"\"\n\x0eShardGroupType\x12\x06\n\x02\x41S\x10\x00\x12\x08\n\x04LIKE\x10\x01\"#\n\x0cReplicaGroup\x12\x13\n\x0breplica_ids\x18\x01 \x03(\x03\"\x8e\x01\n\x19IotaReplicaGroupListProto\x12\x1a\n\x12num_replica_groups\x18\x01 \x01(\x03\x12\x1d\n\x15num_devices_per_group\x18\x02 \x01(\x03\x12\x19\n\x11iota_reshape_dims\x18\x03 \x03(\x03\x12\x1b\n\x13iota_transpose_perm\x18\x04 \x03(\x05\"\x87\x01\n\x19\x43ollectiveDeviceListProto\x12)\n\x0ereplica_groups\x18\x01 \x03(\x0b\x32\x11.xla.ReplicaGroup\x12?\n\x17iota_replica_group_list\x18\x02 \x01(\x0b\x32\x1e.xla.IotaReplicaGroupListProto\".\n\x0cSourceTarget\x12\x0e\n\x06source\x18\x01 \x01(\x03\x12\x0e\n\x06target\x18\x02 \x01(\x03\"\xb9\x04\n\x0fPrecisionConfig\x12\x39\n\x11operand_precision\x18\x01 \x03(\x0e\x32\x1e.xla.PrecisionConfig.Precision\x12\x31\n\talgorithm\x18\x02 \x01(\x0e\x32\x1e.xla.PrecisionConfig.Algorithm\"B\n\tPrecision\x12\x0b\n\x07\x44\x45\x46\x41ULT\x10\x00\x12\x08\n\x04HIGH\x10\x01\x12\x0b\n\x07HIGHEST\x10\x02\x12\x11\n\rPACKED_NIBBLE\x10\x03\"\xf3\x02\n\tAlgorithm\x12\r\n\tALG_UNSET\x10\x00\x12\x1d\n\x19\x41LG_DOT_ANY_F8_ANY_F8_F32\x10\x01\x12(\n$ALG_DOT_ANY_F8_ANY_F8_F32_FAST_ACCUM\x10\x02\x12\x17\n\x13\x41LG_DOT_F16_F16_F16\x10\x03\x12\x17\n\x13\x41LG_DOT_F16_F16_F32\x10\x04\x12\x1a\n\x16\x41LG_DOT_BF16_BF16_BF16\x10\x05\x12\x19\n\x15\x41LG_DOT_BF16_BF16_F32\x10\x06\x12\x1c\n\x18\x41LG_DOT_BF16_BF16_F32_X3\x10\x07\x12\x1c\n\x18\x41LG_DOT_BF16_BF16_F32_X6\x10\x08\x12\x19\n\x15\x41LG_DOT_TF32_TF32_F32\x10\t\x12\x1c\n\x18\x41LG_DOT_TF32_TF32_F32_X3\x10\n\x12\x17\n\x13\x41LG_DOT_F32_F32_F32\x10\x0b\x12\x17\n\x13\x41LG_DOT_F64_F64_F64\x10\x0c\":\n\x14ParameterReplication\x12\"\n\x1areplicated_at_leaf_buffers\x18\x01 \x03(\x08\"{\n\x16WhileLoopBackendConfig\x12\x44\n\x10known_trip_count\x18\x01 \x01(\x0b\x32*.xla.WhileLoopBackendConfig.KnownTripCount\x1a\x1b\n\x0eKnownTripCount\x12\t\n\x01n\x18\x01 \x01(\x03\"g\n\x15OutputOperandAliasing\x12\x1a\n\x12output_shape_index\x18\x01 \x03(\x03\x12\x15\n\roperand_index\x18\x02 \x01(\x03\x12\x1b\n\x13operand_shape_index\x18\x03 \x03(\x03\"]\n\x12OriginalArrayProto\x12\x18\n\x10leaf_shape_index\x18\x01 \x03(\x03\x12\x18\n\x10instruction_name\x18\x02 \x01(\t\x12\x13\n\x0bshape_index\x18\x03 \x03(\x03\"=\n\x12OriginalValueProto\x12\'\n\x06leaves\x18\x01 \x03(\x0b\x32\x17.xla.OriginalArrayProto*\xc7\x02\n\rPrimitiveType\x12\x1a\n\x16PRIMITIVE_TYPE_INVALID\x10\x00\x12\x08\n\x04PRED\x10\x01\x12\x06\n\x02S2\x10\x1a\x12\x06\n\x02S4\x10\x15\x12\x06\n\x02S8\x10\x02\x12\x07\n\x03S16\x10\x03\x12\x07\n\x03S32\x10\x04\x12\x07\n\x03S64\x10\x05\x12\x06\n\x02U2\x10\x1b\x12\x06\n\x02U4\x10\x16\x12\x06\n\x02U8\x10\x06\x12\x07\n\x03U16\x10\x07\x12\x07\n\x03U32\x10\x08\x12\x07\n\x03U64\x10\t\x12\x07\n\x03\x46\x31\x36\x10\n\x12\x07\n\x03\x46\x33\x32\x10\x0b\x12\x08\n\x04\x42\x46\x31\x36\x10\x10\x12\x07\n\x03\x46\x36\x34\x10\x0c\x12\n\n\x06\x46\x38\x45\x35M2\x10\x13\x12\x0c\n\x08\x46\x38\x45\x34M3FN\x10\x14\x12\x11\n\rF8E4M3B11FNUZ\x10\x17\x12\x0e\n\nF8E5M2FNUZ\x10\x18\x12\x0e\n\nF8E4M3FNUZ\x10\x19\x12\x07\n\x03\x43\x36\x34\x10\x0f\x12\x08\n\x04\x43\x31\x32\x38\x10\x12\x12\t\n\x05TUPLE\x10\r\x12\x0f\n\x0bOPAQUE_TYPE\x10\x0e\x12\t\n\x05TOKEN\x10\x11*^\n\x0c\x44imLevelType\x12\r\n\tDIM_DENSE\x10\x00\x12\x12\n\x0e\x44IM_COMPRESSED\x10\x01\x12\x11\n\rDIM_SINGLETON\x10\x02\x12\x18\n\x14\x44IM_LOOSE_COMPRESSED\x10\x03*=\n\x0bProfileType\x12\x0b\n\x07INVALID\x10\x00\x12\n\n\x06WINDOW\x10\x01\x12\x08\n\x04\x46LAG\x10\x02\x12\x0b\n\x07INTEGER\x10\x03*j\n\rProfileSource\x12!\n\x1dPROFILE_SOURCE_UNKNOWN_SOURCE\x10\x00\x12\x1b\n\x17PROFILE_SOURCE_EMBEDDED\x10\x01\x12\x19\n\x15PROFILE_SOURCE_REMOTE\x10\x02*\x85\x01\n\x10\x43ompilationEvent\x12#\n\x1f\x43OMPILATION_EVENT_UNKNOWN_EVENT\x10\x00\x12\'\n#COMPILATION_EVENT_FIRST_COMPILATION\x10\x01\x12#\n\x1f\x43OMPILATION_EVENT_RECOMPILATION\x10\x02*G\n\x0bPaddingType\x12\x13\n\x0fPADDING_INVALID\x10\x00\x12\x11\n\rPADDING_VALID\x10\x01\x12\x10\n\x0cPADDING_SAME\x10\x02*1\n\x07\x46\x66tType\x12\x07\n\x03\x46\x46T\x10\x00\x12\x08\n\x04IFFT\x10\x01\x12\x08\n\x04RFFT\x10\x02\x12\t\n\x05IRFFT\x10\x03*A\n\x0cSparsityType\x12\x14\n\x10SPARSITY_INVALID\x10\x00\x12\x1b\n\x17SPARSITY_STRUCTURED_N_M\x10\x01*F\n\x12RandomDistribution\x12\x0f\n\x0bRNG_INVALID\x10\x00\x12\x0f\n\x0bRNG_UNIFORM\x10\x01\x12\x0e\n\nRNG_NORMAL\x10\x02*E\n\x0fRandomAlgorithm\x12\x0f\n\x0bRNG_DEFAULT\x10\x00\x12\x11\n\rRNG_THREE_FRY\x10\x01\x12\x0e\n\nRNG_PHILOX\x10\x02\x42\x03\xf8\x01\x01\x62\x06proto3') + +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'xla.xla_data_pb2', globals()) +if _descriptor._USE_C_DESCRIPTORS == False: + + DESCRIPTOR._options = None + DESCRIPTOR._serialized_options = b'\370\001\001' + _OPMETADATA.fields_by_name['profile_type']._options = None + _OPMETADATA.fields_by_name['profile_type']._serialized_options = b'\030\001' + _FRONTENDATTRIBUTES_MAPENTRY._options = None + _FRONTENDATTRIBUTES_MAPENTRY._serialized_options = b'8\001' + _PRIMITIVETYPE._serialized_start=6896 + _PRIMITIVETYPE._serialized_end=7223 + _DIMLEVELTYPE._serialized_start=7225 + _DIMLEVELTYPE._serialized_end=7319 + _PROFILETYPE._serialized_start=7321 + _PROFILETYPE._serialized_end=7382 + _PROFILESOURCE._serialized_start=7384 + _PROFILESOURCE._serialized_end=7490 + _COMPILATIONEVENT._serialized_start=7493 + _COMPILATIONEVENT._serialized_end=7626 + _PADDINGTYPE._serialized_start=7628 + _PADDINGTYPE._serialized_end=7699 + _FFTTYPE._serialized_start=7701 + _FFTTYPE._serialized_end=7750 + _SPARSITYTYPE._serialized_start=7752 + _SPARSITYTYPE._serialized_end=7817 + _RANDOMDISTRIBUTION._serialized_start=7819 + _RANDOMDISTRIBUTION._serialized_end=7889 + _RANDOMALGORITHM._serialized_start=7891 + _RANDOMALGORITHM._serialized_end=7960 + _PADDINGCONFIG._serialized_start=28 + _PADDINGCONFIG._serialized_end=211 + _PADDINGCONFIG_PADDINGCONFIGDIMENSION._serialized_start=108 + _PADDINGCONFIG_PADDINGCONFIGDIMENSION._serialized_end=211 + _TILEPROTO._serialized_start=213 + _TILEPROTO._serialized_end=244 + _SPLITCONFIGPROTO._serialized_start=246 + _SPLITCONFIGPROTO._serialized_end=306 + _LAYOUTPROTO._serialized_start=309 + _LAYOUTPROTO._serialized_end=879 + _SHAPEPROTO._serialized_start=882 + _SHAPEPROTO._serialized_end=1071 + _PROGRAMSHAPEPROTO._serialized_start=1073 + _PROGRAMSHAPEPROTO._serialized_end=1187 + _COMPUTATIONSTATS._serialized_start=1189 + _COMPUTATIONSTATS._serialized_end=1257 + _OPMETADATA._serialized_start=1260 + _OPMETADATA._serialized_end=1868 + _OPMETADATA_PROFILEINFO._serialized_start=1633 + _OPMETADATA_PROFILEINFO._serialized_end=1806 + _EXECUTIONPROFILE._serialized_start=1871 + _EXECUTIONPROFILE._serialized_end=2127 + _EXECUTIONHANDLE._serialized_start=2129 + _EXECUTIONHANDLE._serialized_end=2162 + _GLOBALDATAHANDLE._serialized_start=2164 + _GLOBALDATAHANDLE._serialized_end=2198 + _DEVICEHANDLE._serialized_start=2200 + _DEVICEHANDLE._serialized_end=2252 + _CHANNELHANDLE._serialized_start=2255 + _CHANNELHANDLE._serialized_end=2435 + _CHANNELHANDLE_CHANNELTYPE._serialized_start=2334 + _CHANNELHANDLE_CHANNELTYPE._serialized_end=2435 + _DEVICEASSIGNMENTPROTO._serialized_start=2438 + _DEVICEASSIGNMENTPROTO._serialized_end=2635 + _DEVICEASSIGNMENTPROTO_COMPUTATIONDEVICE._serialized_start=2588 + _DEVICEASSIGNMENTPROTO_COMPUTATIONDEVICE._serialized_end=2635 + _LITERALPROTO._serialized_start=2638 + _LITERALPROTO._serialized_end=3116 + _WINDOWDIMENSION._serialized_start=3119 + _WINDOWDIMENSION._serialized_end=3282 + _WINDOW._serialized_start=3284 + _WINDOW._serialized_end=3334 + _GATHERDIMENSIONNUMBERS._serialized_start=3337 + _GATHERDIMENSIONNUMBERS._serialized_end=3531 + _SCATTERDIMENSIONNUMBERS._serialized_start=3534 + _SCATTERDIMENSIONNUMBERS._serialized_end=3749 + _CONVOLUTIONDIMENSIONNUMBERS._serialized_start=3752 + _CONVOLUTIONDIMENSIONNUMBERS._serialized_end=4096 + _DOTDIMENSIONNUMBERS._serialized_start=4099 + _DOTDIMENSIONNUMBERS._serialized_end=4252 + _SPARSITYDESCRIPTOR._serialized_start=4254 + _SPARSITYDESCRIPTOR._serialized_end=4363 + _TRIANGULARSOLVEOPTIONS._serialized_start=4366 + _TRIANGULARSOLVEOPTIONS._serialized_end=4589 + _TRIANGULARSOLVEOPTIONS_TRANSPOSE._serialized_start=4509 + _TRIANGULARSOLVEOPTIONS_TRANSPOSE._serialized_end=4589 + _CHOLESKYOPTIONS._serialized_start=4591 + _CHOLESKYOPTIONS._serialized_end=4623 + _SORTOPTIONS._serialized_start=4625 + _SORTOPTIONS._serialized_end=4658 + _FRONTENDATTRIBUTES._serialized_start=4660 + _FRONTENDATTRIBUTES._serialized_end=4771 + _FRONTENDATTRIBUTES_MAPENTRY._serialized_start=4729 + _FRONTENDATTRIBUTES_MAPENTRY._serialized_end=4771 + _STATISTIC._serialized_start=4773 + _STATISTIC._serialized_end=4821 + _STATISTICSVIZ._serialized_start=4823 + _STATISTICSVIZ._serialized_end=4907 + _OPSHARDING._serialized_start=4910 + _OPSHARDING._serialized_end=5505 + _OPSHARDING_TYPE._serialized_start=5387 + _OPSHARDING_TYPE._serialized_end=5469 + _OPSHARDING_SHARDGROUPTYPE._serialized_start=5471 + _OPSHARDING_SHARDGROUPTYPE._serialized_end=5505 + _REPLICAGROUP._serialized_start=5507 + _REPLICAGROUP._serialized_end=5542 + _IOTAREPLICAGROUPLISTPROTO._serialized_start=5545 + _IOTAREPLICAGROUPLISTPROTO._serialized_end=5687 + _COLLECTIVEDEVICELISTPROTO._serialized_start=5690 + _COLLECTIVEDEVICELISTPROTO._serialized_end=5825 + _SOURCETARGET._serialized_start=5827 + _SOURCETARGET._serialized_end=5873 + _PRECISIONCONFIG._serialized_start=5876 + _PRECISIONCONFIG._serialized_end=6445 + _PRECISIONCONFIG_PRECISION._serialized_start=6005 + _PRECISIONCONFIG_PRECISION._serialized_end=6071 + _PRECISIONCONFIG_ALGORITHM._serialized_start=6074 + _PRECISIONCONFIG_ALGORITHM._serialized_end=6445 + _PARAMETERREPLICATION._serialized_start=6447 + _PARAMETERREPLICATION._serialized_end=6505 + _WHILELOOPBACKENDCONFIG._serialized_start=6507 + _WHILELOOPBACKENDCONFIG._serialized_end=6630 + _WHILELOOPBACKENDCONFIG_KNOWNTRIPCOUNT._serialized_start=6603 + _WHILELOOPBACKENDCONFIG_KNOWNTRIPCOUNT._serialized_end=6630 + _OUTPUTOPERANDALIASING._serialized_start=6632 + _OUTPUTOPERANDALIASING._serialized_end=6735 + _ORIGINALARRAYPROTO._serialized_start=6737 + _ORIGINALARRAYPROTO._serialized_end=6830 + _ORIGINALVALUEPROTO._serialized_start=6832 + _ORIGINALVALUEPROTO._serialized_end=6893 +# @@protoc_insertion_point(module_scope) diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/__init__.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/__pycache__/__init__.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..aa540bda4f8fa4910c27034d66ab14aa026cd25b Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/__pycache__/__init__.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/proto/__init__.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/proto/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/proto/__pycache__/__init__.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/proto/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..a9353d539bbaa29836c590f2995770ee54f59554 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/proto/__pycache__/__init__.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/proto/__pycache__/layout_pb2.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/proto/__pycache__/layout_pb2.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..70a3114b50627ac51c6daa5cca24acb4e7aa1a90 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/proto/__pycache__/layout_pb2.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/proto/layout_pb2.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/proto/layout_pb2.py new file mode 100644 index 0000000000000000000000000000000000000000..02ae56365aed4ab3ce3abfa91bef883b5c2eab59 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/proto/layout_pb2.py @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +# Generated by the protocol buffer compiler. DO NOT EDIT! +# source: tensorflow/dtensor/proto/layout.proto +"""Generated protocol buffer code.""" +from google.protobuf.internal import builder as _builder +from google.protobuf import descriptor as _descriptor +from google.protobuf import descriptor_pool as _descriptor_pool +from google.protobuf import symbol_database as _symbol_database +# @@protoc_insertion_point(imports) + +_sym_db = _symbol_database.Default() + + + + +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n%tensorflow/dtensor/proto/layout.proto\x12\x12tensorflow.dtensor\"+\n\x0cShardingSpec\x12\x15\n\rsharding_spec\x18\x02 \x01(\tJ\x04\x08\x01\x10\x02\"0\n\x12MeshDimensionProto\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0c\n\x04size\x18\x02 \x01(\x03\"\xfb\x01\n\x0bLayoutProto\x12\x38\n\x0esharding_specs\x18\x01 \x03(\x0b\x32 .tensorflow.dtensor.ShardingSpec\x12\x32\n\x0bmesh_config\x18\x02 \x01(\x0b\x32\x1d.tensorflow.dtensor.MeshProto\x12\x38\n\x04type\x18\x03 \x01(\x0e\x32*.tensorflow.dtensor.LayoutProto.LayoutType\"D\n\nLayoutType\x12\x0b\n\x07UNKNOWN\x10\x00\x12\n\n\x06STATIC\x10\x01\x12\n\n\x06PARTED\x10\x02\x12\x11\n\rSINGLE_DEVICE\x10\x03\"\xeb\x01\n\tMeshProto\x12?\n\x0fmesh_dimensions\x18\x01 \x03(\x0b\x32&.tensorflow.dtensor.MeshDimensionProto\x12\x19\n\x11global_device_ids\x18\x02 \x03(\x03\x12\x18\n\x10local_device_ids\x18\x04 \x03(\x03\x12\x15\n\rlocal_devices\x18\x05 \x03(\t\x12\x16\n\x0eglobal_devices\x18\x06 \x03(\t\x12\x0c\n\x04name\x18\x03 \x01(\t\x12\x14\n\x0cuse_xla_spmd\x18\x07 \x01(\x08\x12\x15\n\rsingle_device\x18\x08 \x01(\tb\x06proto3') + +_builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, globals()) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'tensorflow.dtensor.proto.layout_pb2', globals()) +if _descriptor._USE_C_DESCRIPTORS == False: + + DESCRIPTOR._options = None + _SHARDINGSPEC._serialized_start=61 + _SHARDINGSPEC._serialized_end=104 + _MESHDIMENSIONPROTO._serialized_start=106 + _MESHDIMENSIONPROTO._serialized_end=154 + _LAYOUTPROTO._serialized_start=157 + _LAYOUTPROTO._serialized_end=408 + _LAYOUTPROTO_LAYOUTTYPE._serialized_start=340 + _LAYOUTPROTO_LAYOUTTYPE._serialized_end=408 + _MESHPROTO._serialized_start=411 + _MESHPROTO._serialized_end=646 +# @@protoc_insertion_point(module_scope) diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__init__.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..b38604828608850d6d3ae6391bb3390e78599cb1 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__init__.py @@ -0,0 +1,17 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""DTensor Python API.""" + +# This file is left empty intentionally. diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/d_checkpoint.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/d_checkpoint.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..81ddc96a68552c6a19f9abccab08da570832e890 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/d_checkpoint.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/d_variable.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/d_variable.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9368a549b9b9dd05a5c4a7981bda88ec1d1978ae Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/d_variable.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/dtensor_device.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/dtensor_device.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..380ef32b19e5bacd996d9d75cb6169b5bda49333 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/dtensor_device.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/gen_dtensor_ops.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/gen_dtensor_ops.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9741726a4a916965f020edcb00fe52bfc97c76a9 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/gen_dtensor_ops.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/input_util.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/input_util.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..dc5cfd7ce90cf29c1cb882095015db30c0f8eaf3 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/input_util.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/layout.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/layout.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..231a687d32fe414744c4bce0de12650786ab063f Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/layout.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/mesh_util.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/mesh_util.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..d8b226e4d28cea32071d54a0795d9626dc3cb6f6 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/mesh_util.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/numpy_util.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/numpy_util.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..886544791ec7f3e016c1b32901af9cbf79e21f73 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/numpy_util.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/save_restore.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/save_restore.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..2a799e15d82bbc491a8f5b19e95ca4f4cb6690a3 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/save_restore.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/tpu_util.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/tpu_util.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..ad52d3f3cfdea74a63c84455136437ba168c741e Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/__pycache__/tpu_util.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/accelerator_util.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/accelerator_util.py new file mode 100644 index 0000000000000000000000000000000000000000..b1e96c169de4e1641ef7e5fa5d865750715462ff --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/accelerator_util.py @@ -0,0 +1,300 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Utility for working with accelerator systems.""" + +from typing import List, Optional + +from absl import logging + +from tensorflow.core.protobuf import cluster_pb2 +from tensorflow.core.protobuf import tensorflow_server_pb2 +from tensorflow.dtensor.python import config +from tensorflow.dtensor.python import tpu_util +from tensorflow.python.eager import context +from tensorflow.python.framework import config as tf_config +from tensorflow.python.platform import remote_utils +from tensorflow.python.util.tf_export import tf_export + +_INITIALIZED_ACCELERATOR_SYSTEM_TYPE = None + + +def is_initialized() -> bool: + """Returns whether accelerator system has been initialized.""" + return bool(_INITIALIZED_ACCELERATOR_SYSTEM_TYPE) + + +def set_initialized(value): + """Sets if accelerator system has been initialized.""" + global _INITIALIZED_ACCELERATOR_SYSTEM_TYPE + _INITIALIZED_ACCELERATOR_SYSTEM_TYPE = value + + +def initialize_multi_client_cluster(job_name: str, + dtensor_jobs: List[str], + client_id: int, + collective_leader: str, + port: Optional[int] = None, + gpu_use_nccl_communication: bool = False, + enable_coordination_service: bool = True): + """Initialize GRPC servers and collectives for multi-client DTensor setup. + + This function can be used to initialize a multi-client cluster and enable + collective ops. GRPC servers are necessary in the multi-client mode, even + when the number of clientis is 1. + + NOTE: this function must be called in an eager context. + + Args: + job_name: The job name used by all clients in the DTensor cluster. + dtensor_jobs: A list of the DTensor client jobs participating in the + cluster. Must be strings of the form "hostname:port". + client_id: The ID of the DTensor client this function is being called in. + collective_leader: The job/task that will be used to run collectives. + port: The port this client's GRPC server will run on. If omitted, use the + port from dtensor_jobs for this client. + gpu_use_nccl_communication: if True, configure TensorFlow to use NCCL by + default. + enable_coordination_service: If true, enable distributed coordination + service to make sure that workers know the devices on each other, a + prerequisite for data transfer through cross-worker rendezvous. + + Raises: + RuntimeError: If running inside a tf.function. + """ + assert context.executing_eagerly() + + if not collective_leader.startswith("/job:"): + collective_leader = "/job:" + collective_leader + + context.context().configure_collective_ops( + use_nccl_communication=gpu_use_nccl_communication, + collective_leader=collective_leader) + if enable_coordination_service: + context.context().configure_coordination_service( + service_type="standalone", service_leader=collective_leader) + + config_proto = context.get_config() + + # Construct server def from the host directly instead of relying on + # TF_CONFIG. + cluster_def = cluster_pb2.ClusterDef() + # Note that for bns addresses, we will currently rely on the sorted string + # of job name as the order of assigning task ids. This might be brittle once + # we have jobs across multiple cells. + cluster_def.job.add(name=job_name, tasks=dict(enumerate(dtensor_jobs))) + server_def = tensorflow_server_pb2.ServerDef( + cluster=cluster_def, + default_session_config=config_proto, + job_name=job_name, + task_index=client_id, + protocol=remote_utils.get_default_communication_protocol(), + port=port) + server_def.default_session_config.rpc_options.num_channels_per_target = 4 + server_def.default_session_config.experimental.recv_buf_max_chunk = -1 + + logging.info("Enabling collectives with server_def: %s", server_def) + + context.context().enable_collective_ops(server_def) + + context.ensure_initialized() + + +@tf_export( + "experimental.dtensor.initialize_accelerator_system", + "experimental.dtensor.initialize_tpu_system", + "experimental.dtensor.initialize_multi_client", + v1=[]) +def initialize_accelerator_system( + device_type: Optional[str] = None, + enable_coordination_service: Optional[bool] = True, + num_logical_cpu_devices: Optional[int] = None, + experimental_reset_context: Optional[bool] = False, + experimental_enable_megcore: Optional[bool] = False, +) -> str: + """Initializes accelerators and communication fabrics for DTensor. + + DTensor configures TensorFlow to run in the local mode or multi-client mode. + - In local mode, a mesh can only use devices attached to the current process. + - In multi-client mode, a mesh can span across devices from multiple clients. + + If `DTENSOR_JOBS` is non-empty, DTensor configures TensorFlow to run in the + multi-client mode using the distributed runtime. In multi-client mode devices + on different clients can communicate with each other. + + The following environment variables controls the behavior of this function. + + - `DTENSOR_JOBS`: string, a comma separated list. Each item in the list is + of format `{hostname}:{port}`. If empty, DTensor runs in the local mode. + Examples of valid `DTENSOR_JOBS` values: + - 4 clients on localhost: + `localhost:10000,localhost:10001,localhost:10002,localhost:10003` + - 2 clients on host1, 2 clients on host2 + `host1:10000,host1:10001,host2:10000,host2:10003` + If the hostnames are BNS addresses, the items must be sorted in + alphabetical order. + - `DTENSOR_CLIENT_ID`: integer, between `0` to `num_clients - 1`, to identify + the client id of the current process. The default value is `0`. + - `DTENSOR_JOB_NAME`: string, a string for the name of the TensorFlow job. + The job name controls the job name section of the TensorFlow DeviceSpecs, + e.g., `job:worker` in `/job:worker/replica:0/task:0/device:TPU:0` when + the job name is `worker`. + The default value is `localhost` in local mode, and + `worker` when in the multi-client mode. All DTensor clients within the + same multi-client cluster share the same job name. + - `DTENSOR_USE_PARALLEL_EXECUTOR`: string, with its value being `pw` to + specify that the backend is Pathways, and TensorFlow otherwise. + + Args: + device_type: Type of accelerator to use, can be CPU, GPU, or TPU. If None, + uses `tf.experimental.dtensor.preferred_device_type()`. + enable_coordination_service: If true, enable distributed coordination + service to make sure that workers know the devices on each other, when + there is more than 1 client. + num_logical_cpu_devices: the number of logical CPU devices per DTensor + client. Default to the current number of logical CPU + (`dtensor.num_local_devices("CPU")`),when `device_type` is CPU, otherwise + set automatially to match the number of local GPU/TPU devices. + experimental_reset_context: Reset the tensorflow context. Behaviors of + existing TensorFlow objects (e.g. Tensors) are undefined. Set this to True + as an escape hatch, if there is no clear way to refactor your code to call + initialize_accelerator_system() before calling TensorFlow APIs that + initialize the context. + experimental_enable_megcore: Optionally enable megcore in backend. + + Returns: + device_type: the type of accelerator that was initialized. + """ + global _INITIALIZED_ACCELERATOR_SYSTEM_TYPE + assert context.executing_eagerly() + + if is_initialized(): + raise ValueError( + "Accelerator system has already been initialized. " + "Call tf.experimental.dtensor.shutdown_accelerator_system() first.") + + if experimental_reset_context: + if context.context()._initialized: # pylint: disable=protected-access + logging.warn( + "experimental_reset_context is True. " + "Resetting TensorFlow context. Existing TensorFlow objects " + "(e.g. Tensors and resources) are invalidated." + ) + context.context().ensure_uninitialized() + + if context.context()._initialized: # pylint: disable=protected-access + raise ValueError( + "TensorFlow has already been initialized. " + "tf.experimental.dtensor.initialize_accelerator_system() must be " + "called before TensorFlow is initialized.") + + context.context()._clear_caches() # pylint: disable=protected-access + + if device_type is None: + device_type = config.preferred_device_type() + + device_type = device_type.upper() + if device_type not in {"CPU", "GPU", "TPU"}: + raise ValueError(f"Unknown device_type {device_type}. " + "Allowed values are CPU, GPU, or TPU") + + if config.gpu_use_nccl_communication(): + logical_gpu_count = config.num_local_devices("GPU") + physical_gpu_count = len(tf_config.list_physical_devices("GPU")) + if logical_gpu_count > physical_gpu_count: + raise ValueError( + "DTENSOR_GPU_USE_NCCL_COMMUNICATION is set for using NCCL. " + "NCCL Collectives require one to one mapping between logical and " + "physical GPUs. " + f"The number of logical GPU ({logical_gpu_count}) " + f"is more than the number of physical GPU ({physical_gpu_count})." + ) + + # Configure logical host CPU devices for accelerators. + if device_type in ("GPU", "TPU"): + num_local_devices = config.num_local_devices(device_type) + if num_logical_cpu_devices is None: + num_logical_cpu_devices = max( + config.num_local_devices("CPU"), num_local_devices + ) + else: + if num_logical_cpu_devices < num_local_devices: + raise ValueError( + "If set, `num_logical_cpu_devices`" + f" (={num_logical_cpu_devices}) must be greater than or" + f" equal to the number of local {device_type} devices" + f" (={num_local_devices})" + ) + + if num_logical_cpu_devices is not None: + tf_config.set_logical_device_configuration( + tf_config.list_physical_devices("CPU")[0], + [context.LogicalDeviceConfiguration()] + * num_logical_cpu_devices, + ) + + if not config.is_local_mode(): + initialize_multi_client_cluster( + job_name=config.job_name(), + dtensor_jobs=config.jobs(), + client_id=config.client_id(), + collective_leader=config.full_job_name(task_id=0), + gpu_use_nccl_communication=config.gpu_use_nccl_communication(), + enable_coordination_service=enable_coordination_service) + else: + if device_type == "GPU": + # Enables Nccl on local mode. + context.context( # pylint: disable=protected-access + )._collective_use_nccl_communication = config.gpu_use_nccl_communication( + ) + + if device_type == "TPU" and not config.backend_is_pw(): + tpu_util.initialize_tpu_system(use_megacore=experimental_enable_megcore) + + _INITIALIZED_ACCELERATOR_SYSTEM_TYPE = device_type + + return device_type + + +@tf_export( + "experimental.dtensor.shutdown_accelerator_system", + "experimental.dtensor.shutdown_tpu_system", + v1=[]) +def shutdown_accelerator_system() -> None: + """Shuts down the accelerator system.""" + global _INITIALIZED_ACCELERATOR_SYSTEM_TYPE + try: + context.async_wait() + finally: + if not is_initialized(): + raise ValueError( + "Accelerator system is not initialized. Call " + "tf.experimental.dtensor.initialize_accelerator_system first." + ) + + device_type = _INITIALIZED_ACCELERATOR_SYSTEM_TYPE + + if not config.is_local_mode(): + raise ValueError( + "Shutting down accelerator system under multi-client mode is " + "not supported." + ) + + if device_type == "TPU" and not config.backend_is_pw(): + tpu_util.shutdown_tpu_system() + + # reset TF context to stop gRPC servers. + context._reset_context() # pylint: disable=protected-access + context.context()._clear_caches() # pylint: disable=protected-access + _INITIALIZED_ACCELERATOR_SYSTEM_TYPE = None diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/api.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/api.py new file mode 100644 index 0000000000000000000000000000000000000000..4a49cd82fa33943cd64a12eef5f52111979358e7 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/api.py @@ -0,0 +1,568 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Core DTensor Python API.""" + +import contextlib +import threading +from typing import Any, Callable, Optional, Sequence + +from tensorflow.dtensor.python import dtensor_device +from tensorflow.dtensor.python import gen_dtensor_ops +from tensorflow.dtensor.python import layout as layout_lib +from tensorflow.python.eager import context +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor as tensor_lib +from tensorflow.python.util import deprecation +from tensorflow.python.util.tf_export import tf_export + +_dtensor_singleton = None +_dtensor_singleton_lock = threading.Lock() + +# ----------------------------------------------------------------------------- +# Main methods to launch DTensor computations. + + +@tf_export("experimental.dtensor.call_with_layout", v1=[]) +def call_with_layout(fn: Callable[..., + Any], layout: Optional[layout_lib.Layout], + *args, **kwargs) -> Any: + """Calls a function in the DTensor device scope if `layout` is not None. + + If `layout` is not None, `fn` consumes DTensor(s) as input and produces a + DTensor as output; a DTensor is a tf.Tensor with layout-related attributes. + + If `layout` is None, `fn` consumes and produces regular tf.Tensors. + + Args: + fn: A supported TF API function such as tf.zeros. + layout: Optional, the layout of the output DTensor. + *args: Arguments given to `fn`. + **kwargs: Keyword arguments given to `fn`. + + Returns: + The return value of `fn` transformed to a DTensor if requested. + """ + if layout is not None: + if context.executing_eagerly(): + with default_mesh(layout.mesh): + with _dtensor_device()._default_layout(layout): # pylint: disable=protected-access + return fn(*args, **kwargs) + else: + return relayout(fn(*args, **kwargs), layout) + return fn(*args, **kwargs) + + +@tf_export("experimental.dtensor.run_on", v1=[]) +@deprecation.deprecated(None, "Use `dtensor.default_mesh` scope instead.") +@contextlib.contextmanager +def run_on(mesh: layout_lib.Mesh): + """Runs enclosed functions in the DTensor device scope. + + This function returns a scope. All the ops and tf.functions in this scope will + run on the DTensor device using the mesh provided. + This is useful for wrapping any tf.function that doesn't take a DTensor as + input but would like to produce DTensor as result. The scope will also make + sure all small constants be replicated as DTensor. + + Args: + mesh: A Mesh instance to extract a default mesh from. + + Yields: + A context in which all ops and tf.functions will run on the DTensor device. + """ + with default_mesh(mesh): + yield + + +@tf_export("experimental.dtensor.default_mesh", v1=[]) +@contextlib.contextmanager +def default_mesh(mesh: layout_lib.Mesh): + """Sets the default DTensor device mesh to use for enclosed functions. + + This function returns a scope. All the ops and tf.functions in this scope will + default to this DTensor mesh if a mesh cannot be inferred from any of the + inputs + This is useful for wrapping any tf.function that doesn't take a DTensor as + input but would like to produce DTensor as result. The scope will also make + sure all small constants are replicated as DTensors. + + Args: + mesh: A Mesh instance to extract a default mesh from. + + Yields: + A context in which all ops and tf.functions will run on the given mesh. + """ + if not isinstance(mesh, layout_lib.Mesh): + raise ValueError(f"Expect `mesh` to be `Mesh`, got {type(mesh)}") + + with _dtensor_device()._experimental_default_mesh(mesh): # pylint: disable=protected-access + with ops.device(device_name()): + yield + + +@tf_export("experimental.dtensor.get_default_mesh", v1=[]) +def get_default_mesh() -> Optional[layout_lib.Mesh]: + """Return the default mesh under the current dtensor device context. + + In the case that dtensor device system is not initialized, this function + will return None. + + Returns: + The current default mesh for the dtensor device context. + """ + if _dtensor_singleton is None: + return None + else: + return _dtensor_singleton._current_default_mesh # pylint: disable=protected-access + + +@tf_export("experimental.dtensor.device_name", v1=[]) +def device_name() -> str: + """Returns the singleton DTensor device's name. + + This function can be used in the following way: + + ```python + import tensorflow as tf + + with tf.device(dtensor.device_name()): + # ... + ``` + """ + return _dtensor_device().name + + +@tf_export("experimental.dtensor.is_dtensor", v1=[]) +def is_dtensor(tensor) -> bool: + """Check whether the input tensor is a DTensor. + + In Python, a DTensor has the same type as a `tf.Tensor`. This method will + let you check and handle the tensor differently if a tf.Tensor is a DTensor. + + Args: + tensor: an object to be checked. + + Returns: + bool, True if the given tensor is a DTensor. + """ + return _dtensor_device().is_dtensor(tensor) + + +# ----------------------------------------------------------------------------- +# Data transfer methods. + + +@tf_export("experimental.dtensor.copy_to_mesh", v1=[]) +def copy_to_mesh( + tensor: Any, + layout: layout_lib.Layout, + source_layout: Optional[layout_lib.Layout] = None) -> tensor_lib.Tensor: + """Copies a tf.Tensor onto the DTensor device with the given layout. + + Copies a regular tf.Tensor onto the DTensor device. Use the mesh attached to + `layout` as target mesh. This method currently only supports replicated + layouts, or one-to-one copies for sharded layouts. + + Args: + tensor: A regular tf.Tensor to be copied as a DTensor. + layout: Target layout (and mesh) for the result DTensor. + source_layout: Source layout of the tensor before copy. This argument + is deprecated. + + Returns: + A DTensor on the DTensor device with the given layout. + """ + del source_layout + return relayout(tensor, layout) + + +@tf_export("experimental.dtensor.pack", v1=[]) +def pack(tensors: Sequence[Any], layout: layout_lib.Layout) -> Any: + """Packs `tf.Tensor` components into a DTensor. + + Packing and unpacking are inverse operations: + + ``` + * unpack(pack(tensors)) == tensors + * pack(unpack(dtensor)) == dtensor + ``` + + 1. For any DTensor on the mesh, `unpack` returns the raw components placed on + each underlying device. + 2. Packing these raw components in the same order using `pack` returns a + DTensor which should be identical to the original DTensor--both the content + value and the layout. + + **Shape, Rank, and Scalars**: The rank of the DTensor is the same as the + rank of its raw components, i.e., rank is preserved. This leads to a + consistent interpretation for packing scalar values into a DTensor. The only + valid layout for a scalar value is fully replicated, and the individual + components must be identical scalars. + + Each input `tensors[i]` will be copied to `layout.mesh.local_device[i]` + if not already on the local device. Non-local components should not be passed + to `pack`; use `copy_to_mesh` and `relayout` to place tensors on all global + devices on a mesh. + + It is the caller's responsibility to ensure that the underlying values + for `pack` adhere to the specified layout, and that only as many values are + specified as there are local devices. Pack does not move data between clients. + See examples below for more detail about layouts. + + For example, assume we have a mesh `[X(2), Y(3)]`, which has in total 6 + underlying devices. Futuremore, assume that the device location mapping is + the following: + + ``` + device_ID | location X, Y + 0 0, 0 + 1 0, 1 + 2 0, 2 + 3 1, 0 + 4 1, 1 + 5 1, 2 + ``` + + 1. For 1-D vector DTensor with shape `[128]` with layout `[mesh.X]` and value + as `range(128)`, the raw components will have shape `[64]` each, and the + raw components will be: + + ``` + device_ID | raw component + 0 range(0, 64) + 1 range(0, 64) + 2 range(0, 64) + 3 range(64, 128) + 4 range(64, 128) + 5 range(64, 128) + ``` + + This also means for a 1-D DTensor with shape `[2]` and layout `[mesh.X]`, + the raw components have shape `[1]` rather than the shape for scalar values + `[]`. + + 2. For 2-D vector DTensor with shape `[2, 3]` with layout `[mesh.X, mesh.Y]` + and value as `range(6)`, this is basically a fully-sharded DTensor. + + From global view, the content looks like + ``` + [ + [0.0, 1.0, 2.0], + [3.0, 4.0, 5.0], + ] + ``` + + The raw components will have shape `[1, 1]` each, and have the following + content: + + ``` + device_ID | raw component + 0 [[0.0]] + 1 [[1.0]] + 2 [[2.0]] + 3 [[3.0]] + 4 [[4.0]] + 5 [[5.0]] + ``` + + 3. For a scalar value `123.0` DTensor, it can only have one legitimate layout + `[]` (no dimension, but fully replicated). + + The raw components will have shape `[]` each, and have the following + content: + + ``` + device_ID | raw component + 0 123.0 + 1 123.0 + 2 123.0 + 3 123.0 + 4 123.0 + 5 123.0 + ``` + + Again, caller of `pack` is expected to provide 6 identical value raw + components with scalar shapes. + + 4. For 3-D vector DTensor with shape `[2, 2, 3]` with layout + `[X, unsharded, unsharded]` and value as `range(12)`, + + From global view, the content looks like: + ``` + [ + [ + [0.0, 1.0, 2.0], + [3.0, 4.0, 5.0], + ], + [ + [6.0, 7.0, 8.0], + [9.0, 10., 11.], + ], + ] + ``` + + The raw components will have shape `[1, 2, 3]` each, and have the following + content: + + ``` + device_ID | raw component + 0 range(6).reshape([1, 2, 3]) + 1 range(6).reshape([1, 2, 3]) + 2 range(6).reshape([1, 2, 3]) + 3 range(6, 12).reshape([1, 2, 3]) + 4 range(6, 12).reshape([1, 2, 3]) + 5 range(6, 12).reshape([1, 2, 3]) + ``` + + Args: + tensors: The list of local tensor components to pack into a DTensor. + layout: The layout of the DTensor to be created. + + Returns: + A DTensor created from the individual component tensors. + + Raises: + RuntimeError: When `pack` is not called eagerly. + """ + return _dtensor_device().pack(tensors, layout) + + +@tf_export("experimental.dtensor.unpack", v1=[]) +def unpack(tensor: Any) -> Sequence[Any]: + """Unpacks a DTensor into `tf.Tensor` components. + + Packing and unpacking are inverse operations: + + ``` + * unpack(pack(tensors)) == tensors + * pack(unpack(dtensor)) == dtensor + ``` + + 1. For any DTensor on the mesh, `unpack` returns the raw components placed on + each underlying device. + 2. Packing these raw components in the same order using `pack` returns a + DTensor which should be identical to the original DTensor--both the content + value and the layout. + + See the documentation for `pack` for more information about how packing and + unpacking works. + + Args: + tensor: The DTensor to unpack. + + Returns: + The individual component tensors of the DTensor. This will include only the + client-local components, i.e. the components placed on the local devices. + + Raises: + RuntimeError: When `unpack` is not called eagerly. + """ + return _dtensor_device().unpack(tensor) + + +# ----------------------------------------------------------------------------- +# Layout-related methods. + + +@tf_export("experimental.dtensor.fetch_layout", v1=[]) +def fetch_layout(tensor: tensor_lib.Tensor) -> layout_lib.Layout: + """Fetches the layout of a DTensor. + + Args: + tensor: The DTensor whose layout is to be fetched. + + Returns: + The `Layout` of this DTensor. + + Raises: + RuntimeError: When not called eagerly. + """ + return _dtensor_device().fetch_layout(tensor) + + +@tf_export("experimental.dtensor.check_layout", v1=[]) +def check_layout(tensor: tensor_lib.Tensor, layout: layout_lib.Layout) -> None: + """Asserts that the layout of the DTensor is `layout`. + + Args: + tensor: A DTensor whose layout is to be checked. + layout: The `Layout` to compare against. + + Raises: + ValueError: If the layout of `tensor` does not match the supplied `layout`. + """ + if fetch_layout(tensor) != layout: + raise ValueError("Layout of tensor: " + str(fetch_layout(tensor)) + + ", did not match expected layout: " + str(layout)) + + +@tf_export("experimental.dtensor.relayout", v1=[]) +def relayout( + tensor: tensor_lib.Tensor, + layout: layout_lib.Layout, + name: Optional[str] = None, +) -> tensor_lib.Tensor: + """Changes the layout of `tensor`. + + Changes the layout of `tensor` to `layout`. This is used to fine-tune the + behavior of ops following/connected to `tensor`, such as choosing one SPMD + expansion pattern over another. This works by forward propagating `layout` + to connected TensorFlow computation graphs during layout propagation. + + Currently, only converting layouts from replicated to sharded or sharded to + replicated per mesh dimension is supported. That is, "x, y" -> "unsharded, y" + is supported, while "x, y" -> "z, y" is not supported. + + We also support a special "match" sharding spec, which instructs the relayout + to act as an identity operation with respect to any sharding on these + mesh dimensions. + + Relayout is internally lowered to a set of Split and/or AllToAll ops. When + tensor layouts are converted from replicated to sharded, the cost is + comparatively low because we only insert Split ops and no cross-device + communication is needed. However, when tensor layouts are converted from + sharded to replicated, cross-device communication may occur, causing potential + performance impact. + + Args: + tensor: A DTensor to specify a new layout for. + layout: A Layout object specifying a new sharding spec. + name: name of the Op. + + Returns: + A DTensor output from the Relayout op. + """ + layout_str = layout.to_string() + with default_mesh(layout.mesh): + return gen_dtensor_ops.relayout(tensor, layout_str, name=name) + + +@tf_export("experimental.dtensor.relayout_like", v1=[]) +def relayout_like( + tensor: tensor_lib.Tensor, + layout_tensor: tensor_lib.Tensor, + name: Optional[str] = None, +) -> tensor_lib.Tensor: + """Changes the layout of `tensor` to the same as `layout_tensor`. + + `relayout_like` is often used inside a `tf.function`, to ensure a tensor is + placed to the same mesh and with the same layout as another tensor. + + The backward gradient of a `relayout` is a `relayout_like` operation, to + ensure the backward tensor has the same layout as the forward input tensor: + + ``` + @ops.RegisterGradient("Relayout") + def _relayout_gradient(op, grad): + return relayout_like(grad, layout_input=op.inputs[0]) + ``` + + Here is another illustrative example: + + ``` + @tf.function + def func(x): + z = tf.ones(x.shape) + z = dtensor.relayout_like(z, x) + return x + z + + with dtensor.default_mesh(cpu_mesh): + x = tf.ones((4, 4)) + + with dtensor.default_mesh(gpu_mesh): + y = func(x) + + # y would be on the cpu mesh, following the mesh of x. + ``` + + Args: + tensor: A DTensor to specify a new layout for. + layout_tensor: A Tensor object whose layout will be used for the layout of + result. The shape and type of layout_tensor are irrelevant. + name: name of the Op. + + Returns: + A DTensor output from the RelayoutLike op. + """ + + return gen_dtensor_ops.relayout_like( + input=tensor, layout_input=layout_tensor, name=name + ) + + +@tf_export("experimental.dtensor._reset_dtensor_device", v1=[]) +def reset_dtensor_device(is_async: bool) -> None: + """Resets the Eager execution device for DTensor. + + This function is only intended for testing and diagnostics. + + Args: + is_async: If True, the device uses async execution. + """ + global _dtensor_singleton + device = dtensor_device.DTensorDevice(meshes=[], is_async=is_async) + _dtensor_singleton = device + + +def _dtensor_device() -> dtensor_device.DTensorDevice: + with _dtensor_singleton_lock: + if _dtensor_singleton is None: + reset_dtensor_device(is_async=True) + return _dtensor_singleton + + +def _reset() -> None: + global _dtensor_singleton + with _dtensor_singleton_lock: + if _dtensor_singleton is not None: + _dtensor_singleton.clear_tpu_core_ids() + _dtensor_singleton = None + + +# ---------------------------------------------------------------------------- +# Gradients + + +@ops.RegisterGradient("Relayout") +def _relayout_gradient(op, grad): + grad = gen_dtensor_ops.relayout_like(grad, layout_input=op.inputs[0]) + return grad + + +@ops.RegisterGradient("RelayoutLike") +def _relayout_grad_gradient(op, grad): + # Gradient of RelayoutGrad is relayout to the original Relayout's output. + grad = gen_dtensor_ops.relayout_like(grad, layout_input=op.inputs[0]) + # Return None for forward_input's partial gradient since it is not connected + # to the target's gradient. + return grad, None + + +@ops.RegisterGradient("CopyToMesh") +def _copy_to_mesh_gradient(op, grad): + grad = gen_dtensor_ops.copy_to_mesh_grad( + grad, + forward_input=op.inputs[0], + ) + return grad + + +@ops.RegisterGradient("CopyToMeshGrad") +def _copy_to_mesh_grad_gradient(op, grad): + grad = gen_dtensor_ops.copy_to_mesh_grad( + grad, + forward_input=op.inputs[0], + ) + return grad, None diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/config.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/config.py new file mode 100644 index 0000000000000000000000000000000000000000..d03491d20bbe708bf474c5c8eeb96c19fc85ce39 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/config.py @@ -0,0 +1,219 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""DTensor Configuration API.""" + +import os +from typing import List, Optional, Union + +from tensorflow.python.eager import context +from tensorflow.python.framework import config as tf_config +from tensorflow.python.framework import device as tf_device +from tensorflow.python.util.tf_export import tf_export + +_DT_CLIENT_ID = "DTENSOR_CLIENT_ID" +# DTENSOR_NUM_CLIENTS is removed, but some DTensor users still use this symbol. +_DT_NUM_CLIENTS = "DTENSOR_NUM_CLIENTS" +_DT_JOB_NAME = "DTENSOR_JOB_NAME" +_DT_JOBS = "DTENSOR_JOBS" +_DT_HEARTBEAT_ENABLED = "DTENSOR_ENABLE_HEARTBEAT" + + +# All functions in this file can be used before calling +# `tf.experimental.dtensor.initialize_accelerator_system`. + + +# ----------------------------------------------------------------------------- +# Distributed training-related methods. +# +# Most users should use DTensor utility methods to create a mesh. The methods +# here are only for advanced users who want to fully customize their meshes. +# Note that local_devices and num_local_devices return the actual number of +# locally attached devices. The others are set through environment variables. + + +@tf_export("experimental.dtensor.local_devices", v1=[]) +def local_devices( + device_type: str, + for_client_id: Optional[int] = None) -> List[tf_device.DeviceSpec]: + """Returns a list of device specs configured on this client.""" + if device_type.upper() not in ["CPU", "GPU", "TPU"]: + raise ValueError(f"Device type {device_type} is not CPU, GPU, or TPU.") + + if for_client_id is None: + for_client_id = client_id() + + # Return fully qualified device specs, sorted by increasing device index. + return [ + tf_device.DeviceSpec( # pylint: disable=g-complex-comprehension + job=job_name(), + replica=0, # replica is deprecated and mostly hard-coded now. + task=for_client_id, + device_type=device_type, + device_index=i) for i in range(num_local_devices(device_type)) + ] + + +@tf_export("experimental.dtensor.num_local_devices", v1=[]) +def num_local_devices(device_type: str) -> int: + """Returns the number of devices of device_type configured on this client.""" + + # Reads from config because CPU and GPU can use logical devices. + if device_type.upper() in ["CPU", "GPU"]: + context_config = context.get_config() + return context_config.device_count[device_type.upper()] + + return len(tf_config.list_physical_devices(device_type)) + + +@tf_export("experimental.dtensor.num_global_devices", v1=[]) +def num_global_devices(device_type: str) -> int: + """Returns the number of devices of device_type in this DTensor cluster.""" + return num_local_devices(device_type) * num_clients() + + +@tf_export("experimental.dtensor.client_id", v1=[]) +def client_id() -> int: + """Returns this client's ID.""" + # If missing, assume running with a single client with client_id of 0. + client_id_value = int(os.environ.get(_DT_CLIENT_ID, "0")) + if client_id_value < 0: + raise ValueError(f"Environment variable {_DT_CLIENT_ID} " + f"must be >= 0, got {client_id_value}. ") + if client_id_value >= num_clients(): + raise ValueError(f"Environment variable {_DT_CLIENT_ID} " + f"must be < {num_clients()}, got {client_id_value}") + return client_id_value + + +@tf_export("experimental.dtensor.num_clients", v1=[]) +def num_clients() -> int: + """Returns the number of clients in this DTensor cluster.""" + if is_local_mode(): + return 1 + return len(jobs()) + + +@tf_export("experimental.dtensor.job_name", v1=[]) +def job_name() -> str: + """Returns the job name used by all clients in this DTensor cluster.""" + # If missing, assumes the program runs locally and use localhost as job name + # per TensorFlow convention. + return os.environ.get(_DT_JOB_NAME, + "localhost" if num_clients() == 1 else "worker") + + +@tf_export("experimental.dtensor.full_job_name", v1=[]) +def full_job_name(task_id: Optional[int] = None) -> str: + """Returns the fully qualified TF job name for this or another task.""" + # If task_id is None, use this client's ID, which is equal to its task ID. + if task_id is None: + task_id = client_id() + # In local runs and unit tests, there should be exactly one client running + # on one TF task. + if num_clients() == 1 and task_id != 0: + raise ValueError(f"Unexpected task ID {task_id} in local runs") + return f"{job_name()}/replica:0/task:{task_id}" + + +def _bns_task_id(job: str) -> Union[int, str]: + """Tries to extract an integer task ID from a job name. + + For example, for `job` = '/.../tpu_worker/0:port_name', return 0. + + Args: + job: A job name to extract task ID from. + + Returns: + The task ID on success, or the original job name on failure. + """ + maybe_task_id = job.rsplit("/")[-1].rsplit(":")[0] + try: + return int(maybe_task_id) + except ValueError: + return job + + +@tf_export("experimental.dtensor.jobs", v1=[]) +def jobs() -> List[str]: + """Returns a list of job names of all clients in this DTensor cluster.""" + d_jobs = os.environ.get(_DT_JOBS) + if d_jobs is None: + return [] + d_jobs_list = d_jobs.split(",") + + # Validate ordering for BNS style job names. + # For definition of BNS, refer to https://research.google/pubs/pub43438/. + if any([name.startswith("/bns/") for name in d_jobs_list]): + if d_jobs_list != sorted(d_jobs_list, key=_bns_task_id): + raise ValueError( + f"Unexpected DTENSOR_JOBS content {d_jobs}. Sort entries " + "in DTENSOR_JOBS because cluster construction relies on " + "the order.") + + return d_jobs_list + + +@tf_export("experimental.dtensor.heartbeat_enabled", v1=[]) +def heartbeat_enabled() -> bool: + """Returns true if DTensor heartbeat service is enabled.""" + return os.environ.get(_DT_HEARTBEAT_ENABLED, "true").lower() in ("true", "1") + + +def is_local_mode() -> bool: + """Returns true if DTensor shall run in local mode.""" + return not jobs() + + +def is_tpu_present() -> bool: + """Returns true if TPU devices are present.""" + # Check if TPU is present from initialized context. + # TPU_SYSTEM is a device that indicates TPUs are present. + tpu_system_devices = tf_config.list_physical_devices("TPU_SYSTEM") + return bool(tpu_system_devices) + + +def is_gpu_present() -> bool: + """Returns true if TPU devices are present.""" + return bool(tf_config.list_physical_devices("GPU")) + + +@tf_export("experimental.dtensor.preferred_device_type", v1=[]) +def preferred_device_type() -> str: + """Returns the preferred device type for the accelerators. + + The returned device type is determined by checking the first present device + type from all supported device types in the order of 'TPU', 'GPU', 'CPU'. + """ + if is_tpu_present(): + return "TPU" + elif is_gpu_present(): + return "GPU" + + return "CPU" + + +def use_multi_device_mode() -> bool: + """Return True if environment indicates multi-device mode is enabled.""" + return os.environ.get("DTENSOR_ENABLE_MULTI_DEVICE_EXPANSION", "0") != "0" + + +def gpu_use_nccl_communication() -> bool: + """Return True if environment indicates NCCL shall be used for GPU.""" + return os.environ.get("DTENSOR_GPU_USE_NCCL_COMMUNICATION", "0") != "0" + + +def backend_is_pw() -> bool: + """Return True if environment indicates the backend is Pathways.""" + return os.environ.get("DTENSOR_USE_PARALLEL_EXECUTOR") == "pw" diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/d_checkpoint.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/d_checkpoint.py new file mode 100644 index 0000000000000000000000000000000000000000..f2cb0900f8500055487dde063081f7a7a4d079a5 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/d_checkpoint.py @@ -0,0 +1,463 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""DTensor Checkpoint. + +Note that this module contains deprecated functionality, and the DTensor related +checkpoint has been integrated with tf.train.Checkpoint. It can be used out of +the box to save and restore dtensors. +""" + +from typing import Dict, List, Optional +import weakref + +from tensorflow.core.protobuf import trackable_object_graph_pb2 + +from tensorflow.dtensor.python import api +from tensorflow.dtensor.python import d_variable +from tensorflow.dtensor.python import gen_dtensor_ops +from tensorflow.dtensor.python import layout +from tensorflow.dtensor.python import save_restore +from tensorflow.python.checkpoint import checkpoint as util +from tensorflow.python.checkpoint import checkpoint_options +from tensorflow.python.checkpoint import graph_view as graph_view_lib +from tensorflow.python.checkpoint import restore as restore_lib +from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import errors_impl +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.trackable import base +from tensorflow.python.trackable import data_structures +from tensorflow.python.training import py_checkpoint_reader +from tensorflow.python.training.saving import saveable_object +from tensorflow.python.training.saving import saveable_object_util +from tensorflow.python.util import deprecation +from tensorflow.python.util import nest +from tensorflow.python.util.tf_export import tf_export + + +class _DSaver: # pylint: disable=protected-access + """A single device saver that places tensors on DTensor Device.""" + + def __init__(self, mesh: layout.Mesh, + saveable_objects: List[saveable_object.SaveableObject]): + self._saveable_objects = saveable_objects + self._mesh = mesh + + def save( + self, + file_prefix: str, + options: Optional[checkpoint_options.CheckpointOptions] = None + ) -> Optional[ops.Operation]: + """Saves the saveable objects to a checkpoint with `file_prefix`. + + Also query the generated shards from the distributed DTensor SaveV2 ops and + do a MergeV2 on those. Each op here is backed by a global_barrier to avoid + racing from multiple clients. + + Args: + file_prefix: A string or scalar string Tensor containing the prefix to + save under. + options: Optional `CheckpointOptions` object. This is unused in DTensor. + + Returns: + An `Operation`, or None when executing eagerly. + """ + if options is not None and options.experimental_io_device is not None: + raise ValueError( + "Specified experimental_io_device in DTensor checkpoint is not supported." + ) + del options + tensor_names = [] + tensors = [] + tensor_slices = [] + for saveable in self._saveable_objects: + for spec in saveable.specs: + tensor = spec.tensor + # A tensor value of `None` indicates that this SaveableObject gets + # recorded in the object graph, but that no value is saved in the + # checkpoint. + if tensor is not None: + if api.device_name() != spec.device: + # Some small tensors are placed on CPU0 from save manager and + # broadcasted to DTensor mesh, e,g., SaveCounter. + tensor = api.pack([tensor] * + self._mesh.host_mesh().num_local_devices(), + layout.Layout.replicated( + self._mesh.host_mesh(), + rank=tensor.shape.rank)) + tensor_names.append(spec.name) + tensors.append(tensor) + tensor_slices.append(spec.slice_spec) + return save_restore.sharded_save(self._mesh, file_prefix, tensor_names, + tensor_slices, tensors) + + def restore( + self, + file_prefix: str, + options: Optional[checkpoint_options.CheckpointOptions] = None + ) -> Dict[str, ops.Operation]: + """Restore the saveable objects from a checkpoint with `file_prefix`. + + Args: + file_prefix: A string or scalar string Tensor containing the prefix for + files to read from. + options: Optional `CheckpointOptions` object. This is unused in DTensor. + + Returns: + A dictionary mapping from SaveableObject names to restore operations. + """ + if options is not None and options.experimental_io_device is not None: + raise ValueError( + "Specified experimental_io_device in DTensor checkpoint is not " + "supported.") + del options + restore_specs = [] + tensor_structure = [] + for saveable in self._saveable_objects: + saveable_tensor_structure = [] + tensor_structure.append(saveable_tensor_structure) + # DTensor change 1 : Gather shapes and layout from original saveable + # specs. + # Note that this relies on the fact that the variables are already + # initialized -- which isn't the behavior we want eventually. + # TODO(b/159035705): Handle the variable initialization in restore. + for spec in saveable.specs: + saveable_tensor_structure.append(spec.name) + if isinstance(spec, d_variable.DSaveSpec): + restore_specs.append((spec.name, spec.slice_spec, spec.dtype, + spec.layout, spec.global_shape)) + # Fall back to replicated layouts for non-DTensor saves that constructs + # normal SaveSpec. + elif isinstance(spec, saveable_object.SaveSpec): + restore_specs.append( + (spec.name, spec.slice_spec, spec.dtype, + layout.Layout.replicated(self._mesh.host_mesh(), + spec.tensor.shape.rank).to_string(), + spec.tensor.shape.as_list())) + tensor_names, tensor_slices, tensor_dtypes, layouts, global_shapes = zip( + *restore_specs) + with ops.device(api.device_name()): + # DTensor change 2 : Run on customized DTensor RestoreV2 op rather than + # stock TF io_ops.RestoreV2. + restored_tensors = gen_dtensor_ops.d_tensor_restore_v2( + prefix=file_prefix, + tensor_names=tensor_names, + shape_and_slices=tensor_slices, + input_shapes=global_shapes, + input_layouts=layouts, + dtypes=tensor_dtypes) + structured_restored_tensors = nest.pack_sequence_as(tensor_structure, + restored_tensors) + restore_ops = {} + for saveable, restored_tensors in zip(self._saveable_objects, + structured_restored_tensors): + restore_ops[saveable.name] = saveable.restore( + restored_tensors, restored_shapes=None) + return restore_ops + + +class _DCheckpointRestoreCoordinator(util._CheckpointRestoreCoordinator): # pylint: disable=protected-access + """Holds the status of an object-based checkpoint load.""" + + def __init__(self, mesh: layout.Mesh, **kwargs): + super().__init__(**kwargs) + self._mesh = mesh + + def restore_saveables(self, + tensor_saveables: Dict[str, + saveable_object.SaveableObject], + python_positions: List[restore_lib.CheckpointPosition], + registered_savers: Optional[Dict[str, Dict[ + str, base.Trackable]]] = None, + reader: py_checkpoint_reader.NewCheckpointReader = None + ) -> Optional[List[ops.Operation]]: + """Run or build restore operations for SaveableObjects. + + Args: + tensor_saveables: `SaveableObject`s which correspond to Tensors. + python_positions: `CheckpointPosition`s which correspond to `PythonState` + Trackables bound to the checkpoint. + registered_savers: a dict mapping saver names-> object name -> Trackable. + This argument is not implemented for DTensorCheckpoint. + reader: A CheckpointReader. Creates one lazily if None. + + Returns: + When graph building, a list of restore operations, either cached or newly + created, to restore `tensor_saveables`. + """ + del registered_savers + + restore_ops = [] + # Eagerly run restorations for Python state. + if python_positions: + # Lazily create the NewCheckpointReader, since this requires file access + # and we may not have any Python saveables. + if reader is None: + reader = py_checkpoint_reader.NewCheckpointReader(self.save_path_string) + for position in python_positions: + key = position.object_proto.attributes[0].checkpoint_key + position.trackable.deserialize(reader.get_tensor(key)) + + # If we have new SaveableObjects, extract and cache restore ops. + if tensor_saveables: + validated_saveables = saveable_object_util.validate_and_slice_inputs( + tensor_saveables) + validated_names = set(saveable.name for saveable in validated_saveables) + if set(tensor_saveables.keys()) != validated_names: + raise AssertionError( + ("Saveable keys changed when validating. Got back %s, was " + "expecting %s") % (tensor_saveables.keys(), validated_names)) + # DTensor change: Use _DSaver that does restore on DTensor with + # customized DTensorRestoreV2 op. + new_restore_ops = _DSaver(self._mesh, validated_saveables).restore( + self.save_path_tensor, self.options) + if not context.executing_eagerly(): + for name, restore_op in sorted(new_restore_ops.items()): + restore_ops.append(restore_op) + assert name not in self.restore_ops_by_name + self.restore_ops_by_name[name] = restore_op + return restore_ops + + +class DTrackableSaver(util.TrackableSaver): + """A DTensor trackable saver that uses _SingleDeviceSaver.""" + + def __init__(self, mesh: layout.Mesh, graph_view): + super(DTrackableSaver, self).__init__(graph_view) + self._mesh = mesh + + def _gather_saveables(self, object_graph_tensor=None): + # Since the base Checkpoint class does not return SaveableObjects, re-use + # the saveables cache or generate new Saveables. + (serialized_tensors, feed_additions, registered_savers, + graph_proto) = self._gather_serialized_tensors(object_graph_tensor) + + saveables_dict = self._saveables_cache + if saveables_dict is None: + # Get and remove object graph tensor from `serialized_tensors`, because + # the function `serialized_tensors_to_saveable_cache` isn't equipped + # to handle it. + object_graph_tensor = serialized_tensors.pop( + None)[base.OBJECT_GRAPH_PROTO_KEY] + saveables_dict = ( + saveable_object_util.serialized_tensors_to_saveable_cache( + serialized_tensors)) + named_saveable_objects = [] + for saveable_by_name in saveables_dict.values(): + for saveables in saveable_by_name.values(): + named_saveable_objects.extend(saveables) + named_saveable_objects.append( + base.NoRestoreSaveable( + tensor=object_graph_tensor, + name=base.OBJECT_GRAPH_PROTO_KEY)) + return (named_saveable_objects, graph_proto, feed_additions, + registered_savers) + + def _save_cached_when_graph_building(self, + file_prefix, + object_graph_tensor, + options, + update_ckpt_state=False): + """Create or retrieve save ops, overrides parents's private method. + + Args: + file_prefix: The prefix for saved checkpoint files. + object_graph_tensor: A `Tensor` to which the current object graph will be + fed. + options: `CheckpointOptions` object. + update_ckpt_state: Optional bool flag. Indiciate whether the internal + checkpoint state needs to be updated. This is used for async checkpoint, + which DTrackableSaver currently does not support. + TODO(chienchunh): Implement async checkpoint for DTrackableSaver. + + Returns: + A two-element tuple with a filename tensor and a feed_dict of tensors to + feed when running it (if graph building). The feed dict contains the + current object graph and any Python state to be saved in the + checkpoint. When executing eagerly only the first argument is meaningful. + """ + (named_saveable_objects, graph_proto, feed_additions, + unused_registered_savers) = self._gather_saveables( + object_graph_tensor=object_graph_tensor) + if (self._last_save_object_graph != graph_proto + # When executing eagerly, we need to re-create SaveableObjects each time + # save() is called so they pick up new Tensors passed to their + # constructors. That means the Saver needs to be copied with a new + # var_list. + or context.executing_eagerly() or ops.inside_function()): + # This is needed to avoid MultiDeviceSaver creating unnecessary MergeV2 + # ops in DTensor. It is an issue when saving TPU Variables on host CPU + # mesh given our limited expressiveness in API and hard-coded logic in + # broadcasting -- for a small constant Tensor with no extra information, + # we place it on the first registered mesh(A.K.A. default mesh). + saver = _DSaver(self._mesh, named_saveable_objects) + save_op = saver.save(file_prefix, options=options) + with ops.device("/cpu:0"): + with ops.control_dependencies([save_op]): + self._cached_save_operation = array_ops.identity(file_prefix) + self._last_save_object_graph = graph_proto + return self._cached_save_operation, feed_additions + + # TODO(b/180466245): Use proper mesh placement semantic. + def restore(self, save_path, options=None): + """Restore a training checkpoint with host mesh placement.""" + options = options or checkpoint_options.CheckpointOptions() + if save_path is None: + return util.InitializationOnlyStatus(self._graph_view, ops.uid()) + reader = py_checkpoint_reader.NewCheckpointReader(save_path) + graph_building = not context.executing_eagerly() + if graph_building: + dtype_map = None + else: + dtype_map = reader.get_variable_to_dtype_map() + try: + object_graph_string = reader.get_tensor(base.OBJECT_GRAPH_PROTO_KEY) + except errors_impl.NotFoundError: + # The object graph proto does not exist in this checkpoint. Try the + # name-based compatibility mode. + restore_coordinator = util._NameBasedRestoreCoordinator( # pylint: disable=protected-access + save_path=save_path, + dtype_map=dtype_map) + if not graph_building: + for existing_trackable in self._graph_view.list_objects(): + # pylint: disable=protected-access + existing_trackable._maybe_initialize_trackable() + existing_trackable._name_based_restores.add(restore_coordinator) + existing_trackable._name_based_attribute_restore(restore_coordinator) + # pylint: enable=protected-access + return util.NameBasedSaverStatus( + restore_coordinator, graph_view=self._graph_view) + + if graph_building: + if self._file_prefix_placeholder is None: + # DTensor change: provide a hint for mesh broadcasting to put the input + # onto the host mesh. + self._file_prefix_placeholder = api.pack( + [constant_op.constant("model")] * self._mesh.num_local_devices(), + layout.Layout.replicated(self._mesh.host_mesh(), rank=0)) + file_prefix_tensor = self._file_prefix_placeholder + file_prefix_feed_dict = {self._file_prefix_placeholder: save_path} + else: + # DTensor change: provide a hint for mesh broadcasting to put the input + # onto the host mesh. + file_prefix_tensor = api.pack( + [constant_op.constant(save_path)] * self._mesh.num_local_devices(), + layout.Layout.replicated(self._mesh.host_mesh(), rank=0)) + file_prefix_feed_dict = None + object_graph_proto = (trackable_object_graph_pb2.TrackableObjectGraph()) + object_graph_proto.ParseFromString(object_graph_string) + # DTensor Change: Hook the proper DSaver in restore. + checkpoint = _DCheckpointRestoreCoordinator( + mesh=self._mesh, + object_graph_proto=object_graph_proto, + save_path=save_path, + save_path_tensor=file_prefix_tensor, + reader=reader, + restore_op_cache=self._restore_op_cache, + graph_view=self._graph_view, + options=options, + saveables_cache=self._saveables_cache) + restore_lib.CheckpointPosition( + checkpoint=checkpoint, proto_id=0).restore(self._graph_view.root) + + # Attached dependencies are not attached to the root, so should be restored + # separately. + if self._graph_view.attached_dependencies: + for ref in self._graph_view.attached_dependencies: + if ref.name == "root": + # Root dependency is automatically added to attached dependencies -- + # this can be ignored since it maps back to the root object. + continue + proto_id = None + # Find proto ID of attached dependency (if it is in the proto). + for proto_ref in object_graph_proto.nodes[0].children: + if proto_ref.local_name == ref.name: + proto_id = proto_ref.node_id + break + + if proto_id in checkpoint.object_by_proto_id: + # Object has already been restored. This can happen when there's an + # indirect connection from the attached object to the root. + continue + + restore_lib.CheckpointPosition( + checkpoint=checkpoint, proto_id=proto_id).restore(ref.ref) + + load_status = util.CheckpointLoadStatus( + checkpoint, + graph_view=self._graph_view, + feed_dict=file_prefix_feed_dict, + options=options) + return load_status + + +@deprecation.deprecated( + date=None, + instructions="Please use tf.train.Checkpoint instead of DTensorCheckpoint. " + "DTensor is integrated with tf.train.Checkpoint and it can be " + "used out of the box to save and restore dtensors.") +@tf_export("experimental.dtensor.DTensorCheckpoint", v1=[]) +class DTensorCheckpoint(util.Checkpoint): + """Manages saving/restoring trackable values to disk, for DTensor.""" + + def __init__(self, mesh: layout.Mesh, root=None, **kwargs): + super(DTensorCheckpoint, self).__init__(root=root, **kwargs) + self._mesh = mesh + + saver_root = self + attached_dependencies = None + self._save_counter = None # Created lazily for restore-on-create. + self._save_assign_op = None + + if root: + util._assert_trackable(root, "root") + saver_root = root + attached_dependencies = [] + + # All keyword arguments (including root itself) are set as children + # of root. + kwargs["root"] = root + root._maybe_initialize_trackable() + + self._save_counter = data_structures.NoDependency( + root._lookup_dependency("save_counter")) + self._root = data_structures.NoDependency(root) + + for k, v in sorted(kwargs.items(), key=lambda item: item[0]): + setattr(self, k, v) + + # Call getattr instead of directly using v because setattr converts + # v to a Trackable data structure when v is a list/dict/tuple. + converted_v = getattr(self, k) + util._assert_trackable(converted_v, k) + + if root: + # Make sure that root doesn't already have dependencies with these names + attached_dependencies = attached_dependencies or [] + child = root._lookup_dependency(k) + if child is None: + attached_dependencies.append(base.TrackableReference(k, converted_v)) + elif child != converted_v: + raise ValueError( + "Cannot create a Checkpoint with keyword argument {name} if " + "root.{name} already exists.".format(name=k)) + # DTensor Change: + # Override the parents saver with DTrackableSaver with _SingleDeviceSaver. + self._saver = DTrackableSaver( + mesh, + graph_view_lib.ObjectGraphView( + weakref.ref(saver_root), + attached_dependencies=attached_dependencies)) diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/d_variable.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/d_variable.py new file mode 100644 index 0000000000000000000000000000000000000000..b99875f03fa70ed4f942a0d70c828040a7223e64 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/d_variable.py @@ -0,0 +1,260 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""DTensor variable and saveable.""" + +import functools + +from tensorflow.dtensor.python import api +from tensorflow.dtensor.python import layout as layout_lib +from tensorflow.python.eager import context +from tensorflow.python.eager import def_function +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors_impl +from tensorflow.python.framework import ops +from tensorflow.python.ops import math_ops +from tensorflow.python.ops import resource_variable_ops +from tensorflow.python.trackable import base as trackable +from tensorflow.python.training.saving import saveable_object +from tensorflow.python.util.tf_export import tf_export + + +class DSaveSpec(saveable_object.SaveSpec): + """DTensor SaveSpec that additionaly captures global_shape and layout.""" + + def __init__(self, + tensor, + slice_spec, + name, + global_shape, + layout, + dtype=None, + device=None): + super().__init__( + tensor=tensor, + slice_spec=slice_spec, + name=name, + dtype=dtype, + device=device) + self.global_shape = global_shape + self.layout = layout + + +class _DVariableSaveable(saveable_object.SaveableObject): + """Class for defining how to save/restore DTensor variable.""" + + def __init__(self, dvariable, name): + with ops.device(dvariable.device): + original_layout = api.fetch_layout(dvariable) + # Record original layout to allow restore. + self._original_layout = original_layout + self._dvariable = dvariable + + def pack(tensors, layout): + with ops.device(dvariable.device): + return api.pack(tensors, layout) + + host_layout = layout_lib.Layout(original_layout.sharding_specs, + original_layout.mesh.host_mesh()) + + def get_host_dtensor(): + # Copy to host mesh if needed. + if original_layout.mesh.device_type().upper() != 'CPU': + # Prefer pack and unpack in eager mode because it supports sharded + # layouts. + if context.executing_eagerly(): + host_dtensor = api.pack( + api.unpack(dvariable.read_value()), host_layout) + else: + host_dtensor = api.copy_to_mesh(dvariable.read_value(), host_layout) + else: + host_dtensor = dvariable.read_value() + return (math_ops.cast(host_dtensor, dtypes.bfloat16) + if self.should_cast(host_dtensor) else host_dtensor) + + num_local_devices = original_layout.mesh.num_local_devices() + super(_DVariableSaveable, self).__init__( + None, + [ + DSaveSpec( + tensor=get_host_dtensor, + slice_spec=pack([''] * num_local_devices, + layout_lib.Layout.replicated( + original_layout.mesh.host_mesh(), rank=0)), + name=pack([name] * num_local_devices, + layout_lib.Layout.replicated( + original_layout.mesh.host_mesh(), rank=0)), + global_shape=dvariable.shape, + # Layout is attached as attribute, no need to put it as a + # Tensor on DTensorDevice. + layout=host_layout.to_string(), + dtype=dtypes.bfloat16 + if self.should_cast(dvariable) else dvariable.dtype, + device=dvariable.device) + ], + name) + + def should_cast(self, v): + """Returns True if v has float32 dtype and is intructed to save as bf16. + + Args: + v : The variable that determines whether to cast. + + Returns: + True if current savable DVariable is instructed to save as bfloat16 and + the variable has dtype float32. + """ + return self._dvariable.save_as_bf16 and v.dtype == dtypes.float32 + + def restore(self, restored_tensors, restored_shapes): + """Restores the same value into all variables.""" + tensor, = restored_tensors + + @def_function.function + def _restore(t): + with ops.device(self._dvariable.device): + return api.copy_to_mesh(t, self._original_layout) + + # This assign establishes connections from restored tensor and tensors + # being restored to -- so that restore in SPMD can backtrack the DVariable + # and its layout, given that we're using tf.function style restore. + # Note that the restored dvaraible is on CPU no matter what as the restoreV2 + # op must run on CPU. + # TODO(b/159035705): Allow restore for Tensor objects as well? + # Restore the dvariable back to original layout. + if self._original_layout.mesh.device_type().upper() != 'CPU': + tensor = _restore(tensor) + return self._dvariable.assign( + math_ops.cast(tensor, dtype=self._dvariable.dtype) if self._dvariable + .save_as_bf16 else tensor) + + +@tf_export('experimental.dtensor.DVariable', v1=[]) +class DVariable(resource_variable_ops.ResourceVariable): + """A replacement for tf.Variable which follows initial value placement. + + The class also handles restore/save operations in DTensor. Note that, + DVariable may fall back to normal tf.Variable at this moment if + `initial_value` is not a DTensor. + """ + + def __init__(self, initial_value, *args, dtype=None, **kwargs): + """Overrides tf.Variable to fix VarHandleOp placements.""" + # Variables by default use the current device scope for placement. This + # wrapper has them follow the initial value's placement instead (which will + # be the DTensor device if the initial value has a layout). + + # Pop layout from kwargs since keras make_variable may pass a 'layout' + # keyword argument. We need to pop it because we are passing kwargs to + # super class constructor. + layout = kwargs.pop('layout', None) + shape = kwargs.get('shape', None) + + if callable(initial_value): + unwrapped = initial_value + if issubclass(type(initial_value), functools.partial): + unwrapped = initial_value.func + + # If wrapped is a CheckpointInitialValueCallable, this means that + # we are creating a Variable during a checkpoint restore. + # Thus the restore will happen now through this callable + # and we will create the DVariable with the restored dtensor. + if issubclass(type(unwrapped), trackable.CheckpointInitialValueCallable): + if not shape or not layout: + raise ValueError('Expected shape and layout to be not None.') + + # CheckpointInitialValueCallable will call an eager tf.RestoreV2, + # which does not have any shape information or layout information + # attached. Thus we will do two things to have them correctly specified: + # + # The default layout scope allows us to correctly specify the output + # layout of the tf.RestoreV2 that will be called + # + # Passing shard_info with the correct shape allows the tf.RestoreV2 + # ShapeInference to extract the shape. + initial_value = api.call_with_layout( + initial_value, + layout, + shard_info=trackable.ShardInfo( + shape=shape, offset=[0] * len(shape))) + else: + initial_value = initial_value() + + # When the initial value came from a Checkpoint restoration, fetch tensor. + if isinstance(initial_value, trackable.CheckpointInitialValue): + initial_value = initial_value.wrapped_value + + initial_value = ops.convert_to_tensor(initial_value, dtype=dtype) + variable_device = initial_value.device + self._save_as_bf16 = False + # TODO(b/159035705): The following code enables variable creation inside + # a tf.function. However, it requires a global dtensor device. + # if not variable_device and not tf.executing_eagerly(): + # try: + # initial_value.op.get_attr("_layout") + # except ValueError: + # pass + # else: + # # The initial value is a DTensor, but because the DTensor device is + # # only active during eager execution at the moment we need to + # # translate that into a placement for the eager VarHandleOp. + # variable_device = _dtensor_device().name + with ops.device(variable_device): + # If initial tensor assigned to DVariable is DTensor, record the layout of + # the resource so that this can be queried. + if context.executing_eagerly(): + if api.is_dtensor(initial_value): + value_layout = api.fetch_layout(initial_value) + if layout is not None and layout != value_layout: + raise errors_impl.InvalidArgumentError( + None, + None, + 'Conflicting layout are provided for initial ' + f'value layout ({value_layout}) and variable ({layout}).', + ) + layout = value_layout + elif layout is not None: + initial_value = api.relayout(initial_value, layout) + else: + raise errors_impl.InvalidArgumentError( + None, + None, + 'Neither layout nor DTensor initial value are provided.', + ) + self.layout = layout + with api.default_mesh(layout.mesh): + super(DVariable, self).__init__( + initial_value, *args, dtype=dtype, **kwargs + ) + else: + # FIXME(175928457): Record value layout in graph mode. + if layout is not None: + initial_value = api.relayout(initial_value, layout) + super(DVariable, self).__init__( + initial_value, *args, dtype=dtype, **kwargs) + + @property + def save_as_bf16(self): + return self._save_as_bf16 + + @save_as_bf16.setter + def save_as_bf16(self, save_as_bf16): + """Enables saving float32 as bfloat16.""" + self._save_as_bf16 = save_as_bf16 and self.dtype == dtypes.float32 + + def _gather_saveables_for_checkpoint(self): + return { + trackable.VARIABLE_VALUE_KEY: + functools.partial(_DVariableSaveable, self) + } diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/dtensor_device.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/dtensor_device.py new file mode 100644 index 0000000000000000000000000000000000000000..5c6dff23052b66f0c31e945af35bf86716ef5026 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/dtensor_device.py @@ -0,0 +1,437 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Propagates information about tensor layouts across operations.""" + +import contextlib +import logging +import threading +from typing import Any, List, Sequence, Set + +import numpy as np + +from tensorflow.core.framework import attr_value_pb2 +from tensorflow.dtensor.python import config +from tensorflow.dtensor.python import layout as layout_lib +from tensorflow.python import _pywrap_dtensor_device +from tensorflow.python.eager import context +from tensorflow.python.eager import core +from tensorflow.python.framework import device as tf_device +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import ops +from tensorflow.python.framework import sparse_tensor +from tensorflow.python.framework import tensor_util +from tensorflow.python.util import _pywrap_utils + + +# TODO(allenl): Allow something other than "CUSTOM" so we don't need device +# numbering hacks to avoid collisions between parallel devices and dtensor +# devices. +_next_device_number = 0 +_next_device_number_lock = threading.Lock() + + +class DTensorDevice(object): + """Wraps a custom device which attempts to propagate tensor layouts.""" + + def __init__(self, + meshes: List[layout_lib.Mesh], + is_async=True, + in_flight_nodes_limit=8): + """Create a new DTensorDevice which executes ops on `underlying_device`. + + Args: + meshes: A list of `Mesh` objects indicating groups of devices to execute + on. These may also be registered lazily. + is_async: Indicates whether DTensor operations on this client will return + immediately (with "non-ready" handles) or block until executed. This is + on by default and is exposed as an option for ease of debugging. + in_flight_nodes_limit: Indicates the limit of in-flight nodes before + enqueueing of async operations to DTensorDevice is blocked. This limit + is per mesh. 0 for no limits from DTensor. Default is 8. + """ + if any(not isinstance(mesh, layout_lib.Mesh) for mesh in meshes): + raise TypeError( + "Expected a flat list of Mesh objects, got {}".format(meshes)) + global _next_device_number + ctx = context.context() + with _next_device_number_lock: + self.name = "{}/device:CUSTOM:{}".format(ctx.host_address_space(), + _next_device_number) + _next_device_number += 1 + device, device_info = _pywrap_dtensor_device.Allocate( + self.name, is_async, in_flight_nodes_limit + ) + context.register_custom_device(device, self.name, device_info) + + self._device_info = device_info + self._current_output_layout = None + self._current_default_mesh = None + self._meshes = set() + self._mesh_lock = threading.Lock() + for mesh in meshes: + self._register_mesh(mesh) + + def _create_host_array(self, shape, host_id): + """Returns ID and device lists that can be used to create a host mesh.""" + num_global_devices = np.prod(shape) + global_device_ids = np.arange(num_global_devices).reshape(shape) + local_device_list = [ + tf_device.DeviceSpec( + job=config.full_job_name(), device_type="CPU", device_index=0) + ] + num_local_devices = len(local_device_list) + local_device_ids = [ + x + host_id * num_local_devices for x in range(num_local_devices) + ] + return global_device_ids, local_device_ids, local_device_list + + def _register_mesh(self, mesh: layout_lib.Mesh): + """Idempotently register `mesh` with the dtensor device.""" + with self._mesh_lock: + if mesh not in self._meshes: + _pywrap_dtensor_device.AddMesh( + self._device_info, mesh.to_string(), False + ) + self._meshes.add(mesh) + if mesh.device_type().upper() == "TPU": + logging.info( + "Registering virtual 1:1 mapped host mesh %s for mesh %s", + mesh.host_mesh().to_string(), mesh.to_string()) + _pywrap_dtensor_device.AddMesh( + self._device_info, mesh.host_mesh().to_string(), True + ) + self._meshes.add(mesh.host_mesh()) + + @property + def meshes(self) -> Set[layout_lib.Mesh]: + return self._meshes + + def pack(self, tensors: Sequence[Any], layout: layout_lib.Layout) -> Any: + """Packs tensors into a DTensor handle on this DTensor device. + + Packing and unpacking are inverse operations: + + ``` + * unpack(pack(tensors)) == tensors + * pack(unpack(dtensor)) == dtensor + ``` + + Refer to `dtensor.pack` for more information. + + Args: + tensors: The list of tensors to pack into a DTensor. + layout: The layout of the DTensor to be created. + + Returns: + A DTensor created from the individual component tensors. + + Raises: + RuntimeError: When not called eagerly. + """ + if not context.executing_eagerly(): + raise RuntimeError("`pack` must be called eagerly.") + self._register_mesh(layout.mesh) + with ops.device(self.name): + if all(isinstance(t, sparse_tensor.SparseTensor) for t in tensors): + if not all(t.shape == tensors[0].shape for t in tensors): + raise TypeError("All input SparseTensors to Pack must be same shape.") + is_sparse = True + tensors = [t.indices for t in tensors] + [t.values for t in tensors] + [ + ops.convert_to_tensor(t.shape, dtype=dtypes.int64) for t in tensors + ] + elif any(isinstance(t, sparse_tensor.SparseTensor) for t in tensors): + raise TypeError("Cannot Pack SparseTensors with Tensors.") + else: + is_sparse = False + try: + return _pywrap_dtensor_device.Pack( + context.context()._handle, # pylint: disable=protected-access + tensors, + layout.to_string(), + self._device_info, + is_sparse) + except core._NotOkStatusException as e: # pylint: disable=protected-access + raise core._status_to_exception(e) from None # pylint: disable=protected-access + + def unpack(self, dtensor: Any) -> Sequence[Any]: + """Unpacks a DTensor handle on this DTensor device. + + Packing and unpacking are inverse operations: + + ``` + * unpack(pack(tensors)) == tensors + * pack(unpack(dtensor)) == dtensor + ``` + + Refer to `dtensor.unpack` for more information. + + Args: + dtensor: The DTensor to unpack. + + Returns: + The raw underlying tensor components of the DTensor. + + Raises: + RuntimeError: When not called eagerly. + """ + if not context.executing_eagerly(): + raise RuntimeError("`unpack` must be called eagerly.") + try: + tensors = _pywrap_dtensor_device.Unpack( + context.context()._handle, # pylint: disable=protected-access + dtensor, + self._device_info) + except core._NotOkStatusException as e: # pylint: disable=protected-access + raise core._status_to_exception(e) from None # pylint: disable=protected-access + + is_sparse = _pywrap_dtensor_device.IsSparseDTensor( + context.context()._handle, # pylint: disable=protected-access. + dtensor, + self._device_info) + if is_sparse: + result = [] + for i in range(len(tensors) // 3): + result.append( + sparse_tensor.SparseTensor(tensors[i], + tensors[i + len(tensors) // 3], + tensors[i + 2 * len(tensors) // 3])) + return result + else: + return tensors + + def fetch_layout(self, dtensor: Any) -> layout_lib.Layout: + """Fetches the layout of the DTensor. + + Args: + dtensor: The DTensor whose layout is to be fetched. + + Returns: + The `Layout` of this DTensor. + + Raises: + RuntimeError: When not called eagerly. + """ + if not context.executing_eagerly(): + raise RuntimeError("`fetch_layout` must be called eagerly.") + if _pywrap_utils.IsVariable(dtensor): + dtensor = dtensor.read_value() + try: + layout_string = _pywrap_dtensor_device.FetchLayout( + context.context()._handle, # pylint: disable=protected-access + dtensor, + self._device_info) + except core._NotOkStatusException as e: # pylint: disable=protected-access + raise core._status_to_exception(e) from None # pylint: disable=protected-access + + if layout_string is None: + return None + return layout_lib.Layout.from_string(layout_string) + + def is_dtensor(self, tensor: Any) -> bool: + """Check whether the input tensor is a DTensor. + + In Python, a DTensor has the same type as a `tf.Tensor`. This method will + let you check and handle the tensor differently if a tf.Tensor is a DTensor. + + Args: + tensor: an object to be checked. + + Returns: + bool, True if the given tensor is a DTensor. + + Raises: + RuntimeError: When not called eagerly. + """ + if not context.executing_eagerly(): + raise RuntimeError("`is_dtensor` must be called eagerly.") + if not tensor_util.is_tensor(tensor): + return False + if _pywrap_utils.IsVariable(tensor): + tensor = tensor._handle # pylint: disable=protected-access + return _pywrap_dtensor_device.IsDTensor( + context.context()._handle, # pylint: disable=protected-access + tensor, + self._device_info, + ) + + def set_tpu_core_ids(self, mesh_name, tpu_core_ids): + """Sets the singleton global device ID-to-physical core ID map. + + Args: + mesh_name: The name of a mesh. If empty, set the default mapping. + tpu_core_ids: TPU core IDs sorted by TF task/device ordinal. + """ + _pywrap_dtensor_device.SetTPUCoreIDs(self._device_info, mesh_name, + tpu_core_ids) + + def clear_tpu_core_ids(self): + _pywrap_dtensor_device.ClearTPUCoreIDs(self._device_info) + + def tpu_core_ids_to_locations(self, tpu_core_ids): + """Translates TPU core IDs to TPU core locations. + + Args: + tpu_core_ids: A list of TPU core IDs. Each one is an unsigned integer. + + Returns: + A list of corresponding TPU core locations. + """ + return _pywrap_dtensor_device.TPUCoreIDsToLocations( + context.context()._handle, # pylint: disable=protected-access + self._device_info, + tpu_core_ids) + + def tpu_core_locations_to_ids(self, tpu_core_locations): + """Translates TPU core locations to TPU core IDs. + + Args: + tpu_core_locations: A list of TPU core locations. Each one is a list of + four unsigned integers, [x, y, z, core]. + + Returns: + A list of corresponding TPU core IDs. + """ + return _pywrap_dtensor_device.TPUCoreLocationsToIDs( + context.context()._handle, # pylint: disable=protected-access + self._device_info, + tpu_core_locations) + + def _get_stats(self): + """Returns the number of cache hit and miss for function compilation. + + Returns: + A dictionary. + 'miss': number of cache misses; + 'hit': number of cache hits; and + 'size': size of cache; + miss count. + """ + return _pywrap_dtensor_device.GetStats( + context.context()._handle, # pylint: disable=protected-access, + self._device_info, + ) + + def set_iterator_element_layouts(self, iterator_resource_dtensor, + layouts: List[layout_lib.Layout]): + """Sets the element layouts on an iterator resource tensor. + + Args: + iterator_resource_dtensor: a DTensor created by packing the individiual + iterator resource tensors. + layouts: the flattened list of layouts to be applied to the elements + emitted by the iterator resource DTensor. + """ + _pywrap_dtensor_device.SetIteratorElementLayouts( + context.context()._handle, # pylint: disable=protected-access + iterator_resource_dtensor, + [layout.to_string() for layout in layouts], + self._device_info) + + @contextlib.contextmanager + def _experimental_default_mesh(self, mesh: layout_lib.Mesh): + """Sets a default mesh for all ops in the scope. + + Note: This is an internal helper method, which is not user facing api. + + Useful for requesting a specific mesh for ops which would have no inferred + layout, e.g. tf.zeros. + + Args: + mesh: A Mesh to be used for ops without Mesh. + + Yields: + Nothing. + """ + previous_default = self._current_default_mesh + self._register_mesh(mesh) + _pywrap_dtensor_device.ExperimentalSetDefaultMesh( + self._device_info, + mesh.to_string().encode("utf-8")) + self._current_default_mesh = mesh + yield + _pywrap_dtensor_device.ExperimentalClearDefaultMesh(self._device_info) + if previous_default: + _pywrap_dtensor_device.ExperimentalSetDefaultMesh( + self._device_info, + previous_default.to_string().encode("utf-8")) + self._current_default_mesh = previous_default + + @contextlib.contextmanager + def _default_layout(self, layout: layout_lib.Layout): + """Sets a default output layout for all ops in the scope. + + Note: This is an internal helper method, which is not user facing api. + + Useful for requesting a specific layout for ops which would have no inferred + layout, e.g. tf.zeros. + + Caveats: + + - Currently only affects the first output of an op. For Op with multiple + outputs, this does not support yet. + + - All Ops in the scope will be attached with the same layout. This might not + be valid as the rank is different. The current suggestion is: Try to wrap + the raw op wheneven possible. + + Args: + layout: A Layout for the outputs of all operations in this scope. + + Yields: + Nothing. + """ + previous_default = None + previous_graph_size = None + graph = None + + self._register_mesh(layout.mesh) + try: + previous_default = self._current_output_layout + self._current_output_layout = layout.to_string().encode("utf-8") + _pywrap_dtensor_device.ExperimentalSetDefaultLayout( + self._device_info, self._current_output_layout) + if context.executing_eagerly(): + with ops.device(self.name): + yield + else: + # Custom devices currently don't affect graph building, so we need a + # separate way to indicate layouts. + # + # TODO(allenl): Remove this case once the DTensor device is active + # during tracing. + graph = ops.get_default_graph() + previous_graph_size = len(graph.get_operations()) + yield + finally: + if graph is not None: + # Tag operations added under this scope + for operation in graph.get_operations()[previous_graph_size:]: + # Set layout directly on the Op itself. + operation._set_attr( # pylint: disable=protected-access + "_layout", + attr_value_pb2.AttrValue( + list=attr_value_pb2.AttrValue.ListValue( + s=[self._current_output_layout]))) + operation._set_attr( # pylint: disable=protected-access + "_mesh", + attr_value_pb2.AttrValue( + s=layout.mesh.to_string().encode("utf-8"))) + + self._current_output_layout = previous_default + if self._current_output_layout is None: + _pywrap_dtensor_device.ExperimentalClearDefaultLayout(self._device_info) + else: + _pywrap_dtensor_device.ExperimentalSetDefaultLayout( + self._device_info, self._current_output_layout.decode("utf-8")) diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/gen_dtensor_ops.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/gen_dtensor_ops.py new file mode 100644 index 0000000000000000000000000000000000000000..adae6de33ef18549f27f8c90b689001d085fd942 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/gen_dtensor_ops.py @@ -0,0 +1,763 @@ +"""Python wrappers around TensorFlow ops. + +This file is MACHINE GENERATED! Do not edit. +""" + +import collections + +from tensorflow.python import pywrap_tfe as pywrap_tfe +from tensorflow.python.eager import context as _context +from tensorflow.python.eager import core as _core +from tensorflow.python.eager import execute as _execute +from tensorflow.python.framework import dtypes as _dtypes +from tensorflow.security.fuzzing.py import annotation_types as _atypes + +from tensorflow.python.framework import op_def_registry as _op_def_registry +from tensorflow.python.framework import ops as _ops +from tensorflow.python.framework import op_def_library as _op_def_library +from tensorflow.python.util.deprecation import deprecated_endpoints +from tensorflow.python.util import dispatch as _dispatch +from tensorflow.python.util.tf_export import tf_export + +from typing import TypeVar, List, Any +from typing_extensions import Annotated + +@_dispatch.add_fallback_dispatch_list +@_dispatch.add_type_based_api_dispatcher +@tf_export('configure_and_initialize_global_tpu') +def configure_and_initialize_global_tpu(use_tfrt_host_runtime:bool=True, name=None) -> Annotated[Any, _atypes.Int32]: + r"""TODO: add doc. + + Args: + use_tfrt_host_runtime: An optional `bool`. Defaults to `True`. + name: A name for the operation (optional). + + Returns: + A `Tensor` of type `int32`. + """ + _ctx = _context._context or _context.context() + tld = _ctx._thread_local_data + if tld.is_eager: + try: + _result = pywrap_tfe.TFE_Py_FastPathExecute( + _ctx, "ConfigureAndInitializeGlobalTPU", name, + "use_tfrt_host_runtime", use_tfrt_host_runtime) + return _result + except _core._NotOkStatusException as e: + _ops.raise_from_not_ok_status(e, name) + except _core._FallbackException: + pass + try: + _result = _dispatcher_for_configure_and_initialize_global_tpu( + (use_tfrt_host_runtime, name,), None) + if _result is not NotImplemented: + return _result + return configure_and_initialize_global_tpu_eager_fallback( + use_tfrt_host_runtime=use_tfrt_host_runtime, name=name, ctx=_ctx) + except _core._SymbolicException: + pass # Add nodes to the TensorFlow graph. + except (TypeError, ValueError): + _result = _dispatch.dispatch( + configure_and_initialize_global_tpu, (), dict(use_tfrt_host_runtime=use_tfrt_host_runtime, + name=name) + ) + if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED: + return _result + raise + else: + _result = _dispatcher_for_configure_and_initialize_global_tpu( + (use_tfrt_host_runtime, name,), None) + if _result is not NotImplemented: + return _result + # Add nodes to the TensorFlow graph. + if use_tfrt_host_runtime is None: + use_tfrt_host_runtime = True + use_tfrt_host_runtime = _execute.make_bool(use_tfrt_host_runtime, "use_tfrt_host_runtime") + try: + _, _, _op, _outputs = _op_def_library._apply_op_helper( + "ConfigureAndInitializeGlobalTPU", use_tfrt_host_runtime=use_tfrt_host_runtime, + name=name) + except (TypeError, ValueError): + _result = _dispatch.dispatch( + configure_and_initialize_global_tpu, (), dict(use_tfrt_host_runtime=use_tfrt_host_runtime, + name=name) + ) + if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED: + return _result + raise + _result = _outputs[:] + if _execute.must_record_gradient(): + _attrs = ("use_tfrt_host_runtime", + _op._get_attr_bool("use_tfrt_host_runtime")) + _inputs_flat = _op.inputs + _execute.record_gradient( + "ConfigureAndInitializeGlobalTPU", _inputs_flat, _attrs, _result) + _result, = _result + return _result + +ConfigureAndInitializeGlobalTPU = tf_export("raw_ops.ConfigureAndInitializeGlobalTPU")(_ops.to_raw_op(configure_and_initialize_global_tpu)) +_dispatcher_for_configure_and_initialize_global_tpu = configure_and_initialize_global_tpu._tf_type_based_dispatcher.Dispatch + + +def configure_and_initialize_global_tpu_eager_fallback(use_tfrt_host_runtime: bool, name, ctx) -> Annotated[Any, _atypes.Int32]: + if use_tfrt_host_runtime is None: + use_tfrt_host_runtime = True + use_tfrt_host_runtime = _execute.make_bool(use_tfrt_host_runtime, "use_tfrt_host_runtime") + _inputs_flat = [] + _attrs = ("use_tfrt_host_runtime", use_tfrt_host_runtime) + _result = _execute.execute(b"ConfigureAndInitializeGlobalTPU", 1, + inputs=_inputs_flat, attrs=_attrs, ctx=ctx, + name=name) + if _execute.must_record_gradient(): + _execute.record_gradient( + "ConfigureAndInitializeGlobalTPU", _inputs_flat, _attrs, _result) + _result, = _result + return _result + + +TV_CopyToMesh_T = TypeVar("TV_CopyToMesh_T", _atypes.BFloat16, _atypes.Bool, _atypes.Complex128, _atypes.Complex64, _atypes.Float16, _atypes.Float32, _atypes.Float64, _atypes.Float8e4m3fn, _atypes.Float8e5m2, _atypes.Half, _atypes.Int16, _atypes.Int32, _atypes.Int4, _atypes.Int64, _atypes.Int8, _atypes.QInt16, _atypes.QInt32, _atypes.QInt8, _atypes.QUInt16, _atypes.QUInt8, _atypes.Resource, _atypes.String, _atypes.UInt16, _atypes.UInt32, _atypes.UInt4, _atypes.UInt64, _atypes.UInt8, _atypes.Variant) + +@_dispatch.add_fallback_dispatch_list +@_dispatch.add_type_based_api_dispatcher +@tf_export('copy_to_mesh') +def copy_to_mesh(input: Annotated[Any, TV_CopyToMesh_T], mesh: str, name=None) -> Annotated[Any, TV_CopyToMesh_T]: + r"""TODO: add doc. + + Args: + input: A `Tensor`. + mesh: A `string`. + name: A name for the operation (optional). + + Returns: + A `Tensor`. Has the same type as `input`. + """ + _ctx = _context._context or _context.context() + tld = _ctx._thread_local_data + if tld.is_eager: + try: + _result = pywrap_tfe.TFE_Py_FastPathExecute( + _ctx, "CopyToMesh", name, input, "mesh", mesh) + return _result + except _core._NotOkStatusException as e: + _ops.raise_from_not_ok_status(e, name) + except _core._FallbackException: + pass + try: + _result = _dispatcher_for_copy_to_mesh( + (input, mesh, name,), None) + if _result is not NotImplemented: + return _result + return copy_to_mesh_eager_fallback( + input, mesh=mesh, name=name, ctx=_ctx) + except _core._SymbolicException: + pass # Add nodes to the TensorFlow graph. + except (TypeError, ValueError): + _result = _dispatch.dispatch( + copy_to_mesh, (), dict(input=input, mesh=mesh, name=name) + ) + if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED: + return _result + raise + else: + _result = _dispatcher_for_copy_to_mesh( + (input, mesh, name,), None) + if _result is not NotImplemented: + return _result + # Add nodes to the TensorFlow graph. + mesh = _execute.make_str(mesh, "mesh") + try: + _, _, _op, _outputs = _op_def_library._apply_op_helper( + "CopyToMesh", input=input, mesh=mesh, name=name) + except (TypeError, ValueError): + _result = _dispatch.dispatch( + copy_to_mesh, (), dict(input=input, mesh=mesh, name=name) + ) + if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED: + return _result + raise + _result = _outputs[:] + if _execute.must_record_gradient(): + _attrs = ("mesh", _op.get_attr("mesh"), "T", _op._get_attr_type("T")) + _inputs_flat = _op.inputs + _execute.record_gradient( + "CopyToMesh", _inputs_flat, _attrs, _result) + _result, = _result + return _result + +CopyToMesh = tf_export("raw_ops.CopyToMesh")(_ops.to_raw_op(copy_to_mesh)) +_dispatcher_for_copy_to_mesh = copy_to_mesh._tf_type_based_dispatcher.Dispatch + + +def copy_to_mesh_eager_fallback(input: Annotated[Any, TV_CopyToMesh_T], mesh: str, name, ctx) -> Annotated[Any, TV_CopyToMesh_T]: + mesh = _execute.make_str(mesh, "mesh") + _attr_T, (input,) = _execute.args_to_matching_eager([input], ctx, []) + _inputs_flat = [input] + _attrs = ("mesh", mesh, "T", _attr_T) + _result = _execute.execute(b"CopyToMesh", 1, inputs=_inputs_flat, + attrs=_attrs, ctx=ctx, name=name) + if _execute.must_record_gradient(): + _execute.record_gradient( + "CopyToMesh", _inputs_flat, _attrs, _result) + _result, = _result + return _result + + +TV_CopyToMeshGrad_T = TypeVar("TV_CopyToMeshGrad_T", _atypes.BFloat16, _atypes.Bool, _atypes.Complex128, _atypes.Complex64, _atypes.Float16, _atypes.Float32, _atypes.Float64, _atypes.Float8e4m3fn, _atypes.Float8e5m2, _atypes.Half, _atypes.Int16, _atypes.Int32, _atypes.Int4, _atypes.Int64, _atypes.Int8, _atypes.QInt16, _atypes.QInt32, _atypes.QInt8, _atypes.QUInt16, _atypes.QUInt8, _atypes.Resource, _atypes.String, _atypes.UInt16, _atypes.UInt32, _atypes.UInt4, _atypes.UInt64, _atypes.UInt8, _atypes.Variant) + +@_dispatch.add_fallback_dispatch_list +@_dispatch.add_type_based_api_dispatcher +@tf_export('copy_to_mesh_grad') +def copy_to_mesh_grad(input: Annotated[Any, TV_CopyToMeshGrad_T], forward_input: Annotated[Any, TV_CopyToMeshGrad_T], name=None) -> Annotated[Any, TV_CopyToMeshGrad_T]: + r"""TODO: add doc. + + Args: + input: A `Tensor`. + forward_input: A `Tensor`. Must have the same type as `input`. + name: A name for the operation (optional). + + Returns: + A `Tensor`. Has the same type as `input`. + """ + _ctx = _context._context or _context.context() + tld = _ctx._thread_local_data + if tld.is_eager: + try: + _result = pywrap_tfe.TFE_Py_FastPathExecute( + _ctx, "CopyToMeshGrad", name, input, forward_input) + return _result + except _core._NotOkStatusException as e: + _ops.raise_from_not_ok_status(e, name) + except _core._FallbackException: + pass + try: + _result = _dispatcher_for_copy_to_mesh_grad( + (input, forward_input, name,), None) + if _result is not NotImplemented: + return _result + return copy_to_mesh_grad_eager_fallback( + input, forward_input, name=name, ctx=_ctx) + except _core._SymbolicException: + pass # Add nodes to the TensorFlow graph. + except (TypeError, ValueError): + _result = _dispatch.dispatch( + copy_to_mesh_grad, (), dict(input=input, + forward_input=forward_input, + name=name) + ) + if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED: + return _result + raise + else: + _result = _dispatcher_for_copy_to_mesh_grad( + (input, forward_input, name,), None) + if _result is not NotImplemented: + return _result + # Add nodes to the TensorFlow graph. + try: + _, _, _op, _outputs = _op_def_library._apply_op_helper( + "CopyToMeshGrad", input=input, forward_input=forward_input, name=name) + except (TypeError, ValueError): + _result = _dispatch.dispatch( + copy_to_mesh_grad, (), dict(input=input, + forward_input=forward_input, name=name) + ) + if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED: + return _result + raise + _result = _outputs[:] + if _execute.must_record_gradient(): + _attrs = ("T", _op._get_attr_type("T")) + _inputs_flat = _op.inputs + _execute.record_gradient( + "CopyToMeshGrad", _inputs_flat, _attrs, _result) + _result, = _result + return _result + +CopyToMeshGrad = tf_export("raw_ops.CopyToMeshGrad")(_ops.to_raw_op(copy_to_mesh_grad)) +_dispatcher_for_copy_to_mesh_grad = copy_to_mesh_grad._tf_type_based_dispatcher.Dispatch + + +def copy_to_mesh_grad_eager_fallback(input: Annotated[Any, TV_CopyToMeshGrad_T], forward_input: Annotated[Any, TV_CopyToMeshGrad_T], name, ctx) -> Annotated[Any, TV_CopyToMeshGrad_T]: + _attr_T, _inputs_T = _execute.args_to_matching_eager([input, forward_input], ctx, []) + (input, forward_input) = _inputs_T + _inputs_flat = [input, forward_input] + _attrs = ("T", _attr_T) + _result = _execute.execute(b"CopyToMeshGrad", 1, inputs=_inputs_flat, + attrs=_attrs, ctx=ctx, name=name) + if _execute.must_record_gradient(): + _execute.record_gradient( + "CopyToMeshGrad", _inputs_flat, _attrs, _result) + _result, = _result + return _result + + +@_dispatch.add_fallback_dispatch_list +@_dispatch.add_type_based_api_dispatcher +@tf_export('d_tensor_restore_v2') +def d_tensor_restore_v2(prefix: Annotated[Any, _atypes.String], tensor_names: Annotated[Any, _atypes.String], shape_and_slices: Annotated[Any, _atypes.String], input_shapes, input_layouts, dtypes, name=None): + r"""TODO: add doc. + + Args: + prefix: A `Tensor` of type `string`. + tensor_names: A `Tensor` of type `string`. + shape_and_slices: A `Tensor` of type `string`. + input_shapes: A list of shapes (each a `tf.TensorShape` or list of `ints`). + input_layouts: A list of `strings`. + dtypes: A list of `tf.DTypes` that has length `>= 1`. + name: A name for the operation (optional). + + Returns: + A list of `Tensor` objects of type `dtypes`. + """ + _ctx = _context._context or _context.context() + tld = _ctx._thread_local_data + if tld.is_eager: + try: + _result = pywrap_tfe.TFE_Py_FastPathExecute( + _ctx, "DTensorRestoreV2", name, prefix, tensor_names, + shape_and_slices, "input_shapes", input_shapes, "input_layouts", + input_layouts, "dtypes", dtypes) + return _result + except _core._NotOkStatusException as e: + _ops.raise_from_not_ok_status(e, name) + except _core._FallbackException: + pass + try: + _result = _dispatcher_for_d_tensor_restore_v2( + (prefix, tensor_names, shape_and_slices, input_shapes, + input_layouts, dtypes, name,), None) + if _result is not NotImplemented: + return _result + return d_tensor_restore_v2_eager_fallback( + prefix, tensor_names, shape_and_slices, input_shapes=input_shapes, + input_layouts=input_layouts, dtypes=dtypes, name=name, ctx=_ctx) + except _core._SymbolicException: + pass # Add nodes to the TensorFlow graph. + except (TypeError, ValueError): + _result = _dispatch.dispatch( + d_tensor_restore_v2, (), dict(prefix=prefix, + tensor_names=tensor_names, + shape_and_slices=shape_and_slices, + input_shapes=input_shapes, + input_layouts=input_layouts, + dtypes=dtypes, name=name) + ) + if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED: + return _result + raise + else: + _result = _dispatcher_for_d_tensor_restore_v2( + (prefix, tensor_names, shape_and_slices, input_shapes, input_layouts, + dtypes, name,), None) + if _result is not NotImplemented: + return _result + # Add nodes to the TensorFlow graph. + if not isinstance(input_shapes, (list, tuple)): + raise TypeError( + "Expected list for 'input_shapes' argument to " + "'d_tensor_restore_v2' Op, not %r." % input_shapes) + input_shapes = [_execute.make_shape(_s, "input_shapes") for _s in input_shapes] + if not isinstance(input_layouts, (list, tuple)): + raise TypeError( + "Expected list for 'input_layouts' argument to " + "'d_tensor_restore_v2' Op, not %r." % input_layouts) + input_layouts = [_execute.make_str(_s, "input_layouts") for _s in input_layouts] + if not isinstance(dtypes, (list, tuple)): + raise TypeError( + "Expected list for 'dtypes' argument to " + "'d_tensor_restore_v2' Op, not %r." % dtypes) + dtypes = [_execute.make_type(_t, "dtypes") for _t in dtypes] + try: + _, _, _op, _outputs = _op_def_library._apply_op_helper( + "DTensorRestoreV2", prefix=prefix, tensor_names=tensor_names, + shape_and_slices=shape_and_slices, + input_shapes=input_shapes, + input_layouts=input_layouts, dtypes=dtypes, + name=name) + except (TypeError, ValueError): + _result = _dispatch.dispatch( + d_tensor_restore_v2, (), dict(prefix=prefix, + tensor_names=tensor_names, + shape_and_slices=shape_and_slices, + input_shapes=input_shapes, + input_layouts=input_layouts, + dtypes=dtypes, name=name) + ) + if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED: + return _result + raise + _result = _outputs[:] + if not _result: + return _op + if _execute.must_record_gradient(): + _attrs = ("input_shapes", _op.get_attr("input_shapes"), "input_layouts", + _op.get_attr("input_layouts"), "dtypes", _op.get_attr("dtypes")) + _inputs_flat = _op.inputs + _execute.record_gradient( + "DTensorRestoreV2", _inputs_flat, _attrs, _result) + return _result + +DTensorRestoreV2 = tf_export("raw_ops.DTensorRestoreV2")(_ops.to_raw_op(d_tensor_restore_v2)) +_dispatcher_for_d_tensor_restore_v2 = d_tensor_restore_v2._tf_type_based_dispatcher.Dispatch + + +def d_tensor_restore_v2_eager_fallback(prefix: Annotated[Any, _atypes.String], tensor_names: Annotated[Any, _atypes.String], shape_and_slices: Annotated[Any, _atypes.String], input_shapes, input_layouts, dtypes, name, ctx): + if not isinstance(input_shapes, (list, tuple)): + raise TypeError( + "Expected list for 'input_shapes' argument to " + "'d_tensor_restore_v2' Op, not %r." % input_shapes) + input_shapes = [_execute.make_shape(_s, "input_shapes") for _s in input_shapes] + if not isinstance(input_layouts, (list, tuple)): + raise TypeError( + "Expected list for 'input_layouts' argument to " + "'d_tensor_restore_v2' Op, not %r." % input_layouts) + input_layouts = [_execute.make_str(_s, "input_layouts") for _s in input_layouts] + if not isinstance(dtypes, (list, tuple)): + raise TypeError( + "Expected list for 'dtypes' argument to " + "'d_tensor_restore_v2' Op, not %r." % dtypes) + dtypes = [_execute.make_type(_t, "dtypes") for _t in dtypes] + prefix = _ops.convert_to_tensor(prefix, _dtypes.string) + tensor_names = _ops.convert_to_tensor(tensor_names, _dtypes.string) + shape_and_slices = _ops.convert_to_tensor(shape_and_slices, _dtypes.string) + _inputs_flat = [prefix, tensor_names, shape_and_slices] + _attrs = ("input_shapes", input_shapes, "input_layouts", input_layouts, + "dtypes", dtypes) + _result = _execute.execute(b"DTensorRestoreV2", len(dtypes), + inputs=_inputs_flat, attrs=_attrs, ctx=ctx, + name=name) + if _execute.must_record_gradient(): + _execute.record_gradient( + "DTensorRestoreV2", _inputs_flat, _attrs, _result) + return _result + + +@_dispatch.add_fallback_dispatch_list +@_dispatch.add_type_based_api_dispatcher +@tf_export('d_tensor_set_global_tpu_array') +def d_tensor_set_global_tpu_array(topology: Annotated[Any, _atypes.String], name=None): + r"""TODO: add doc. + + Args: + topology: A `Tensor` of type `string`. + name: A name for the operation (optional). + + Returns: + The created Operation. + """ + _ctx = _context._context or _context.context() + tld = _ctx._thread_local_data + if tld.is_eager: + try: + _result = pywrap_tfe.TFE_Py_FastPathExecute( + _ctx, "DTensorSetGlobalTPUArray", name, topology) + return _result + except _core._NotOkStatusException as e: + _ops.raise_from_not_ok_status(e, name) + except _core._FallbackException: + pass + try: + _result = _dispatcher_for_d_tensor_set_global_tpu_array( + (topology, name,), None) + if _result is not NotImplemented: + return _result + return d_tensor_set_global_tpu_array_eager_fallback( + topology, name=name, ctx=_ctx) + except _core._SymbolicException: + pass # Add nodes to the TensorFlow graph. + except (TypeError, ValueError): + _result = _dispatch.dispatch( + d_tensor_set_global_tpu_array, (), dict(topology=topology, + name=name) + ) + if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED: + return _result + raise + else: + _result = _dispatcher_for_d_tensor_set_global_tpu_array( + (topology, name,), None) + if _result is not NotImplemented: + return _result + # Add nodes to the TensorFlow graph. + try: + _, _, _op, _outputs = _op_def_library._apply_op_helper( + "DTensorSetGlobalTPUArray", topology=topology, name=name) + except (TypeError, ValueError): + _result = _dispatch.dispatch( + d_tensor_set_global_tpu_array, (), dict(topology=topology, + name=name) + ) + if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED: + return _result + raise + return _op +DTensorSetGlobalTPUArray = tf_export("raw_ops.DTensorSetGlobalTPUArray")(_ops.to_raw_op(d_tensor_set_global_tpu_array)) +_dispatcher_for_d_tensor_set_global_tpu_array = d_tensor_set_global_tpu_array._tf_type_based_dispatcher.Dispatch + + +def d_tensor_set_global_tpu_array_eager_fallback(topology: Annotated[Any, _atypes.String], name, ctx): + topology = _ops.convert_to_tensor(topology, _dtypes.string) + _inputs_flat = [topology] + _attrs = None + _result = _execute.execute(b"DTensorSetGlobalTPUArray", 0, + inputs=_inputs_flat, attrs=_attrs, ctx=ctx, + name=name) + _result = None + return _result + + +TV_Relayout_T = TypeVar("TV_Relayout_T", _atypes.BFloat16, _atypes.Bool, _atypes.Complex128, _atypes.Complex64, _atypes.Float16, _atypes.Float32, _atypes.Float64, _atypes.Float8e4m3fn, _atypes.Float8e5m2, _atypes.Half, _atypes.Int16, _atypes.Int32, _atypes.Int4, _atypes.Int64, _atypes.Int8, _atypes.QInt16, _atypes.QInt32, _atypes.QInt8, _atypes.QUInt16, _atypes.QUInt8, _atypes.Resource, _atypes.String, _atypes.UInt16, _atypes.UInt32, _atypes.UInt4, _atypes.UInt64, _atypes.UInt8, _atypes.Variant) + +@_dispatch.add_fallback_dispatch_list +@_dispatch.add_type_based_api_dispatcher +@tf_export('relayout') +def relayout(input: Annotated[Any, TV_Relayout_T], layout: str, name=None) -> Annotated[Any, TV_Relayout_T]: + r"""TODO: add doc. + + Args: + input: A `Tensor`. + layout: A `string`. + name: A name for the operation (optional). + + Returns: + A `Tensor`. Has the same type as `input`. + """ + _ctx = _context._context or _context.context() + tld = _ctx._thread_local_data + if tld.is_eager: + try: + _result = pywrap_tfe.TFE_Py_FastPathExecute( + _ctx, "Relayout", name, input, "layout", layout) + return _result + except _core._NotOkStatusException as e: + _ops.raise_from_not_ok_status(e, name) + except _core._FallbackException: + pass + try: + _result = _dispatcher_for_relayout( + (input, layout, name,), None) + if _result is not NotImplemented: + return _result + return relayout_eager_fallback( + input, layout=layout, name=name, ctx=_ctx) + except _core._SymbolicException: + pass # Add nodes to the TensorFlow graph. + except (TypeError, ValueError): + _result = _dispatch.dispatch( + relayout, (), dict(input=input, layout=layout, name=name) + ) + if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED: + return _result + raise + else: + _result = _dispatcher_for_relayout( + (input, layout, name,), None) + if _result is not NotImplemented: + return _result + # Add nodes to the TensorFlow graph. + layout = _execute.make_str(layout, "layout") + try: + _, _, _op, _outputs = _op_def_library._apply_op_helper( + "Relayout", input=input, layout=layout, name=name) + except (TypeError, ValueError): + _result = _dispatch.dispatch( + relayout, (), dict(input=input, layout=layout, name=name) + ) + if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED: + return _result + raise + _result = _outputs[:] + if _execute.must_record_gradient(): + _attrs = ("layout", _op.get_attr("layout"), "T", _op._get_attr_type("T")) + _inputs_flat = _op.inputs + _execute.record_gradient( + "Relayout", _inputs_flat, _attrs, _result) + _result, = _result + return _result + +Relayout = tf_export("raw_ops.Relayout")(_ops.to_raw_op(relayout)) +_dispatcher_for_relayout = relayout._tf_type_based_dispatcher.Dispatch + + +def relayout_eager_fallback(input: Annotated[Any, TV_Relayout_T], layout: str, name, ctx) -> Annotated[Any, TV_Relayout_T]: + layout = _execute.make_str(layout, "layout") + _attr_T, (input,) = _execute.args_to_matching_eager([input], ctx, []) + _inputs_flat = [input] + _attrs = ("layout", layout, "T", _attr_T) + _result = _execute.execute(b"Relayout", 1, inputs=_inputs_flat, + attrs=_attrs, ctx=ctx, name=name) + if _execute.must_record_gradient(): + _execute.record_gradient( + "Relayout", _inputs_flat, _attrs, _result) + _result, = _result + return _result + + +TV_RelayoutLike_T = TypeVar("TV_RelayoutLike_T", _atypes.BFloat16, _atypes.Bool, _atypes.Complex128, _atypes.Complex64, _atypes.Float16, _atypes.Float32, _atypes.Float64, _atypes.Float8e4m3fn, _atypes.Float8e5m2, _atypes.Half, _atypes.Int16, _atypes.Int32, _atypes.Int4, _atypes.Int64, _atypes.Int8, _atypes.QInt16, _atypes.QInt32, _atypes.QInt8, _atypes.QUInt16, _atypes.QUInt8, _atypes.Resource, _atypes.String, _atypes.UInt16, _atypes.UInt32, _atypes.UInt4, _atypes.UInt64, _atypes.UInt8, _atypes.Variant) +TV_RelayoutLike_U = TypeVar("TV_RelayoutLike_U", _atypes.BFloat16, _atypes.Bool, _atypes.Complex128, _atypes.Complex64, _atypes.Float16, _atypes.Float32, _atypes.Float64, _atypes.Float8e4m3fn, _atypes.Float8e5m2, _atypes.Half, _atypes.Int16, _atypes.Int32, _atypes.Int4, _atypes.Int64, _atypes.Int8, _atypes.QInt16, _atypes.QInt32, _atypes.QInt8, _atypes.QUInt16, _atypes.QUInt8, _atypes.Resource, _atypes.String, _atypes.UInt16, _atypes.UInt32, _atypes.UInt4, _atypes.UInt64, _atypes.UInt8, _atypes.Variant) + +@_dispatch.add_fallback_dispatch_list +@_dispatch.add_type_based_api_dispatcher +@tf_export('relayout_like') +def relayout_like(input: Annotated[Any, TV_RelayoutLike_T], layout_input: Annotated[Any, TV_RelayoutLike_U], name=None) -> Annotated[Any, TV_RelayoutLike_T]: + r"""TODO: add doc. + + Args: + input: A `Tensor`. + layout_input: A `Tensor`. + name: A name for the operation (optional). + + Returns: + A `Tensor`. Has the same type as `input`. + """ + _ctx = _context._context or _context.context() + tld = _ctx._thread_local_data + if tld.is_eager: + try: + _result = pywrap_tfe.TFE_Py_FastPathExecute( + _ctx, "RelayoutLike", name, input, layout_input) + return _result + except _core._NotOkStatusException as e: + _ops.raise_from_not_ok_status(e, name) + except _core._FallbackException: + pass + try: + _result = _dispatcher_for_relayout_like( + (input, layout_input, name,), None) + if _result is not NotImplemented: + return _result + return relayout_like_eager_fallback( + input, layout_input, name=name, ctx=_ctx) + except _core._SymbolicException: + pass # Add nodes to the TensorFlow graph. + except (TypeError, ValueError): + _result = _dispatch.dispatch( + relayout_like, (), dict(input=input, layout_input=layout_input, + name=name) + ) + if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED: + return _result + raise + else: + _result = _dispatcher_for_relayout_like( + (input, layout_input, name,), None) + if _result is not NotImplemented: + return _result + # Add nodes to the TensorFlow graph. + try: + _, _, _op, _outputs = _op_def_library._apply_op_helper( + "RelayoutLike", input=input, layout_input=layout_input, name=name) + except (TypeError, ValueError): + _result = _dispatch.dispatch( + relayout_like, (), dict(input=input, layout_input=layout_input, + name=name) + ) + if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED: + return _result + raise + _result = _outputs[:] + if _execute.must_record_gradient(): + _attrs = ("T", _op._get_attr_type("T"), "U", _op._get_attr_type("U")) + _inputs_flat = _op.inputs + _execute.record_gradient( + "RelayoutLike", _inputs_flat, _attrs, _result) + _result, = _result + return _result + +RelayoutLike = tf_export("raw_ops.RelayoutLike")(_ops.to_raw_op(relayout_like)) +_dispatcher_for_relayout_like = relayout_like._tf_type_based_dispatcher.Dispatch + + +def relayout_like_eager_fallback(input: Annotated[Any, TV_RelayoutLike_T], layout_input: Annotated[Any, TV_RelayoutLike_U], name, ctx) -> Annotated[Any, TV_RelayoutLike_T]: + _attr_T, (input,) = _execute.args_to_matching_eager([input], ctx, []) + _attr_U, (layout_input,) = _execute.args_to_matching_eager([layout_input], ctx, []) + _inputs_flat = [input, layout_input] + _attrs = ("T", _attr_T, "U", _attr_U) + _result = _execute.execute(b"RelayoutLike", 1, inputs=_inputs_flat, + attrs=_attrs, ctx=ctx, name=name) + if _execute.must_record_gradient(): + _execute.record_gradient( + "RelayoutLike", _inputs_flat, _attrs, _result) + _result, = _result + return _result + + +@_dispatch.add_fallback_dispatch_list +@_dispatch.add_type_based_api_dispatcher +@tf_export('shutdown_tpu_system') +def shutdown_tpu_system(name=None) -> Annotated[Any, _atypes.Bool]: + r"""TODO: add doc. + + Args: + name: A name for the operation (optional). + + Returns: + A `Tensor` of type `bool`. + """ + _ctx = _context._context or _context.context() + tld = _ctx._thread_local_data + if tld.is_eager: + try: + _result = pywrap_tfe.TFE_Py_FastPathExecute( + _ctx, "ShutdownTPUSystem", name) + return _result + except _core._NotOkStatusException as e: + _ops.raise_from_not_ok_status(e, name) + except _core._FallbackException: + pass + try: + _result = _dispatcher_for_shutdown_tpu_system( + (name,), None) + if _result is not NotImplemented: + return _result + return shutdown_tpu_system_eager_fallback( + name=name, ctx=_ctx) + except _core._SymbolicException: + pass # Add nodes to the TensorFlow graph. + except (TypeError, ValueError): + _result = _dispatch.dispatch( + shutdown_tpu_system, (), dict(name=name) + ) + if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED: + return _result + raise + else: + _result = _dispatcher_for_shutdown_tpu_system( + (name,), None) + if _result is not NotImplemented: + return _result + # Add nodes to the TensorFlow graph. + try: + _, _, _op, _outputs = _op_def_library._apply_op_helper( + "ShutdownTPUSystem", name=name) + except (TypeError, ValueError): + _result = _dispatch.dispatch( + shutdown_tpu_system, (), dict(name=name) + ) + if _result is not _dispatch.OpDispatcher.NOT_SUPPORTED: + return _result + raise + _result = _outputs[:] + if _execute.must_record_gradient(): + _attrs = () + _inputs_flat = _op.inputs + _execute.record_gradient( + "ShutdownTPUSystem", _inputs_flat, _attrs, _result) + _result, = _result + return _result + +ShutdownTPUSystem = tf_export("raw_ops.ShutdownTPUSystem")(_ops.to_raw_op(shutdown_tpu_system)) +_dispatcher_for_shutdown_tpu_system = shutdown_tpu_system._tf_type_based_dispatcher.Dispatch + + +def shutdown_tpu_system_eager_fallback(name, ctx) -> Annotated[Any, _atypes.Bool]: + _inputs_flat = [] + _attrs = None + _result = _execute.execute(b"ShutdownTPUSystem", 1, inputs=_inputs_flat, + attrs=_attrs, ctx=ctx, name=name) + if _execute.must_record_gradient(): + _execute.record_gradient( + "ShutdownTPUSystem", _inputs_flat, _attrs, _result) + _result, = _result + return _result + diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/input_util.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/input_util.py new file mode 100644 index 0000000000000000000000000000000000000000..280fcdabe7d08132ca1ff20c59937dd9d1cdaef1 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/input_util.py @@ -0,0 +1,666 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""APIs to deal with input datasets efficiently in DTensor. + +When using tf.data with DTensor, the `DTensorDataset` API can be used to +efficiently handle loading the input data and correctly packing it to the +corresponding devices. This API is intended to work with unbatched data and can +be used for both data and model parallel setups. + +Example usage: + +>>> # 1-D mesh with 4 devices +>>> mesh = dtensor.Mesh(dim_names=['batch'], ...) +>>> layout = dtensor.Layout.batch_sharded(mesh, 'batch', rank=1) +>>> dataset = tf.data.Dataset.range(256) +>>> d_dataset = dtensor.DTensorDataset( +... dataset=dataset, +... global_batch_size=16, +... mesh=mesh, +... layouts=layout, +... batch_dim='batch') +>>> d_iter = iter(d_dataset) +>>> # Each batch is a length 16 tensor sharded across 4 devices +>>> batch_0_dtensor = next(d_iter) +>>> batch_0_dtensor + +>>> batch_1_dtensor = next(d_iter) +>>> batch_1_dtensor + + +For multi-client setups, `DTensorDataset` interacts with tf.data service to +correctly distribute the dataset among the participating clients. DTensor works +with tf.data service in co-located mode where each worker is running alongside +the DTensor client (the Tensorflow Python process). The `TFDataServiceConfig` +dataclass can be filled with information about the tf.data service cluster, and +passed to `DTensorDataset` to enable distribution. +""" + +import dataclasses +import operator + +from typing import Any, List, Optional, Sequence, Tuple + +from tensorflow.dtensor.python import api +from tensorflow.dtensor.python import config +from tensorflow.dtensor.python import layout as layout_lib +from tensorflow.python.data.experimental.ops import data_service_ops +from tensorflow.python.data.experimental.ops import distribute +from tensorflow.python.data.ops import dataset_ops +from tensorflow.python.data.ops import iterator_ops +from tensorflow.python.eager import context +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import dtypes +from tensorflow.python.framework import errors +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor +from tensorflow.python.framework import tensor_shape +from tensorflow.python.framework import tensor_spec +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.types import data as data_types +from tensorflow.python.util import nest +from tensorflow.python.util.tf_export import tf_export + + +@dataclasses.dataclass +class TFDataServiceConfig: + """Specifies the tf.data service configuration to use. + + Attributes: + dispatcher_address: a string specifying the address of the tf.data service + dispatcher server. + job_name: a non-empty string identifying the shared job that will be created + on tf.data service to process this dataset. + """ + dispatcher_address: str + job_name: str + + +# TODO(b/223275517): Add support for get_next_as_optional(). +class _DTensorIterator(iterator_ops.OwnedIterator): + """An iterator for a tf.data.Dataset distributed using DTensor. + + DTensorIterator encapsulates multiple underlying dataset iterators. It handles + retrieving the tensors to be placed on each underlying device and then uses + the 'pack' operation to create and return a DTensor. Thus users need only + interact with a single DTensorIterator to automatically distribute dataset + tensors onto devices. + """ + + def __init__( + self, + dtensor_components: Tuple[tensor.Tensor], + global_element_spec: tensor_spec.TensorSpec, + layouts: Any): + """Initializes a distributed iterator for DTensor datasets. + + This iterator encapsulates tf.data iterators for the underlying devices, and + treats it as a packed DTensor of iterator resource tensors. + + Args: + dtensor_components: a tuple containing the underlying iterator resources + packed into a DTensor. This is expected to be a tuple with a single + element. + global_element_spec: the underlying dataset's element spec from a global + view. + layouts: a structure of DTensor layouts to be applied to the elements + returned by the underlying iterators. This can be a single layout or + (possibly nested) tuples or dictionaries of layouts, and the structure + must match the structure of the iterator elements. + """ + # dtensor_components is expected to be a single-element tuple. + [self._iterator_resource_dtensor] = dtensor_components + self._global_element_spec = global_element_spec + self._layouts = layouts + self._layouts_str = nest.map_structure( + lambda layout: layout.to_string(), layouts) + + super().__init__( + components=dtensor_components, element_spec=global_element_spec) + + def __next__(self): + try: + # IteratorGetNext will return a DTensor on the host, so move it to the + # device mesh. If the dataset layouts are on the host mesh itself, this + # is handled by DTensor as a no-op. + host_elem = self._next_internal() + context.async_wait() + device_elem = nest.map_structure( + api.copy_to_mesh, host_elem, self._layouts) + context.async_wait() + return device_elem + except errors.OutOfRangeError as e: + # Match TF2 eager executor behavior by raising StopIteration when iterator + # is out of range. + if context.executing_eagerly(): + raise StopIteration from e + else: + raise e + + @property + def _type_spec(self): + return _DTensorIteratorSpec(self._global_element_spec, self._layouts_str) + + +class _DTensorIteratorSpec(iterator_ops.IteratorSpec): + """Type specification for `_DTensorIterator`.""" + + __slots__ = ['_global_element_spec', '_layouts_str'] + + def __init__( + self, global_element_spec: tensor_spec.TensorSpec, layouts_str: Any): + super().__init__(global_element_spec) + self._global_element_spec = global_element_spec + self._layouts_str = layouts_str + + @property + def value_type(self): + return _DTensorIterator + + def _serialize(self): + return (self._global_element_spec, self._layouts_str) + + @property + def _component_specs(self): + return (tensor_spec.TensorSpec([], dtypes.resource),) + + def _to_components(self, value): + return (value._iterator_resource_dtensor,) # pylint: disable=protected-access + + def _from_components(self, components): + layouts = nest.map_structure( + layout_lib.Layout.from_string, self._layouts_str) + return _DTensorIterator( + dtensor_components=components, + global_element_spec=self._global_element_spec, + layouts=layouts) + + @classmethod + def from_value(cls, value): + return cls(value._global_element_spec, value._layouts_str) # pylint: disable=protected-access + + +def _validate_input(flattened_layouts: Sequence[layout_lib.Layout], + flattened_elem_spec: Sequence[tensor_spec.TensorSpec], + dataset_already_batched: bool): + """Checks that the dataset's layouts and element specs are compatible. + + Args: + flattened_layouts: the flattened list of layouts used to distribute the + dataset. + flattened_elem_spec: the flattened list of element specs used in the + dataset's components. + dataset_already_batched: whether the dataset to be validated is already + batched. + + Raises: + ValueError: if the dataset's inputs are incompatible. + """ + if not flattened_elem_spec: + raise ValueError( + 'Expected input element spec of at least one element, was empty.') + + first_elem_shape = flattened_elem_spec[0].shape + + for layout, elem_spec in zip(flattened_layouts, flattened_elem_spec): + if elem_spec.shape.rank is None: + raise ValueError( + 'Dataset element shape must have a valid rank, got spec %s.' % + elem_spec) + + # Check that layout's rank matches the element's rank. If dataset is not yet + # batched, then the layout's rank must be one greater than the element's + # rank. + expected_rank = elem_spec.shape.rank + if not dataset_already_batched: + expected_rank += 1 + if layout.rank != expected_rank: + raise ValueError( + ('Expected layout with rank %d for element spec %s, got layout %s. ' + 'Check that the dataset is not batched before passing to ' + 'DTensorDataset.') % + (expected_rank, elem_spec, layout.sharding_specs)) + + if dataset_already_batched: + # Check that the batch dimension size of all dataset elements match. + batch_dim_size = first_elem_shape.as_list()[0] + if batch_dim_size is None: + raise ValueError( + ('Size of batch dimension of element spec %s is None. Ensure ' + 'drop_remainder=True when batching the dataset.') % elem_spec) + + if elem_spec.shape.as_list()[0] != batch_dim_size: + raise ValueError( + ('Size of batch dimension of element spec %s does not match ' + 'expected size %d.') % (elem_spec, batch_dim_size)) + + +def _shard_counts(layout: layout_lib.Layout, + batch_dim: Optional[str] = None) -> List[int]: + """Computes a list of the number of shards in each dimension of the layout. + + The shard counts are used to slice each dataset element. The batch dimension's + count is overridden to 1 since we only consider how many shards to make + locally (within each local replica). Sharding across clients is handled by + either tf.data.Dataset's shard transformation (in the single-client case) or + tf.data service's distribute function (in the multi-client case). + + Args: + layout: the layout to compute the shard counts for. + batch_dim: the name of the batch dimension of the layout, if present. + + Returns: + A list of shard counts, one element per dimension of the layout. + """ + shard_counts = [] + for spec in layout.sharding_specs: + if spec in (batch_dim, layout_lib.UNSHARDED): + shard_counts.append(1) + else: + shard_counts.append(layout.mesh.dim_size(spec)) + return shard_counts + + +def _index_matrix(layout: layout_lib.Layout, + elem_spec: tensor_spec.TensorSpec) -> tensor.Tensor: + """Computes a utility matrix to derive device-based slice offsets. + + This function builds a matrix of shape `[mesh.rank, layout.rank]` for each + dataset element. This matrix can be used to slice the DTensor components + returned by the iterator according to the local device that component is to be + placed on. This can be done by multiplying the device offsets of shape + `[1, mesh.rank]` with this index matrix to get a `[1, layout.rank]` shape + tensor containing the slice offsets. + + Note: the index on the batch dim is always 0 since sharding on the batch + dimension is handled by either tf.data.Dataset's shard transformation (in the + single-client case) or tf.data service's distribute function (in the + multi-client case). If there is no sharding on the batch dimension (or any + other dimension), the slice index remains 0. + + Args: + layout: the layout of the dataset element. + elem_spec: the spec of the dataset element. + + Returns: + The index matrix as a tensor. + """ + matrix = [] + for dim in layout.mesh.dim_names: + row = [0] + for layout_idx, spec in enumerate(layout.sharding_specs[1:]): + if spec == layout_lib.UNSHARDED or spec != dim: + row.append(0) + else: + row.append(elem_spec.shape[layout_idx] // layout.mesh.dim_size(dim)) + matrix.append(row) + + return constant_op.constant(matrix, dtype=dtypes.int32) + + +def _pack_iterator_resource_dtensor( + datasets: List[Tuple[int, data_types.DatasetV2]], + layouts: Any, + mesh: layout_lib.Mesh, + num_local_devices_per_replica: int): + """Creates a DTensor iterator resource for the per-replica datasets. + + Given a list of replica ID to tf.data.Dataset mappings, this function creates + iterators for each device and then packs the underlying iterator resource + tensors into a single DTensor. This resource tensor is used by the + IteratorGetNext op to retrieve the next element in the dataset. + + Args: + datasets: a list of tuples of each unique local replica ID to the dataset + object whose elements will be placed on the devices corresponding to that + replica. + layouts: a structure of DTensor layouts to be applied to the elements + returned by the underlying iterators. This can be a single layout or + (possibly nested) tuples or dictionaries of layouts, and the structure + must match the structure of the iterator elements. + mesh: the DTensor mesh to place the iterator batches on. + num_local_devices_per_replica: the number of devices in each data-parallel + replica. + + Returns: + A DTensor of the underlying iterator resource tensors. + """ + host_mesh_devices = mesh.host_mesh().local_devices() + device_idx = 0 + + iterators = [] + for _, dataset in datasets: + for idx in range(num_local_devices_per_replica): + with ops.device_v2(host_mesh_devices[device_idx]): + device_dataset = dataset.shard( + num_shards=num_local_devices_per_replica, index=idx) + iterators.append(iter(device_dataset)) + device_idx += 1 + + if device_idx != len(host_mesh_devices): + raise ValueError( + 'The `datasets` argument does not have the correct number of' + f' underlying datasets, found {device_idx} but expected' + f' {len(host_mesh_devices)}.') + + host_layouts = nest.map_structure( + lambda l: layout_lib.Layout(l.sharding_specs, mesh.host_mesh()), layouts) + + # Pack the iterator resource tensors into a replicated 0-dimensional DTensor + # and set the element layouts. + iterator_resources = [it._iterator_resource for it in iterators] # pylint: disable=protected-access + d_iterator_resource = api.pack( + iterator_resources, + layout_lib.Layout.replicated(mesh=mesh.host_mesh(), rank=0)) + api._dtensor_device().set_iterator_element_layouts( # pylint: disable=protected-access + d_iterator_resource, nest.flatten(host_layouts)) + + return d_iterator_resource + + +@tf_export('experimental.dtensor.DTensorDataset', v1=[]) +class DTensorDataset(dataset_ops.UnaryUnchangedStructureDataset): + """A dataset of DTensors. + + DTensorDataset encapsulates a `tf.data.Dataset` whose elements are + automatically packed and returned as DTensors based on a given mesh and + layouts. + """ + + def __init__(self, + dataset: data_types.DatasetV2, + *, + mesh: layout_lib.Mesh, + layouts: Any, + global_batch_size: int, + dataset_already_batched: bool = False, + batch_dim: Optional[str] = None, + prefetch: Optional[int] = None, + tf_data_service_config: Optional[TFDataServiceConfig] = None): + """Creates a DTensorDataset. + + DTensorDataset automatically handles distribution of the dataset elements to + each client's devices. It can be used to create an iterator that returns + DTensors of the input data on each iteration. + + DTensorDataset works best with unbatched datasets. It takes the mesh and the + provided layouts to automatically calculate how to batch the input locally + for each replica. + + If the provided dataset is already batched according to the per-replica + batch size, then `dataset_already_batched` must be set and DTensorDataset + will check that the batch size is consistent with the intended + `global_batch_size` using the layout information. Each replica receives a + separate slice of the global batch, thus the per-replica batch size can be + computed as the global batch size divided by the number of model replicas. + For a DTensor mesh, the number of replicas is equal to the size of the + mesh's batch dimension. + + Note: `tf.experimental.dtensor.DTensorDataset` instances do *not* implement + the full interface of `tf.data.Dataset`. It only supports two usages we will + mention below: iteration and `element_spec`. We don't support any other APIs + to transform or inspect the dataset. + + TODO(b/223275517): add support for input datasets that are already batched + to the global batch size. + + Args: + dataset: a `tf.data.Dataset` object. + mesh: the DTensor mesh to place the dataset batches on. + layouts: a structure of DTensor layouts to be applied to the input dataset + values. This can be a single layout or (possibly nested) tuples or + dictionaries of layouts, and the structure must match the structure of + the dataset. Either all or none of the layouts should be sharded on the + batch dimension; having only a subset of layouts batch sharded will not + work and raises a ValueError. + global_batch_size: the desired global batch size. + dataset_already_batched: must be set only if the dataset is already + batched to the per-replica batch size. The batched dataset must have + `drop_remainder=True` set since DTensor requires static shapes for + slicing the input tensors. + batch_dim: the mesh dimension on which the input's batch dimension is + sharded. Set to None if the input layouts do not shard on the batch + dimension. + prefetch: number of batches to prefetch using Dataset.prefetch. + tf_data_service_config: if operating in multi-client mode, this config + specifies the tf.data service configuration to use. + + Raises: + ValueError: on any of the following situations, + 1. if the structures and ranks of layouts and the dataset do not match. + 2. if the shapes in the dataset's spec are not fully defined. + 3. if batch_dim is specified and all layouts are not batch-sharded. + 4. if per_replica_batch_size is specified for an already batched Dataset + but it does not match the expected per-replica size based on the + provided mesh. + TypeError: if type of structures of layouts and the dataset do not match. + """ + super().__init__(dataset, dataset_ops.to_variant(dataset)) + + # TODO(b/271162918): fix multi-client use case. + if tf_data_service_config is not None: + raise NotImplementedError( + 'Multi-client DTensorDataset is currently not supported.' + ' Check b/271162918.') + + self._mesh = mesh + self._layouts = layouts + self._batch_dim = batch_dim + self._prefetch = prefetch + self._tf_data_service_config = tf_data_service_config + + nest.assert_same_structure(dataset.element_spec, layouts) + flattened_layouts = nest.flatten(layouts) + flattened_elem_spec = nest.flatten(dataset.element_spec) + + if batch_dim: + self.num_global_replicas = mesh.dim_size(batch_dim) + self._local_replica_ids = list( + dict.fromkeys( + [loc[batch_dim] for loc in mesh.local_device_locations()])) + + for layout in flattened_layouts: + if batch_dim != layout.sharding_specs[0]: + raise ValueError( + ('batch_dim %s was specified but at least one layout did not ' + 'contain it: %s') % (batch_dim, layout)) + else: + # Only one replica since there is no sharding on the batch dimension. + self.num_global_replicas = 1 + self._local_replica_ids = [0] + + # Validate layout and element spec compatibility, and raise ValueError if + # invalid. + _validate_input( + flattened_layouts, + flattened_elem_spec, + dataset_already_batched=dataset_already_batched) + + expected_batch_size = global_batch_size // self.num_global_replicas + if not dataset_already_batched: + self._batched_dataset = dataset.batch( + expected_batch_size, drop_remainder=True) + else: + per_replica_batch_size = flattened_elem_spec[0].shape.as_list()[0] + if per_replica_batch_size != expected_batch_size: + raise ValueError( + ('per_replica_batch_size does not matched expected size based on ' + 'the mesh, got %d but expected %d.') % + (per_replica_batch_size, expected_batch_size)) + self._batched_dataset = dataset + + # Construct a global element spec of the dataset. + flattened_global_elem_spec = [] + batch_tensor_shape = tensor_shape.as_shape([global_batch_size]) + for elem_spec in nest.flatten(self._batched_dataset.element_spec): + new_elem_spec = tensor_spec.TensorSpec( + shape=operator.concat(batch_tensor_shape, elem_spec.shape[1:]), + dtype=elem_spec.dtype, + name=elem_spec.name) + flattened_global_elem_spec.append(new_elem_spec) + self._global_element_spec = nest.pack_sequence_as( + dataset.element_spec, flattened_global_elem_spec) + + num_global_devices_per_replica = config.num_global_devices( + mesh.device_type()) // self.num_global_replicas + self._num_local_replicas = len(self._local_replica_ids) + self._num_local_devices_per_replica = mesh.num_local_devices( + ) // self._num_local_replicas + # The number of clients each replica is split over. + self._num_clients_per_replica = ( + num_global_devices_per_replica // self._num_local_devices_per_replica) + # In the case where a replica is split across multiple clients, an offset + # needs to be added to the index used by the partitioning logic such that + # the local devices on that client can be correctly matched to slices of the + # input tensor(s). If replicas are wholly contained within a client, then + # this offset is always 0. + self._partition_offset = (config.client_id() % self._num_clients_per_replica + ) * self._num_local_devices_per_replica + + # Helper data structures used in partitioning the dataset tensors. + self._all_shard_counts = [ + _shard_counts(layout, batch_dim) for layout in flattened_layouts + ] + self._index_matrices = [ + _index_matrix(layout, elem_spec) + for layout, elem_spec in zip(flattened_layouts, flattened_elem_spec) + ] + + def __iter__(self): + datasets: List[Tuple[int, data_types.DatasetV2]] = [] + + # Start with the batched the dataset. + local_dataset = self._batched_dataset + + if self._batch_dim is not None: + if self._num_clients_per_replica > 1: + # If a replica is split over multiple clients then each batch needs to + # be repeated before distribution as many times as there are clients + # corresponding to that replica. + local_dataset = self._repeat_batch(local_dataset, + self._num_clients_per_replica) + sharding_policy = data_service_ops.ShardingPolicy.DATA + else: + # Replicas are unique to each client, so FILE based sharding can be used + # which is more performant since each worker does not need to read the + # entire dataset. + sharding_policy = data_service_ops.ShardingPolicy.FILE + else: + # No batch dimension sharding specified so disable dataset sharding during + # the distribute step. + sharding_policy = data_service_ops.ShardingPolicy.OFF + + # Apply distribution here (if specified) so all remaining transformations + # are executed locally. + if self._tf_data_service_config is not None: + local_dataset = local_dataset.apply( + data_service_ops.distribute( + processing_mode=sharding_policy, + service=self._tf_data_service_config.dispatcher_address, + job_name=f'{self._tf_data_service_config.job_name}_{config.client_id()}', + target_workers='LOCAL')) + + for local_replica_idx, replica_id in enumerate(self._local_replica_ids): + # Select the shard for the corresponding replica. + dataset = distribute._AutoShardDataset( + local_dataset, + num_workers=self._num_local_replicas, + index=local_replica_idx, + num_replicas=self.num_global_replicas) + + # Repeat each batch for each local device in the replica. + dataset = self._repeat_batch(dataset, self._num_local_devices_per_replica) + + # Slice each shard further for all non-batch dim shards. If there is no + # non-batch dim sharding, this slice is essentially a no-op. + dataset = self._partition(dataset) + + # Apply prefetch as the last step. Since each batch is repeated, the + # number of elements to prefetch has to be scaled by the same size. + if self._prefetch is not None: + dataset = dataset.prefetch( + self._prefetch * self._num_local_devices_per_replica) + + datasets.append((replica_id, dataset)) + + # Convert the datasets into iterators placed on the host. + d_iterator_resource = _pack_iterator_resource_dtensor( + datasets=datasets, + layouts=self._layouts, + mesh=self._mesh, + num_local_devices_per_replica=self._num_local_devices_per_replica) + + return _DTensorIterator( + dtensor_components=(d_iterator_resource,), + global_element_spec=self._global_element_spec, + layouts=self._layouts) + + def _repeat_batch(self, dataset, repeats): + if repeats == 1: + # Remove this shortcut if tf.data can optimize this away. + return dataset + + def repeat(*x): + return dataset_ops.DatasetV2.from_tensors(x).repeat(repeats) + + return dataset.flat_map(repeat) + + def _partition(self, dataset): + """Slices each dataset element on any sharded non-batch dimension.""" + if self._num_local_devices_per_replica == 1 and self._partition_offset == 0: + # Remove this shortcut if tf.data can optimize this away. + return dataset + + # TODO(b/223275517): decouple from self and make testable. + def slice_batch(index, batch): + flattened_batch = nest.flatten(batch) + flattened_output = [] + + norm_index = math_ops.cast( + index % self._num_local_devices_per_replica, dtype=dtypes.int32) + norm_index += self._partition_offset + coords = self._mesh.coords(norm_index) + coords = array_ops.reshape(coords, (1, -1)) + + for element, shard_counts, idx_matrix in zip(flattened_batch, + self._all_shard_counts, + self._index_matrices): + indexes = math_ops.matmul(coords, idx_matrix) + start = array_ops.reshape(indexes, (-1,)) + size = array_ops.shape_v2( + element, out_type=dtypes.int32) // shard_counts + flattened_output.append( + array_ops.slice(element, begin=start, size=size)) + + return nest.pack_sequence_as(batch, flattened_output) + + enumerated_dataset = dataset.enumerate() + partitioned_dataset = enumerated_dataset.map(slice_batch) + return partitioned_dataset + + @property + def element_spec(self): + return self._global_element_spec diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/layout.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/layout.py new file mode 100644 index 0000000000000000000000000000000000000000..e55039757f692d8d8f346ce73eb7e08e253af8a2 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/layout.py @@ -0,0 +1,550 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Python definitions for `Mesh` and `Layout`.""" + +import collections +import functools +import itertools +from typing import List, Dict, Optional, Union + +import numpy as np + +from tensorflow.dtensor.proto import layout_pb2 +from tensorflow.python import _pywrap_dtensor_device +from tensorflow.python.framework import device as tf_device +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor +from tensorflow.python.util.tf_export import tf_export + +# UNSHARDED indicates a tensor dimension is not sharded over any mesh dimension. +UNSHARDED = 'unsharded' +MATCH = 'match' +USE_XLA_SPMD = False + +tf_export( + 'experimental.dtensor.UNSHARDED', + v1=[]).export_constant(__name__, 'UNSHARDED') +tf_export( + 'experimental.dtensor.MATCH', v1=[]).export_constant(__name__, 'MATCH') + +MeshDimension = collections.namedtuple('MeshDimension', ['name', 'size']) + + +def _compute_mesh_strides(shape: List[int]) -> List[int]: + strides = [1] + for idx, dim_size in enumerate(reversed(shape[1:])): + strides.append(strides[idx] * dim_size) + strides.reverse() + return strides + + +@tf_export('experimental.dtensor.Mesh', v1=[]) +class Mesh(_pywrap_dtensor_device.Mesh): + """Represents a Mesh configuration over a certain list of Mesh Dimensions. + + A mesh consists of named dimensions with sizes, which describe how a set of + devices are arranged. Defining tensor layouts in terms of mesh dimensions + allows us to efficiently determine the communication required when computing + an operation with tensors of different layouts. + + A mesh provides information not only about the placement of the tensors but + also the topology of the underlying devices. For example, we can group 8 TPUs + as a 1-D array for data parallelism or a `2x4` grid for (2-way) data + parallelism and (4-way) model parallelism. + + Refer to [DTensor Concepts](https://www.tensorflow.org/guide/dtensor_overview) + for in depth discussion and examples. + + Note: the utilities `dtensor.create_mesh` and + `dtensor.create_distributed_mesh` provide a simpler API to create meshes for + single- or multi-client use cases. + """ + + def __init__( + self, + dim_names: List[str], + global_device_ids: np.ndarray, + local_device_ids: List[int], + local_devices: List[Union[tf_device.DeviceSpec, str]], + mesh_name: str = '', + global_devices: Optional[List[Union[tf_device.DeviceSpec, str]]] = None, + use_xla_spmd: bool = USE_XLA_SPMD, + ): + """Builds a Mesh. + + The `dim_names` and `global_device_ids` arguments describe the dimension + names and shape for the mesh. + + For example, + + ```python + dim_names = ('x', 'y'), + global_device_ids = [[0, 1], + [2, 3], + [4, 5]] + ``` + + defines a 2D mesh of shape 3x2. A reduction over the 'x' dimension will + reduce across columns (0, 2, 4) and (1, 3, 5), and a reduction over the 'y' + dimension reduces across rows. + + Note: the utilities `dtensor.create_mesh` and + `dtensor.create_distributed_mesh` provide a simpler API to create meshes for + single- or multi-client use cases. + + Args: + dim_names: A list of strings indicating dimension names. + global_device_ids: An ndarray of global device IDs is used to compose + DeviceSpecs describing the mesh. The shape of this array determines the + size of each mesh dimension. Values in this array should increment + sequentially from 0. This argument is the same for every DTensor client. + local_device_ids: A list of local device IDs equal to a subset of values + in global_device_ids. They indicate the position of local devices in the + global mesh. Different DTensor clients must contain distinct + local_device_ids contents. All local_device_ids from all DTensor clients + must cover every element in global_device_ids. + local_devices: The list of devices hosted locally. The elements correspond + 1:1 to those of local_device_ids. + mesh_name: The name of the mesh. Currently, this is rarely used, and is + mostly used to indicate whether it is a CPU, GPU, or TPU-based mesh. + global_devices (optional): The list of global devices. Set when multiple + device meshes are in use. + use_xla_spmd (optional): Boolean when True, will use XLA SPMD instead of + DTensor SPMD. + """ + # Check if input args are valid. + if not isinstance(global_device_ids, np.ndarray): + raise ValueError('Variable global_device_ids must be an ndarray.') + if global_device_ids.size == 0: + raise ValueError('Variable global_device_ids must be non-empty.') + flat_global_device_ids = global_device_ids.flatten() + # global_device_ids are expected to be consecutive numbers. + # LINT.IfChange + distance = flat_global_device_ids[0] + if any( + (gid - i != distance) for i, gid in enumerate(flat_global_device_ids)): + raise ValueError('global_device_ids must sequentially increase: %s' % + global_device_ids) + # LINT.ThenChange(//tensorflow/dtensor/cc/dtensor_device.cc) + + # TODO(b/242201545): This class is only for args type transformation for + # exported C++ Mesh class after the unification is complete. Any other + # logics should reside in the C++ layer, including validation checks, shall + # go to C++. + + if len(dim_names) != global_device_ids.ndim: + raise ValueError( + 'Number of mesh dimensions does not match number of dimension names.') + + if not isinstance(local_device_ids, list): + raise ValueError('Variable local_device_ids must be a list of integers.') + + if not isinstance(local_devices, list): + raise ValueError('Variable local_devices must be a list of DeviceSpecs.') + + if global_devices and not isinstance(global_devices, list): + raise ValueError('Variable global_devices must be a list of DeviceSpecs.') + + if not local_devices and not global_devices: + raise ValueError('Empty list of devices not allowed.') + + # Transform args format for C++ Mesh constructor + global_device_ids_flatten = global_device_ids.flatten() + global_device_ids_shape = global_device_ids.shape + + def to_str(d) -> str: + if isinstance(d, tf_device.DeviceSpec): + return d.to_string() + return d + + def to_spec(d) -> tf_device.DeviceSpec: + if not isinstance(d, tf_device.DeviceSpec): + return tf_device.DeviceSpec.from_string(d) + return d + + local_devices_str = [to_str(d) for d in local_devices] + local_devices_spec = [to_spec(d) for d in local_devices] + if not global_devices: + global_devices = [] + global_devices_str = [to_str(d) for d in global_devices] + global_devices_spec = [to_spec(d) for d in global_devices] + + local_devices_set = set(local_devices_spec) + local_device_only_contains_host_cpu = ( + len(local_devices_set) == 1 and + list(local_devices_set)[0].device_type == 'CPU') + if not local_device_only_contains_host_cpu and len(local_devices) != len( + local_devices_set): + raise ValueError('Duplicate devices found in mesh specification %s.' % + [d for d in local_devices if local_devices.count(d) > 1]) + + if len(local_device_ids) != len(local_devices): + raise ValueError( + 'Variable local_device_ids does not have same size as local_devices.') + + if len(local_device_ids) > len(np.ravel(global_device_ids)): + raise ValueError('Cannot have more local than gobal device IDs.') + + device_types = set([device.device_type for device in local_devices_spec]) + if not device_types: + device_types = set([device.device_type for device in global_devices_spec]) + if None in device_types: + raise ValueError('device_type is required') + if len(device_types) > 1: + raise ValueError('Devices containing multiple device_types : %s' % + device_types) + device_type = device_types.pop() + if use_xla_spmd and device_type != 'TPU': + raise ValueError('XLA SPMD is not currently not supported for %s mesh.' % + device_type) + + super().__init__( + mesh_name, + dim_names, + global_device_ids_shape, + global_device_ids_flatten, + global_devices_str, + local_device_ids, + local_devices_str, + use_xla_spmd, + ) + + @classmethod + def _new_object(cls, *args, **kwargs): + # Need to explicitly invoke the base class __init__ because + # Mesh.__init__ overrode it with a different signature. + self = _pywrap_dtensor_device.Mesh.__new__(cls) + super().__init__(self, *args, **kwargs) + return self + + def global_device_ids(self) -> np.ndarray: + """Returns a global device list as an array.""" + return np.array(super().global_device_ids(), dtype=np.int64).reshape( + self.shape() + ) + + def __getitem__(self, dim_name: str) -> MeshDimension: + return MeshDimension(name=dim_name, size=self.dim_size(dim_name)) + + def __hash__(self): + return hash(self.as_proto().SerializeToString(deterministic=True)) + + def __repr__(self) -> str: + return f'Mesh.from_string({self.to_string()})' + + # TODO(panzf): change to pybind11 pickle implementation in the last step + def __reduce__(self): + return Mesh.from_string, (self.to_string(),) + + # TODO(b/242201545): implement this in Mesh C++ class + def coords(self, device_idx: int) -> tensor.Tensor: + """Converts the device index into a tensor of mesh coordinates.""" + strides = ops.convert_to_tensor(self.strides) + shape = ops.convert_to_tensor(self.shape()) + return (device_idx // strides) % shape + + @classmethod + def from_proto(cls, proto: layout_pb2.MeshProto) -> 'Mesh': + """Construct a mesh instance from input `proto`.""" + return cls._new_object(mesh_proto=proto) + + @classmethod + def from_string(cls, mesh_str: str) -> 'Mesh': + return cls._new_object(mesh_str=mesh_str) + + @classmethod + def from_device(cls, device: str) -> 'Mesh': + """Constructs a single device mesh from a device string.""" + return cls._new_object(single_device=device) + + @classmethod + def _from_mesh(cls, mesh: _pywrap_dtensor_device.Mesh): + """Creates a copy from an existing pywrap mesh object.""" + return cls._new_object(mesh=mesh) + + @functools.cached_property + def _host_mesh(self) -> 'Mesh': + return Mesh._from_mesh(super().host_mesh()) + + def host_mesh(self) -> 'Mesh': + """Returns a host mesh.""" + # TODO(b/242201545): Find a way to get the super class to return correct + # typed objects. + return self._host_mesh + + # TODO(b/242201545): implement this in Mesh C++ class + def local_device_locations(self) -> List[Dict[str, int]]: + """Returns a list of local device locations. + + A device location is a dictionary from dimension names to indices on those + dimensions. + """ + mapping = self.unravel_index() + return [mapping[device_id] for device_id in self.local_device_ids()] + + # TODO(b/242201545): implement this in Mesh C++ class + @property + def strides(self) -> List[int]: + """Returns the strides tensor array for this mesh. + + If the mesh shape is `[a, b, c, d]`, then the strides array can be computed + as `[b*c*d, c*d, d, 1]`. This array can be useful in computing local device + offsets given a device ID. Using the same example, the device coordinates of + the mesh can be computed as: + + ``` + [(device_id / (b*c*d)) % a, + (device_id / (c*d)) % b, + (device_id / (d)) % c, + (device_id) % d] + ``` + + This is the same as `(device_id // mesh.strides) % mesh.shape`. + + Returns: + The mesh strides as an integer tensor. + """ + return _compute_mesh_strides(self.shape()) + + # TODO(b/242201545): implement this in Mesh C++ class + def unravel_index(self): + """Returns a dictionary from device ID to {dim_name: dim_index}. + + For example, for a 3x2 mesh, return this: + + ``` + { 0: {'x': 0, 'y', 0}, + 1: {'x': 0, 'y', 1}, + 2: {'x': 1, 'y', 0}, + 3: {'x': 1, 'y', 1}, + 4: {'x': 2, 'y', 0}, + 5: {'x': 2, 'y', 1} } + ``` + """ + idx_ranges = [range(self.dim_size(dim_name)) for dim_name in self.dim_names] + mesh_pos = itertools.product(*idx_ranges) + mapping = {} + for device_id, device_pos in enumerate(mesh_pos): + device_loc = {} + for dim_name, dim_index in zip(self.dim_names, device_pos): + device_loc[dim_name] = dim_index + mapping[device_id] = device_loc + return mapping + + +LayoutType = _pywrap_dtensor_device.LayoutType + + +# TODO(hthu): Consider making this class immutable. +@tf_export('experimental.dtensor.Layout', v1=[]) +class Layout(_pywrap_dtensor_device.Layout): + """Represents the layout information of a DTensor. + + A layout describes how a distributed tensor is partitioned across a mesh (and + thus across devices). For each axis of the tensor, the corresponding + sharding spec indicates which dimension of the mesh it is sharded over. A + special sharding spec `UNSHARDED` indicates that axis is replicated on + all the devices of that mesh. + + Refer to [DTensor Concepts](https://www.tensorflow.org/guide/dtensor_overview) + for in depth discussion and examples. + + For example, let's consider a 1-D mesh: + + ``` + Mesh(["TPU:0", "TPU:1", "TPU:2", "TPU:3", "TPU:4", "TPU:5"], [("x", 6)]) + ``` + + This mesh arranges 6 TPU devices into a 1-D array. `Layout([UNSHARDED], mesh)` + is a layout for rank-1 tensor which is replicated on the 6 devices. + + For another example, let's consider a 2-D mesh: + + ``` + Mesh(["TPU:0", "TPU:1", "TPU:2", "TPU:3", "TPU:4", "TPU:5"], + [("x", 3), ("y", 2)]) + ``` + + This mesh arranges 6 TPU devices into a `3x2` 2-D array. + `Layout(["x", UNSHARDED], mesh)` is a layout for rank-2 tensor whose first + axis is sharded on mesh dimension "x" and the second axis is replicated. If we + place `np.arange(6).reshape((3, 2))` using this layout, the individual + components tensors would look like: + + ``` + Device | Component + TPU:0 [[0, 1]] + TPU:1 [[0, 1]] + TPU:2 [[2, 3]] + TPU:3 [[2, 3]] + TPU:4 [[4, 5]] + TPU:5 [[4, 5]] + ``` + """ + + def __init__(self, sharding_specs: List[str], mesh: Mesh): + """Builds a Layout from a list of dimension names and a Mesh. + + Args: + sharding_specs: List of sharding specifications, each corresponding to a + tensor axis. Each specification (dim_sharding) can either be a mesh + dimension or the special value UNSHARDED. + mesh: A mesh configuration for the Tensor. + + Returns: + A valid Layout built with given layout & mesh. + """ + # Validate mesh + if not isinstance(mesh, Mesh): + raise ValueError('mesh is not a valid Mesh object.') + + # Validate sharding spec + for _, dim_sharding in enumerate(sharding_specs): + # If special value no need to check for uniqueness, just skip. + if dim_sharding == UNSHARDED or dim_sharding == MATCH: + continue + # Check dim_sharding is unique. + if sharding_specs.count(dim_sharding) > 1: + raise ValueError( + ('Mesh dimension {mesh_dim} was repeated in sharding ' + + 'specification {sharding_specs}. Mesh dimensions must be unique ' + + 'in a layout.').format( + mesh_dim=dim_sharding, sharding_specs=sharding_specs)) + # Check dim_sharding is mesh dimension. + if dim_sharding not in mesh: + raise ValueError( + ('{dim_sharding}: A dimension sharding must either be a ' + + 'valid mesh dimension or UNSHARDED.').format( + dim_sharding=dim_sharding)) + + super().__init__( + type=LayoutType.STATIC, sharding_specs=sharding_specs, mesh=mesh + ) + + @classmethod + def _new_object(cls, *args, **kwargs): + # Need to explicitly invoke the base class __init__ because + # Layout.__init__ overrode it with a different signature. + self = _pywrap_dtensor_device.Layout.__new__(cls) + super().__init__(self, *args, **kwargs) + return self + + def __repr__(self) -> str: + return f'Layout.from_string({self.to_string()})' + + def __hash__(self): + return hash(self.as_proto().SerializeToString(deterministic=True)) + + # TODO(panzf): change to pybind11 pickle implementation in the last step + def __reduce__(self): + return Layout.from_string, (self.to_string(),) + + @property + def mesh(self): + return Mesh._from_mesh(mesh=super().mesh) # pylint: disable=protected-access + + @property + def shape(self): + return self.mesh.shape() + + @classmethod + def batch_sharded( + cls, mesh: Mesh, batch_dim: str, rank: int, axis: int = 0 + ) -> 'Layout': + """Returns a layout sharded on batch dimension.""" + return cls._new_object( + # Watchout for the different ordering. + mesh=mesh, + rank=rank, + batch_dim=batch_dim, + axis=axis, + ) + + # TODO(b/242201545): Move this to C++ / find the corresponding function there. + def delete(self, dims: List[int]) -> 'Layout': + """Returns the layout with the give dimensions deleted.""" + if not isinstance(dims, list): + dims = [dims] + new_specs = [ + spec for i, spec in enumerate(self.sharding_specs) if i not in dims + ] + return Layout(new_specs, self.mesh) + + @classmethod + def from_proto(cls, layout_proto: layout_pb2.LayoutProto) -> 'Layout': + """Creates an instance from a LayoutProto.""" + return cls._new_object(layout_proto=layout_proto) + + @classmethod + def from_string(cls, layout_str: str) -> 'Layout': + """Creates an instance from a human-readable string.""" + return cls._new_object(layout_str=layout_str) + + def to_parted(self) -> 'Layout': + """Returns a "parted" layout from a static layout. + + A parted layout contains axes that are treated as independent by most of + SPMD expanders. + + FIXME(b/285905569): The exact semantics is still being investigated. + """ + return Layout._new_object(layout=super().to_parted()) + + @classmethod + def inner_sharded(cls, mesh: Mesh, inner_dim: str, rank: int) -> 'Layout': + """Returns a layout sharded on inner dimension.""" + return cls.batch_sharded(mesh, inner_dim, rank, axis=rank - 1) + + @classmethod + def from_single_device_mesh(cls, mesh: Mesh) -> 'Layout': + """Constructs a single device layout from a single device mesh.""" + return cls._new_object(mesh=mesh) + + @classmethod + def from_device(cls, device: str) -> 'Layout': + """Constructs a single device layout from a single device mesh.""" + return cls.from_single_device_mesh(Mesh.from_device(device)) + + # TODO(b/242201545): Move this to C++ / find the corresponding function there. + def offset_to_shard(self): + """Mapping from offset in a flattened list to shard index.""" + unravel_index = self.mesh.unravel_index() + locations = [None] * self.mesh.size + for offset, mesh_loc in unravel_index.items(): + loc = [] + for dim_sharding in self.sharding_specs: + if dim_sharding == UNSHARDED: + loc.append(0) + else: + loc.append(mesh_loc[dim_sharding]) + locations[offset] = tuple(loc) + + return locations + + # TODO(b/242201545): Move this to C++ / find the corresponding function there. + def offset_tuple_to_global_index(self, offset_tuple): + """Mapping from offset to index in global tensor.""" + index = 0 + for i, o in enumerate(offset_tuple): + m = 1 + for x in range(i + 1, self.rank): + m = m * self.num_shards(x) + index = index + m * o + return index + + @classmethod + def replicated(cls, mesh: Mesh, rank: int) -> 'Layout': + """Returns a replicated layout of rank `rank`.""" + return cls._new_object(mesh=mesh, rank=rank) diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/mesh_util.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/mesh_util.py new file mode 100644 index 0000000000000000000000000000000000000000..12e86e321e1210fb88096077c808ed44fae80fe0 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/mesh_util.py @@ -0,0 +1,310 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Utilities to help with mesh creation.""" + +from typing import Dict, List, Optional, Tuple, Union + +from absl import logging +import numpy as np + +from tensorflow.dtensor.python import accelerator_util +from tensorflow.dtensor.python import api +from tensorflow.dtensor.python import config +from tensorflow.dtensor.python import layout +from tensorflow.dtensor.python import tpu_util +from tensorflow.python.eager import context +from tensorflow.python.framework import device as tf_device +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.util.tf_export import tf_export + + +def _print_context(num_global_devices: int, num_clients: int, client_id: int, + device_type: str, mesh: layout.Mesh) -> None: + logging.info('This is client %d of %d clients', client_id, num_clients) + logging.info('Number of global %s devices: %d', device_type.upper(), + num_global_devices) + # pylint: disable=protected-access + logging.info('Global device IDs: %s', mesh.global_device_ids()) + logging.info('Local device IDs: %s', mesh.local_device_ids()) + logging.info('Local devices: %s', mesh.local_devices()) + # pylint: enable=protected-access + + +def _make_device_specs( + devices: Optional[List[Union[tf_device.DeviceSpec, str]]] = None, + device_type: Optional[str] = None +) -> Tuple[List[tf_device.DeviceSpec], str]: + """Makes device specs for all local devices or from a provided list.""" + + if devices is None: + if device_type is None: + device_type = 'CPU' + devices = config.local_devices(device_type) + else: + if isinstance(devices[0], str): + devices = [tf_device.DeviceSpec.from_string(d) for d in devices] + if device_type is None: + device_type = devices[0].device_type + + if device_type.upper() != devices[0].device_type.upper(): + raise ValueError( + f'Conflicting devices {str(devices)} and device_type {device_type}' + ) + + return devices, device_type + + +@tf_export('experimental.dtensor.create_mesh', v1=[]) +def create_mesh( + mesh_dims: Optional[Union[List[Tuple[str, int]], Dict[str, int]]] = None, + mesh_name: str = '', + devices: Optional[List[Union[tf_device.DeviceSpec, str]]] = None, + device_type: Optional[str] = None, + use_xla_spmd: bool = layout.USE_XLA_SPMD, +) -> layout.Mesh: + """Creates a single-client mesh. + + If both `mesh_dims` and `devices` are specified, they must match each otehr. + As a special case, when all arguments are missing, this creates a 1D CPU mesh + with an empty name, assigning all available devices to that dimension. + + Args: + mesh_dims: A dict of dim_name: dim_size, or a list of (dim_name, dim_size) + tuples. Defaults to a single batch-parallel dimension called 'x' usin all + devices. As a special case, a single-element mesh_dims whose dim_size is + -1 also uses all devices. e.g. `{'x' : 4, 'y' : 1}` or `[('x', 4), ('y', + 1)]`. + mesh_name: Name of the created mesh. Defaults to ''. + devices: String representations of devices to use. This is the device part + of tf.DeviceSpec, e.g. 'CPU:0'. Defaults to all available logical devices. + device_type: If `devices` is missing, the type of devices to use. Defaults + to 'CPU'. + use_xla_spmd: Boolean when True, will use XLA SPMD instead of DTensor SPMD. + + Returns: + A single-client mesh created from specified or default arguments. + """ + device_specs, device_type = _make_device_specs(devices, device_type) + + local_spec = tf_device.DeviceSpec(job=config.job_name(), replica=0, task=0) + device_specs = [local_spec.make_merged_spec(d) for d in device_specs] + + if isinstance(mesh_dims, dict): + mesh_dims = list(mesh_dims.items()) + if mesh_dims is None: + mesh_dims = [('x', len(device_specs))] + elif len(mesh_dims) == 1 and mesh_dims[0][1] == -1: + # Replace -1 dim_size in a 1D mesh will the number of all devices. + mesh_dims[0] = (mesh_dims[0][0], len(device_specs)) + + dim_names = [d[0] for d in mesh_dims] + shape = [d[1] for d in mesh_dims] + + if np.prod(shape) != len(device_specs): + raise ValueError(f'length of devices ({len(device_specs)}) must be ' + f'equal to total size of the mesh of shape {shape}') + + global_device_ids = np.arange(len(device_specs)).reshape(shape) + local_device_ids = np.ravel(global_device_ids).tolist() + mesh = layout.Mesh( + dim_names=dim_names, + global_device_ids=global_device_ids, + local_device_ids=local_device_ids, + local_devices=device_specs, + mesh_name=mesh_name, + use_xla_spmd=use_xla_spmd) + _print_context( + num_global_devices=len(device_specs), + num_clients=1, + client_id=0, + device_type=device_type, + mesh=mesh) + return mesh + + +@tf_export('experimental.dtensor.create_distributed_mesh', v1=[]) +def create_distributed_mesh( + mesh_dims: Union[List[Tuple[str, int]], Dict[str, int]], + mesh_name: str = '', + local_devices: Optional[List[Union[tf_device.DeviceSpec, str]]] = None, + device_type: Optional[str] = None, + use_xla_spmd: bool = layout.USE_XLA_SPMD, +) -> layout.Mesh: + """Creates a distributed mesh. + + This is similar to `create_mesh`, but with a different set of arguments to + create a mesh that spans evenly across a multi-client DTensor cluster. + + For CPU and GPU meshes, users can choose to use fewer local devices than what + is available `local_devices`. + + For TPU, only meshes that uses all TPU cores is supported by the DTensor + runtime. + + Args: + mesh_dims: A dict of dim_name: dim_size, or a list of (dim_name, dim_size) + tuples. e.g. `{'x' : 4, 'y' : 1}` or `[('x', 4), ('y', 1)]`. + mesh_name: Name of the created mesh. Defaults to ''. + local_devices: String representations of devices to use. This is the device + part of tf.DeviceSpec, e.g. 'CPU:0'. Defaults to all available local + logical devices. + device_type: Type of device to build the mesh for. Defaults to 'CPU'. + Supported values are 'CPU', 'GPU', 'TPU'.6 + use_xla_spmd: Boolean when True, will use XLA SPMD instead of DTensor SPMD. + + Returns: + A mesh that spans evenly across all DTensor clients in the cluster. + """ + if isinstance(mesh_dims, dict): + mesh_dims = list(mesh_dims.items()) + dim_names, shape = zip(*mesh_dims) + + if not accelerator_util.is_initialized(): + raise ValueError('Accelerators are uninitialized, please run ' + 'dtensor.initialize_accelerator_system() first.') + + if device_type and device_type.upper() == 'TPU': + # TODO(b/185940495): Allow multi-mesh and partial on TPU. + # TPU meshes can only be configured through environment variables that + # reflect the actual TPU topology. Do not let users specify custom args. + if local_devices is not None: + raise ValueError( + f'Do not specify devices for {device_type.upper()} meshes. ' + f'Using a partial list of devices for {device_type.upper()} ' + f'is not supported.') + + device_specs, device_type = _make_device_specs(local_devices, device_type) + + if device_type.upper() in ['CPU', 'GPU']: + # For CPU and GPU meshes, user-specified args take precedence over env vars. + # This is particularly useful on single clients when users want to create + # meshes that use fewer logical devices than what's available. + + local_spec = tf_device.DeviceSpec( + job=config.job_name(), replica=0, task=config.client_id()) + device_specs = [local_spec.make_merged_spec(d) for d in device_specs] + + # Assumes identical number of local devices per client. + num_global_devices = len(device_specs) * config.num_clients() + + if np.prod(shape) != num_global_devices: + raise ValueError( + f'Global number of devices ' + f'({len(device_specs)} per client * {config.num_clients()} clients ' + f'= {num_global_devices}) must be ' + f'equal to total size of the mesh of shape {shape}') + + global_device_ids = np.arange(num_global_devices).reshape(shape) + flattened = np.ravel(global_device_ids).tolist() + start_idx = len(device_specs) * config.client_id() + local_device_ids = flattened[start_idx:start_idx + len(device_specs)] + + mesh = layout.Mesh( + dim_names=dim_names, + global_device_ids=global_device_ids, + local_device_ids=local_device_ids, + local_devices=device_specs, + mesh_name=mesh_name, + use_xla_spmd=use_xla_spmd) + _print_context(num_global_devices, config.num_clients(), config.client_id(), + device_type, mesh) + return mesh + + if device_type.upper() == 'TPU': + mesh = tpu_util.create_tpu_mesh( + mesh_dim_names=dim_names, + mesh_shape=shape, + mesh_name=mesh_name, + use_xla_spmd=use_xla_spmd) + _print_context( + config.num_global_devices(device_type), config.num_clients(), + config.client_id(), device_type, mesh) + return mesh + + raise ValueError(f'Device type {device_type} is not CPU, GPU or TPU') + + +_BARRIER_DICT = {} + + +@tf_export('experimental.dtensor.barrier', v1=[]) +def barrier(mesh: layout.Mesh, + barrier_name: Optional[str] = None, + timeout_in_ms: Optional[int] = None): + """Runs a barrier on the mesh. + + Upon returning from the barrier, all operations run before the barrier + would have completed across all clients. Currently we allocate a fully + sharded tensor with mesh shape and run an all_reduce on it. + + Example: + + A barrier can be used before application exit to ensure completion of pending + ops. + + ```python + + x = [1, 2, 3] + x = dtensor.relayout(x, dtensor.Layout.batch_sharded(mesh, 'batch', 1)) + dtensor.barrier(mesh) + + # At this point all devices on all clients in the mesh have completed + # operations before the barrier. Therefore it is OK to tear down the clients. + sys.exit() + ``` + + Args: + mesh: The mesh to run the barrier on. + barrier_name: The name of the barrier. Mainly used for logging purpose. + timeout_in_ms: The timeout of the barrier in ms. If omitted, blocks + indefinitely till the barrier is reached from all clients. + """ + if barrier_name is None: + barrier_name = '(barrier)' + + logging.info('entering barrier before op: %s', barrier_name) + + # Make sure all ops are consumed before running the sync. + context.async_wait() + + # Reduction on a fully sharded tensor requires all devices to participate + # and serves as a barrier on the mesh. + component = array_ops.reshape(1.0, [1] * len(mesh.shape())) + ones = api.pack([component] * mesh.num_local_devices(), + layout.Layout(mesh.dim_names, mesh)) + + mesh_size = math_ops.reduce_sum(ones) + if mesh_size != mesh.size: + raise ValueError( + 'Global barrier produced wrong mesh size : {0} while mesh has actual' + 'size : {1}'.format(mesh_size, mesh.size)) + + # TODO(hthu): This isn't strictly needed but might cause confusing behaviors + # from users. Consider dropping this if there is a `big` performance hit. + context.async_wait() + + if context.context().coordination_service: + if timeout_in_ms is None: + timeout_in_ms = 24 * 60 * 60 * 1000 # 24 hours to stand in for infinite. + + num_calls = _BARRIER_DICT.setdefault(barrier_name, 0) + _BARRIER_DICT[barrier_name] = num_calls + 1 + + barrier_id = f'{barrier_name}:{num_calls}' + context.context().wait_at_barrier(barrier_id, timeout_in_ms) + + logging.info('finished running barrier across all clients after ' + 'op: %s', barrier_name) diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/numpy_util.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/numpy_util.py new file mode 100644 index 0000000000000000000000000000000000000000..8cab7a592d1bbba14fa1c43fd2f79ea363e02a5f --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/numpy_util.py @@ -0,0 +1,116 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Utilities to convert data buffers to/from DTensor tensors.""" +from typing import List + +import numpy as np + +from tensorflow.dtensor.python import api +from tensorflow.dtensor.python import layout as layout_lib +from tensorflow.python.eager.polymorphic_function import polymorphic_function +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import array_ops_stack +from tensorflow.python.ops import sparse_ops +from tensorflow.python.ops import stateless_random_ops +from tensorflow.python.types.core import Tensor, TensorLike # pylint: disable=g-multiple-import + +# FIXME(b/262894693): Functions in this file are buggy. +# They do not distinguish between the client-local data and the global view. + + +def _split(value, splits, axis=0, split_fn=np.split, stack_fn=np.stack): + """Split `value` into a sharded nparray/tf tensor based on the number of splits. + """ + children = split_fn(value, splits[0], axis=axis) + if len(splits) > 1: + splits = splits[1:] + children = [_split(child, splits, axis + 1) for child in children] + return stack_fn(children) + + +def to_numpy(tensor: TensorLike) -> np.ndarray: + """Copy `input` DTensor to an equivalent local numpy array.""" + layout = api.fetch_layout(tensor) + if layout.mesh.is_remote(): + return np.array([None]) + + unpacked = [tensor.numpy() for tensor in api.unpack(tensor)] + return unpacked_to_numpy(unpacked, layout) + + +def unpacked_to_numpy(unpacked: List[TensorLike], + layout: layout_lib.Layout) -> np.ndarray: + """Heals local Tensor components to a numpy array.""" + if len(unpacked) != len(layout.offset_to_shard()): + raise ValueError('Wrong number of component Tensors.') + + unravelled = np.ndarray([layout.num_shards(i) for i in range(layout.rank)], + dtype=object) + + for offset, loc in enumerate(layout.offset_to_shard()): + unravelled[loc] = unpacked[offset] + + concat_tensor = np.block(unravelled.tolist()) + + # np.block can introduce empty initial dimensions, peel these off until + # the output matches the rank of the input tensors. + while concat_tensor.ndim > unpacked[0].ndim: + concat_tensor = np.squeeze(concat_tensor, axis=0) + return concat_tensor + + +# TODO(feyu): rename to slice. +def unpack(t: TensorLike, + layout: layout_lib.Layout, + split_fn=np.split, + stack_fn=np.stack) -> List[TensorLike]: + """Slice `t` into a flattened list of tensors suitable for `pack`.""" + if not layout.rank: + return [t] * layout.mesh.size + sharded_tensor = _split( + t, [layout.num_shards(i) for i in range(layout.rank)], + split_fn=split_fn, + stack_fn=stack_fn) + flattened = [np.ndarray([])] * layout.mesh.size + for offset, shard in enumerate(layout.offset_to_shard()): + flattened[offset] = sharded_tensor[tuple(shard)] + return flattened + + +def pack_numpy(value: np.ndarray, + layout: layout_lib.Layout, + make_sparse: bool = False) -> Tensor: + assert value is not None + unpacked = unpack(value, layout) + if make_sparse: + return api.pack([sparse_ops.from_dense(t) for t in unpacked], layout) + return api.pack(unpacked, layout) + + +def pack_tf_tensor(value: Tensor, layout: layout_lib.Layout) -> Tensor: + if value is None: + raise ValueError('pack requires values to be passed in') + unpacked = unpack( + value, layout, split_fn=array_ops.split, stack_fn=array_ops_stack.stack) + return api.pack(unpacked, layout) + + +@polymorphic_function.function +def stateless_random_uniform(shape, seed, layout): + """Creates uniform random tensor with the given layout.""" + return api.relayout( + stateless_random_ops.stateless_random_uniform(shape=shape, seed=seed), + layout=layout, + ) diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/save_restore.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/save_restore.py new file mode 100644 index 0000000000000000000000000000000000000000..25bd78cdf00a991c13181c33218e060c999577b0 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/save_restore.py @@ -0,0 +1,222 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Contains functionaility for Checkpoint/SavedModel in DTensor.""" + +import collections +from typing import Dict, List, Union + +from tensorflow.dtensor.python import api +from tensorflow.dtensor.python import d_variable +from tensorflow.dtensor.python import gen_dtensor_ops +from tensorflow.dtensor.python import layout as layout_lib +from tensorflow.dtensor.python import mesh_util +from tensorflow.python.eager import context +from tensorflow.python.framework import errors_impl +from tensorflow.python.framework import ops +from tensorflow.python.framework import tensor as tensor_lib +from tensorflow.python.ops import io_ops +from tensorflow.python.ops import variables as tf_variables +from tensorflow.python.util.tf_export import tf_export + + +@tf_export('experimental.dtensor.sharded_save', v1=[]) +def sharded_save( + mesh: layout_lib.Mesh, + file_prefix: Union[str, tensor_lib.Tensor], + tensor_names: Union[List[str], tensor_lib.Tensor], + shape_and_slices: Union[List[str], tensor_lib.Tensor], + tensors: List[Union[tensor_lib.Tensor, tf_variables.Variable]], +): + """Saves given named tensor slices in a sharded, multi-client safe fashion. + + The method makes sure the checkpoint directory state is correct in a sharded + mutli-client saving. Namely, we place a barrier after SaveV2 to make sure + every client has done writing the files. And another one after + MergeV2Checkpoints to make sure all Metadata is properly merged. + + Upon existing, the checkpoint is completed and the all directory operations + are done. + + Args: + mesh: The Mesh that contains the Tensors to save. + file_prefix: The prefix of checkpoint. + tensor_names: a list of tensor names used in save op. + shape_and_slices: a list of shape and slice specification used in save op. + The only supported value is "" as we don't support distributed saving with + slices yet. + tensors: a list of tensors used in save op. The order should match + tensor_names. + + Returns: + A MergeV2Checkpoints op that merged all Metadata. + """ + with ops.device(api.device_name()): + io_ops.save_v2(file_prefix, tensor_names, shape_and_slices, tensors) + + # Make sure all clients have written the files + mesh_util.barrier(mesh.host_mesh(), 'SaveV2') # pylint: disable=protected-access + + with api.default_mesh(mesh.host_mesh()): + merge_op = io_ops.MergeV2Checkpoints( + checkpoint_prefixes=[file_prefix], + destination_prefix=file_prefix, + delete_old_dirs=True) + + # Make sure first device in first host has finished merge. + mesh_util.barrier(mesh.host_mesh(), 'MergeV2Checkpoints') + + return merge_op + + +@tf_export('experimental.dtensor.enable_save_as_bf16', v1=[]) +def enable_save_as_bf16(variables: List[tf_variables.Variable]): + """Allows float32 DVariables to be checkpointed and restored as bfloat16. + + The method only affects the DVariable part inside the model and leaves + non-DTensor Variables/Tensors untouched. + + Args: + variables: A list of tf.Variable to be enabled with bfloat16 save/restore. + Only has effect on DTensor Variables as they go through d_variables with + DTensor Specific logis. + """ + for v in variables: + if isinstance(v, d_variable.DVariable): + v.save_as_bf16 = True + + +@tf_export('experimental.dtensor.name_based_restore', v1=[]) +def name_based_restore( + mesh: layout_lib.Mesh, + checkpoint_prefix: str, + name_tensor_dict: Dict[ + str, Union[tensor_lib.Tensor, tf_variables.Variable]], +): + """Restores from checkpoint_prefix to name based DTensors. + + It is required to have already-initialized DTensor variables that have same + shape/dtype for the tensors being restored. + + Also, we currently only support a named based restore on a single mesh. + + Args: + mesh: The single mesh that all Tensors would be restored to. + checkpoint_prefix : The prefix of checkpoint to be restored. + name_tensor_dict: A ordered dictionary of tensor_names to a DTensor. The + DTensor shape/dtype must match the tensors being saved/restored for now. + + Returns: + A dictionary of name to its restored DTensor value. + """ + if not context.executing_eagerly(): + raise ValueError('name based restore must run eagerly.') + + ordered_name_tensor_dict = name_tensor_dict + if not isinstance(name_tensor_dict, collections.OrderedDict): + ordered_name_tensor_dict = collections.OrderedDict(name_tensor_dict) + + # Make sure that all tensors are on CPU mesh for now. + # This might not be a hard limitation in the future. + for name, tensor in ordered_name_tensor_dict.items(): + try: + if api.fetch_layout(tensor).mesh.device_type().upper() != 'CPU': + raise ValueError( + 'Restoring a non CPU Tensor is not supported currently. Offending ' + 'tensor name : {tensor_name}'.format(tensor_name=name)) + except errors_impl.OpError as op_error: + raise ValueError( + 'Saving/Restoring tensor must be a DTensor') from op_error + + # Now that we have all tensors on CPU mesh, do a DTensorRestoreV2. + checkpoint_prefix = api.pack( + [checkpoint_prefix] * mesh.num_local_devices(), + layout_lib.Layout.replicated(mesh.host_mesh(), rank=0)) + # Explicitly pack to mesh to avoid implicit small constant extraction, which + # does not work larger restores that has lots of names. + tensor_names = api.pack( + [list(ordered_name_tensor_dict.keys())] * mesh.num_local_devices(), + layout_lib.Layout.replicated(mesh.host_mesh(), rank=1)) + shape_and_slices = api.pack( + [[''] * len(ordered_name_tensor_dict)] * mesh.num_local_devices(), + layout_lib.Layout.replicated(mesh.host_mesh(), rank=1)) + # A list of TensorShape representing all shapes for the input tensors. + input_shapes = [tensor.shape for tensor in ordered_name_tensor_dict.values()] + input_layouts = [ + api.fetch_layout(tensor).to_string() + for tensor in ordered_name_tensor_dict.values() + ] + + with ops.device(api.device_name()): + restored_cpu_tensors = gen_dtensor_ops.d_tensor_restore_v2( + prefix=checkpoint_prefix, + tensor_names=tensor_names, + shape_and_slices=shape_and_slices, + input_shapes=input_shapes, + input_layouts=input_layouts, + dtypes=[tensor.dtype for tensor in ordered_name_tensor_dict.values()], + ) + + return collections.OrderedDict( + zip(ordered_name_tensor_dict.keys(), restored_cpu_tensors) + ) + + +@tf_export('experimental.dtensor.name_based_save', v1=[]) +def name_based_save( + mesh: layout_lib.Mesh, + checkpoint_prefix: Union[str, tensor_lib.Tensor], + name_tensor_dict: Dict[ + str, Union[tensor_lib.Tensor, tf_variables.Variable]], +): + """Saves name based Tensor into a Checkpoint. + + The function prepares the input dictionary to the format of a `sharded_save`, + so that it can take advantage of DTensor SPMD based distributed save. + + Same as restore, the function only supports saving on the single mesh. + + Args: + mesh: The single mesh that all Tensors would be restored to. + checkpoint_prefix : The prefix of checkpoint to be restored. + name_tensor_dict: A ordered dictionary of tensor_names to a DTensor. The + DTensor shape/dtype must match the tensors being saved/restored for now. + """ + if not context.executing_eagerly(): + raise ValueError('name based save must run eagerly.') + + ordered_name_tensor_dict = name_tensor_dict + if not isinstance(name_tensor_dict, collections.OrderedDict): + ordered_name_tensor_dict = collections.OrderedDict(name_tensor_dict) + + # Current _dtensor_device() in api.py is the correct way of specifying + # DTensor device singletons. The API itself will be eventually be moved to + # a public API and provides global singleton in DTensor context. + # For now, we just use the current `internal` API and aim at migrating in + # one shot later. + # TODO(hthu): Provide _dtensor_device() singleton as a public API. + # pylint: disable=protected-access + checkpoint_prefix = api.pack([checkpoint_prefix] * mesh.num_local_devices(), + layout_lib.Layout.replicated( + mesh.host_mesh(), rank=0)) + tensor_names = api.pack( + [list(ordered_name_tensor_dict.keys())] * mesh.num_local_devices(), + layout_lib.Layout.replicated(mesh.host_mesh(), rank=1)) + + sharded_save( + mesh, + file_prefix=checkpoint_prefix, + tensor_names=tensor_names, + shape_and_slices=[''] * len(ordered_name_tensor_dict), + tensors=list(ordered_name_tensor_dict.values())) diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/tpu_util.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/tpu_util.py new file mode 100644 index 0000000000000000000000000000000000000000..5c746a53b5db9272fc6e1ec88e959841c78777a9 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/dtensor/python/tpu_util.py @@ -0,0 +1,812 @@ +# Copyright 2022 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""TPU-specific utilities for DTensor.""" + +import functools +import time +from typing import List, Optional, Dict + +import numpy as np + +from tensorflow.dtensor.python import config +from tensorflow.dtensor.python import dtensor_device +from tensorflow.dtensor.python import gen_dtensor_ops +from tensorflow.dtensor.python import layout as layout_lib +from tensorflow.python.eager import context +from tensorflow.python.eager import def_function +from tensorflow.python.framework import constant_op +from tensorflow.python.framework import errors +from tensorflow.python.framework import ops +from tensorflow.python.ops import array_ops +from tensorflow.python.ops import math_ops +from tensorflow.python.platform import tf_logging as logging +from tensorflow.python.tpu import topology +from tensorflow.python.util import numpy_compat +from tensorflow.python.util.tf_export import tf_export + + +_MESH_DIM_X = "x" +_TPU_DEVICE_TYPE = "TPU" + +# A dedicated, hidden device used to make C++ API calls. +_dtensor_device = None + +# `_topology._mesh_shape` contains the TPU hardware slice size. +# `_topology.device_coordinates` maps TF task-device ordinals to TPU core IDs. +_tpu_topology = None + +# Cache core ID <-> location mappings so we need not make repeated C++ calls. +# Both are indexed by TF task-device ordinals. +_all_core_ids = None +_all_core_locations = None + + +class _CoreLocation: + """Represents a TPU core's location in the mesh.""" + + def __init__(self, x: int = 0, y: int = 0, z: int = 0, core: int = 0): + self.x = x + self.y = y + self.z = z + self.core = core + + def __eq__(self, other): + if not isinstance(other, _CoreLocation): + return False + return self.x == other.x and self.y == other.y and self.z == other.z and self.core == other.core + + def __ne__(self, other): + if not isinstance(other, _CoreLocation): + return True + return not self == other + + def __hash__(self): + return hash((self.x, self.y, self.z, self.core)) + + def __repr__(self): + return f"{type(self).__name__}(x={self.x}, y={self.y}, z={self.z}, core={self.core})" + + def to_list(self): + return [self.x, self.y, self.z, self.core] + + +def _create_device_array(shape, device_type, host_id, local_device_ids=None): + """Returns ID and device lists that can be used to create a mesh.""" + num_global_devices = config.num_global_devices(device_type) + global_device_ids = np.arange(num_global_devices).reshape(shape) + local_device_list = config.local_devices(device_type) + + # User can specify local_device_ids or use default list for multi host. + num_local_devices = len(local_device_list) + local_device_ids = [ + x + host_id * num_local_devices for x in range(num_local_devices) + ] if not local_device_ids else local_device_ids + + return global_device_ids, local_device_ids, local_device_list + + +def _create_tpu_topology(core_locations: List[_CoreLocation], num_tasks: int, + num_devices_per_task: int) -> topology.Topology: + """Returns a Topology object build from a _CoreLocation list. + + Args: + core_locations: A list of _CoreLocation objects sorted first by TF task ID + and then by per-task device ordinals. + num_tasks: The number of TF tasks in the cluster. + num_devices_per_task: The number of TPU devices local to each task. + """ + + assert min([l.x for l in core_locations]) == 0 + assert min([l.y for l in core_locations]) == 0 + assert min([l.z for l in core_locations]) == 0 + assert min([l.core for l in core_locations]) == 0 + x_max = max([l.x for l in core_locations]) + y_max = max([l.y for l in core_locations]) + z_max = max([l.z for l in core_locations]) + core_max = max([l.core for l in core_locations]) + mesh_shape = [x_max + 1, y_max + 1, z_max + 1, core_max + 1] + + device_coordinates = [[l.x, l.y, l.z, l.core] for l in core_locations] + device_coordinates = numpy_compat.np_asarray(device_coordinates).reshape( + num_tasks, num_devices_per_task, 4) + + return topology.Topology( + mesh_shape=mesh_shape, device_coordinates=device_coordinates) + + +def shutdown_tpu_system(): + """Shuts down the TPU system.""" + + @def_function.function + def _shutdown_tpu_system(): + return gen_dtensor_ops.shutdown_tpu_system() + + success = _shutdown_tpu_system() if context.is_tfrt_enabled() else True + if success: + logging.info("TPU system shut down.") + else: + logging.warning("TPU system fails to shut down.") + + +def tpu_system_init_helper(task_id, + num_tasks, + num_devices, + use_tfrt_host_runtime=True, + use_megacore=False): + """A helper function to initialize multi-client tpu system.""" + + @def_function.function + def _tpu_init_fn(): + return gen_dtensor_ops.configure_and_initialize_global_tpu( + use_tfrt_host_runtime=use_tfrt_host_runtime) + + @def_function.function + def _set_global_tpu_array_fn(topology_proto): + gen_dtensor_ops.d_tensor_set_global_tpu_array(topology_proto) + + with ops.device("/job:" + config.full_job_name() + "/device:TPU_SYSTEM:0"): # pylint: disable=protected-access + my_core_ids = _tpu_init_fn() + + if use_megacore: + logging.info("Using TPU megacore") + my_core_ids = my_core_ids * 2 + logging.info("TPU core IDs: %s", my_core_ids) + + # `my_core_ids` contains the IDs of TPU cores attached to this host. + # + # To generate correct and efficient XLA AllReduce group assignment, we must + # merge these arrays from all hosts and broadcast the result back to all + # hosts, so all hosts can use these mappings in their MLIR passes. + # + # This is essentially doing what WaitForDistributedTpuOp and + # SetGlobalTPUArrayOp do, in our multi-client environment. + num_devices_per_task = int(num_devices / num_tasks) + + # Create a one-time use mesh and layout just for merging core IDs. + mesh = layout_lib.Mesh([_MESH_DIM_X], + *_create_device_array((num_devices,), _TPU_DEVICE_TYPE, + config.client_id())) + layout = layout_lib.Layout([_MESH_DIM_X, layout_lib.UNSHARDED], mesh) + device = dtensor_device.DTensorDevice(meshes=[mesh]) + logging.info("TPU core locations: %s", + device.tpu_core_ids_to_locations(my_core_ids)) + + # At this point, we don't know which cores are attached to other hosts. + # The core ID mappings in the runtime haven't been set yet. + # + # The core ID merging AllReduce below is carefully written so it works + # without needing correct core mappings to be set in the runtime. We will + # use this AllReduce's result to set the core ID mappings, and all future + # user-initiated AllReduces will use the mappings. + # + # The runtime is hard-coded to ignore core ID mappings on this AllReduce. + all_core_ids = np.zeros([num_devices], dtype=np.int32) + for i in range(len(my_core_ids)): + all_core_ids[task_id * num_devices_per_task + i] = my_core_ids[i] + + # Only one local device gets a valid input. To give an example, assume we have + # 2 tasks and each of them has 8 local devices, then `all_core_ids` in task 0 + # will have 8 tensors, where 1 of them may have its value as + # [0,1,2,3,4,5,6,7,0,0,0,0,0,0,0,0] and the other tensors are all zeros. For + # task 1, the case may be one with [0,0,0,0,0,0,0,0,8,9,10,11,12,13,14,15] + # and other 7 are all zeros. + all_core_ids = constant_op.constant([all_core_ids]) + zeros = array_ops.zeros_like(all_core_ids) + all_core_ids = [all_core_ids] + [zeros] * (num_devices_per_task - 1) + + # All devices on all hosts participate in one AllReduce, whose result will be + # core IDs arranged by task-device ordinals. For the above example, the result + # will be [0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15]. + with ops.device(device.name): + all_core_ids = device.pack(all_core_ids, layout) + all_core_ids = math_ops.reduce_sum(all_core_ids, axis=[0]) + unpacked_all_tpu_ids = device.unpack(all_core_ids) + + all_core_ids = list(unpacked_all_tpu_ids[0].numpy()) + logging.info("All TPU core IDs: %s", all_core_ids) + + # Set the default core ID mappings in the runtime for legacy code and tests. + # + # Legacy code and tests create TPU meshes directly without using the + # `create_tpu_mesh` function below. Those meshes have global device IDs + # equal to TF task-device ordinals. The `all_core_ids` array happens to + # arrange core IDs by TF task-device ordinals. Using this array on those + # meshes guarantee correct although inefficient results. + device.set_tpu_core_ids("", all_core_ids) + + # Remember enough global, immutable information to be able to build any ring + # we want prescribed by `create_tpu_mesh` in the future. + global _all_core_ids + _all_core_ids = all_core_ids + + all_core_locations = device.tpu_core_ids_to_locations(all_core_ids) + all_core_locations = [ + _CoreLocation(l[0], l[1], l[2], l[3]) for l in all_core_locations + ] + global _all_core_locations + _all_core_locations = all_core_locations + logging.info("All TPU core locations: %s", all_core_locations) + + tpu_topology = _create_tpu_topology(all_core_locations, num_tasks, + num_devices_per_task) + + _set_global_tpu_array_fn(tpu_topology.serialized()) + return tpu_topology, device + + +def initialize_tpu_system(use_megacore=False): + """Initializes the TPU system.""" + + # Make sure the server change is fully propagated before attempting to run + # the core ID merging logic below. + context.ensure_initialized() + context.async_wait() + context.context()._clear_caches() # pylint: disable=protected-access + + use_tfrt_host_runtime = context.context().use_tfrt + logging.info("Using TFRT host runtime is set to %s", use_tfrt_host_runtime) + try: + task_id = config.client_id() + num_tasks = config.num_clients() + num_devices = config.num_global_devices(_TPU_DEVICE_TYPE) + + tpu_topology, device = tpu_system_init_helper( + task_id, + num_tasks, + num_devices, + use_tfrt_host_runtime=use_tfrt_host_runtime, + use_megacore=use_megacore) + global _tpu_topology + _tpu_topology = tpu_topology + logging.vlog(1, "TPU Topology: %s, %s", tpu_topology.mesh_shape, + tpu_topology.device_coordinates) + + global _dtensor_device + _dtensor_device = device + + context.async_wait() + + except errors.InvalidArgumentError as e: + raise errors.NotFoundError( + None, None, + "Initialization failed, no valid TPUs found. " + str(e)) from e + + except errors.InternalError as e: + logging.error("Hit internal error during TPU system initialization. " + + "It is likely hardware failure. \nPlease check the error " + + "messages above to see whether that's the case. \nIf so, " + + "consider to restart the job or try another machine.") + raise e + + # Clear out the eager context caches since the memory is invalid now. + logging.info("Clearing out eager caches") + context.context()._clear_caches() # pylint: disable=protected-access + + +def _enumerate_cores(bounds: List[int], ring_bounds: List[int], + ring_sizes: List[int], host_bounds: List[int], + host_sizes: List[int]) -> List[List[int]]: + """Enumerates cores within `bounds` from fatest to slowest varying axes. + + Args: + bounds: Upper bounds of axes, from fastest to slowest varying. + ring_bounds: Upper bounds of ring size per axis in the same axis order. + ring_sizes: Number consecutive cores in the ring built so far, cumulatively. + host_bounds: Number of axis values per host in the same axis order. + host_sizes: Number consecutive cores on one host, cumulatively. + + Returns: + Cores represented as a list of 4 integers in the same axis order. + """ + if not bounds: + return [[]] + + # Recursively enumerate cores under all but the slowest varying axis. + partials = _enumerate_cores(bounds[:-1], ring_bounds[:-1], ring_sizes[:-1], + host_bounds[:-1], host_sizes[:-1]) + + # Append the slowest varying axis to the end of all partial results. + # From ring_i|j to host_i|j to core_i|j, use progressively smaller or equal + # iteration groupings until every one of the bounds[-1] * len(partials) + # combinations is iterated on. + # Despite the six levels of nested loops below, the total time complexity for + # this invocation is O(N), where N is the number of cores in the topology. + results = [] + for ring_i in range(0, bounds[-1], ring_bounds[-1]): + for ring_j in range(0, len(partials), ring_sizes[-1]): + for host_i in range(ring_i, ring_i + ring_bounds[-1], host_bounds[-1]): + for host_j in range(ring_j, ring_j + ring_sizes[-1], host_sizes[-1]): + for i in range(host_i, host_i + host_bounds[-1]): + for j in range(host_j, host_j + host_sizes[-1]): + results.append(partials[j] + [i]) + return results + + +def _enumerate_core_locations(bounds: List[int], ring_bounds: List[int], + axes: List[str], + can_split_host_across_rings: bool, + ring_size: int) -> List[_CoreLocation]: + """Enumerates all possible core locations under the axis iteration order. + + Args: + bounds: A list of 4 positive integers, upper bound values for x, y, z, core. + ring_bounds: A list of 4 positive integers, upper bound values for ring size + in x, y, z, core axes. + axes: A permutation of ["x", "y", "z", "core"], the axis iteration order. + can_split_host_across_rings: If true, devices attached to the same host may + get assigned to different rings. + ring_size: Number of devices in a ring, only for argument validation. + + Returns: + A list of all CoreLocation objects defined in a TPU slice of shape `bounds`, + sorted by axis iteration order specified by `axes`. + + For example, given bounds=[2, 2, 1, 2] and axes=["core", "z", "y", "x"], + return 8 core locations expressed in (x, y, z, core) format but iterated in + core -> z -> y -> x order (fatest to slowest varying): + + [_CoreLocation(0, 0, 0, 0), + _CoreLocation(0, 0, 0, 1), + _CoreLocation(0, 1, 0, 0), + _CoreLocation(0, 1, 0, 1), + _CoreLocation(1, 0, 0, 0), + _CoreLocation(1, 0, 0, 1), + _CoreLocation(1, 1, 0, 0), + _CoreLocation(1, 1, 0, 1)] + + Raises: + ValueError: If ring_size cannot be fulfilled without splitting hosts. + """ + + num_cores_per_chip = bounds[3] + if num_cores_per_chip != 1 and num_cores_per_chip != 2: + raise ValueError("Unsupported TPU slice size: %s" % bounds) + + # Translate `axes` from string to integer format. + axes = [{"x": 0, "y": 1, "z": 2, "core": 3}[axis] for axis in axes] + # Reorder bounds from fastest to slowest varying axes. + bounds = [bounds[i] for i in axes] + + # Set and validate host_bounds. + if can_split_host_across_rings: + # If we can split hosts, shrink every host to effectively contain 1 device. + host_bounds = [1, 1, 1, 1] + elif np.prod(bounds) <= 2: + # We must be running on 1x1 or 1x1x1 Forge. + host_bounds = [[1, 1, 1, num_cores_per_chip][i] for i in axes] + else: + # Other cases including 2x2 Forge and Borg must use a full donut. + host_bounds = [[2, 2, 1, num_cores_per_chip][i] for i in axes] + # host_sizes is the cumulative products of host_bounts. + host_sizes = [1] + for host_bound in host_bounds: + host_sizes.append(host_sizes[-1] * host_bound) + host_size = host_sizes.pop() + # When can_split_host_across_rings is false, a ring must contain at least as + # many devices as a host has. + if ring_size < host_size: + assert not can_split_host_across_rings + raise ValueError( + "Rings too small for can_split_host_across_rings = False: %d" % + ring_size) + + # Reorder ring_bounds and validate it's element-wise >= host_bounds. + ring_bounds = [ring_bounds[i] for i in axes] + if ring_bounds < host_bounds: + raise ValueError("ring_bounds %s should be >= host_bounds %s" % + (ring_bounds, host_bounds)) + ring_sizes = [1] + # ring_sizes is the cumulative products of ring_bounds. + for ring_bound in ring_bounds: + ring_sizes.append(ring_sizes[-1] * ring_bound) + ring_sizes.pop() + + # Enumerate cores in the given iteration order. Each core is represented as a + # list of int, which are offsets from fatest to slowest varying axes. + cores = _enumerate_cores(bounds, ring_bounds, ring_sizes, host_bounds, + host_sizes) + # Reorder offsets of each core back to the x, y, z, core order. + core_locations = [] + for core in cores: + core = [core[axes.index(i)] for i in range(4)] + core_locations.append(_CoreLocation(core[0], core[1], core[2], core[3])) + return core_locations + + +def _build_all_reduce_ring(core_locations: List[_CoreLocation], + rotate: bool = False) -> List[int]: + """Reorders a list of TPU cores to optimize for AllReduce performance. + + This is ported from the C++ tensorflow::BuildAllReduceRing function, + mixed with some logic from TF TPU's device_assignment._ring_3d. + + Args: + core_locations: A list of core locations expressed as [x, y, z, core]. + rotate: If true, scan the cores in a column-major order. False by default. + + Returns: + A permutation of the input list such that neighbors in the sequence are + nearby in the TPU topology. + """ + + permutation = list(range(len(core_locations))) + if not permutation: + return permutation + logging.vlog(2, "Core locations in: %s", core_locations) + + first_column = min([l.x for l in core_locations]) + first_row = min([l.y for l in core_locations]) + same_z = (len(set([l.z for l in core_locations])) == 1) + logging.vlog(2, "first_column: %d", first_column) + logging.vlog(2, "first_row: %d", first_row) + logging.vlog(2, "same_z: %s", same_z) + + def _cmp_2d(ia: int, ib: int) -> int: + if not rotate: + a = core_locations[ia] + b = core_locations[ib] + + # Order the first column last in the sequence, except for the first row. + a_first = (a.x == first_column and a.y != first_row) + b_first = (b.x == first_column and b.y != first_row) + if a_first != b_first: + return -1 if b_first else 1 + + # Order rows in increasing order, unless in the first column. + if a.y != b.y: + return b.y - a.y if a_first else a.y - b.y + + # Order even rows left to right, odd rows right to left. + if a.x != b.x: + return a.x - b.x if a.y % 2 == 0 else b.x - a.x + + # Order cores in increasing order. + return a.core - b.core + else: + a = core_locations[ia] + b = core_locations[ib] + + # Order the first row last in the sequence, except for the first column. + a_first = (a.y == first_row and a.x != first_column) + b_first = (b.y == first_row and b.x != first_column) + if a_first != b_first: + return -1 if b_first else 1 + + # Order columns in increasing order, unless in the first row. + if a.x != b.x: + return b.x - a.x if a_first else a.x - b.x + + # Order even columns top down, odd columns bottom up. + if a.y != b.y: + return a.y - b.y if a.x % 2 == 0 else b.y - a.y + + # Order cores in increasing order. + return a.core - b.core + + def _cmp_3d(ia: int, ib: int) -> int: + a = core_locations[ia] + b = core_locations[ib] + + a_corner = (a.x == first_column and a.y == first_row) + b_corner = (b.x == first_column and b.y == first_row) + + # If both are in the corner, order in reverse z then core order. + if a_corner and b_corner: + return b.z - a.z if a.z != b.z else a.core - b.core + + # Corner cores always go after non-corner cores. + if a_corner != b_corner: + return -1 if b_corner else 1 + + # Both non-corner cores are on the same z-plane. Reverse odd z-planes. + if a.z == b.z: + return _cmp_2d(ia, ib) if a.z % 2 == 0 else -_cmp_2d(ia, ib) + + # Both non-corner cores are on different z-planes. Smaller z goes first. + return a.z - b.z + + # If all cores are on the same z-plane, order as usual. Otherwise, order + # neighbor z-planes in opposite orders. Stack all z-planes along the z axis + # and connect them in one corner. + if same_z: + permutation.sort(key=functools.cmp_to_key(_cmp_2d)) + else: + permutation.sort(key=functools.cmp_to_key(_cmp_3d)) + logging.vlog(2, "Permutation out: %s", permutation) + return permutation + + +def _build_orthogonal_rings( + core_locations: List[_CoreLocation], ring_size: int, + rotate_ring_across_rings: bool) -> List[_CoreLocation]: + """Build two all-reduce rings orthogonal to each other. + + One ring includes every `ring_size` consecutive core locations. It is usually + applied to the model-parallel dimension of a mesh to achieve best 1D + all-reduce performance. The other ring includes core locations separated by + a stride of `ring_size`. It is usually applied to the data-parallel dimension + of a mesh to get predictable strided all-reduce performance. + + Args: + core_locations: A list of core locations expressed as [x, y, z, core]. + ring_size: The number of core locations in the consecutive ring. + rotate_ring_across_rings: Build column-major secondary rings. + + Returns: + A permutation of the input list forming the described rings. + """ + # Build a ring for the first `ring_size` cores, and apply that permutation to + # every group of `ring_size` cores. + num_cores = len(core_locations) + permutation = _build_all_reduce_ring(core_locations[:ring_size]) + for r in range(0, num_cores, ring_size): + core_locations[r:r + ring_size] = [ + core_locations[r + permutation[i]] for i in range(ring_size) + ] + logging.vlog(1, "Permutated core locations: %s", core_locations) + + # Build a "ring" for the collection of devices consisting of the 0th device + # from every group, and apply that permutation to every i-th device group. + # This is achieved by transposing the list and back. + transposed = [] + for i in range(ring_size): + transposed += [ + core_locations[g + i] for g in range(0, num_cores, ring_size) + ] + + num_rings = int(num_cores / ring_size) + permutation = _build_all_reduce_ring( + transposed[:num_rings], rotate=rotate_ring_across_rings) + for r in range(0, num_cores, num_rings): + transposed[r:r + num_rings] = [ + transposed[r + permutation[i]] for i in range(num_rings) + ] + + untransposed = [] + for i in range(num_rings): + untransposed += [transposed[g + i] for g in range(0, num_cores, num_rings)] + logging.vlog(1, "Stride-permutated core locations: %s", untransposed) + + return untransposed + + +@tf_export("experimental.dtensor.create_tpu_mesh", v1=[]) +def create_tpu_mesh( + mesh_dim_names: List[str], + mesh_shape: List[int], + mesh_name: str, + ring_dims: Optional[int] = None, + ring_axes: Optional[List[str]] = None, + ring_bounds: Optional[List[int]] = None, + can_split_host_across_rings: bool = True, + build_ring_across_rings: bool = False, + rotate_ring_across_rings: bool = False, + use_xla_spmd: bool = layout_lib.USE_XLA_SPMD) -> layout_lib.Mesh: + """Returns a distributed TPU mesh optimized for AllReduce ring reductions. + + Only as many as leading axes specified by `ring_axes` as necessary will be + used to build rings, as long as the subslice formed by these axes have enough + cores to contain a ring of the required size. The leftover axes in `ring_axes` + won't affect results. + + This function always uses all TPU devices, and offers more customization than + `tf.experimental.dtensor.create_distributed_mesh`. + + Args: + mesh_dim_names: List of mesh dimension names. + mesh_shape: Shape of the mesh. + mesh_name: A unique name for the mesh. If empty, internally generate one. + ring_dims: Optional; The number of leading (ring_dims > 0) or trailing + (ring_dims < 0) mesh dimensions to build rings for. If unspecified, build + rings for all but the first dimension. + ring_axes: Optional; A permutation of ["x", "y", "z", "core"], specifying + the order of TPU topology axes to build rings in. If unspecified, default + to ["core", "x", "y", "z"]. + ring_bounds: Optional; The maximum number of devices on each axis, in the x, + y, z, core order. If unspecified, default to physical topology limits. + can_split_host_across_rings: Optional; If true, devices attached to the same + host (i.e., DTensor client) may get assigned to different rings. Setting + it to false may cause some combinations of arguments to be infeasible; see + DeviceAssignmentTest.testCreateMesh[No]SplittingHosts* for examples. + build_ring_across_rings: Optional; If true, also build a data-parallel ring + across model-parallel rings. This ring could be strided. + rotate_ring_across_rings: Optional; If true, build the data-parallel ring in + column-major instead of row-major order. + use_xla_spmd: Boolean when True, will use XLA SPMD instead of + DTensor SPMD. + """ + + logging.info("Building a TPU mesh %s of shape %s", mesh_name, mesh_shape) + logging.info("Requested ring_dims: %s", ring_dims) + logging.info("Requested ring_axes: %s", ring_axes) + logging.info("Requested ring_bounds: %s", ring_bounds) + logging.info("Requested can_split_host_across_rings: %s", + can_split_host_across_rings) + if not mesh_name: + mesh_name = "mesh_%f" % time.time() + logging.info("Requested mesh_name: %s", mesh_name) + + # By default, build rings for all but the first (usually batch) dimension. + if ring_dims is None: + ring_dims = 1 - len(mesh_shape) + elif ring_dims < -len(mesh_shape) or ring_dims > len(mesh_shape): + raise ValueError("Invalid ring_dims value: %d" % ring_dims) + logging.info("Actual ring_dims: %s", ring_dims) + + # By default, vary axes in the core -> x -> y -> z order. + if ring_axes is None: + ring_axes = ["core", "x", "y", "z"] + elif len(ring_axes) != 4: + raise ValueError("Expected 4 elements in ring_axes, got %s" % ring_axes) + elif sorted(ring_axes) != ["core", "x", "y", "z"]: + raise ValueError("Invalid ring_axes value: %s" % ring_axes) + logging.info("Actual ring_axes: %s", ring_axes) + + # Validate ring_bounds values. + if _tpu_topology is None: + raise ValueError( + "Invalid TPU topology, run dtensor.initialize_tpu_system() first") + topology_shape = list(_tpu_topology.mesh_shape) + if ring_bounds is None: + ring_bounds = topology_shape + elif len(ring_bounds) != 4: + raise ValueError("Expected 4 elements in ring_bounds, got %s" % ring_bounds) + elif ring_bounds > topology_shape: + raise ValueError("ring_bounds %s should be <= topology sizes %s" % + (ring_bounds, topology_shape)) + logging.info("Actual ring_bounds: %s", ring_bounds) + + # Compute ring_size, the number of cores in a ring. + if ring_dims > 0: + ring_size = np.prod(mesh_shape[:ring_dims]) + elif ring_dims < 0: + ring_size = np.prod(mesh_shape[ring_dims:]) + else: + ring_size = 1 # single-core rings + logging.info("Actual ring_size: %d", ring_size) + + # Rearrange all cores according to the axis iteration order. + global_core_locations = _enumerate_core_locations( + topology_shape, ring_bounds, ring_axes, can_split_host_across_rings, + ring_size) + logging.vlog(1, "Enumerated core locations: %s", global_core_locations) + num_cores = len(global_core_locations) + + # The mesh to be created must use all TPU cores in the system. + mesh_size = np.prod(mesh_shape) + if mesh_size != num_cores: + raise ValueError( + "Invalid mesh size: mesh shape %s cannot 1:1 map to %d TPU cores" % + (mesh_shape, num_cores)) + + # Build a ring for the `ring_size` dimension and, if required, a strided ring + # for the orthogonal dimension. + if build_ring_across_rings: + global_core_locations = _build_orthogonal_rings(global_core_locations, + ring_size, + rotate_ring_across_rings) + else: + permutation = _build_all_reduce_ring(global_core_locations[:ring_size]) + for r in range(0, num_cores, ring_size): + global_core_locations[r:r + ring_size] = [ + global_core_locations[r + permutation[i]] for i in range(ring_size) + ] + logging.vlog(1, "Permutated core locations: %s", global_core_locations) + + # For this point on, change from List[CoreLocation] to List[List[int]] for + # easier interaction with the C++ API. + global_core_locations = [l.to_list() for l in global_core_locations] + if _dtensor_device is None: + raise ValueError("Invalid system device, " + "run dtensor.initialize_accelerator_system() first") + global_core_ids = _dtensor_device.tpu_core_locations_to_ids( + global_core_locations) + + # Store a per-mesh mapping in the runtime. + _dtensor_device.set_tpu_core_ids(mesh_name, global_core_ids) + + # Create the mesh by manually specifying local_device_ids. + local_core_locations = _tpu_topology.device_coordinates[config.client_id()] + indexes = [ + global_core_locations.index(list(local_core_location)) + for local_core_location in local_core_locations + ] + global_device_ids, local_device_ids, local_device_list = _create_device_array( + mesh_shape, _TPU_DEVICE_TYPE, None, local_device_ids=indexes) + return layout_lib.Mesh( + mesh_dim_names, + global_device_ids, + local_device_ids, + local_device_list, + mesh_name, + use_xla_spmd=use_xla_spmd, + ) + + +def get_device_ids(mesh: layout_lib.Mesh, + client_id: Optional[int] = None) -> List[int]: + """Returns the device IDs of all TPU cores local to the given client. + + A device ID is a non-negative integer that uniquely identifies a device in the + mesh. For example, for a 2x2 mesh ('x', 'y'), this function returns a + permutation of [0, 1, 2, 3]. + + Note that device IDs and device locations are equivalent. The former is a + linearization of the latter along mesh dimensions. + + Args: + mesh: A TPU mesh. + client_id: Optional; A DTensor client ID. If empty, query this client. + """ + + if mesh.device_type() != _TPU_DEVICE_TYPE: + raise ValueError("The mesh must be a TPU mesh") + + if client_id is None or client_id == config.client_id(): + return mesh.local_device_ids() + + # It's not clear we should ever allow a client to query other clients for + # their device IDs. + raise NotImplementedError( + "Looking up other clients' device IDs is not supported") + + +def get_device_locations( + mesh: layout_lib.Mesh, + client_id: Optional[int] = None) -> List[Dict[str, int]]: + """Returns the device locations of all TPU cores local to the given client. + + A device location is a dictionary from dimension names to indices on those + dimensions. For example, for a 2x2 mesh ('x', 'y'), this function returns a + permutation of this list: + + [{'x': 0, 'y': 0}, + {'x': 0, 'y': 1}, + {'x': 1, 'y': 0}, + {'x': 1, 'y': 1}]. + + Note that device IDs and device locations are equivalent. The former is a + linearization of the latter along mesh dimensions. + + Args: + mesh: A TPU mesh. + client_id: Optional; A DTensor client ID. If empty, query this client. + """ + + if mesh.device_type() != _TPU_DEVICE_TYPE: + raise ValueError("The mesh must be a TPU mesh") + + if client_id is None or client_id == config.client_id(): + return mesh.local_device_locations() + + # It's not clear we should ever allow a client to query other clients for + # their device locations. + raise NotImplementedError( + "Looking up other clients' device locations is not supported") + + +# TODO(b/245589661): Remove dtensor_initialize_tpu_system() and +# dtensor_shutdown_tpu_system() after users stopped using them. +def dtensor_initialize_tpu_system(enable_coordination_service=False): + """Deprecated way to initialize the TPU system.""" + from . import accelerator_util # pylint: disable=g-import-not-at-top + accelerator_util.initialize_accelerator_system( + "TPU", enable_coordination_service=enable_coordination_service) + + +def dtensor_shutdown_tpu_system(): + """Deprecated way to shutodwn the TPU system.""" + from . import accelerator_util # pylint: disable=g-import-not-at-top + accelerator_util.shutdown_accelerator_system() diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/security/__init__.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/security/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/security/__pycache__/__init__.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/security/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..06db94f954f145ac32cfc62ba1cfd006a2efca91 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/security/__pycache__/__init__.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/security/fuzzing/__init__.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/security/fuzzing/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/security/fuzzing/__pycache__/__init__.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/security/fuzzing/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..1b41f3eb7ba300696a42e25e25b48c253b0214cc Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/security/fuzzing/__pycache__/__init__.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/security/fuzzing/py/__init__.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/security/fuzzing/py/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/security/fuzzing/py/__pycache__/__init__.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/security/fuzzing/py/__pycache__/__init__.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..9fe9ecca9e48263fc848728f7a0ca4891ed357f1 Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/security/fuzzing/py/__pycache__/__init__.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/security/fuzzing/py/__pycache__/annotation_types.cpython-310.pyc b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/security/fuzzing/py/__pycache__/annotation_types.cpython-310.pyc new file mode 100644 index 0000000000000000000000000000000000000000..3f8c06747e068e0f85b0fcc098d673444fb916ba Binary files /dev/null and b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/security/fuzzing/py/__pycache__/annotation_types.cpython-310.pyc differ diff --git a/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/security/fuzzing/py/annotation_types.py b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/security/fuzzing/py/annotation_types.py new file mode 100644 index 0000000000000000000000000000000000000000..4ce6fa3cf85fb33822bf9aef9a0a6d49fb6bace9 --- /dev/null +++ b/SwarmUI/dlbackend/ComfyUI/venv/lib/python3.10/site-packages/tensorflow/security/fuzzing/py/annotation_types.py @@ -0,0 +1,54 @@ +# Copyright 2023 The TensorFlow Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +# ============================================================================== +"""Library of types used for type annotation.""" +from tensorflow.python.framework import dtypes as _dtypes + + +class DTypeAnnotation: + pass + + +def _create_dtype_wrapper(name, underlying_dtype: _dtypes.DType): + return type(name, (DTypeAnnotation,), {"underlying_dtype": underlying_dtype}) + + +BFloat16 = _create_dtype_wrapper("BFloat16", _dtypes.bfloat16) +Bool = _create_dtype_wrapper("Bool", _dtypes.bool) +Complex128 = _create_dtype_wrapper("Complex128", _dtypes.complex128) +Complex64 = _create_dtype_wrapper("Complex64", _dtypes.complex64) +Float8e4m3fn = _create_dtype_wrapper("Float8e4m3fn", _dtypes.float8_e4m3fn) +Float8e5m2 = _create_dtype_wrapper("Float8e5m2", _dtypes.float8_e5m2) +Float16 = _create_dtype_wrapper("Float16", _dtypes.float16) +Float32 = _create_dtype_wrapper("Float32", _dtypes.float32) +Float64 = _create_dtype_wrapper("Float64", _dtypes.float64) +Half = _create_dtype_wrapper("Half", _dtypes.float16) +Int4 = _create_dtype_wrapper("Int4", _dtypes.int4) +Int8 = _create_dtype_wrapper("Int8", _dtypes.int8) +Int16 = _create_dtype_wrapper("Int16", _dtypes.int16) +Int32 = _create_dtype_wrapper("Int32", _dtypes.int32) +Int64 = _create_dtype_wrapper("Int64", _dtypes.int64) +UInt4 = _create_dtype_wrapper("UInt4", _dtypes.uint4) +UInt8 = _create_dtype_wrapper("UInt8", _dtypes.uint8) +UInt16 = _create_dtype_wrapper("UInt16", _dtypes.uint16) +UInt32 = _create_dtype_wrapper("UInt32", _dtypes.uint32) +UInt64 = _create_dtype_wrapper("UInt64", _dtypes.uint64) +QInt8 = _create_dtype_wrapper("QInt8", _dtypes.qint8) +QInt16 = _create_dtype_wrapper("QInt16", _dtypes.qint16) +QInt32 = _create_dtype_wrapper("QInt32", _dtypes.qint32) +QUInt16 = _create_dtype_wrapper("QUInt16", _dtypes.quint16) +QUInt8 = _create_dtype_wrapper("QUInt8", _dtypes.quint8) +Resource = _create_dtype_wrapper("Resource", _dtypes.resource) +String = _create_dtype_wrapper("String", _dtypes.string) +Variant = _create_dtype_wrapper("Variant", _dtypes.variant)